Beispiel #1
0
def generate(X, seqType, args):
    '''
    # Reference: It is a very common feature.
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA':
        d = {
            'A': [0, 5, 5, 1],
            'C': [5, 1, 0, 5],
            'G': [1, 5, 5, 0],
            'T': [5, 0, 1, 5],
            'p': [0, 0, 0, 0],  # padding
        }
    else:
        if seqType == 'RNA':
            d = {
                'A': [0, 5, 5, 1],
                'C': [5, 1, 0, 5],
                'G': [1, 5, 5, 0],
                'U': [5, 0, 1, 5],
                'p': [0, 0, 0, 0],  # padding
            }
        else:
            if seqType == 'PROT':
                print(CRED + 'Error: The \'Transition-Transversion\' feature is NOT applicable for PROT.' + CEND)
                return None
            else: None
    #end-if
    # print(X)

    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = 4
    else:
        if seqType == 'PROT': None
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'tt')
#end-def
Beispiel #2
0
def generate(X, seqType, args):
    '''
    # Reference-1: (http://rosalind.info/glossary/k-mer-composition/) # It is also called "k-mer composition".
    # Reference-2: iRecSpot-EF: https://www.sciencedirect.com/science/article/abs/pii/S0010482518302981
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    elements = utils.sequenceElements(seqType)
    # print(elements)
    # print(args.gGap)
    # print(args.kTuple)

    T = []
    for x in X:
        x = x[:args.terminusLength]
        t = []
        for i in range(1, args.kTuple + 1, 1):
            v = list(itertools.product(elements, repeat=i))
            # seqLength = len(x) - i + 1
            for i in v:
                # print(x.count(''.join(i)), end=',')
                t.append(x.count(''.join(i)))
        ### --- ###
        t = np.array(t)
        # t = t.reshape(-1, 1)
        # print(t.shape)
        T.append(t)
    #end-for

    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = np.sum([4**(i) for i in range(1, args.kTuple + 1)])
    else:
        if seqType == 'PROT':
            totalFeature = np.sum([20**(i) for i in range(1, args.kTuple + 1)])
        else:
            None
    #end-if

    save.datasetSave(T, totalFeature, 'fkmer')
def generate(X, seqType, args):
    '''
    # Reference-1: http://rosalind.info/glossary/blosum62/
    # Reference-2: https://www.pnas.org/content/89/22/10915/
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'PROT':
        d = {
            'A': [
                4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0,
                0, -3, -2
            ],
            'C': [
                0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1,
                -1, -1, -2, -2
            ],
            'D': [
                -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1,
                -3, -4, -3
            ],
            'E': [
                -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2,
                -3, -2
            ],
            'F': [
                -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2,
                -1, 1, 3
            ],
            'G': [
                0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2,
                -3, -2, -3
            ],
            'H': [
                -2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2,
                -3, -2, 2
            ],
            'I': [
                -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1,
                3, -3, -1
            ],
            'K': [
                -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1,
                -2, -3, -2
            ],
            'L': [
                -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1,
                1, -2, -1
            ],
            'M': [
                -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1,
                1, -1, -1
            ],
            'N': [
                -2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3,
                -4, -2
            ],
            'P': [
                -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1,
                -1, -2, -4, -3
            ],
            'Q': [
                -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2,
                -2, -1
            ],
            'R': [
                -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1,
                -3, -3, -2
            ],
            'S': [
                1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2,
                -3, -2
            ],
            'T': [
                0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5,
                0, -2, -2
            ],
            'V': [
                0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0,
                4, -3, -1
            ],
            'W': [
                -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3,
                -2, -3, 11, 2
            ],
            'Y': [
                -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2,
                -2, -1, 2, 7
            ],
            'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'BLOSUM62\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        None
    else:
        if seqType == 'PROT':
            totalFeature = 20
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'blosum62')
Beispiel #4
0
def generate(X, seqType, args):
    '''
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA' or seqType == 'RNA':
        p = [0] * (4 * 4)  # As we are working for g11
    else:
        if seqType == 'PROT':
            p = [0] * (20 * 20)  # As we are working for g11
        else:
            None

    # Trail: Merged
    elements = utils.sequenceElements(seqType)
    m = list(itertools.product(elements, repeat=2))

    T = []
    for x in X:
        merged = []
        x = x[:args.terminusLength]
        for i in range(1, args.gGap + 1):
            kmers = utils.kmers(x, 2 + i)  # g11 --> 2, gGap (g11+gGap)
            t = []
            require = (args.terminusLength - (2 + 1) + 1) - (len(x) -
                                                             (2 + i) + 1)
            for kmer in kmers:
                d = {''.join(_): 0 for _ in m}
                segment = kmer[0] + kmer[-1]
                d[segment] = 1
                t.append(list(d.values()))
                # break
            # break
            # print(v)
            if require > 0:
                for i in range(require):
                    t.append(p)
                # end-for
            else:
                None
            t = np.array(t)
            # print(t)
            merged.append(t)
            # print('------------------')
        # end-for
        T.append(np.concatenate((merged), axis=1))
    # end-for
    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = (4 * args.gGap * 4)
    else:
        if seqType == 'PROT':
            totalFeature = (20 * args.gGap * 20)
        else:
            None
    # end-if
    save.datasetSave(T, totalFeature, 'pg11')


#end-for
Beispiel #5
0
def generate(X, seqType, args):
    '''
    # Reference: https://www.biorxiv.org/content/10.1101/332171v2.full.pdf (Supp: Table-5)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    # Column1 --> Acidic              : D, E
    # Column2 --> Basic               : R, H, K
    # Column3 --> Aromatic side chain : Y, F, W
    # Column4 --> Aliphatic side chain: I, L, V, A, G
    # Column5 --> Cyclic              : P
    # Column6 --> Sulfur containing   : M, C
    # Column7 --> Hydroxyl containing : S, T
    # Column8 --> Acidic amide        : Q, N

    # [1, 0, 0, 0, 0, 0, 0, 0], #Acidic
    # [0, 1, 0, 0, 0, 0, 0, 0], #Basic
    # [0, 0, 1, 0, 0, 0, 0, 0], #Aromatic side chain
    # [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain
    # [0, 0, 0, 0, 1, 0, 0, 0], #Cyclic
    # [0, 0, 0, 0, 0, 1, 0, 0], #Sulfur containing
    # [0, 0, 0, 0, 0, 0, 1, 0], #Hydroxyl containing
    # [0, 0, 0, 0, 0, 0, 0, 1], #Acidic amide
    # [0, 0, 0, 0, 0, 0, 0, 0], #padding

    if seqType == 'PROT':
        d = {
            'A': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'C': [0, 0, 0, 0, 0, 1, 0, 0],  #Sulfur containing
            'D': [1, 0, 0, 0, 0, 0, 0, 0],  #Acidic
            'E': [1, 0, 0, 0, 0, 0, 0, 0],  #Acidic
            'F': [0, 0, 1, 0, 0, 0, 0, 0],  #Aromatic side chain
            'G': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'H': [0, 1, 0, 0, 0, 0, 0, 0],  #Basic
            'I': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'K': [0, 1, 0, 0, 0, 0, 0, 0],  #Basic
            'L': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'M': [0, 0, 0, 0, 0, 1, 0, 0],  #Sulfur containing
            'N': [0, 0, 0, 0, 0, 0, 0, 1],  #Acidic amide
            'P': [0, 0, 0, 0, 1, 0, 0, 0],  #Cyclic
            'Q': [0, 0, 0, 0, 0, 0, 0, 1],  #Acidic amide
            'R': [0, 1, 0, 0, 0, 0, 0, 0],  #Basic
            'S': [0, 0, 0, 0, 0, 0, 1, 0],  #Hydroxyl containing
            'T': [0, 0, 0, 0, 0, 0, 1, 0],  #Hydroxyl containing
            'V': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'W': [0, 0, 1, 0, 0, 0, 0, 0],  #Aromatic side chain
            'Y': [0, 0, 1, 0, 0, 0, 0, 0],  #Aromatic side chain
            'p': [0, 0, 0, 0, 0, 0, 0, 0],  #padding
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-P3\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 8
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP3')
Beispiel #6
0
def generate(X, seqType, args):
    '''
    # Reference: https://doi.org/10.1016/j.omtn.2019.04.025 (It is also called "identity matrix".)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA':
        d = {
            'A': [1, 0, 0, 0],
            'C': [0, 1, 0, 0],
            'G': [0, 0, 1, 0],
            'T': [0, 0, 0, 1],
            'p': [0, 0, 0, 0],  # padding
        }
    else:
        if seqType == 'RNA':
            d = {
                'A': [1, 0, 0, 0],
                'C': [0, 1, 0, 0],
                'G': [0, 0, 1, 0],
                'U': [0, 0, 0, 1],
                'p': [0, 0, 0, 0],  # padding
            }
        else:
            if seqType == 'PROT':
                # Protein/Peptide One-Zero Encoding
                d = {
                    'A': [
                        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'C': [
                        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'D': [
                        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'E': [
                        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'F': [
                        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'G': [
                        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'H': [
                        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'I': [
                        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'K': [
                        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'L': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'M': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'N': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'P': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'Q': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
                        0, 0
                    ],
                    'R': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
                        0, 0
                    ],
                    'S': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
                        0, 0
                    ],
                    'T': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
                        0, 0
                    ],
                    'V': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                        0, 0
                    ],
                    'W': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        1, 0
                    ],
                    'Y': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 1
                    ],
                    'p': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],  # padding
                }
            else:
                None
    #end-if
    # print(X)
    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = 4
    else:
        if seqType == 'PROT':
            totalFeature = 20
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'bpf')


#end-def
Beispiel #7
0
def generate(X, seqType, args):
    '''
    # Reference: repRNA
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'RNA':
        d = {
            'AA': [
                2.000, 0.000, -6.600, -6.820, -18.400, -19.000, -0.900, -0.930,
                0.000, 0.000, 0.023, 0.040, 0.000, 2.000, 3.180, 7.000, -0.080,
                -1.270, -13.700, 0.000, -0.800, 31.000
            ],
            'AC': [
                1.000, 1.000, -10.200, -11.400, -26.200, -29.500, -2.100,
                -2.240, 1.000, 0.000, 0.083, 0.140, 0.000, 1.000, 3.240, 4.800,
                0.230, -1.430, -13.800, 0.000, 0.800, 32.000
            ],
            'AG': [
                1.000, 0.000, -7.600, -10.480, -19.200, -27.100, -1.700,
                -2.080, 1.000, 1.000, 0.035, 0.080, 0.000, 2.000, 3.300, 8.500,
                -0.040, -1.500, -14.000, 0.000, 0.500, 30.000
            ],
            'AU': [
                1.000, 0.000, -5.700, -9.380, -15.500, -26.700, -0.900, -1.100,
                0.000, 0.000, 0.090, 0.140, 1.000, 1.000, 3.240, 7.100, -0.060,
                -1.360, -15.400, 1.000, 1.100, 33.000
            ],
            'CA': [
                1.000, 1.000, -10.500, -10.440, -27.800, -26.900, -1.800,
                -2.110, 1.000, 0.000, 0.118, 0.210, 0.000, 1.000, 3.090, 9.900,
                0.110, -1.460, -14.400, 0.000, 1.000, 31.000
            ],
            'CC': [
                0.000, 2.000, -12.200, -13.390, -29.700, -32.700, -2.900,
                -3.260, 2.000, 0.000, 0.349, 0.490, 0.000, 0.000, 3.320, 8.700,
                -0.010, -1.780, -11.100, 0.000, 0.300, 32.000
            ],
            'CG': [
                0.000, 1.000, -8.000, -10.640, -19.400, -26.700, -2.000,
                -2.360, 2.000, 1.000, 0.193, 0.350, 1.000, 1.000, 3.300,
                12.100, 0.300, -1.890, -15.600, 0.000, -0.100, 27.000
            ],
            'CU': [
                0.000, 1.000, -7.600, -10.480, -19.200, -27.100, -1.700,
                -2.080, 1.000, 0.000, 0.378, 0.520, 1.000, 0.000, 3.300, 8.500,
                -0.040, -1.500, -14.000, 1.000, 0.500, 30.000
            ],
            'GA': [
                1.000, 0.000, -13.300, -12.440, -35.500, -32.500, -2.300,
                -2.350, 1.000, 1.000, 0.048, 0.100, 1.000, 2.000, 3.380, 9.400,
                0.070, -1.700, -14.200, 0.000, 1.300, 32.000
            ],
            'GC': [
                0.000, 1.000, -14.200, -14.880, -34.900, -36.900, -3.400,
                -3.420, 2.000, 1.000, 0.146, 0.260, 1.000, 1.000, 3.220, 6.100,
                0.070, -1.390, -16.900, 0.000, 0.000, 35.000
            ],
            'GG': [
                0.000, 0.000, -12.200, -13.390, -29.700, -32.700, -2.900,
                -3.260, 2.000, 2.000, 0.065, 0.170, 2.000, 2.000, 3.320,
                12.100, -0.010, -1.780, -11.100, 0.000, 0.300, 32.000
            ],
            'GU': [
                0.000, 0.000, -10.200, -11.400, -26.200, -29.500, -2.100,
                -2.240, 1.000, 1.000, 0.160, 0.270, 2.000, 1.000, 3.240, 4.800,
                0.230, -1.430, -13.800, 1.000, 0.800, 32.000
            ],
            'UA': [
                1.000, 0.000, -8.100, -7.690, -22.600, -20.500, -1.100, -1.330,
                0.000, 0.000, 0.112, 0.210, 1.000, 1.000, 3.260, 10.700,
                -0.020, -1.450, -16.000, 1.000, -0.200, 32.000
            ],
            'UC': [
                0.000, 1.000, -10.200, -12.440, -26.200, -32.500, -2.100,
                -2.350, 1.000, 0.000, 0.359, 0.480, 1.000, 0.000, 3.380, 9.400,
                0.070, -1.700, -14.200, 1.000, 1.300, 32.000
            ],
            'UG': [
                0.000, 0.000, -7.600, -10.440, -19.200, -26.900, -1.700,
                -2.110, 1.000, 1.000, 0.224, 0.340, 1.000, 1.000, 3.090, 9.900,
                0.110, -1.460, -14.400, 1.000, 1.000, 31.000
            ],
            'UU': [
                0.000, 0.000, -6.600, -6.820, -18.400, -19.000, -0.900, -0.930,
                0.000, 0.000, 0.389, 0.440, 2.000, 0.000, 3.180, 7.000, -0.080,
                -1.270, -13.700, 2.000, -0.800, 31.000
            ],
            'p': [
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0
            ],  # padding
        }
    else:
        if seqType == 'PROT' or seqType == 'DNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-R1\' feature is NOT applicable for PROT and DNA.'
                + CEND)
            return None
        else:
            None
    #end-if

    # print(X)

    X = utils.processDi(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'RNA':
        totalFeature = 22
    else:
        if seqType == 'PROT' or seqType == 'DNA': None
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpR1')


#end-def
Beispiel #8
0
def generate(X, seqType, args):
    '''
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA' or seqType == 'RNA':
        p = [0] * (4**args.kTuple)
    else:
        if seqType == 'PROT':
            p = [0] * (20**args.kTuple)
        else:
            None
    # print(p)
    # print(len(p))

    elements = utils.sequenceElements(seqType)
    m = list(itertools.product(elements, repeat=args.kTuple))

    terminusLength = args.terminusLength
    # print(terminusLength)

    T = []
    for x in X:
        # print(len(x))
        x = x[:terminusLength]
        # print(len(x))
        # print('-----------------')
        require = (terminusLength - args.kTuple + 1) - (len(x) - args.kTuple +
                                                        1)
        # print(require)
        t = []
        kmers = utils.kmers(x, args.kTuple)
        for kmer in kmers:
            d = {''.join(i): 0 for i in m}
            d[kmer] = 1
            t.append(list(d.values()))
        #end-for
        if require > 0:
            for i in range(require):
                t.append(p)
            #end-for
        else:
            None
        t = np.array(t)
        # print(t.shape)
        T.append(t)
        # print(t.shape)
    #end-for
    T = np.array(T)
    # print(T.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = (4**args.kTuple)
    else:
        if seqType == 'PROT':
            totalFeature = (20**args.kTuple)
        else:
            None
    # end-if

    save.datasetSave(T, totalFeature, 'pkmer')


#end-def
Beispiel #9
0
def generate(X, seqType, args):
    '''
    # Reference: https://doi.org/10.1093/bioinformatics/bty451 (Supp: Table S2)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    # Column1  --> Aromatic: {F, Y, W, H}
    # Column2  --> Negative: {D, E}
    # Column3  --> Positive: {K, H, R}
    # Column4  --> Polar: {N, Q, S, D, E, C, T, K, R, H, Y, W}
    # Column5  --> Hydrophobic: {A, G, C, T, I, V, L, K, H, F, Y, W, M}
    # Column6  --> Aliphatic: {I, V, L}
    # Column7  --> Tiny: {A, S, G, C}
    # Column8  --> Charged: {K, H, R, D, E}
    # Column9  --> Small: {P, N, D, T, C, A, G, S, V}
    # Column10 --> Proline: {P}

    if seqType == 'PROT':
        d = {
            'A': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0],
            'R': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0],
            'N': [0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
            'D': [0, 1, 0, 1, 0, 0, 0, 1, 1, 0],
            'C': [0, 0, 0, 1, 1, 0, 1, 0, 1, 0],
            'Q': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            'E': [0, 1, 0, 1, 0, 0, 0, 1, 0, 0],
            'G': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0],
            'H': [1, 0, 1, 1, 1, 0, 0, 1, 0, 0],
            'I': [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
            'L': [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
            'K': [0, 0, 1, 1, 1, 0, 0, 1, 0, 0],
            'M': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            'F': [1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            'P': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
            'S': [0, 0, 0, 1, 0, 0, 1, 0, 1, 0],
            'T': [0, 0, 0, 1, 1, 0, 0, 0, 1, 0],
            'W': [1, 0, 0, 1, 1, 0, 0, 0, 0, 0],
            'Y': [1, 0, 0, 1, 1, 0, 0, 0, 0, 0],
            'V': [0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
            'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        }

    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(CRED+'Error: The \'Physicochemical Properties-P1\' feature is NOT applicable for DNA/RNA.'+CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 10
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP1')
Beispiel #10
0
def generate(X, seqType, args):
    '''
    # Reference-1: http://rosalind.info/glossary/pam250/
    # Reference-2: http://profs.scienze.univr.it/~liptak/ALBioinfo/2011_2012/files/pam1.pdf (M.O. Dayhoff et al.)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'PROT':
        d = {
            'A': [
                2, -2, 0, 0, -3, 1, -1, -1, -1, -2, -1, 0, 1, 0, -2, 1, 1, 0,
                -6, -3
            ],
            'C': [
                -2, 12, -5, -5, -4, -3, -3, -2, -5, -6, -5, -4, -3, -5, -4, 0,
                -2, -2, -8, 0
            ],
            'D': [
                0, -5, 4, 3, -6, 1, 1, -2, 0, -4, -3, 2, -1, 2, -1, 0, 0, -2,
                -7, -4
            ],
            'E': [
                0, -5, 3, 4, -5, 0, 1, -2, 0, -3, -2, 1, -1, 2, -1, 0, 0, -2,
                -7, -4
            ],
            'F': [
                -3, -4, -6, -5, 9, -5, -2, 1, -5, 2, 0, -3, -5, -5, -4, -3, -3,
                -1, 0, 7
            ],
            'G': [
                1, -3, 1, 0, -5, 5, -2, -3, -2, -4, -3, 0, 0, -1, -3, 1, 0, -1,
                -7, -5
            ],
            'H': [
                -1, -3, 1, 1, -2, -2, 6, -2, 0, -2, -2, 2, 0, 3, 2, -1, -1, -2,
                -3, 0
            ],
            'I': [
                -1, -2, -2, -2, 1, -3, -2, 5, -2, 2, 2, -2, -2, -2, -2, -1, 0,
                4, -5, -1
            ],
            'K': [
                -1, -5, 0, 0, -5, -2, 0, -2, 5, -3, 0, 1, -1, 1, 3, 0, 0, -2,
                -3, -4
            ],
            'L': [
                -2, -6, -4, -3, 2, -4, -2, 2, -3, 6, 4, -3, -3, -2, -3, -3, -2,
                2, -2, -1
            ],
            'M': [
                -1, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6, -2, -2, -1, 0, -2, -1,
                2, -4, -2
            ],
            'N': [
                0, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 2, 0, 1, 0, 1, 0, -2, -4,
                -2
            ],
            'P': [
                1, -3, -1, -1, -5, 0, 0, -2, -1, -3, -2, 0, 6, 0, 0, 1, 0, -1,
                -6, -5
            ],
            'Q': [
                0, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, 1, -1, -1, -2,
                -5, -4
            ],
            'R': [
                -2, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, 0, -1, -2,
                2, -4
            ],
            'S': [
                1, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, 1, -1,
                -2, -3
            ],
            'T': [
                1, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0,
                -5, -3
            ],
            'V': [
                0, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0,
                4, -6, -2
            ],
            'W': [
                -6, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2,
                -5, -6, 17, 0
            ],
            'Y': [
                -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3,
                -3, -2, 0, 10
            ],
            'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'PAM250\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        None
    else:
        if seqType == 'PROT':
            totalFeature = 20
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pam250')
Beispiel #11
0
def generate(X, seqType, args):
    '''
    # Reference: https://doi.org/10.1093/bioinformatics/bty451 (Supp: Table S3)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    ### Group-1:
    # Column1   --> Hydrophobicity: {A, C, F, G, H, I, L, M, N, P, Q, S, T, V, W, Y}
    # Column2   --> Normalized Van der Waals volume: {C, F, I, L, M, V, W}
    # Column3   --> Polarity: {A, C, D, G, P, S, T}
    # Column4   --> Polarizibility: {C, F, I, L, M, V, W, Y}
    # Column5   --> Charge: {A, D, G, S, T}
    # Column6   --> Secondary structures: {D, G, N, P, S}
    # Column7   --> Solvent accessibility: {A, C, F, G, I, L, V, W}

    ### Group-2:
    # Column8   --> Hydrophobicity: {D, E}
    # Column9   --> Normalized Van der Waals volume: {A, G, H, P, S, T, Y}
    # Column10  --> Polarity: {E, I, L, N, Q, V}
    # Column11  --> Polarizibility: {A, G, P, S, T}
    # Column12  --> Charge: {C, E, I, L, N, P, Q, V}
    # Column13  --> Secondary structures: {A, E, H, K, L, M, Q, R}
    # Column14  --> Solvent accessibility: {H, M, P, S, T, Y}

    ### Group-3:
    # Column15  --> Hydrophobicity: {K, R}
    # Column16  --> Normalized Van der Waals volume: {D, E, K, N, Q, R}
    # Column17  --> Polarity: {F, H, K, M, R, W, Y}
    # Column18  --> Polarizibility: {D, E, H, K, N, Q, R}
    # Column19  --> Charge: {F, H, K, M, R, W, Y}
    # Column20  --> Secondary structures: {C, F, I, T, V, W, Y}
    # Column21  --> Solvent accessibility: {D, E, K, N, R, Q}

    if seqType == 'PROT':
        d = {
            'A':
            [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0],
            'R':
            [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1],
            'N':
            [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1],
            'D':
            [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1],
            'C':
            [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0],
            'Q':
            [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
            'E':
            [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
            'G':
            [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0],
            'H':
            [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0],
            'I':
            [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0],
            'L':
            [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0],
            'K':
            [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1],
            'M': [
                1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0
            ],
            'F': [
                1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0
            ],
            'P': [
                1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0
            ],
            'S': [
                1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0
            ],
            'T': [
                1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0
            ],
            'W': [
                1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0
            ],
            'Y': [
                1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0
            ],
            'V': [
                1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0
            ],
            'p': [
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            ],
        }

    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-P2\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 21
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP2')
Beispiel #12
0
def generate(X, seqType, args):
    '''
    # Reference: repRNA
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA':
        d = {
            'GGG': [5.7000, 5.8500, 3.0000, 13.0000, 5.8270, 5.8270, 3.3110, 3.8680, 622.4000, 103.3887, 6.0000,3.5360, ],
            'GGA': [6.2000, 5.0000, 2.0000, -5.0000, 4.9907, 4.9907, 3.8190, 3.5810, 622.4000, 103.3887, 3.8000,4.7990, ],
            'GGC': [8.2000, 9.1000, 3.0000, 45.0000, 9.0823, 9.0823, 1.3870, 2.4480, 622.4000, 103.3887, 10.0000,1.3090, ],
            'GGT': [5.2000, 5.3000, 2.0000, 8.0000, 5.3160, 5.3160, 3.6190, 4.1560, 622.4000, 103.3887, 5.4000,3.8780, ],
            'GAG': [6.6000, 6.0000, 2.0000, 8.0000, 5.9806, 5.9806, 3.2210, 3.3530, 621.4000, 103.2226, 5.4000,3.8780, ],
            'GAA': [5.1000, 4.0500, 1.0000, -12.0000, 4.0633, 4.0633, 4.3850, 4.2140, 621.4000, 103.2226, 3.0000,5.2640, ],
            'GAC': [5.6000, 5.5000, 2.0000, 8.0000, 5.5164, 5.5164, 3.4980, 3.9250, 621.4000, 103.2226, 5.4000,3.8780, ],
            'GAT': [3.6000, 4.4500, 1.0000, 7.0000, 4.4432, 4.4432, 4.1530, 5.0870, 621.4000, 103.2226, 5.3000,3.9350, ],
            'GCG': [4.3000, 5.9000, 3.0000, 25.0000, 5.8914, 5.8914, 3.2750, 4.6780, 622.4000, 103.3887, 7.5000,2.6910, ],
            'GCA': [7.5000, 6.7500, 2.0000, 13.0000, 6.7553, 6.7553, 2.7540, 2.8420, 622.4000, 103.3887, 6.0000,3.5360, ],
            'GCC': [8.2000, 9.1000, 3.0000, 45.0000, 9.0823, 9.0823, 1.3870, 2.4480, 622.4000, 103.3887, 10.0000, 1.3090, ],
            'GCT': [6.3000, 6.9000, 2.0000, 25.0000, 6.8829, 6.8829, 2.6830, 3.5240, 622.4000, 103.3887, 7.5000,2.6910, ],
            'GTG': [6.8000, 6.6500, 2.0000, 17.0000, 6.6255, 6.6255, 2.8320, 3.2390, 621.4000, 103.2226, 6.5000,3.2530, ],
            'GTA': [6.4000, 5.0500, 1.0000, -6.0000, 5.0673, 5.0673, 3.7700, 3.4670, 621.4000, 103.2226, 3.7000,4.8570, ],
            'GTC': [5.6000, 5.5000, 2.0000, 8.0000, 5.5164, 5.5164, 3.4980, 3.9250, 621.4000, 103.2226, 5.4000,3.8780, ],
            'GTT': [1.6000, 2.6500, 1.0000, -6.0000, 2.6412, 2.6412, 5.2600, 6.2720, 621.4000, 103.2226, 3.7000,4.8570, ],
            'AGG': [4.7000, 5.0500, 2.0000, 8.0000, 5.0523, 5.0523, 3.7820, 4.4450, 622.4000, 103.3887, 5.4000,3.8780, ],
            'AGA': [6.5000, 4.9000, 1.0000, -9.0000, 4.8884, 4.8884, 3.8790, 3.4100, 622.4000, 103.3887, 3.3000,5.0890, ],
            'AGC': [6.3000, 6.9000, 2.0000, 25.0000, 6.8829, 6.8829, 2.6830, 3.5240, 622.4000, 103.3887, 7.5000,2.6910, ],
            'AGT': [2.0000, 3.9000, 1.0000, 11.0000, 3.9232, 3.9232, 4.4710, 6.0330, 622.4000, 103.3887, 5.8000,3.6500, ],
            'AAG': [4.2000, 4.7000, 1.0000, 6.0000, 4.6992, 4.6992, 3.9950, 4.7360, 621.4000, 103.2226, 5.2000,3.9920, ],
            'AAA': [0.1000, 0.0500, 0.0000, -36.0000, 0.0633, 0.0633, 6.8820, 7.1760, 621.4000, 103.2226, 0.0000,7.0450, ],
            'AAC': [1.6000, 2.6500, 1.0000, -6.0000, 2.6412, 2.6412, 5.2600, 6.2720, 621.4000, 103.2226, 3.7000,4.8570, ],
            'AAT': [0.0000, 0.3500, 0.0000, -30.0000, 0.3500, 0.3500, 6.6980, 7.2370, 621.4000, 103.2226, 0.7000,6.6240, ],
            'ACG': [5.2000, 5.3000, 2.0000, 8.0000, 5.3055, 5.3055, 3.6250, 4.1560, 622.4000, 103.3887, 5.4000,3.8780, ],
            'ACA': [5.8000, 5.5000, 1.0000, 6.0000, 5.4903, 5.4903, 3.5160, 3.8100, 622.4000, 103.3887, 5.2000,3.9920, ],
            'ACC': [5.2000, 5.3000, 2.0000, 8.0000, 5.3160, 5.3160, 3.6190, 4.1560, 622.4000, 103.3887, 5.4000,3.8780, ],
            'ACT': [2.0000, 3.9000, 1.0000, 11.0000, 3.9232, 3.9232, 4.4710, 6.0330, 622.4000, 103.3887, 5.8000,3.6500, ],
            'ATG': [8.7000, 7.7000, 1.0000, 18.0000, 7.7171, 7.7171, 2.1850, 2.1690, 621.4000, 103.2226, 6.7000,3.1400, ],
            'ATA': [9.7000, 6.2500, 0.0000, -13.0000, 6.2734, 6.2734, 3.0470, 1.6130, 621.4000, 103.2226, 2.8000,5.3810, ],
            'ATC': [3.6000, 4.4500, 1.0000, 7.0000, 4.4432, 4.4432, 4.1530, 5.0870, 621.4000, 103.2226, 5.3000,3.9350, ],
            'ATT': [0.0000, 0.3500, 0.0000, -30.0000, 0.3500, 0.3500, 6.6980, 7.2370, 621.4000, 103.2226, 0.7000,6.6240, ],
            'CGG': [3.0000, 3.8500, 3.0000, 2.0000, 3.8690, 3.8690, 4.5020, 5.4400, 622.4000, 103.3887, 4.7000,4.2790, ],
            'CGA': [5.8000, 7.0500, 2.0000, 31.0000, 7.0720, 7.0720, 2.5700, 3.8100, 622.4000, 103.3887, 8.3000,2.2450, ],
            'CGC': [4.3000, 5.9000, 3.0000, 25.0000, 5.8914, 5.8914, 3.2750, 4.6780, 622.4000, 103.3887, 7.5000,2.6910, ],
            'CGT': [5.2000, 5.3000, 2.0000, 8.0000, 5.3055, 5.3055, 3.6250, 4.1560, 622.4000, 103.3887, 5.4000,3.8780, ],
            'CAG': [9.6000, 6.9000, 2.0000, -2.0000, 6.8996, 6.8996, 2.6710, 1.6680, 621.4000, 103.2226, 4.2000,4.5670, ],
            'CAA': [6.2000, 4.7500, 1.0000, -9.0000, 4.7618, 4.7618, 3.9580, 3.5810, 621.4000, 103.2226, 3.3000,5.0890, ],
            'CAC': [6.8000, 6.6500, 2.0000, 17.0000, 6.6255, 6.6255, 2.8320, 3.2390, 621.4000, 103.2226, 6.5000,3.2530, ],
            'CAT': [8.7000, 7.7000, 1.0000, 18.0000, 7.7171, 7.7171, 2.1850, 2.1690, 621.4000, 103.2226, 6.7000,3.1400, ],
            'CCG': [3.0000, 3.8500, 3.0000, 2.0000, 3.8690, 3.8690, 4.5020, 5.4400, 622.4000, 103.3887, 4.7000,4.2790, ],
            'CCA': [0.7000, 3.0500, 2.0000, 8.0000, 3.0587, 3.0587, 5.0000, 6.8130, 622.4000, 103.3887, 5.4000,3.8780, ],
            'CCC': [5.7000, 5.8500, 3.0000, 13.0000, 5.8270, 5.8270, 3.3110, 3.8680, 622.4000, 103.3887, 6.0000,3.5360, ],
            'CCT': [4.7000, 5.0500, 2.0000, 8.0000, 5.0523, 5.0523, 3.7820, 4.4450, 622.4000, 103.3887, 5.4000,3.8780, ],
            'CTG': [9.6000, 6.9000, 2.0000, -2.0000, 6.8996, 6.8996, 2.6710, 1.6680, 621.4000, 103.2226, 4.2000,4.5670, ],
            'CTA': [7.8000, 5.0000, 1.0000, -18.0000, 5.0030, 5.0030, 3.8130, 2.6730, 621.4000, 103.2226, 2.2000,5.7340, ],
            'CTC': [6.6000, 6.0000, 2.0000, 8.0000, 5.9806, 5.9806, 3.2210, 3.3530, 621.4000, 103.2226, 5.4000,3.8780, ],
            'CTT': [4.2000, 4.7000, 1.0000, 6.0000, 4.6992, 4.6992, 3.9950, 4.7360, 621.4000, 103.2226, 5.2000,3.9920, ],
            'TGG': [0.7000, 3.0500, 2.0000, 8.0000, 3.0587, 3.0587, 5.0000, 6.8130, 622.4000, 103.3887, 5.4000,3.8780, ],
            'TGA': [10.0000, 7.7000, 1.0000, 8.0000, 7.7000, 7.7000, 10.0000, 1.4470, 622.4000, 103.3887, 5.4000,3.8780, ],
            'TGC': [7.5000, 6.7500, 2.0000, 13.0000, 6.7553, 6.7553, 2.7540, 2.8420, 622.4000, 103.3887, 6.0000,3.5360, ],
            'TGT': [5.8000, 5.5000, 1.0000, 6.0000, 5.4903, 5.4903, 3.5160, 3.8100, 622.4000, 103.3887, 5.2000,3.9920, ],
            'TAG': [7.8000, 5.0000, 1.0000, -18.0000, 5.0030, 5.0030, 3.8130, 2.6730, 621.4000, 103.2226, 2.2000,5.7340, ],
            'TAA': [7.3000, 4.6500, 0.0000, -20.0000, 4.6709, 4.6709, 4.0130, 2.9550, 621.4000, 103.2226, 2.0000,5.8520, ],
            'TAC': [6.4000, 5.0500, 1.0000, -6.0000, 5.0673, 5.0673, 3.7700, 3.4670, 621.4000, 103.2226, 3.7000,4.8570, ],
            'TAT': [9.7000, 6.2500, 0.0000, -13.0000, 6.2734, 6.2734, 3.0470, 1.6130, 621.4000, 103.2226, 2.8000,5.3810, ],
            'TCG': [5.8000, 7.0500, 2.0000, 31.0000, 7.0720, 7.0720, 2.5700, 3.8100, 622.4000, 103.3887, 8.3000,2.2450, ],
            'TCA': [10.0000, 7.7000, 1.0000, 8.0000, 7.7000, 7.7000, 2.1970, 1.4470, 622.4000, 103.3887, 5.4000,3.8780, ],
            'TCC': [6.2000, 5.0000, 2.0000, -5.0000, 4.9907, 4.9907, 3.8190, 3.5810, 622.4000, 103.3887, 3.8000,4.7990, ],
            'TCT': [6.5000, 4.9000, 1.0000, -9.0000, 4.8884, 4.8884, 3.8790, 3.4100, 622.4000, 103.3887, 3.3000,5.0890, ],
            'TTG': [6.2000, 4.7500, 1.0000, -9.0000, 4.7618, 4.7618, 3.9580, 3.5810, 621.4000, 103.2226, 3.3000,5.0890, ],
            'TTA': [7.3000, 4.6500, 0.0000, -20.0000, 4.6709, 4.6709, 4.0130, 2.9550, 621.4000, 103.2226, 2.0000,5.8520, ],
            'TTC': [5.1000, 4.0500, 1.0000, -12.0000, 4.0633, 4.0633, 4.3850, 4.2140, 621.4000, 103.2226, 3.0000,5.2640, ],
            'TTT': [0.1000, 0.0500, 0.0000, -36.0000, 0.0633, 0.0633, 0.1000, 7.1760, 621.4000, 103.2226, 0.0000,7.0450, ],
            'p'  : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # padding
        }
    else:
        if seqType == 'PROT' or seqType == 'RNA':
            print(CRED + 'Error: The \'Physicochemical Properties-D2\' feature is NOT applicable for PROT and RNA.' + CEND)
            return None
        else: None
    #end-if

    # print(X)

    X = utils.processTri(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA':
        totalFeature = 12
    else:
        if seqType == 'PROT' or seqType == 'RNA': None
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpD2')
#end-def