Example #1
0
def generate(X, seqType, args):
    '''
    # Reference-1: Z-Scale (iFeature)
    # Reference-2: https://pubs.acs.org/doi/suppl/10.1021/jm9700575/suppl_file/jm2481.pdf
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'PROT':
        d = {
            'A': [0.24, -2.32, 0.60, -0.14, 1.30],  # A
            'C': [0.84, -1.67, 3.71, 0.18, -2.65],  # C
            'D': [3.98, 0.93, 1.93, -2.46, 0.75],  # D
            'E': [3.11, 0.26, -0.11, -0.34, -0.25],  # E
            'F': [-4.22, 1.94, 1.06, 0.54, -0.62],  # F
            'G': [2.05, -4.06, 0.36, -0.82, -0.38],  # G
            'H': [2.47, 1.95, 0.26, 3.90, 0.09],  # H
            'I': [-3.89, -1.73, -1.71, -0.84, 0.26],  # I
            'K': [2.29, 0.89, -2.49, 1.49, 0.31],  # K
            'L': [-4.28, -1.30, -1.49, -0.72, 0.84],  # L
            'M': [-2.85, -0.22, 0.47, 1.94, -0.98],  # M
            'N': [3.05, 1.62, 1.04, -1.15, 1.61],  # N
            'P': [-1.66, 0.27, 1.84, 0.70, 2.00],  # P
            'Q': [1.75, 0.50, -1.44, -1.34, 0.66],  # Q
            'R': [3.52, 2.50, -3.50, 1.99, -0.17],  # R
            'S': [2.39, -1.07, 1.15, -1.39, 0.67],  # S
            'T': [0.75, -2.18, -1.12, -1.46, -0.40],  # T
            'V': [-2.59, -2.64, -1.54, -0.85, -0.02],  # V
            'W': [-4.36, 3.94, 0.59, 3.44, -1.59],  # W
            'Y': [-2.54, 2.44, 0.43, 0.04, -1.47],  # Y
            'p': [0, 0, 0, 0, 0],  # p
        }

    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-P4\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 5
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP4')
Example #2
0
def generate(X, seqType, args):
    '''
    # Reference: Spider-2
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'PROT':
        d = {
            'A': [-0.350, -0.680, -0.677, -0.171, -0.170, 0.900, -0.476],
            'C': [-0.140, -0.329, -0.359, 0.508, -0.114, -0.652, 0.476],
            'D': [-0.213, -0.417, -0.281, -0.767, -0.900, -0.155, -0.635],
            'E': [-0.230, -0.241, -0.058, -0.696, -0.868, 0.900, -0.582],
            'F': [0.363, 0.373, 0.412, 0.646, -0.272, 0.155, 0.318],
            'G': [-0.900, -0.900, -0.900, -0.342, -0.179, -0.900, -0.900],
            'H': [0.384, 0.110, 0.138, -0.271, 0.195, -0.031, -0.106],
            'I': [0.900, -0.066, -0.009, 0.652, -0.186, 0.155, 0.688],
            'K': [-0.088, 0.066, 0.163, -0.889, 0.727, 0.279, -0.265],
            'L': [0.213, -0.066, -0.009, 0.596, -0.186, 0.714, -0.053],
            'M': [0.110, 0.066, 0.087, 0.337, -0.262, 0.652, -0.001],
            'N': [-0.213, -0.329, -0.243, -0.674, -0.075, -0.403, -0.529],
            'P': [0.247, -0.900, -0.294, 0.055, -0.010, -0.900, 0.106],
            'Q': [-0.230, -0.110, -0.020, -0.464, -0.276, 0.528, -0.371],
            'R': [0.105, 0.373, 0.466, -0.900, 0.900, 0.528, -0.371],
            'S': [-0.337, -0.637, -0.544, -0.364, -0.265, -0.466, -0.212],
            'T': [0.402, -0.417, -0.321, -0.199, -0.288, -0.403, 0.212],
            'V': [0.677, -0.285, -0.232, 0.331, -0.191, -0.031, 0.900],
            'W': [0.479, 0.900, 0.900, 0.900, -0.209, 0.279, 0.529],
            'Y': [0.363, 0.417, 0.541, 0.188, -0.274, -0.155, 0.476],
            'p': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-P5\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 7
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP5')
Example #3
0
def generate(X, seqType, args):
    '''
    # Reference: It is a very common feature.
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA':
        d = {
            'A': [0, 5, 5, 1],
            'C': [5, 1, 0, 5],
            'G': [1, 5, 5, 0],
            'T': [5, 0, 1, 5],
            'p': [0, 0, 0, 0],  # padding
        }
    else:
        if seqType == 'RNA':
            d = {
                'A': [0, 5, 5, 1],
                'C': [5, 1, 0, 5],
                'G': [1, 5, 5, 0],
                'U': [5, 0, 1, 5],
                'p': [0, 0, 0, 0],  # padding
            }
        else:
            if seqType == 'PROT':
                print(CRED + 'Error: The \'Transition-Transversion\' feature is NOT applicable for PROT.' + CEND)
                return None
            else: None
    #end-if
    # print(X)

    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = 4
    else:
        if seqType == 'PROT': None
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'tt')
#end-def
Example #4
0
def generate(X, seqType, args):
    '''
    # Reference-1: http://rosalind.info/glossary/blosum62/
    # Reference-2: https://www.pnas.org/content/89/22/10915/
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'PROT':
        d = {
            'A': [
                4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0,
                0, -3, -2
            ],
            'C': [
                0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1,
                -1, -1, -2, -2
            ],
            'D': [
                -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1,
                -3, -4, -3
            ],
            'E': [
                -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2,
                -3, -2
            ],
            'F': [
                -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2,
                -1, 1, 3
            ],
            'G': [
                0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2,
                -3, -2, -3
            ],
            'H': [
                -2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2,
                -3, -2, 2
            ],
            'I': [
                -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1,
                3, -3, -1
            ],
            'K': [
                -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1,
                -2, -3, -2
            ],
            'L': [
                -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1,
                1, -2, -1
            ],
            'M': [
                -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1,
                1, -1, -1
            ],
            'N': [
                -2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3,
                -4, -2
            ],
            'P': [
                -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1,
                -1, -2, -4, -3
            ],
            'Q': [
                -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2,
                -2, -1
            ],
            'R': [
                -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1,
                -3, -3, -2
            ],
            'S': [
                1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2,
                -3, -2
            ],
            'T': [
                0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5,
                0, -2, -2
            ],
            'V': [
                0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0,
                4, -3, -1
            ],
            'W': [
                -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3,
                -2, -3, 11, 2
            ],
            'Y': [
                -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2,
                -2, -1, 2, 7
            ],
            'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'BLOSUM62\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        None
    else:
        if seqType == 'PROT':
            totalFeature = 20
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'blosum62')
Example #5
0
def generate(X, seqType, args):
    '''
    # Reference: https://www.biorxiv.org/content/10.1101/332171v2.full.pdf (Supp: Table-5)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    # Column1 --> Acidic              : D, E
    # Column2 --> Basic               : R, H, K
    # Column3 --> Aromatic side chain : Y, F, W
    # Column4 --> Aliphatic side chain: I, L, V, A, G
    # Column5 --> Cyclic              : P
    # Column6 --> Sulfur containing   : M, C
    # Column7 --> Hydroxyl containing : S, T
    # Column8 --> Acidic amide        : Q, N

    # [1, 0, 0, 0, 0, 0, 0, 0], #Acidic
    # [0, 1, 0, 0, 0, 0, 0, 0], #Basic
    # [0, 0, 1, 0, 0, 0, 0, 0], #Aromatic side chain
    # [0, 0, 0, 1, 0, 0, 0, 0], #Aliphatic side chain
    # [0, 0, 0, 0, 1, 0, 0, 0], #Cyclic
    # [0, 0, 0, 0, 0, 1, 0, 0], #Sulfur containing
    # [0, 0, 0, 0, 0, 0, 1, 0], #Hydroxyl containing
    # [0, 0, 0, 0, 0, 0, 0, 1], #Acidic amide
    # [0, 0, 0, 0, 0, 0, 0, 0], #padding

    if seqType == 'PROT':
        d = {
            'A': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'C': [0, 0, 0, 0, 0, 1, 0, 0],  #Sulfur containing
            'D': [1, 0, 0, 0, 0, 0, 0, 0],  #Acidic
            'E': [1, 0, 0, 0, 0, 0, 0, 0],  #Acidic
            'F': [0, 0, 1, 0, 0, 0, 0, 0],  #Aromatic side chain
            'G': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'H': [0, 1, 0, 0, 0, 0, 0, 0],  #Basic
            'I': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'K': [0, 1, 0, 0, 0, 0, 0, 0],  #Basic
            'L': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'M': [0, 0, 0, 0, 0, 1, 0, 0],  #Sulfur containing
            'N': [0, 0, 0, 0, 0, 0, 0, 1],  #Acidic amide
            'P': [0, 0, 0, 0, 1, 0, 0, 0],  #Cyclic
            'Q': [0, 0, 0, 0, 0, 0, 0, 1],  #Acidic amide
            'R': [0, 1, 0, 0, 0, 0, 0, 0],  #Basic
            'S': [0, 0, 0, 0, 0, 0, 1, 0],  #Hydroxyl containing
            'T': [0, 0, 0, 0, 0, 0, 1, 0],  #Hydroxyl containing
            'V': [0, 0, 0, 1, 0, 0, 0, 0],  #Aliphatic side chain
            'W': [0, 0, 1, 0, 0, 0, 0, 0],  #Aromatic side chain
            'Y': [0, 0, 1, 0, 0, 0, 0, 0],  #Aromatic side chain
            'p': [0, 0, 0, 0, 0, 0, 0, 0],  #padding
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-P3\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 8
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP3')
Example #6
0
def generate(X, seqType, args):
    '''
    # Reference: https://doi.org/10.1016/j.omtn.2019.04.025 (It is also called "identity matrix".)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'DNA':
        d = {
            'A': [1, 0, 0, 0],
            'C': [0, 1, 0, 0],
            'G': [0, 0, 1, 0],
            'T': [0, 0, 0, 1],
            'p': [0, 0, 0, 0],  # padding
        }
    else:
        if seqType == 'RNA':
            d = {
                'A': [1, 0, 0, 0],
                'C': [0, 1, 0, 0],
                'G': [0, 0, 1, 0],
                'U': [0, 0, 0, 1],
                'p': [0, 0, 0, 0],  # padding
            }
        else:
            if seqType == 'PROT':
                # Protein/Peptide One-Zero Encoding
                d = {
                    'A': [
                        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'C': [
                        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'D': [
                        0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'E': [
                        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'F': [
                        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'G': [
                        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'H': [
                        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'I': [
                        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'K': [
                        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'L': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'M': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'N': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'P': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
                        0, 0
                    ],
                    'Q': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
                        0, 0
                    ],
                    'R': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
                        0, 0
                    ],
                    'S': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
                        0, 0
                    ],
                    'T': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
                        0, 0
                    ],
                    'V': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
                        0, 0
                    ],
                    'W': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        1, 0
                    ],
                    'Y': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 1
                    ],
                    'p': [
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0
                    ],  # padding
                }
            else:
                None
    #end-if
    # print(X)
    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        totalFeature = 4
    else:
        if seqType == 'PROT':
            totalFeature = 20
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'bpf')


#end-def
Example #7
0
def generate(X, seqType, args):
    '''
    # Reference: https://doi.org/10.1093/bioinformatics/bty451 (Supp: Table S2)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    # Column1  --> Aromatic: {F, Y, W, H}
    # Column2  --> Negative: {D, E}
    # Column3  --> Positive: {K, H, R}
    # Column4  --> Polar: {N, Q, S, D, E, C, T, K, R, H, Y, W}
    # Column5  --> Hydrophobic: {A, G, C, T, I, V, L, K, H, F, Y, W, M}
    # Column6  --> Aliphatic: {I, V, L}
    # Column7  --> Tiny: {A, S, G, C}
    # Column8  --> Charged: {K, H, R, D, E}
    # Column9  --> Small: {P, N, D, T, C, A, G, S, V}
    # Column10 --> Proline: {P}

    if seqType == 'PROT':
        d = {
            'A': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0],
            'R': [0, 0, 1, 1, 0, 0, 0, 1, 0, 0],
            'N': [0, 0, 0, 1, 0, 0, 0, 0, 1, 0],
            'D': [0, 1, 0, 1, 0, 0, 0, 1, 1, 0],
            'C': [0, 0, 0, 1, 1, 0, 1, 0, 1, 0],
            'Q': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
            'E': [0, 1, 0, 1, 0, 0, 0, 1, 0, 0],
            'G': [0, 0, 0, 0, 1, 0, 1, 0, 1, 0],
            'H': [1, 0, 1, 1, 1, 0, 0, 1, 0, 0],
            'I': [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
            'L': [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
            'K': [0, 0, 1, 1, 1, 0, 0, 1, 0, 0],
            'M': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            'F': [1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
            'P': [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
            'S': [0, 0, 0, 1, 0, 0, 1, 0, 1, 0],
            'T': [0, 0, 0, 1, 1, 0, 0, 0, 1, 0],
            'W': [1, 0, 0, 1, 1, 0, 0, 0, 0, 0],
            'Y': [1, 0, 0, 1, 1, 0, 0, 0, 0, 0],
            'V': [0, 0, 0, 0, 1, 1, 0, 0, 1, 0],
            'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        }

    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(CRED+'Error: The \'Physicochemical Properties-P1\' feature is NOT applicable for DNA/RNA.'+CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 10
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP1')
Example #8
0
def generate(X, seqType, args):
    '''
    # Reference-1: http://rosalind.info/glossary/pam250/
    # Reference-2: http://profs.scienze.univr.it/~liptak/ALBioinfo/2011_2012/files/pam1.pdf (M.O. Dayhoff et al.)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    if seqType == 'PROT':
        d = {
            'A': [
                2, -2, 0, 0, -3, 1, -1, -1, -1, -2, -1, 0, 1, 0, -2, 1, 1, 0,
                -6, -3
            ],
            'C': [
                -2, 12, -5, -5, -4, -3, -3, -2, -5, -6, -5, -4, -3, -5, -4, 0,
                -2, -2, -8, 0
            ],
            'D': [
                0, -5, 4, 3, -6, 1, 1, -2, 0, -4, -3, 2, -1, 2, -1, 0, 0, -2,
                -7, -4
            ],
            'E': [
                0, -5, 3, 4, -5, 0, 1, -2, 0, -3, -2, 1, -1, 2, -1, 0, 0, -2,
                -7, -4
            ],
            'F': [
                -3, -4, -6, -5, 9, -5, -2, 1, -5, 2, 0, -3, -5, -5, -4, -3, -3,
                -1, 0, 7
            ],
            'G': [
                1, -3, 1, 0, -5, 5, -2, -3, -2, -4, -3, 0, 0, -1, -3, 1, 0, -1,
                -7, -5
            ],
            'H': [
                -1, -3, 1, 1, -2, -2, 6, -2, 0, -2, -2, 2, 0, 3, 2, -1, -1, -2,
                -3, 0
            ],
            'I': [
                -1, -2, -2, -2, 1, -3, -2, 5, -2, 2, 2, -2, -2, -2, -2, -1, 0,
                4, -5, -1
            ],
            'K': [
                -1, -5, 0, 0, -5, -2, 0, -2, 5, -3, 0, 1, -1, 1, 3, 0, 0, -2,
                -3, -4
            ],
            'L': [
                -2, -6, -4, -3, 2, -4, -2, 2, -3, 6, 4, -3, -3, -2, -3, -3, -2,
                2, -2, -1
            ],
            'M': [
                -1, -5, -3, -2, 0, -3, -2, 2, 0, 4, 6, -2, -2, -1, 0, -2, -1,
                2, -4, -2
            ],
            'N': [
                0, -4, 2, 1, -3, 0, 2, -2, 1, -3, -2, 2, 0, 1, 0, 1, 0, -2, -4,
                -2
            ],
            'P': [
                1, -3, -1, -1, -5, 0, 0, -2, -1, -3, -2, 0, 6, 0, 0, 1, 0, -1,
                -6, -5
            ],
            'Q': [
                0, -5, 2, 2, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, 1, -1, -1, -2,
                -5, -4
            ],
            'R': [
                -2, -4, -1, -1, -4, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, 0, -1, -2,
                2, -4
            ],
            'S': [
                1, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, 1, -1,
                -2, -3
            ],
            'T': [
                1, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0,
                -5, -3
            ],
            'V': [
                0, -2, -2, -2, -1, -1, -2, 4, -2, 2, 2, -2, -1, -2, -2, -1, 0,
                4, -6, -2
            ],
            'W': [
                -6, -8, -7, -7, 0, -7, -3, -5, -3, -2, -4, -4, -6, -5, 2, -2,
                -5, -6, 17, 0
            ],
            'Y': [
                -3, 0, -4, -4, 7, -5, 0, -1, -4, -1, -2, -2, -5, -4, -4, -3,
                -3, -2, 0, 10
            ],
            'p': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        }
    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'PAM250\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)
    # print(X.shape)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA':
        None
    else:
        if seqType == 'PROT':
            totalFeature = 20
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pam250')
Example #9
0
def generate(X, seqType, args):
    '''
    # Reference: https://doi.org/10.1093/bioinformatics/bty451 (Supp: Table S3)
    :param X:
    :param seqType:
    :param args:
    :return:
    '''

    ### Group-1:
    # Column1   --> Hydrophobicity: {A, C, F, G, H, I, L, M, N, P, Q, S, T, V, W, Y}
    # Column2   --> Normalized Van der Waals volume: {C, F, I, L, M, V, W}
    # Column3   --> Polarity: {A, C, D, G, P, S, T}
    # Column4   --> Polarizibility: {C, F, I, L, M, V, W, Y}
    # Column5   --> Charge: {A, D, G, S, T}
    # Column6   --> Secondary structures: {D, G, N, P, S}
    # Column7   --> Solvent accessibility: {A, C, F, G, I, L, V, W}

    ### Group-2:
    # Column8   --> Hydrophobicity: {D, E}
    # Column9   --> Normalized Van der Waals volume: {A, G, H, P, S, T, Y}
    # Column10  --> Polarity: {E, I, L, N, Q, V}
    # Column11  --> Polarizibility: {A, G, P, S, T}
    # Column12  --> Charge: {C, E, I, L, N, P, Q, V}
    # Column13  --> Secondary structures: {A, E, H, K, L, M, Q, R}
    # Column14  --> Solvent accessibility: {H, M, P, S, T, Y}

    ### Group-3:
    # Column15  --> Hydrophobicity: {K, R}
    # Column16  --> Normalized Van der Waals volume: {D, E, K, N, Q, R}
    # Column17  --> Polarity: {F, H, K, M, R, W, Y}
    # Column18  --> Polarizibility: {D, E, H, K, N, Q, R}
    # Column19  --> Charge: {F, H, K, M, R, W, Y}
    # Column20  --> Secondary structures: {C, F, I, T, V, W, Y}
    # Column21  --> Solvent accessibility: {D, E, K, N, R, Q}

    if seqType == 'PROT':
        d = {
            'A':
            [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0],
            'R':
            [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1],
            'N':
            [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1],
            'D':
            [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1],
            'C':
            [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0],
            'Q':
            [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
            'E':
            [0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1],
            'G':
            [1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0],
            'H':
            [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0],
            'I':
            [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0],
            'L':
            [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0],
            'K':
            [0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1],
            'M': [
                1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0
            ],
            'F': [
                1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0
            ],
            'P': [
                1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0
            ],
            'S': [
                1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0
            ],
            'T': [
                1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0
            ],
            'W': [
                1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0
            ],
            'Y': [
                1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0
            ],
            'V': [
                1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0
            ],
            'p': [
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
            ],
        }

    else:
        if seqType == 'DNA' or seqType == 'RNA':
            print(
                CRED +
                'Error: The \'Physicochemical Properties-P2\' feature is NOT applicable for DNA/RNA.'
                + CEND)
            return None
        else:
            return None
    #end-if
    X = utils.processMono(X, d, args)

    totalFeature = 0
    if seqType == 'DNA' or seqType == 'RNA': None
    else:
        if seqType == 'PROT':
            totalFeature = 21
        else:
            None
    # end-if

    save.datasetSave(X, totalFeature, 'pcpP2')