예제 #1
0
def getIndexChar(char):
    import aaindex
    aaindex.init(path='.')

    dic = aaindex._aaindex.values()
    listIndices = []
    for item in dic:
        # print(item.key)
        listIndices.append(item.key)
    listIndices = [
        'KARS160114', 'KARS160106', 'MIYS990101', 'CASG920101', 'FAUJ880111',
        'FAUJ880112'
    ]

    listComputedValue = []
    # forbiddenList=['KARS160114','TANS760101','MONM990201','NADH010101']

    for indice in listIndices:
        # if indice not in forbiddenList:
        try:

            x = aaindex.get(indice)
            listComputedValue.append(x.get(char))
            print(indice)
        except Exception as e:
            # print(indice)
            # print("")
            # l=2
            print("ECHEC AVEC INDICES : " + indice)
    return listComputedValue
예제 #2
0
def get_aaindex_features(AA):
    """
    given an amino acid return its physicochemical features
    """
    aaindex.init(path='../data/aaindex')
    Hydrophobicity = aaindex.get('ARGP820101')
    Hydrophilicity = aaindex.get('HOPT810101')
    Polarity = aaindex.get('GRAR740102')
    Polarizability = aaindex.get('CHAM820101')
    Propensity = aaindex.get('WERD780101')
    Average_accessible_surface_area = aaindex.get('JANJ780101')
    Radius_of_gyration_of_side_chain = aaindex.get('LEVM760105')
    Side_chain_volume = aaindex.get('KRIW790103')
    Charge = aaindex.get('KLEP840101')
    Number_of_hydrogen_bond_donors = aaindex.get('FAUJ880109')
    Molecular_weight = aaindex.get('FASG760101')
    Electron_ion_interaction_potential = aaindex.get('VELV850101')
#code to get features
    aaindex_feature = [Hydrophobicity.get(AA),\
                       Hydrophilicity.get(AA),\
                       Polarity.get(AA),\
                       Polarizability.get(AA),\
                       Propensity.get(AA),\
                       Average_accessible_surface_area.get(AA),\
                       Radius_of_gyration_of_side_chain.get(AA),\
                       Side_chain_volume.get(AA),\
                       Charge.get(AA),\
                       Number_of_hydrogen_bond_donors.get(AA),\
                       Molecular_weight.get(AA),\
                       Electron_ion_interaction_potential.get(AA)]
    return(aaindex_feature)
예제 #3
0
 def _load_score_tab(self, **kwargs):
     aaindex_dir = kwargs['aaindex_dir'] if kwargs.has_key('aaindex_dir') else 'datasets/aaindex/'
     aaindex.init(aaindex_dir, '1')
     scoretab = pd.DataFrame(index=list(Const.AMINO_ACIDS), )
     for key, rec in aaindex._aaindex.iteritems():
         # rec = aaindex.get(key)
         vals = [rec.get(aa) for aa in Const.AMINO_ACIDS]
         scoretab[key] = vals
     # print('%s==>%s' % (key, rec.desc))
     self._scoretab = scoretab
예제 #4
0
def getIndexSeq(seq):
    import aaindex
    aaindex.init(path='.')

    dic = aaindex._aaindex.values()
    listIndices = getIndicesUsed()

    listComputedValue = [0] * len(listIndices)

    forbiddenList = ['KARS160114', 'TANS760101', 'MONM990201', 'NADH010101']
    for char in seq:
        for i in range(len(listIndices)):
            indice = listIndices[i]
            try:
                x = aaindex.get(indice)
                listComputedValue[i] += (x.get(char))
            except Exception as e:
                print("ECHEC AVEC INDICES : " + indice)

    return listComputedValue
예제 #5
0
def encode_aaindex_features(sequences):
    aaindex.init(path='.', index='1')
    aai_recs = [aaindex.get(d) for d in HQI8_descriptors]
    return np.array([[get_aaindex_feature(aa, r, seq) for aa in seq
                      for r in aai_recs] for seq in sequences], dtype=np.float_)
예제 #6
0
#_author: Julian Zubek

import numpy as np
import aaindex

aaindex.init(path='.', index='1')
HQI8_descriptors = ["BLAM930101", "BIOV880101", "MAXF760101", "TSAJ990101",
                    "NAKH920108", "CEDJ970104", "LIFS790101", "MIYS990104"]


def get_aaindex_feature(amino_acid, aai_record, seq):
    # B and Z are ambiguous amino acids.
    if amino_acid == "B":
        val = (aai_record.get("D") + aai_record.get("N")) / 2
    elif amino_acid == "Z":
        val = (aai_record.get("E") + aai_record.get("Q")) / 2
    elif amino_acid == "O":
        val = aai_record.get("K")
    elif amino_acid == "U":
        val = aai_record.get("C")
    elif amino_acid in "X*-":
        val = 0.0
    else:
        val = aai_record.get(amino_acid)
    # Checking for "None" type in case of an unspecified amino acid character.
    if not isinstance(val, float):
        print("""Unrecognised amino acid symbol {0} found in sequence {1}
                 for descriptor {2}""".format(amino_acid, seq, aai_record))
        exit(-1)
    return val
예제 #7
0
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 20 18:07:22 2017

@author: Patrick
"""
from __future__ import division
import aaindex
from aa_indeces import aai_to_get
from math import cos, sin, radians, degrees
aaindex.init(path='.')


def aaf(sequence, identifier, mode):
    x = aaindex.get(identifier)

    total = 0
    total_list = []
    for aa in sequence:
        total = total + x.get(aa)
        total_list.append(x.get(aa))

    if mode == "mean":
        return total / len(sequence)
    elif mode == "total":
        return total
    elif mode == "max":
        return max(total_list)


def aaf_angular(sequence, identifier, mode):
예제 #8
0
def get_vector(structure,\
               path_file_pssm,\
               path_aaindex,\
               path_file_rsa,\
               path_file_asa,\
               list_dssp_features):
    """
    get the features vector for a structure, returna list of all the features
    for each residue
    """
    aaindex.init(path_aaindex)

    Hydrophobicity = aaindex.get('ARGP820101')
    Hydrophilicity = aaindex.get('HOPT810101')
    Polarity = aaindex.get('GRAR740102')
    Polarizability = aaindex.get('CHAM820101')
    Propensity = aaindex.get('WERD780101')
    Average_accessible_surface_area = aaindex.get('JANJ780101')
    Radius_of_gyration_of_side_chain = aaindex.get('LEVM760105')
    Side_chain_volume = aaindex.get('KRIW790103')
    Charge = aaindex.get('KLEP840101')
    Number_of_hydrogen_bond_donors = aaindex.get('FAUJ880109')
    Molecular_weight = aaindex.get('FASG760101')
    Electron_ion_interaction_potential = aaindex.get('VELV850101')
    
    list_vector = []

    #asa
    list_asa_value = get_asa_value(path_file_asa)
    #rsa
    list_rsa_value = get_rsa_relative(path_file_rsa)
    #pssm

    list_ipp_value, list_rwgrmtp_value = get_pssm_value(path_file_pssm)
    #QIPI
    QIPI = {
'H':1.147, 'R':1.346, 'K':0.784, 'A':0.841, 'V':0.994, 'I':1.084, 'L':1.144, \
'M':1.451, 'P':1.109, 'F':1.334, 'W':1.284, 'Y':1.368, 'G':0.823, 'C':1.172, \
'S':0.873, 'T':0.966, 'N':0.958, 'Q':0.909, 'D':0.830, 'E':0.805}
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure):
        sequence = list(pp.get_sequence())
    for i in range(len(sequence)):
        list_vector.append([\
                           #asa
                           list_asa_value[i],\
                           #rsa
                           list_rsa_value[i],\
                           #pssm
                           list_ipp_value[i],\
                           list_rwgrmtp_value[i],\
                           #QIPI
                           QIPI[sequence[i]],\
                           #pseudo hydrophobicity
                           get_pseudo_hydrophobicity(sequence[i],\
                                                     Hydrophobicity,\
                                                     Charge),\
                           #aaindex
                           Hydrophobicity.get(sequence[i]),\
                           Hydrophilicity.get(sequence[i]),\
                           Polarity.get(sequence[i]),\
                           Polarizability.get(sequence[i]),\
                           Propensity.get(sequence[i]),\
                           Average_accessible_surface_area.get(sequence[i]),\
                           Radius_of_gyration_of_side_chain.get(sequence[i]),\
                           Side_chain_volume.get(sequence[i]),\
                           Charge.get(sequence[i]),\
                           Number_of_hydrogen_bond_donors.get(sequence[i]),\
                           Molecular_weight.get(sequence[i]),\
                           Electron_ion_interaction_potential.get(sequence[i])\
                           ] + list_dssp_features[i])
                           #             
    return(list_vector)
예제 #9
0
from db_mapping.up_parsing import UPParser

hqi_db = "hqi.sqlite"

# liu_indices: hydrophobicity, hydrophicility, volumes of side chains of amino acids, polarity, polarizability, solvent-accessible surface area, net charge index
liu_indices = [
    "ARGP820101", "HOPT810101", "KRIW790101", "GRAR740102", "CHAM820101",
    "ROSG850101", "KLEP840101"
]
hqi8_indices = [
    "BLAM930101", "BIOV880101", "MAXF760101", "TSAJ990101", "NAKH920108",
    "CEDJ970104", "LIFS790101", "MIYS990104"
]

aaindex.init(path='.', index='1')
aai_recs = [aaindex.get(d) for d in liu_indices]
hqi8_recs = [aaindex.get(d) for d in hqi8_indices]


def get_aaindex_feature(amino_acid, aai_record):
    # B and Z are ambiguous amino acids.
    if amino_acid == "B":
        val = (aai_record.get("D") + aai_record.get("N")) / 2
    elif amino_acid == "Z":
        val = (aai_record.get("E") + aai_record.get("Q")) / 2
    elif amino_acid == "O":
        val = aai_record.get("K")
    elif amino_acid == "U":
        val = aai_record.get("C")
    elif amino_acid in "X*-":
예제 #10
0
import pandas as pd
import numpy as np
import pickle
import aaindex
from Bio import SeqIO

aaindex.init("./aaindex/")


def comp_sites(a, b, idx):
    """
    Calculate dissimilarity between amino acids from AAIndex
    """
    x = aaindex.get(idx)
    m = x.get(a, a)
    n = x.get(b, b)
    k = x.get(a, b)

    NoneType = type(None)
    if isinstance(m, NoneType) or isinstance(n, NoneType) or isinstance(
            k, NoneType):
        # print "Unattended gaps in this AAIndex matrice"
        dist = None
    else:
        dist = m + n - 2 * k
    return dist


def pair_idx_encode(s1, s2, idx):
    """
    Calculate dissimilarity between two sequences
예제 #11
0
from encoding_pairs import batch_idxList_encode

import pickle
import aaindex

aaindex.init('./aaindex/')


def encode_subtype(subtype):
    seq_file = '../data/' + subtype + '_seqs'
    with open('./aaindex/indexKeyList_2_pickle', 'rb') as f:
        idxList = pickle.load(f)

    batch_idxList_encode(seq_file, idxList)