def getIndexChar(char): import aaindex aaindex.init(path='.') dic = aaindex._aaindex.values() listIndices = [] for item in dic: # print(item.key) listIndices.append(item.key) listIndices = [ 'KARS160114', 'KARS160106', 'MIYS990101', 'CASG920101', 'FAUJ880111', 'FAUJ880112' ] listComputedValue = [] # forbiddenList=['KARS160114','TANS760101','MONM990201','NADH010101'] for indice in listIndices: # if indice not in forbiddenList: try: x = aaindex.get(indice) listComputedValue.append(x.get(char)) print(indice) except Exception as e: # print(indice) # print("") # l=2 print("ECHEC AVEC INDICES : " + indice) return listComputedValue
def get_aaindex_features(AA): """ given an amino acid return its physicochemical features """ aaindex.init(path='../data/aaindex') Hydrophobicity = aaindex.get('ARGP820101') Hydrophilicity = aaindex.get('HOPT810101') Polarity = aaindex.get('GRAR740102') Polarizability = aaindex.get('CHAM820101') Propensity = aaindex.get('WERD780101') Average_accessible_surface_area = aaindex.get('JANJ780101') Radius_of_gyration_of_side_chain = aaindex.get('LEVM760105') Side_chain_volume = aaindex.get('KRIW790103') Charge = aaindex.get('KLEP840101') Number_of_hydrogen_bond_donors = aaindex.get('FAUJ880109') Molecular_weight = aaindex.get('FASG760101') Electron_ion_interaction_potential = aaindex.get('VELV850101') #code to get features aaindex_feature = [Hydrophobicity.get(AA),\ Hydrophilicity.get(AA),\ Polarity.get(AA),\ Polarizability.get(AA),\ Propensity.get(AA),\ Average_accessible_surface_area.get(AA),\ Radius_of_gyration_of_side_chain.get(AA),\ Side_chain_volume.get(AA),\ Charge.get(AA),\ Number_of_hydrogen_bond_donors.get(AA),\ Molecular_weight.get(AA),\ Electron_ion_interaction_potential.get(AA)] return(aaindex_feature)
def _load_score_tab(self, **kwargs): aaindex_dir = kwargs['aaindex_dir'] if kwargs.has_key('aaindex_dir') else 'datasets/aaindex/' aaindex.init(aaindex_dir, '1') scoretab = pd.DataFrame(index=list(Const.AMINO_ACIDS), ) for key, rec in aaindex._aaindex.iteritems(): # rec = aaindex.get(key) vals = [rec.get(aa) for aa in Const.AMINO_ACIDS] scoretab[key] = vals # print('%s==>%s' % (key, rec.desc)) self._scoretab = scoretab
def getIndexSeq(seq): import aaindex aaindex.init(path='.') dic = aaindex._aaindex.values() listIndices = getIndicesUsed() listComputedValue = [0] * len(listIndices) forbiddenList = ['KARS160114', 'TANS760101', 'MONM990201', 'NADH010101'] for char in seq: for i in range(len(listIndices)): indice = listIndices[i] try: x = aaindex.get(indice) listComputedValue[i] += (x.get(char)) except Exception as e: print("ECHEC AVEC INDICES : " + indice) return listComputedValue
def encode_aaindex_features(sequences): aaindex.init(path='.', index='1') aai_recs = [aaindex.get(d) for d in HQI8_descriptors] return np.array([[get_aaindex_feature(aa, r, seq) for aa in seq for r in aai_recs] for seq in sequences], dtype=np.float_)
#_author: Julian Zubek import numpy as np import aaindex aaindex.init(path='.', index='1') HQI8_descriptors = ["BLAM930101", "BIOV880101", "MAXF760101", "TSAJ990101", "NAKH920108", "CEDJ970104", "LIFS790101", "MIYS990104"] def get_aaindex_feature(amino_acid, aai_record, seq): # B and Z are ambiguous amino acids. if amino_acid == "B": val = (aai_record.get("D") + aai_record.get("N")) / 2 elif amino_acid == "Z": val = (aai_record.get("E") + aai_record.get("Q")) / 2 elif amino_acid == "O": val = aai_record.get("K") elif amino_acid == "U": val = aai_record.get("C") elif amino_acid in "X*-": val = 0.0 else: val = aai_record.get(amino_acid) # Checking for "None" type in case of an unspecified amino acid character. if not isinstance(val, float): print("""Unrecognised amino acid symbol {0} found in sequence {1} for descriptor {2}""".format(amino_acid, seq, aai_record)) exit(-1) return val
# -*- coding: utf-8 -*- """ Created on Wed Dec 20 18:07:22 2017 @author: Patrick """ from __future__ import division import aaindex from aa_indeces import aai_to_get from math import cos, sin, radians, degrees aaindex.init(path='.') def aaf(sequence, identifier, mode): x = aaindex.get(identifier) total = 0 total_list = [] for aa in sequence: total = total + x.get(aa) total_list.append(x.get(aa)) if mode == "mean": return total / len(sequence) elif mode == "total": return total elif mode == "max": return max(total_list) def aaf_angular(sequence, identifier, mode):
def get_vector(structure,\ path_file_pssm,\ path_aaindex,\ path_file_rsa,\ path_file_asa,\ list_dssp_features): """ get the features vector for a structure, returna list of all the features for each residue """ aaindex.init(path_aaindex) Hydrophobicity = aaindex.get('ARGP820101') Hydrophilicity = aaindex.get('HOPT810101') Polarity = aaindex.get('GRAR740102') Polarizability = aaindex.get('CHAM820101') Propensity = aaindex.get('WERD780101') Average_accessible_surface_area = aaindex.get('JANJ780101') Radius_of_gyration_of_side_chain = aaindex.get('LEVM760105') Side_chain_volume = aaindex.get('KRIW790103') Charge = aaindex.get('KLEP840101') Number_of_hydrogen_bond_donors = aaindex.get('FAUJ880109') Molecular_weight = aaindex.get('FASG760101') Electron_ion_interaction_potential = aaindex.get('VELV850101') list_vector = [] #asa list_asa_value = get_asa_value(path_file_asa) #rsa list_rsa_value = get_rsa_relative(path_file_rsa) #pssm list_ipp_value, list_rwgrmtp_value = get_pssm_value(path_file_pssm) #QIPI QIPI = { 'H':1.147, 'R':1.346, 'K':0.784, 'A':0.841, 'V':0.994, 'I':1.084, 'L':1.144, \ 'M':1.451, 'P':1.109, 'F':1.334, 'W':1.284, 'Y':1.368, 'G':0.823, 'C':1.172, \ 'S':0.873, 'T':0.966, 'N':0.958, 'Q':0.909, 'D':0.830, 'E':0.805} ppb = PPBuilder() for pp in ppb.build_peptides(structure): sequence = list(pp.get_sequence()) for i in range(len(sequence)): list_vector.append([\ #asa list_asa_value[i],\ #rsa list_rsa_value[i],\ #pssm list_ipp_value[i],\ list_rwgrmtp_value[i],\ #QIPI QIPI[sequence[i]],\ #pseudo hydrophobicity get_pseudo_hydrophobicity(sequence[i],\ Hydrophobicity,\ Charge),\ #aaindex Hydrophobicity.get(sequence[i]),\ Hydrophilicity.get(sequence[i]),\ Polarity.get(sequence[i]),\ Polarizability.get(sequence[i]),\ Propensity.get(sequence[i]),\ Average_accessible_surface_area.get(sequence[i]),\ Radius_of_gyration_of_side_chain.get(sequence[i]),\ Side_chain_volume.get(sequence[i]),\ Charge.get(sequence[i]),\ Number_of_hydrogen_bond_donors.get(sequence[i]),\ Molecular_weight.get(sequence[i]),\ Electron_ion_interaction_potential.get(sequence[i])\ ] + list_dssp_features[i]) # return(list_vector)
from db_mapping.up_parsing import UPParser hqi_db = "hqi.sqlite" # liu_indices: hydrophobicity, hydrophicility, volumes of side chains of amino acids, polarity, polarizability, solvent-accessible surface area, net charge index liu_indices = [ "ARGP820101", "HOPT810101", "KRIW790101", "GRAR740102", "CHAM820101", "ROSG850101", "KLEP840101" ] hqi8_indices = [ "BLAM930101", "BIOV880101", "MAXF760101", "TSAJ990101", "NAKH920108", "CEDJ970104", "LIFS790101", "MIYS990104" ] aaindex.init(path='.', index='1') aai_recs = [aaindex.get(d) for d in liu_indices] hqi8_recs = [aaindex.get(d) for d in hqi8_indices] def get_aaindex_feature(amino_acid, aai_record): # B and Z are ambiguous amino acids. if amino_acid == "B": val = (aai_record.get("D") + aai_record.get("N")) / 2 elif amino_acid == "Z": val = (aai_record.get("E") + aai_record.get("Q")) / 2 elif amino_acid == "O": val = aai_record.get("K") elif amino_acid == "U": val = aai_record.get("C") elif amino_acid in "X*-":
import pandas as pd import numpy as np import pickle import aaindex from Bio import SeqIO aaindex.init("./aaindex/") def comp_sites(a, b, idx): """ Calculate dissimilarity between amino acids from AAIndex """ x = aaindex.get(idx) m = x.get(a, a) n = x.get(b, b) k = x.get(a, b) NoneType = type(None) if isinstance(m, NoneType) or isinstance(n, NoneType) or isinstance( k, NoneType): # print "Unattended gaps in this AAIndex matrice" dist = None else: dist = m + n - 2 * k return dist def pair_idx_encode(s1, s2, idx): """ Calculate dissimilarity between two sequences
from encoding_pairs import batch_idxList_encode import pickle import aaindex aaindex.init('./aaindex/') def encode_subtype(subtype): seq_file = '../data/' + subtype + '_seqs' with open('./aaindex/indexKeyList_2_pickle', 'rb') as f: idxList = pickle.load(f) batch_idxList_encode(seq_file, idxList)