def extract_named_descriptors_of_seq(sequence): ''' Returns a map ("descriptor" -> value) of descriptors when given a sequence of aminoacids (string) :param sequence: :return: ''' #mol = Chem.MolFromSequence(str(sequence)) res = {} sequence = str(sequence) if len(sequence) > 3: #print(Autocorrelation.CalculateGearyAutoTotal(sequence)) res = PseudoAAC.GetPseudoAAC(sequence, lamda=3, weight=0.05, AAP=[ _Hydrophobicity, _hydrophilicity, _residuemass, _pK1, _pK2, _pI ]) res.update(AAComposition_extra_PS.CalculateAAComposition(sequence)) res.update(CTD.CalculateCTD(sequence)) mol = Chem.MolFromSequence(str(sequence)) #res = geary.GetGearyAuto(mol) res.update(kappa.GetKappa(mol)) res.update(charge.GetCharge(mol)) #res.update(moran.GetMoranAuto(mol)) res.update(moreaubroto.GetMoreauBrotoAuto(mol)) res.update(molproperty.GetMolecularProperty(mol)) #res.update(moe.GetMOE(mol)) res.update(basak.Getbasak(mol)) #print(res) #input() return res
def main_process(self, str_pep_file, str_des_out): self.str_pep_file = str_pep_file self.str_des_out = str_des_out my_pep, list_class_label = Str_DS_class().read_pep_file( self.str_pep_file) Str_DS_class().structure_gen(my_pep) sValues = [] sdf_names = glob.glob(os.path.join("./strs", '*.sdf')) n = len(sdf_names) #print sdf_names print "Descriptors being calculated !" bar2 = Bar('Processing SDF file', fill='>', max=n) for sdn in my_pep: mols = Chem.SDMolSupplier(os.path.join("./strs", sdn + ".sdf")) #mols = Chem.SDMolSupplier(sdn) for mol in mols: sValue = Str_DS_class().new_Des_gene(mol) sValues.append(sValue) bar2.next() sDF1 = pd.DataFrame(sValues) #sDF1.to_csv(self.str_des_out, index = False) bar2.finish() print "Descripor calculation has been finished " return sDF1, list_class_label
def smiles_converter(self, smi, L): res = [] try: mol = Chem.MolFromSmiles(smi) res.append(constitution.GetConstitutional(mol)) res.append(connectivity.GetConnectivity(mol)) res.append(fingerprint.CalculateMACCSFingerprint(mol)[1]) try: res.append(kappa.GetKappa(mol)) except: pass try: drug.ReadMolFromSmile(smi) res.append(drug.GetMOE()) except: pass try: res.append(charge.GetCharge(mol)) except: pass except: pass super_dict = {} # uses set to avoid duplicates if len(res) >= 1: for d in res: for k, v in d.items(): # use d.iteritems() in python 2 super_dict[k] = v super_dict["smiles"] = smi.strip() L.append(super_dict)
def get_f1_f2(row): try: protein_sequence = dpi.GetProteinSequenceFromID(row['target_id']) dpi.ReadProteinSequence(protein_sequence) aa_composition = dpi.GetAAComp() #COMPOSICAO AMINOACIOS molecule = Chem.MolFromSmiles(row['smiles']) kappa_descriptors = kappa.GetKappa(molecule) if (row.name % 500 == 0 ): # para facilitar o processo, a leitura e feita aos poucos partial = pd.DataFrame( result_list_F1 ) # os smiles e target_id das colunas que dao erros que sao guardados num ficheiro partial.to_csv(export_path + "export_partial_f1.csv") partial = pd.DataFrame(result_list_F2) partial.to_csv(export_path + "export_partial_f2.csv") partial = pd.DataFrame(target_id_errors) partial.to_csv(export_path + "errors.csv") result_list_F1.append( dpi.GetDPIFeature1(kappa_descriptors, aa_composition)) result_list_F2.append( dpi.GetDPIFeature2(kappa_descriptors, aa_composition)) except: dic = {'smiles': row['smiles'], 'target_id': row['target_id']} target_id_errors.append(dic) print(row.name)