def set_reaction(self,product_smiles): try: product_mol = SM(product_smiles) prod_num_rings = CalcNumRings(product_mol) except: print("error in the product",product_smiles) return [] try: reactant_list = self.rxn.RunReactants([product_mol]) except: print("Reaction failed") print(self.reaction_name,self.smarts,product_smiles) exit() approved_reactants = [] for reactant_mol in reactant_list: #condition 1 - conserved ring count try: [Chem.SanitizeMol(r) for r in reactant_mol] if np.sum([CalcNumRings(r) for r in reactant_mol]) - prod_num_rings == self.ring_change_count: approved_reactants.append(reactant_mol) except: print("could not sanitize ",product_smiles,".".join([MS(r) for r in reactant_mol])) return approved_reactants
def convert_to_featmorgan(SMILES): mol = MS(SMILES) morgan = Chem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024, useFeatures=True) morgan_float_list = list( np.asarray(list(morgan.ToBitString()), dtype=float)) return morgan_float_list
def decompose(self, mol_id, smiles, reaction_func): #perform the decomposition reaction reactant_list = reaction_func.run_reaction(smiles) output_list = [] for reactant_mol in reactant_list: reactant_pair = [] for mol in reactant_mol: #converts reactant rdkit mol objects to smiles reactant_pair.append(MS(mol)) #makes sure we don't log duplicates of the decomposition reaction if reactant_pair not in output_list: output_list.append( str(mol_id) + "|" + smiles + "|" + reaction_func.reaction_name + "|" + ".".join(reactant_pair)) return output_list
try: mol_desc = convert_to_morgan(mol) dist_val1 = tanimoto_dist(mol_desc, template1) dist_val2 = tanimoto_dist(mol_desc, template2) ave_dist = np.average([dist_val1, dist_val2]) zinc_dist_list.append(ave_dist) except: #desc_func(mol) print("skipped smiles: ") pass return zinc_dist_list template_file = open(args['template_file'], "r") template_desc_list = [ convert_to_morgan(MS(template.strip("\n"))) for template in template_file ] template_file.close() print("inputs", zinc_split_index, split_num) os.system("echo calc_time > zinc_" + str(zinc_split_index) + ".out") #first calculation sorted_dist = [] sorted_index = [] sorted_file_label = [] for zf_index, zf in enumerate( np.array_split(zinc_files, split_num)[zinc_split_index]): st = time.time()
def convert_to_tor(SMILES): mol = MS(SMILES) desc_val = Tor(mol) desc_val_float_list = list( np.asarray(list(desc_val.ToBitString()), dtype=float)) return desc_val_float_list
def convert_to_atompair(SMILES): mol = MS(SMILES) atom_pair = AtomPair(mol) atom_pair_float_list = list( np.asarray(list(atom_pair.ToBitString()), dtype=float)) return atom_pair_float_list
def convert_to_clogp(SMILES): mol = MS(SMILES) logp = MolLogP(mol) return logp
def convert_to_MW(SMILES): mol = MS(SMILES) MW = Chem.Descriptors.ExactMolWt(mol) return MW
def convert_to_maccs(SMILES): mol = MS(SMILES) maccs = Chem.GetMACCSKeysFingerprint(mol) maccs_float_list = list(np.asarray(list(maccs.ToBitString()), dtype=float)) return maccs_float_list
def convert_to_num_rings(SMILES): mol = MS(SMILES) val = rdkit.Chem.rdMolDescriptors.CalcNumRings(mol) return val
def convert_to_num_aromatic_hetero(SMILES): mol = MS(SMILES) val = rdkit.Chem.rdMolDescriptors.CalcNumAromaticHeterocycles(mol) return val
def convert_to_HBD(SMILES): mol = MS(SMILES) val = rdkit.Chem.rdMolDescriptors.CalcNumHBD(mol) return val
def convert_to_cats(SMILES): mol = MS(SMILES) cats_desc = cats.getCATs2D(mol) return cats_desc
def convert_to_rdkit(SMILES): mol = MS(SMILES) desc_val = RDKFingerprint(mol) desc_val_float_list = list( np.asarray(list(desc_val.ToBitString()), dtype=float)) return desc_val_float_list