def classify(sdf, label, lambdas): new_filename = "%s_class.sdf" % sdf.split('.sdf')[0] new_label = label + "_class" sdm = ForwardSDMolSupplier(sdf, strictParsing=False, removeHs=False, sanitize=False) sdw = SDWriter(new_filename) counter = -1 i = 0 for mol in sdm: print(i) sys.stdout.flush() i += 1 counter += 1 if mol is None: print("%d rdkit couldn't read molecule" % counter, file=sys.stderr) sys.stderr.flush() continue c = None prop = floatify(mol.GetProp(label)) if prop is None: print("couldn't convert %s to float or int...skip" % mol.GetProp(label), file=sys.stderr) sys.stderr.flush() continue for k, l in lambdas.items(): if l(prop): c = k print("hit %s" % k) sys.stdout.flush() break if c is None: print("%d no prop range matched '%s' ..skip" % (counter, mol.GetProp(label)), prop, type(prop), file=sys.stderr) sys.stderr.flush() sys.stdout.flush() continue mol.SetProp(new_label, c) try: sdw.write(mol) except: print( "couldn't write mol %d to file, try to build mol from smiles" % i, file=sys.stderr) mol = MolFromSmiles(mol.GetProp("SMILES")) AllChem.Compute2DCoords(mol) mol.SetProp(new_label, c) try: sdw.write(mol) except: print("couldn't write mol %d to file...skip" % i, file=sys.stderr) sdw.close()
def extract_substructure_information(self, radii, smi_dict): self.radii = radii for akey in smi_dict.keys(): info = {} mol = MolFromSmiles(smi_dict[akey]) fp = _GetMorganFingerprint(mol, max(radii), bitInfo=info) mol_id = akey if self.name_field == None else mol.GetProp( self.name_field) self.mols_ids.append(mol_id) substructure_dictionary = { k: [mol_id] for k, v in info.items() if v[0][1] in radii } self.substructure_dictionary = self._combine_dicts( substructure_dictionary, self.substructure_dictionary) self.nb_substructures = len(self.substructure_dictionary.keys()) self.max_radius = max(radii)