Beispiel #1
0
def classify(sdf, label, lambdas):
    new_filename = "%s_class.sdf" % sdf.split('.sdf')[0]
    new_label = label + "_class"
    sdm = ForwardSDMolSupplier(sdf,
                               strictParsing=False,
                               removeHs=False,
                               sanitize=False)
    sdw = SDWriter(new_filename)
    counter = -1
    i = 0
    for mol in sdm:
        print(i)
        sys.stdout.flush()
        i += 1
        counter += 1
        if mol is None:
            print("%d rdkit couldn't read molecule" % counter, file=sys.stderr)
            sys.stderr.flush()
            continue
        c = None
        prop = floatify(mol.GetProp(label))
        if prop is None:
            print("couldn't convert %s to float or int...skip" %
                  mol.GetProp(label),
                  file=sys.stderr)
            sys.stderr.flush()
            continue
        for k, l in lambdas.items():
            if l(prop):
                c = k
                print("hit %s" % k)
                sys.stdout.flush()
                break
        if c is None:
            print("%d no prop range matched '%s' ..skip" %
                  (counter, mol.GetProp(label)),
                  prop,
                  type(prop),
                  file=sys.stderr)
            sys.stderr.flush()
            sys.stdout.flush()
            continue
        mol.SetProp(new_label, c)
        try:
            sdw.write(mol)
        except:
            print(
                "couldn't write mol %d to file, try to build mol from smiles" %
                i,
                file=sys.stderr)
            mol = MolFromSmiles(mol.GetProp("SMILES"))
            AllChem.Compute2DCoords(mol)
            mol.SetProp(new_label, c)
            try:
                sdw.write(mol)
            except:
                print("couldn't write mol %d to file...skip" % i,
                      file=sys.stderr)
    sdw.close()
 def extract_substructure_information(self, radii, smi_dict):
     self.radii = radii
     for akey in smi_dict.keys():
         info = {}
         mol = MolFromSmiles(smi_dict[akey])
         fp = _GetMorganFingerprint(mol, max(radii), bitInfo=info)
         mol_id = akey if self.name_field == None else mol.GetProp(
             self.name_field)
         self.mols_ids.append(mol_id)
         substructure_dictionary = {
             k: [mol_id]
             for k, v in info.items() if v[0][1] in radii
         }
         self.substructure_dictionary = self._combine_dicts(
             substructure_dictionary, self.substructure_dictionary)
     self.nb_substructures = len(self.substructure_dictionary.keys())
     self.max_radius = max(radii)