def main(): # Read in a SD or SMILES file - then write out into a specified directory parser = argparse.ArgumentParser( description="Convert a SMILES to nodes, edges and attributes" ) parser.add_argument("--smiles") parser.add_argument("--id") parser.add_argument("--standardize", action="store_true") parser.add_argument("--isomeric", dest="iso_flag", action="store_true") parser.add_argument("--non_isomeric", dest="iso_flag", action="store_false") group = parser.add_mutually_exclusive_group() group.add_argument("-v", dest="verbosity", action="store_const", const=1) group.add_argument("-vv", dest="verbosity", action="store_const", const=2) parser.set_defaults(verbosity=0) parser.set_defaults(iso_flag=True) args = parser.parse_args() # Do we have an input and base directory? if not args.smiles: print('ERROR: Must specify a SMILES') sys.exit(1) attrs = [] print("Original SMILES: " + args.smiles) mol = Chem.MolFromSmiles(args.smiles) if args.standardize: mol = standardize(mol) print("Standardized SMILES: " + Chem.MolToSmiles(mol)) smiles = Chem.CanonSmiles(Chem.MolToSmiles(mol, isomericSmiles=True)) print("Canonical SMILES: " + smiles) id = args.id if id is None: id = "smiles1" attr = Attr(smiles, ["EM", id]) attrs.append(attr) # Build the network node_holder = NodeHolder(iso_flag=args.iso_flag) max_frags = 0 node_holder = build_network(attrs, node_holder, max_frags, smiles, args.verbosity) # Write the data out for node in node_holder.node_list: print(str(node)) for edge in node_holder.get_edges(): print(str(edge)) for attr in attrs: print(str(attr)) print("Number of nodes: " + str(len(node_holder.node_list))) print("Number of edges: " + str(len(node_holder.get_edges())))
def process(smiles): try: m = Chem.MolFromSmiles(smiles) if m is None: return None, 0 frags = Chem.GetMolFrags(m, asMols=True) mol = standardize(m) return mol, len(frags) except ValueError as ve: sys.stderr.write("Failed to process molecule\n") sys.stderr.write(ve.message) return None, 0
print("\nIsomeric molecule 1") iso1 = Chem.MolToSmiles(mol1, isomericSmiles=True, canonical=True) noniso1 = Chem.MolToSmiles(mol1, isomericSmiles=False, canonical=True) print("Read: " + smiles1) print("Isomeric: " + iso1) print("NonIsomeric: " + noniso1) print("Is isomeric? " + str(iso1 != noniso1)) # iso and noniso smiles are different print("\nIsomeric molecule 2") iso2 = Chem.MolToSmiles(mol2, isomericSmiles=True, canonical=True) noniso2 = Chem.MolToSmiles(mol2, isomericSmiles=False, canonical=True) print("Read: " + smiles2) print("Isomeric: " + iso2) print("NonIsomeric: " + noniso2) print("Is isomeric? " + str(iso2 != noniso2)) # iso and noniso smiles are the same print("\nNon-standardized molecule 3") std3 = standardize(mol3) # gives back a RDKit RWMol iso3 = Chem.MolToSmiles(std3, isomericSmiles=True, canonical=True) noniso3 = Chem.MolToSmiles(std3, isomericSmiles=False, canonical=True) print("Read: " + smiles3) # standardization changes the smiles print("Isomeric: " + iso3) print("NonIsomeric: " + noniso3) print("Is isomeric? " + str(iso3 != noniso3)) print("Is smiles1? " + str(iso1 == iso3))
def standardise_mol(mol, osmiles): """Standardise a RDKit mol object. A SMILES string that represents the molecule can also be passed in to allow better logging of errors. :param mol: flag indicating whether this is a molecule or not. :param osmiles: The original (non-standard) SMILES :return: A namedtuple containing the standard molecule representations and info. Errors are logged and, on error, the standard form will be returned as None. """ global logger # Standardise and update global maps... # And try and handle and report any catastrophic errors # from dependent modules/functions. std = None iso = None noniso = None inchis = None inchik = None noniso_inchis = None noniso_inchik = None iso_inchis = None iso_inchik = None hac = 0 rac = 0 if not mol: logger.error( 'Got nothing from MolFromSmiles(%s).' ' Skipping this Vendor compound', osmiles) if mol: # Got a molecule. # # Get the HAC and try to (safely) standardise, # and create isomeric an non-isomeric representations. hac = mol.GetNumHeavyAtoms() rac = 0 for atom in mol.GetAtoms(): if atom.IsInRing(): rac += 1 try: std = standardize(mol) except Exception as e: logger.warning('standardize(%s) exception: "%s"', osmiles, e.message) if not std: logger.error( 'Got nothing from standardize(%s).' ' Skipping this Vendor compound', osmiles) if std: # We have a standard representation, # Try to generate the isomeric version... try: iso = Chem.MolToSmiles(std, isomericSmiles=True, canonical=True) except Exception as e: logger.warning('MolToSmiles(%s, iso) exception: "%s"', osmiles, e.message) if not iso: logger.error( 'Got nothing from MolToSmiles(%s, iso).' ' Skipping this Vendor compound', osmiles) if std: # We have a standard representation, # Try to generate the non-isomeric version... try: noniso = Chem.MolToSmiles(std, isomericSmiles=False, canonical=True) nonisoMol = Chem.MolFromSmiles(noniso) try: inchis, inchik = gen_inchi(nonisoMol, '') iso_inchis, iso_inchik = gen_inchi(std, '/FixedH') noniso_inchis, noniso_inchik = gen_inchi(nonisoMol, '/FixedH') except Exception as e: logger.warning('gen_inchi exception for %s', noniso) except Exception as e: logger.warning('MolToSmiles(%s, noniso) exception: "%s"', osmiles, e.message) if not noniso: logger.error( 'Got nothing from MolToSmiles(%s, noniso).' ' Skipping this Vendor compound', osmiles) # If anything went wrong, set std to None. # It's "all-or-nothing"... if not iso or not noniso: std = None return StandardInfo(std, iso, noniso, str(hac), str(rac), inchis, inchik, noniso_inchis, noniso_inchik, iso_inchis, iso_inchik)
def standardise(osmiles): """Given a vendor (original) SMILES this method standardises it into a canonical form and returns a namedtuple that contains the standard form, the isomeric form, the non-isomeric form the heavy atom count (hac). :param osmiles: The original (non-standard) SMILES :return: A namedtuple containing the standard molecule representations and info. Errors are logged and, on error, the standard form will be returned as None. """ global logger # Create our logger if it does not exist if not logger: logger = logging.getLogger(__name__) # Standardise and update global maps... # And try and handle and report any catastrophic errors # from dependent modules/functions. std = None iso = None noniso = None hac = 0 mol = None try: mol = Chem.MolFromSmiles(osmiles) except Exception as e: logger.warning('MolFromSmiles(%s) exception: "%s"', osmiles, e.message) if not mol: logger.error('Got nothing from MolFromSmiles(%s).' ' Skipping this Vendor compound', osmiles) if mol: # Got a molecule. # # Get the HAC and try to (safely) standardise, # and create isomeric an non-isomeric representations. hac = mol.GetNumHeavyAtoms() try: std = standardize(mol) except Exception as e: logger.warning('standardize(%s) exception: "%s"', osmiles, e.message) if not std: logger.error('Got nothing from standardize(%s).' ' Skipping this Vendor compound', osmiles) if std: # We have a standard representation, # Try to generate the isomeric version... try: iso = Chem.MolToSmiles(std, isomericSmiles=True, canonical=True) except Exception as e: logger.warning('MolToSmiles(%s, iso) exception: "%s"', osmiles, e.message) if not iso: logger.error('Got nothing from MolToSmiles(%s, iso).' ' Skipping this Vendor compound', osmiles) if std: # We have a standard representation, # Try to generate the non-isomeric version... try: noniso = Chem.MolToSmiles(std, isomericSmiles=False, canonical=True) except Exception as e: logger.warning('MolToSmiles(%s, noniso) exception: "%s"', osmiles, e.message) if not noniso: logger.error('Got nothing from MolToSmiles(%s, noniso).' ' Skipping this Vendor compound', osmiles) # If anything went wrong, set std to None. # It's "all-or-nothing"... if not iso or not noniso: std = None return StandardInfo(std, iso, noniso, str(hac))