예제 #1
0
def main():
    # Read in a SD or SMILES file - then write out into a specified directory
    parser = argparse.ArgumentParser(
        description="Convert a SMILES to nodes, edges and attributes"
    )
    parser.add_argument("--smiles")
    parser.add_argument("--id")
    parser.add_argument("--standardize", action="store_true")
    parser.add_argument("--isomeric", dest="iso_flag", action="store_true")
    parser.add_argument("--non_isomeric", dest="iso_flag", action="store_false")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("-v", dest="verbosity", action="store_const", const=1)
    group.add_argument("-vv", dest="verbosity", action="store_const", const=2)

    parser.set_defaults(verbosity=0)
    parser.set_defaults(iso_flag=True)

    args = parser.parse_args()

    # Do we have an input and base directory?
    if not args.smiles:
        print('ERROR: Must specify a SMILES')
        sys.exit(1)

    attrs = []
    print("Original SMILES: " + args.smiles)
    mol = Chem.MolFromSmiles(args.smiles)
    if args.standardize:
        mol = standardize(mol)
        print("Standardized SMILES: " + Chem.MolToSmiles(mol))
    smiles = Chem.CanonSmiles(Chem.MolToSmiles(mol, isomericSmiles=True))
    print("Canonical SMILES: " + smiles)

    id = args.id
    if id is None:
        id = "smiles1"
    attr = Attr(smiles, ["EM", id])
    attrs.append(attr)
    # Build the network
    node_holder = NodeHolder(iso_flag=args.iso_flag)
    max_frags = 0
    node_holder = build_network(attrs, node_holder,
                                max_frags, smiles, args.verbosity)
    # Write the data out
    for node in node_holder.node_list:
        print(str(node))

    for edge in node_holder.get_edges():
        print(str(edge))

    for attr in attrs:
        print(str(attr))

    print("Number of nodes: " + str(len(node_holder.node_list)))
    print("Number of edges: " + str(len(node_holder.get_edges())))
예제 #2
0
def process(smiles):

    try:
        m = Chem.MolFromSmiles(smiles)
        if m is None:
            return None, 0
        frags = Chem.GetMolFrags(m, asMols=True)
        mol = standardize(m)
        return mol, len(frags)
    except ValueError as ve:
        sys.stderr.write("Failed to process molecule\n")
        sys.stderr.write(ve.message)

    return None, 0
예제 #3
0
print("\nIsomeric molecule 1")
iso1 = Chem.MolToSmiles(mol1, isomericSmiles=True, canonical=True)
noniso1 = Chem.MolToSmiles(mol1, isomericSmiles=False, canonical=True)

print("Read:        " + smiles1)
print("Isomeric:    " + iso1)
print("NonIsomeric: " + noniso1)
print("Is isomeric? " +
      str(iso1 != noniso1))  # iso and noniso smiles are different

print("\nIsomeric molecule 2")
iso2 = Chem.MolToSmiles(mol2, isomericSmiles=True, canonical=True)
noniso2 = Chem.MolToSmiles(mol2, isomericSmiles=False, canonical=True)

print("Read:        " + smiles2)
print("Isomeric:    " + iso2)
print("NonIsomeric: " + noniso2)
print("Is isomeric? " +
      str(iso2 != noniso2))  # iso and noniso smiles are the same

print("\nNon-standardized molecule 3")
std3 = standardize(mol3)  # gives back a RDKit RWMol
iso3 = Chem.MolToSmiles(std3, isomericSmiles=True, canonical=True)
noniso3 = Chem.MolToSmiles(std3, isomericSmiles=False, canonical=True)

print("Read:        " + smiles3)  # standardization changes the smiles
print("Isomeric:    " + iso3)
print("NonIsomeric: " + noniso3)
print("Is isomeric? " + str(iso3 != noniso3))
print("Is smiles1?  " + str(iso1 == iso3))
예제 #4
0
def standardise_mol(mol, osmiles):
    """Standardise a RDKit mol object. A SMILES string that represents the molecule can also be passed in to allow
    better logging of errors.

   :param mol: flag indicating whether this is a molecule or not.
   :param osmiles: The original (non-standard) SMILES

   :return: A namedtuple containing the standard molecule
            representations and info. Errors are logged and, on error,
            the standard form will be returned as None.
   """
    global logger

    # Standardise and update global maps...
    # And try and handle and report any catastrophic errors
    # from dependent modules/functions.

    std = None
    iso = None
    noniso = None
    inchis = None
    inchik = None
    noniso_inchis = None
    noniso_inchik = None
    iso_inchis = None
    iso_inchik = None

    hac = 0
    rac = 0

    if not mol:
        logger.error(
            'Got nothing from MolFromSmiles(%s).'
            ' Skipping this Vendor compound', osmiles)

    if mol:

        # Got a molecule.
        #
        # Get the HAC and try to (safely) standardise,
        # and create isomeric an non-isomeric representations.

        hac = mol.GetNumHeavyAtoms()
        rac = 0
        for atom in mol.GetAtoms():
            if atom.IsInRing():
                rac += 1

        try:
            std = standardize(mol)
        except Exception as e:
            logger.warning('standardize(%s) exception: "%s"', osmiles,
                           e.message)
        if not std:
            logger.error(
                'Got nothing from standardize(%s).'
                ' Skipping this Vendor compound', osmiles)

    if std:

        # We have a standard representation,
        # Try to generate the isomeric version...

        try:
            iso = Chem.MolToSmiles(std, isomericSmiles=True, canonical=True)
        except Exception as e:
            logger.warning('MolToSmiles(%s, iso) exception: "%s"', osmiles,
                           e.message)
        if not iso:
            logger.error(
                'Got nothing from MolToSmiles(%s, iso).'
                ' Skipping this Vendor compound', osmiles)

    if std:

        # We have a standard representation,
        # Try to generate the non-isomeric version...

        try:
            noniso = Chem.MolToSmiles(std,
                                      isomericSmiles=False,
                                      canonical=True)
            nonisoMol = Chem.MolFromSmiles(noniso)
            try:
                inchis, inchik = gen_inchi(nonisoMol, '')
                iso_inchis, iso_inchik = gen_inchi(std, '/FixedH')
                noniso_inchis, noniso_inchik = gen_inchi(nonisoMol, '/FixedH')
            except Exception as e:
                logger.warning('gen_inchi exception for %s', noniso)

        except Exception as e:
            logger.warning('MolToSmiles(%s, noniso) exception: "%s"', osmiles,
                           e.message)
        if not noniso:
            logger.error(
                'Got nothing from MolToSmiles(%s, noniso).'
                ' Skipping this Vendor compound', osmiles)

    # If anything went wrong, set std to None.
    # It's "all-or-nothing"...
    if not iso or not noniso:
        std = None

    return StandardInfo(std, iso, noniso, str(hac), str(rac), inchis, inchik,
                        noniso_inchis, noniso_inchik, iso_inchis, iso_inchik)
def standardise(osmiles):
    """Given a vendor (original) SMILES this method standardises
    it into a canonical form and returns a namedtuple that contains
    the standard form, the isomeric form, the non-isomeric form
    the heavy atom count (hac).

    :param osmiles: The original (non-standard) SMILES

    :return: A namedtuple containing the standard molecule
             representations and info. Errors are logged and, on error,
             the standard form will be returned as None.
    """
    global logger

    # Create our logger if it does not exist
    if not logger:
        logger = logging.getLogger(__name__)

    # Standardise and update global maps...
    # And try and handle and report any catastrophic errors
    # from dependent modules/functions.

    std = None
    iso = None
    noniso = None
    hac = 0

    mol = None
    try:
        mol = Chem.MolFromSmiles(osmiles)
    except Exception as e:
        logger.warning('MolFromSmiles(%s) exception: "%s"',
                       osmiles, e.message)
    if not mol:
        logger.error('Got nothing from MolFromSmiles(%s).'
                     ' Skipping this Vendor compound', osmiles)

    if mol:

        # Got a molecule.
        #
        # Get the HAC and try to (safely) standardise,
        # and create isomeric an non-isomeric representations.

        hac = mol.GetNumHeavyAtoms()
        try:
            std = standardize(mol)
        except Exception as e:
            logger.warning('standardize(%s) exception: "%s"',
                           osmiles, e.message)
        if not std:
            logger.error('Got nothing from standardize(%s).'
                         ' Skipping this Vendor compound', osmiles)

    if std:

        # We have a standard representation,
        # Try to generate the isomeric version...

        try:
            iso = Chem.MolToSmiles(std, isomericSmiles=True, canonical=True)
        except Exception as e:
            logger.warning('MolToSmiles(%s, iso) exception: "%s"',
                           osmiles, e.message)
        if not iso:
            logger.error('Got nothing from MolToSmiles(%s, iso).'
                         ' Skipping this Vendor compound', osmiles)

    if std:

        # We have a standard representation,
        # Try to generate the non-isomeric version...

        try:
            noniso = Chem.MolToSmiles(std, isomericSmiles=False, canonical=True)
        except Exception as e:
            logger.warning('MolToSmiles(%s, noniso) exception: "%s"',
                           osmiles, e.message)
        if not noniso:
            logger.error('Got nothing from MolToSmiles(%s, noniso).'
                         ' Skipping this Vendor compound', osmiles)

    # If anything went wrong, set std to None.
    # It's "all-or-nothing"...
    if not iso or not noniso:
        std = None

    return StandardInfo(std, iso, noniso, str(hac))