def test_get_compound_hash_two_blocks():
    smiles = "C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O"
    assert utils.get_compound_hash(smiles, "Starting Compound", inchi_blocks=2) == (
        "Cf95a3c17f908e427c3127b4e8c3d8575c286d6ce",
        "WQZGKKKJIJFFOK-DVKNGEFBSA-N",
    )

    assert utils.get_compound_hash(smiles, "Starting Compound", inchi_blocks=1) == (
        "C9ab1a08d72c90a8167d1f3a668d8f1138e534a07",
        "WQZGKKKJIJFFOK-DVKNGEFBSA-N",
    )
def test_get_compound_hash():
    """Test compound to hash."""
    assert utils.get_compound_hash("CCO", "Coreactant") == (
        "Xa41fe8492d86f214ba494e3d04da2f0854c0e2ea",
        "LFQSCWFLJHTTHZ",
    )
    assert utils.get_compound_hash("CCO", "Predicted") == (
        "Ca41fe8492d86f214ba494e3d04da2f0854c0e2ea",
        "LFQSCWFLJHTTHZ-UHFFFAOYSA-N",
    )
    assert utils.get_compound_hash("CCO", "Starting Compound") == (
        "Ca41fe8492d86f214ba494e3d04da2f0854c0e2ea",
        "LFQSCWFLJHTTHZ-UHFFFAOYSA-N",
    )
Exemple #3
0
def import_mol_dir(mine_db: MINE,
                   target: str,
                   name_field: str = "Name",
                   overwrite: bool = False) -> None:
    """Imports a directory of molfiles as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    name_field : str, optional
        Field for the compound name, by default "Name".
    overwrite : bool, optional
        Replace old compounds with new ones if a collision happens, by default False.
    """
    # For each .mol file in the directory of the target folder (path):
    for file in os.listdir(target):
        if ".mol" in file:
            # MolFromMolFile (rdkit) generates Mol objects from .mol files
            mol = AllChem.MolFromMolFile(target + "/" + file)
            # Mol object name becomes name of mol file without .mol extension
            name = file.rstrip(".mol")
            # Check that Mol object is successfully generated
            if mol:
                # Create hashkey for the compound
                cpdhash = utils.get_compound_hash(mol)
                # If we don't want to overwrite, and the compound (cpdhash)
                # already exists, then add an extra cpdhash for that molecule
                if not overwrite and mine_db.compounds.count({"_id": cpdhash}):
                    mine_db.compounds.update({"_id": cpdhash},
                                             {"$addToSet": {
                                                 name_field: name
                                             }})
                # If we don't care about overwriting, just insert the new
                # compound into the database
                else:
                    mine_db.insert_compound(
                        mol,
                        compound_dict={
                            name_field: [name],
                            "Generation": 0
                        },
                        pubchem_db=None,
                        kegg_db=None,
                        modelseed_db=None,
                    )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "MolFiles Imported",
        "Filepath": target,
    })
    def _gen_compound(mol):
        rkl.DisableLog("rdApp.*")
        try:
            if explicit_h:
                mol = RemoveHs(mol)

            # resolve potential tautomers and choose first one
            mol_smiles = MolToSmiles(mol, True)
            if "n" in mol_smiles:
                mol_smiles = utils.postsanitize_smiles([mol_smiles])[0][0]
                mol = MolFromSmiles(mol_smiles)

            SanitizeMol(mol)

        # TODO: logger
        # Get lots of "Explicit valence greater than permitted" errors here
        # This is for predicted compounds that are infeasible, so we throw them out
        except BaseException:
            return None
        rkl.EnableLog("rdApp.*")

        mol_smiles = MolToSmiles(mol, True)
        if "." in mol_smiles:
            return None

        cpd_id, inchi_key = utils.get_compound_hash(mol_smiles, "Predicted")
        if cpd_id:
            if cpd_id not in local_cpds:
                cpd_dict = {
                    "ID": None,
                    "_id": cpd_id,
                    "SMILES": mol_smiles,
                    "InChI_key": inchi_key,
                    "Type": "Predicted",
                    "Generation": generation,
                    "atom_count": utils.get_atom_count(mol),
                    "Reactant_in": [],
                    "Product_of": [],
                    "Expand": True,
                    "Formula": CalcMolFormula(mol),
                    "last_tani": 0,
                }
            else:
                cpd_dict = local_cpds[cpd_id]

            return cpd_dict
        else:
            return None
Exemple #5
0
 def parse_comps(field):
     atoms = collections.Counter()
     compounds = collections.Counter(field.split(' // '))
     half_rxn = []
     for comp, stoich in compounds.items():
         if comp in metacyc2hash:
             mol = metacyc2hash[comp]
             for pair in re.findall('([A-Z][a-z]*)(\d*)',
                                    AllChem.CalcMolFormula(mol)):
                 if pair[1]:
                     atoms[pair[0]] += int(pair[1]) * stoich
                 else:
                     atoms[pair[0]] += 1 * stoich
             if comp not in inserted:
                 mine_db.insert_compound(mol, {'Generation': 0})
                 inserted.add(comp)
             half_rxn.append(
                 utils.stoich_tuple(stoich, utils.get_compound_hash(mol)))
         else:
             raise ValueError('Undefined Compound: %s' % comp)
     return half_rxn, atoms