def test_get_compound_hash_two_blocks(): smiles = "C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O" assert utils.get_compound_hash(smiles, "Starting Compound", inchi_blocks=2) == ( "Cf95a3c17f908e427c3127b4e8c3d8575c286d6ce", "WQZGKKKJIJFFOK-DVKNGEFBSA-N", ) assert utils.get_compound_hash(smiles, "Starting Compound", inchi_blocks=1) == ( "C9ab1a08d72c90a8167d1f3a668d8f1138e534a07", "WQZGKKKJIJFFOK-DVKNGEFBSA-N", )
def test_get_compound_hash(): """Test compound to hash.""" assert utils.get_compound_hash("CCO", "Coreactant") == ( "Xa41fe8492d86f214ba494e3d04da2f0854c0e2ea", "LFQSCWFLJHTTHZ", ) assert utils.get_compound_hash("CCO", "Predicted") == ( "Ca41fe8492d86f214ba494e3d04da2f0854c0e2ea", "LFQSCWFLJHTTHZ-UHFFFAOYSA-N", ) assert utils.get_compound_hash("CCO", "Starting Compound") == ( "Ca41fe8492d86f214ba494e3d04da2f0854c0e2ea", "LFQSCWFLJHTTHZ-UHFFFAOYSA-N", )
def import_mol_dir(mine_db: MINE, target: str, name_field: str = "Name", overwrite: bool = False) -> None: """Imports a directory of molfiles as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. name_field : str, optional Field for the compound name, by default "Name". overwrite : bool, optional Replace old compounds with new ones if a collision happens, by default False. """ # For each .mol file in the directory of the target folder (path): for file in os.listdir(target): if ".mol" in file: # MolFromMolFile (rdkit) generates Mol objects from .mol files mol = AllChem.MolFromMolFile(target + "/" + file) # Mol object name becomes name of mol file without .mol extension name = file.rstrip(".mol") # Check that Mol object is successfully generated if mol: # Create hashkey for the compound cpdhash = utils.get_compound_hash(mol) # If we don't want to overwrite, and the compound (cpdhash) # already exists, then add an extra cpdhash for that molecule if not overwrite and mine_db.compounds.count({"_id": cpdhash}): mine_db.compounds.update({"_id": cpdhash}, {"$addToSet": { name_field: name }}) # If we don't care about overwriting, just insert the new # compound into the database else: mine_db.insert_compound( mol, compound_dict={ name_field: [name], "Generation": 0 }, pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "MolFiles Imported", "Filepath": target, })
def _gen_compound(mol): rkl.DisableLog("rdApp.*") try: if explicit_h: mol = RemoveHs(mol) # resolve potential tautomers and choose first one mol_smiles = MolToSmiles(mol, True) if "n" in mol_smiles: mol_smiles = utils.postsanitize_smiles([mol_smiles])[0][0] mol = MolFromSmiles(mol_smiles) SanitizeMol(mol) # TODO: logger # Get lots of "Explicit valence greater than permitted" errors here # This is for predicted compounds that are infeasible, so we throw them out except BaseException: return None rkl.EnableLog("rdApp.*") mol_smiles = MolToSmiles(mol, True) if "." in mol_smiles: return None cpd_id, inchi_key = utils.get_compound_hash(mol_smiles, "Predicted") if cpd_id: if cpd_id not in local_cpds: cpd_dict = { "ID": None, "_id": cpd_id, "SMILES": mol_smiles, "InChI_key": inchi_key, "Type": "Predicted", "Generation": generation, "atom_count": utils.get_atom_count(mol), "Reactant_in": [], "Product_of": [], "Expand": True, "Formula": CalcMolFormula(mol), "last_tani": 0, } else: cpd_dict = local_cpds[cpd_id] return cpd_dict else: return None
def parse_comps(field): atoms = collections.Counter() compounds = collections.Counter(field.split(' // ')) half_rxn = [] for comp, stoich in compounds.items(): if comp in metacyc2hash: mol = metacyc2hash[comp] for pair in re.findall('([A-Z][a-z]*)(\d*)', AllChem.CalcMolFormula(mol)): if pair[1]: atoms[pair[0]] += int(pair[1]) * stoich else: atoms[pair[0]] += 1 * stoich if comp not in inserted: mine_db.insert_compound(mol, {'Generation': 0}) inserted.add(comp) half_rxn.append( utils.stoich_tuple(stoich, utils.get_compound_hash(mol))) else: raise ValueError('Undefined Compound: %s' % comp) return half_rxn, atoms