def import_smiles(mine_db: MINE, target: str) -> None: """Imports a smiles file as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. """ # SmilesMolSupplier (rdkit) generates Mol objects from smiles file (.smi) mols = AllChem.SmilesMolSupplier(target, delimiter="\t", nameColumn=0) # Go through each generated mol file and add molecule to MINE database # Stores compound properties in dict (GetPropsAsDict() from rdkit Mol # class) for mol in mols: if mol: mine_db.insert_compound( mol, compound_dict=mol.GetPropsAsDict(), pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "SDF Imported", "Filepath": target, })
def import_sdf(mine_db: MINE, target: str) -> None: """Imports a SDF file as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. """ # SDMolSupplier (rdkit) takes entries from sdf file and returns Mol objects sdf_gen = AllChem.SDMolSupplier(target) # Go through each generated Mol object and add each to MINE database for mol in sdf_gen: mine_db.insert_compound( mol, compound_dict=mol.GetPropsAsDict(), pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "SDF Imported", "Filepath": target, })
def save_to_MINE(self, db_id): """Save compounds to a MINE database. :param db_id: The name of the target database :type db_id: basestring """ db = MINE(db_id) bulk_c = db.compounds.initialize_unordered_bulk_op() bulk_r = db.reactions.initialize_unordered_bulk_op() # This loop performs 4 functions to reactions: # 1. Convert stoich_tuples to dicts with hashes # 2. Add reaction links to compounds # 3. Add source information to compounds # 4. Iterate the reactions predicted for each relevant reaction rule for rxn in self.reactions.values(): for x in rxn['Reactants']: self.compounds[x.c_id]['Reactant_in'].append(rxn['_id']) for x in rxn['Products']: self.compounds[x.c_id]['Product_of'].append(rxn['_id']) # Don't track sources of coreactants if x.c_id[0] == 'X': continue self.compounds[x.c_id]['Sources'].append({ "Compounds": [x.c_id for x in rxn['Reactants']], "Operators": list(rxn["Operators"]) }) # Iterate the number of reactions predicted for op in rxn['Reaction_rules']: self.rxn_rules[op][1]['Reactions_predicted'] += 1 db.insert_reaction(rxn, bulk=bulk_r) if self.reactions: bulk_r.execute() db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "Reactions Inserted" }) for comp_dict in self.compounds.values(): db.insert_compound(AllChem.MolFromSmiles(comp_dict['SMILES']), comp_dict, bulk=bulk_c) bulk_c.execute() db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "Compounds Inserted" }) for x in self.rxn_rules.values(): # There are fewer reaction rules so bulk operations are not # really faster. db.operators.save(x[1]) db.build_indexes()
def import_mol_dir(mine_db: MINE, target: str, name_field: str = "Name", overwrite: bool = False) -> None: """Imports a directory of molfiles as a MINE database. Parameters ---------- mine_db : MINE The database to export. target : str Directory in which to place the files. name_field : str, optional Field for the compound name, by default "Name". overwrite : bool, optional Replace old compounds with new ones if a collision happens, by default False. """ # For each .mol file in the directory of the target folder (path): for file in os.listdir(target): if ".mol" in file: # MolFromMolFile (rdkit) generates Mol objects from .mol files mol = AllChem.MolFromMolFile(target + "/" + file) # Mol object name becomes name of mol file without .mol extension name = file.rstrip(".mol") # Check that Mol object is successfully generated if mol: # Create hashkey for the compound cpdhash = utils.get_compound_hash(mol) # If we don't want to overwrite, and the compound (cpdhash) # already exists, then add an extra cpdhash for that molecule if not overwrite and mine_db.compounds.count({"_id": cpdhash}): mine_db.compounds.update({"_id": cpdhash}, {"$addToSet": { name_field: name }}) # If we don't care about overwriting, just insert the new # compound into the database else: mine_db.insert_compound( mol, compound_dict={ name_field: [name], "Generation": 0 }, pubchem_db=None, kegg_db=None, modelseed_db=None, ) # Add to log file (metadata) mine_db.meta_data.insert({ "Timestamp": datetime.datetime.now(), "Action": "MolFiles Imported", "Filepath": target, })