Example #1
0
def import_smiles(mine_db: MINE, target: str) -> None:
    """Imports a smiles file as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    """
    # SmilesMolSupplier (rdkit) generates Mol objects from smiles file (.smi)
    mols = AllChem.SmilesMolSupplier(target, delimiter="\t", nameColumn=0)
    # Go through each generated mol file and add molecule to MINE database
    # Stores compound properties in dict (GetPropsAsDict() from rdkit Mol
    # class)
    for mol in mols:
        if mol:
            mine_db.insert_compound(
                mol,
                compound_dict=mol.GetPropsAsDict(),
                pubchem_db=None,
                kegg_db=None,
                modelseed_db=None,
            )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "SDF Imported",
        "Filepath": target,
    })
Example #2
0
def import_sdf(mine_db: MINE, target: str) -> None:
    """Imports a SDF file as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    """
    # SDMolSupplier (rdkit) takes entries from sdf file and returns Mol objects
    sdf_gen = AllChem.SDMolSupplier(target)
    # Go through each generated Mol object and add each to MINE database
    for mol in sdf_gen:
        mine_db.insert_compound(
            mol,
            compound_dict=mol.GetPropsAsDict(),
            pubchem_db=None,
            kegg_db=None,
            modelseed_db=None,
        )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "SDF Imported",
        "Filepath": target,
    })
Example #3
0
    def save_to_MINE(self, db_id):
        """Save compounds to a MINE database.
        
        :param db_id: The name of the target database
        :type db_id: basestring
        """
        db = MINE(db_id)
        bulk_c = db.compounds.initialize_unordered_bulk_op()
        bulk_r = db.reactions.initialize_unordered_bulk_op()

        # This loop performs 4 functions to reactions:
        #   1. Convert stoich_tuples to dicts with hashes
        #   2. Add reaction links to compounds
        #   3. Add source information to compounds
        #   4. Iterate the reactions predicted for each relevant reaction rule
        for rxn in self.reactions.values():
            for x in rxn['Reactants']:
                self.compounds[x.c_id]['Reactant_in'].append(rxn['_id'])
            for x in rxn['Products']:
                self.compounds[x.c_id]['Product_of'].append(rxn['_id'])
                # Don't track sources of coreactants
                if x.c_id[0] == 'X':
                    continue
                self.compounds[x.c_id]['Sources'].append({
                    "Compounds": [x.c_id for x in rxn['Reactants']],
                    "Operators":
                    list(rxn["Operators"])
                })
            # Iterate the number of reactions predicted
            for op in rxn['Reaction_rules']:
                self.rxn_rules[op][1]['Reactions_predicted'] += 1
            db.insert_reaction(rxn, bulk=bulk_r)
        if self.reactions:
            bulk_r.execute()
            db.meta_data.insert({
                "Timestamp": datetime.datetime.now(),
                "Action": "Reactions Inserted"
            })

        for comp_dict in self.compounds.values():
            db.insert_compound(AllChem.MolFromSmiles(comp_dict['SMILES']),
                               comp_dict,
                               bulk=bulk_c)
        bulk_c.execute()
        db.meta_data.insert({
            "Timestamp": datetime.datetime.now(),
            "Action": "Compounds Inserted"
        })

        for x in self.rxn_rules.values():
            # There are fewer reaction rules so bulk operations are not
            # really faster.
            db.operators.save(x[1])
        db.build_indexes()
Example #4
0
def import_mol_dir(mine_db: MINE,
                   target: str,
                   name_field: str = "Name",
                   overwrite: bool = False) -> None:
    """Imports a directory of molfiles as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    name_field : str, optional
        Field for the compound name, by default "Name".
    overwrite : bool, optional
        Replace old compounds with new ones if a collision happens, by default False.
    """
    # For each .mol file in the directory of the target folder (path):
    for file in os.listdir(target):
        if ".mol" in file:
            # MolFromMolFile (rdkit) generates Mol objects from .mol files
            mol = AllChem.MolFromMolFile(target + "/" + file)
            # Mol object name becomes name of mol file without .mol extension
            name = file.rstrip(".mol")
            # Check that Mol object is successfully generated
            if mol:
                # Create hashkey for the compound
                cpdhash = utils.get_compound_hash(mol)
                # If we don't want to overwrite, and the compound (cpdhash)
                # already exists, then add an extra cpdhash for that molecule
                if not overwrite and mine_db.compounds.count({"_id": cpdhash}):
                    mine_db.compounds.update({"_id": cpdhash},
                                             {"$addToSet": {
                                                 name_field: name
                                             }})
                # If we don't care about overwriting, just insert the new
                # compound into the database
                else:
                    mine_db.insert_compound(
                        mol,
                        compound_dict={
                            name_field: [name],
                            "Generation": 0
                        },
                        pubchem_db=None,
                        kegg_db=None,
                        modelseed_db=None,
                    )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "MolFiles Imported",
        "Filepath": target,
    })