Example #1
0
def import_smiles(mine_db: MINE, target: str) -> None:
    """Imports a smiles file as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    """
    # SmilesMolSupplier (rdkit) generates Mol objects from smiles file (.smi)
    mols = AllChem.SmilesMolSupplier(target, delimiter="\t", nameColumn=0)
    # Go through each generated mol file and add molecule to MINE database
    # Stores compound properties in dict (GetPropsAsDict() from rdkit Mol
    # class)
    for mol in mols:
        if mol:
            mine_db.insert_compound(
                mol,
                compound_dict=mol.GetPropsAsDict(),
                pubchem_db=None,
                kegg_db=None,
                modelseed_db=None,
            )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "SDF Imported",
        "Filepath": target,
    })
Example #2
0
def import_sdf(mine_db: MINE, target: str) -> None:
    """Imports a SDF file as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    """
    # SDMolSupplier (rdkit) takes entries from sdf file and returns Mol objects
    sdf_gen = AllChem.SDMolSupplier(target)
    # Go through each generated Mol object and add each to MINE database
    for mol in sdf_gen:
        mine_db.insert_compound(
            mol,
            compound_dict=mol.GetPropsAsDict(),
            pubchem_db=None,
            kegg_db=None,
            modelseed_db=None,
        )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "SDF Imported",
        "Filepath": target,
    })
Example #3
0
def import_mol_dir(mine_db: MINE,
                   target: str,
                   name_field: str = "Name",
                   overwrite: bool = False) -> None:
    """Imports a directory of molfiles as a MINE database.

    Parameters
    ----------
    mine_db : MINE
        The database to export.
    target : str
        Directory in which to place the files.
    name_field : str, optional
        Field for the compound name, by default "Name".
    overwrite : bool, optional
        Replace old compounds with new ones if a collision happens, by default False.
    """
    # For each .mol file in the directory of the target folder (path):
    for file in os.listdir(target):
        if ".mol" in file:
            # MolFromMolFile (rdkit) generates Mol objects from .mol files
            mol = AllChem.MolFromMolFile(target + "/" + file)
            # Mol object name becomes name of mol file without .mol extension
            name = file.rstrip(".mol")
            # Check that Mol object is successfully generated
            if mol:
                # Create hashkey for the compound
                cpdhash = utils.get_compound_hash(mol)
                # If we don't want to overwrite, and the compound (cpdhash)
                # already exists, then add an extra cpdhash for that molecule
                if not overwrite and mine_db.compounds.count({"_id": cpdhash}):
                    mine_db.compounds.update({"_id": cpdhash},
                                             {"$addToSet": {
                                                 name_field: name
                                             }})
                # If we don't care about overwriting, just insert the new
                # compound into the database
                else:
                    mine_db.insert_compound(
                        mol,
                        compound_dict={
                            name_field: [name],
                            "Generation": 0
                        },
                        pubchem_db=None,
                        kegg_db=None,
                        modelseed_db=None,
                    )
    # Add to log file (metadata)
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "MolFiles Imported",
        "Filepath": target,
    })
Example #4
0
def export_sdf(mine_db: MINE,
               dir_path: str,
               max_compounds: int = None) -> None:
    """Exports compounds from the database as an MDL SDF file.

    Parameters
    ----------
    mine_db : MINE
        MINE object that contains the database.
    dir_path : str
        Directory for files.
    max_compounds : int, optional
        Maximum number of compounds per file, by default None.
    """

    # Make sure that all compounds point to all their reactants
    if not mine_db.compounds.find_one({"Product_of": {"$exists": 1}}):
        mine_db.add_rxn_pointers()

    print(
        f"Exporting {mine_db.compounds.count()} compounds from {mine_db.name}"
        " as an SDF file")
    target = utils.prevent_overwrite(
        os.path.join(dir_path, mine_db.name) + "_1.sdf")
    # SDWriter (rdkit) writes Mol objects to SD files
    writer = AllChem.SDWriter(target)
    writer.SetKekulize(True)
    n_files = 1
    for compound in mine_db.compounds.find():
        # Convert SMILES string to Mol object, replacing 'CoA' and 'R' by '*'
        mol = AllChem.MolFromSmiles(compound["SMILES"], True, {
            "CoA": "*",
            "R": "*"
        })
        # if Mol object successfully generated, annotate properties
        if mol:
            mol.SetProp("_id", compound["_id"])
            mol.SetProp("Generation", str(compound["Generation"]))
            if "Reactant_in" in compound:
                mol.SetProp("Reactant_in", str(compound["Reactant_in"]))
            if "Product_of" in compound:
                mol.SetProp("Product_of", str(compound["Product_of"]))
            writer.write(mol)
            # Start writing a new sdf file if the maximum (set by user) has
            # been reached for the current file
            if max_compounds and (writer.NumMols() >= max_compounds):
                n_files += 1
                target = utils.prevent_overwrite(
                    os.path.join(dir_path, mine_db.name) + f"_(n_files).sdf")
                writer = AllChem.SmilesWriter(target)
    writer.close()
Example #5
0
def export_smiles(mine_db: MINE,
                  dir_path: str,
                  max_compounds: int = None) -> None:
    """Exports compounds from the database as a SMILES file.

    Parameters
    ----------
    mine_db : MINE
        MINE object that contains the database.
    dir_path : str
        Directory for files.
    max_compounds : int, optional
        Maximum number of compounds per file, by default None.
    """
    header = ["SMILES", "_id", "Generation", "Reactant_in", "Product_of"]
    # Make sure that all compounds point to all their reactants
    if not mine_db.compounds.find_one({"Product_of": {"$exists": 1}}):
        mine_db.add_rxn_pointers()

    print(
        f"Exporting {mine_db.compounds.count()} compounds from {mine_db.name()}"
        " as SMILES file")
    target = open(
        utils.prevent_overwrite(
            os.path.join(dir_path, mine_db.name) + "_1.smiles"), "w")

    # DictWriter allows for each key:value pair of a dictionary to be written
    # on its own row (by writerow)
    writer = csv.DictWriter(target, fieldnames=header, dialect="excel-tab")
    n_files = 1
    i = 0
    for compound in mine_db.compounds.find({}, dict([(x, 1) for x in header])):
        writer.writerow(compound)
        i += 1
        # If max compounds per file has been set by user and our number of
        # compounds that we have written so far is divisible by the max number,
        # then we start a new file
        if max_compounds and not i % max_compounds:
            n_files += 1
            target = open(
                utils.prevent_overwrite(
                    os.path.join(dir_path, mine_db.name) +
                    f"_{n_files}.smiles"),
                "w",
            )
            writer = csv.DictWriter(target,
                                    fieldnames=header,
                                    dialect="excel-tab")
Example #6
0
def test_save_target_mine(default_rule, smiles_dict, coreactant_dict):
    """Test saving the target run to a MINE."""
    delete_database("MINE_test")
    pk = pickaxe.Pickaxe(database="MINE_test", explicit_h=True)
    pk.operators["2.7.1.a"] = default_rule
    pk._load_coreactant(coreactant_dict["ATP"])
    pk._load_coreactant(coreactant_dict["ADP"])
    pk._add_compound("FADH", smiles_dict["FADH"], cpd_type="Starting Compound")
    pk.load_targets(file_dir / "../data/test_targets.csv")
    pk.transform_all(generations=2)
    pk.prune_network_to_targets()

    pk.save_to_mine()
    mine_db = MINE("MINE_test")

    try:
        assert mine_db.compounds.estimated_document_count() == 6
        assert mine_db.reactions.estimated_document_count() == 4
        assert mine_db.operators.estimated_document_count() == 1
        assert mine_db.operators.find_one()["Reactions_predicted"] == 4
        start_comp = mine_db.target_compounds.find_one()
        assert start_comp["InChI_key"] == "RYNUDNWPSBJQQY-UHFFFAOYSA-N"
        assert all([i in start_comp.keys() for i in ["_id", "SMILES", "InChI_key"]])
    finally:
        delete_database("MINE_test")
Example #7
0
def test_db():
    """Create a test MINE database for testing."""
    datafile_path = file_dir / "data/testing_db.json"
    try:
        testdb = MINE("mongotest")
        with open(datafile_path) as infile:
            jsondb = json.load(infile)
        for doc in jsondb[0]:
            if testdb.compounds.find_one({"_id": doc["_id"]}):
                testdb.compounds.replace_one({"_id": doc["_id"]}, doc)
            else:
                testdb.compounds.insert_one(doc)
        for doc in jsondb[1]:
            if testdb.reactions.find_one({"_id": doc["_id"]}):
                testdb.reactions.replace_one({"_id": doc["_id"]}, doc)
            else:
                testdb.reactions.insert_one(doc)
        for doc in jsondb[2]:
            if testdb.operators.find_one({"_id": doc["_id"]}):
                testdb.operators.replace_one({"_id": doc["_id"]}, doc)
            else:
                testdb.operators.insert_one(doc)

    except ServerSelectionTimeoutError:
        print("No Mongo DB server detected")

    yield testdb
    delete_database("mongotest")
def test_db():
    """Create a test MINE database. Created and torn down before and after each
    test it is used in."""
    print(os.path.dirname(__file__))
    datafile_path = os.path.join(os.path.dirname(__file__),
                                 'data/testing_db.json')
    delete_database("mongotest")
    try:
        testdb = MINE("mongotest")
        with open(datafile_path) as infile:
            jsondb = json.load(infile)
        for doc in jsondb[0]:
            if testdb.compounds.find_one({'_id': doc['_id']}):
                testdb.compounds.replace_one({'_id': doc['_id']}, doc)
            else:
                testdb.compounds.insert_one(doc)
        for doc in jsondb[1]:
            if testdb.reactions.find_one({'_id': doc['_id']}):
                testdb.reactions.replace_one({'_id': doc['_id']}, doc)
            else:
                testdb.reactions.insert_one(doc)
        for doc in jsondb[2]:
            if testdb.operators.find_one({'_id': doc['_id']}):
                testdb.operators.replace_one({'_id': doc['_id']}, doc)
            else:
                testdb.operators.insert_one(doc)

    except ServerSelectionTimeoutError:
        print('No Mongo DB server detected')

    yield testdb
Example #9
0
def test_save_as_mine_multiprocess(default_rule, smiles_dict, coreactant_dict):
    """
    GIVEN a Pickaxe expansion
    WHEN that expansion is saved as a MINE DB in the MongoDB
    THEN make sure that all features are saved in the MongoDB as expected
    """
    delete_database('MINE_test')
    pk = pickaxe.Pickaxe(database='MINE_test', image_dir=DATA_DIR)
    pk.operators['2.7.1.a'] = default_rule
    pk = multiprocess(pk, smiles_dict, coreactant_dict)
    pk.save_to_mine(num_workers=2)
    mine_db = MINE('MINE_test')
    try:
        assert mine_db.compounds.estimated_document_count() == 31
        assert mine_db.reactions.estimated_document_count() == 49
        assert mine_db.operators.estimated_document_count() == 1
        assert os.path.exists(DATA_DIR +
                              '/X9c29f84930a190d9086a46c344020283c85fb917.svg')
        start_comp = mine_db.compounds.find_one({'Type': 'Starting Compound'})
        assert len(start_comp['Reactant_in']) > 0
        # Don't track sources of coreactants
        coreactant = mine_db.compounds.find_one({'Type': 'Coreactant'})
        assert 'Product_of' not in coreactant
        assert 'Reactant_in' not in coreactant
        product = mine_db.compounds.find_one({'Generation': 2})
        assert len(product['Product_of']) > 0
        assert product['Type'] == 'Predicted'
    finally:
        delete_database('MINE_test')
        purge(DATA_DIR, r".*\.svg$")
Example #10
0
def test_db():
    """Create a test MINE database. Created and torn down before and after each
    test it is used in."""
    try:
        testdb = MINE("mongotest")
    except ServerSelectionTimeoutError:
        print('No Mongo DB server detected')
    yield testdb
Example #11
0
    def save_to_MINE(self, db_id):
        """Save compounds to a MINE database.
        
        :param db_id: The name of the target database
        :type db_id: basestring
        """
        db = MINE(db_id)
        bulk_c = db.compounds.initialize_unordered_bulk_op()
        bulk_r = db.reactions.initialize_unordered_bulk_op()

        # This loop performs 4 functions to reactions:
        #   1. Convert stoich_tuples to dicts with hashes
        #   2. Add reaction links to compounds
        #   3. Add source information to compounds
        #   4. Iterate the reactions predicted for each relevant reaction rule
        for rxn in self.reactions.values():
            for x in rxn['Reactants']:
                self.compounds[x.c_id]['Reactant_in'].append(rxn['_id'])
            for x in rxn['Products']:
                self.compounds[x.c_id]['Product_of'].append(rxn['_id'])
                # Don't track sources of coreactants
                if x.c_id[0] == 'X':
                    continue
                self.compounds[x.c_id]['Sources'].append({
                    "Compounds": [x.c_id for x in rxn['Reactants']],
                    "Operators":
                    list(rxn["Operators"])
                })
            # Iterate the number of reactions predicted
            for op in rxn['Reaction_rules']:
                self.rxn_rules[op][1]['Reactions_predicted'] += 1
            db.insert_reaction(rxn, bulk=bulk_r)
        if self.reactions:
            bulk_r.execute()
            db.meta_data.insert({
                "Timestamp": datetime.datetime.now(),
                "Action": "Reactions Inserted"
            })

        for comp_dict in self.compounds.values():
            db.insert_compound(AllChem.MolFromSmiles(comp_dict['SMILES']),
                               comp_dict,
                               bulk=bulk_c)
        bulk_c.execute()
        db.meta_data.insert({
            "Timestamp": datetime.datetime.now(),
            "Action": "Compounds Inserted"
        })

        for x in self.rxn_rules.values():
            # There are fewer reaction rules so bulk operations are not
            # really faster.
            db.operators.save(x[1])
        db.build_indexes()
Example #12
0
 def load_compound_set(self,
                       compound_file=None,
                       structure_field=None,
                       id_field='id'):
     """If a compound file is provided, this function loads the compounds
     into it's internal dictionary. If not, it attempts to find the
     compounds in it's associated MINE database.
     
     :param compound_file: Path to a file containing compounds as tsv
     :type compound_file: basestring
     :param structure_field: the name of the column containing the
         structure incarnation as Inchi or SMILES (Default:'structure')
     :type structure_field: str
     :param id_field: the name of the column containing the desired
         compound ID (Default: 'id)
     :type id_field: str
     :return: compound SMILES
     :rtype: list
     """
     compound_smiles = []
     if compound_file:
         for line in utils.file_to_dict_list(compound_file):
             mol = self._mol_from_dict(line, structure_field)
             if not mol:
                 continue
             # Add compound to internal dictionary as a starting
             # compound and store SMILES string to be returned
             smi = AllChem.MolToSmiles(mol, True)
             _id = line[id_field]
             # Do not operate on inorganic compounds
             if "C" in smi or "c" in smi:
                 AllChem.SanitizeMol(mol)
                 self._add_compound(_id,
                                    smi,
                                    mol=mol,
                                    type='Starting Compound')
                 compound_smiles.append(smi)
     # If a MINE database is being used instead, search for compounds
     # annotated as starting compounds and return those as a list of
     # SMILES strings
     elif self.mine:
         db = MINE(self.mine)
         for compound in db.compounds.find():
             _id = compound['_id']
             smi = compound['SMILES']
             # Assume unannotated compounds are starting compounds
             if 'type' not in compound:
                 compound['Type'] = 'Starting Compound'
             self._add_compound(_id, smi, type=compound['Type'])
             compound_smiles.append(smi)
     else:
         raise ValueError('No input file or database specified for '
                          'starting compounds')
     print("%s compounds loaded" % len(compound_smiles))
     return compound_smiles
Example #13
0
def test_mongo_cli():
    """Test command line interface writing to mongo."""
    mine = MINE("tests")
    os.chdir(file_dir / "../data/../..")
    rc = subprocess.call(
        "python minedatabase/pickaxe.py -d tests -r tests/data/test_cd_rxn_rule.tsv",
        shell=True,
    )
    assert not rc

    try:
        assert mine.compounds.estimated_document_count() == 51
    finally:
        mine.client.drop_database("tests")

    purge(file_dir / "..", r".*\.svg$")
Example #14
0
def test_save_no_rxn_mine():
    """
    GIVEN a Pickaxe object with no expansion
    WHEN that Pickaxe object is saved into a MINE DB in the MongoDB
    THEN check that starting compounds are present and that no reactions exist
    """
    delete_database('MINE_test')
    pk = pickaxe.Pickaxe(database='MINE_test')
    pk.load_compound_set(compound_file=DATA_DIR + '/test_compounds.tsv')
    pk.save_to_mine(num_workers=1)
    mine_db = MINE('MINE_test')
    try:
        assert mine_db.compounds.estimated_document_count() == 14
        assert mine_db.reactions.estimated_document_count() == 0
    finally:
        delete_database('MINE_test')
Example #15
0
def make_box_plots(db_list, prop_list=('Mass', 'logP', 'NP_likeness')):
    df = pandas.DataFrame()
    for db_name in db_list:
        db = MINE(db_name)
        new_name = str(db_name.replace('exp2', 'MINE').split('-')[0])
        l = []
        cursor = db.compounds.find(
            dict([(x, {
                '$exists': 1
            }) for x in prop_list]),
            dict([('_id', 0)] + [(x, 1) for x in prop_list]))
        for x in cursor:
            x['DB'] = new_name
            l.append(x)
        df = df.append(l)
    f, ax = plt.subplots(1, len(prop_list))
    for i, prop in enumerate(prop_list):
        seaborn.boxplot(x='DB', y=prop, data=df, ax=ax[i], showfliers=False)
    plt.tight_layout()
    plt.savefig("MINE property comparison.png")
Example #16
0
def make_fp_heatmap(db_name, fp_type='MACCS', n_rows=25):
    db = MINE(db_name)
    data = defaultdict(Counter)
    for comp in db.compounds.find({}, {"_id": 0, "Generation": 1, fp_type: 1}):
        if fp_type in comp and int(comp['Generation']) > -1:
            data[int(comp['Generation'])].update(comp[fp_type])
    df = pandas.DataFrame(data)
    df_norm = df.div(df.max(axis=0), axis=1)
    if not n_rows:
        df_top = df_norm
    else:
        df_norm['range'] = df_norm.max(axis=1) - df_norm.min(axis=1)
        df_top = df_norm.sort_values('range', ascending=False).head(
            int(n_rows)).ix[:, :-1]
    hm = seaborn.heatmap(df_top)
    hm.collections[0].colorbar.set_label("Prevalence")
    plt.xlabel('Generation')
    plt.ylabel(fp_type + " bit")
    plt.yticks(rotation=0)
    plt.savefig(db_name + '_fp_heatmap.png')
Example #17
0
def test_save_as_mine(default_rule, smiles_dict, coreactant_dict):
    """Test saving compounds to database.

    GIVEN a Pickaxe expansion
    WHEN that expansion is saved as a MINE DB in the MongoDB
    THEN make sure that all features are saved in the MongoDB as expected
    """
    DATA_DIR = (file_dir / "../data").resolve()
    delete_database("MINE_test")
    pk = pickaxe.Pickaxe(database="MINE_test", image_dir=DATA_DIR, explicit_h=True)
    pk.operators["2.7.1.a"] = default_rule
    pk._load_coreactant(coreactant_dict["ATP"])
    pk._load_coreactant(coreactant_dict["ADP"])
    pk._add_compound("FADH", smiles_dict["FADH"], cpd_type="Starting Compound")
    pk.transform_all(generations=2)
    pk.save_to_mine(processes=1)
    mine_db = MINE("MINE_test")

    try:

        assert mine_db.compounds.estimated_document_count() == 31
        assert mine_db.reactions.estimated_document_count() == 49
        assert mine_db.operators.estimated_document_count() == 1
        assert mine_db.operators.find_one()["Reactions_predicted"] == 49
        assert os.path.exists(
            DATA_DIR / "X9c29f84930a190d9086a46c344020283c85fb917.svg"
        )
        start_comp = mine_db.compounds.find_one({"Type": "Starting Compound"})
        assert len(start_comp["Reactant_in"]) > 0
        # Don't track sources of coreactants
        coreactant = mine_db.compounds.find_one({"Type": "Coreactant"})
        assert "Product_of" not in coreactant
        assert "Reactant_in" not in coreactant
        product = mine_db.compounds.find_one({"Generation": 2})
        assert len(product["Product_of"]) > 0
        assert product["Type"] == "Predicted"
    finally:
        delete_database("MINE_test")
        purge(DATA_DIR, r".*\.svg$")
Example #18
0
def make_violin_plots(db_list, prop_list=('Mass', 'logP', 'NP_likeness')):
    df = pandas.DataFrame()
    for db_name in db_list:
        db = MINE(db_name)
        l = []
        cursor = db.compounds.find({"Type": {
            '$ne': 'Coreactant'
        }}, dict([('_id', 0), ('Type', 1)] + [(x, 1) for x in prop_list]))
        for x in cursor:
            x['DB'] = str(db_name.strip('exp2'))
            l.append(x)
        df = df.append(l)
    f, ax = plt.subplots(1, len(prop_list))
    for i, prop in enumerate(prop_list):
        seaborn.violinplot(split=True,
                           hue='Type',
                           x='DB',
                           y=prop,
                           data=df,
                           ax=ax[i])
        if i > 0:
            ax[i].legend_.remove()
    plt.tight_layout()
    plt.savefig("MINE property comparison.png")
Example #19
0
    def __init__(self,
                 rule_list=None,
                 coreactant_list=None,
                 explicit_h=True,
                 kekulize=True,
                 neutralise=True,
                 errors=True,
                 racemize=False,
                 database=None,
                 image_dir=None):
        """This class generates new compounds from user-specified starting
        compounds using a set of SMARTS-based reaction rules. It may be
        initialized with a text file containing the reaction rules and
        coreactants or this may be done on an ad hock basis.

        :param rule_list: Path to a list of reaction rules in TSV form
        :type rule_list: str
        :param coreactant_list: Path to list of coreactants in TSV form
        :type coreactant_list: str
        :param explicit_h: Explicitly represent bound hydrogen atoms
        :type explicit_h: bool
        :param kekulize: Kekulize structures before applying reaction rules
        :type kekulize: bool
        :param neutralise: Remove charges on structure before applying reaction
            rules
        :type neutralise: bool
        :param errors: Print underlying RDKit warnings and halt on error
        :type errors: bool
        :param racemize: Enumerate all possible chiral forms of a molecule if
            unspecified stereocenters exist
        :type racemize: bool
        :param database: Name of desired Mongo Database
        :type database: str
        :param image_dir: Path to desired image folder
        :type image_dir: str
        """
        self.rxn_rules = {}
        self.coreactants = {}
        self._raw_compounds = {}
        self.compounds = {}
        self.reactions = {}
        self.generation = 0
        self.explicit_h = explicit_h
        self.kekulize = kekulize
        self.racemize = racemize
        self.neutralise = neutralise
        self.image_dir = image_dir
        self.errors = errors
        self.fragmented_mols = False
        self.radical_check = False
        self.structure_field = None
        # Make sure that if a database is to be used, that the database is empty
        if database:
            self.mine = database
            db = MINE(database)
            if db.compounds.count():
                print(
                    "Warning: expansion will overwrite existing compounds and"
                    " operators!")
        else:
            self.mine = None

        # Use RDLogger to catch errors in log file. SetLevel indicates mode (
        # 0 - debug, 1 - info, 2 - warning, 3 - critical). Default is no errors.
        from rdkit import RDLogger
        lg = RDLogger.logger()
        if not errors:
            lg.setLevel(4)

        # Load coreactants (if any) into Pickaxe object
        if coreactant_list:
            with open(coreactant_list) as infile:
                for coreactant in infile:
                    self._load_coreactant(coreactant)

        # Load rules (if any) into Pickaxe object
        if rule_list:
            self.load_rxn_rules(rule_list)
Example #20
0
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "MolFiles Imported",
        "Filepath": target
    })


if __name__ == '__main__':
    # User inputs task as first argument (export-sdf, export-smi, export-mol,
    #  import-sdf, import-smi, or import-mol)
    task = sys.argv[1]
    # User inputs database name as second argument
    db_name = sys.argv[2]
    # User inputs file path as third argument
    path = sys.argv[3]
    database = MINE(db_name)
    if task == 'export-sdf':
        # If a maximum molecules per file is specified (fourth argument
        # entered by user), then pass that to the export function.
        if len(sys.argv) == 5:
            export_sdf(database, path, int(sys.argv[4]))
        # Otherwise, assume an unlimited number of molecules per file
        else:
            export_sdf(database, path)
    elif task == 'export-smi':
        # If a maximum molecules per file is specified (fourth argument
        # entered by user), then pass that to the export function.
        if len(sys.argv) == 5:
            export_smiles(database, path, int(sys.argv[4]))
        # Otherwise, assume an unlimited number of molecules per file
        else:
                abrv[row['Abbreviation'].strip()] = c_id
                if pic_dir:
                    rc = subprocess.call("/Applications/ChemAxon/JChem/bin/molconvert -o %s/temp.png png:-a,w500 -s "
                                         "'%s'" % (pic_dir, row['SMILES'].strip()), shell=True)
                    if not rc:
                        os.rename(pic_dir + "temp.png", pic_dir + c_id + ".png")
            else:
                print("Failed to parse %s" % row['SMILES'])
        else:
            print('SMILES missing from %s' % row.name)

    reactions['Type of Reaction'].fillna('ffill', inplace=True)
    for i, row in reactions.iterrows():
        if row['Equation (Abbreviations)']:
            rxn = row[['Metabolite', 'Equation (full names)']].to_dict()
            if isinstance(row['PMID or doi'], str):
                rxn['References'] = row['PMID or doi'].strip().split('; ')
            else:
                rxn['References'] = [str(row['PMID or doi'])]

            rxn['Type'] = str(row['Type of Reaction']).strip()
            rxn['Notes'] = str(row['Comments']).strip()
            rxn['Reactants'], rxn['Products'] = utils.parse_text_rxn(row['Equation (Abbreviations)'], ' = ', ' + ', abrv)
            rxn['InChI_hash'] = utils._calculate_rxn_hash(mine_db, rxn['Reactants'], rxn['Products'])
            mine_db.insert_reaction(rxn)
        else:
            print('RXN missing from %s' % row.name)

if __name__ == '__main__':
    mine = MINE(sys.argv[1])
    load_cdmine_rxns(mine, sys.argv[2])
def delete_database(name):
    mine = MINE(name)
    mine.client.drop_database(name)
    mine.client.close()
import pandas
import seaborn
import matplotlib.pyplot as plt
import numpy
from minedatabase.databases import MINE
import sys

db = MINE(sys.argv[1])
fields = ['Compounds', 'Compound_ids', 'Reactions', 'Operators']


def pw_jaccard(series, reduce=numpy.median):
    pw = []
    for i, x in enumerate(series):
        tc = []
        for j, y in enumerate(series):
            if i != j:
                tc.append(len(x & y) / float(len(x | y)))
        pw.append(reduce(tc))
    return pw


keys = {}
results = []
for model in db.models.find():
    results.append([model['_id']] + [
        set([y[0] for y in model[x]]) if isinstance(model[x][0], list
                                                    ) else set(model[x])
        for x in fields
    ])
Example #24
0
                    print(r_atoms, p_atoms)
                    raise ValueError('Unbalanced Reaction: %s' %
                                     rxn['MetaCyc ID'])
                if sorted(rxn['Reactants']) == sorted(rxn['Products']):
                    raise ValueError('No Change: %s' % rxn['MetaCyc ID'])

            except ValueError as e:
                print(e)
                continue
            mine_db.insert_reaction(rxn)
            """reactions = pd.read_csv(csv_path, sep='\t', error_bad_lines=False).fillna("")
                for i, row in reactions.iterrows():
                    rxn = row[['MetaCyc ID']].to_dict()
                    rxn['Metabolite'], rxn['Type'] = "", "" """


def add_metacyc_comps(metacyc_db, mine_db):
    c_ids = set(mine_db.reactions.distinct("Reactants.c_id"))
    c_ids |= set(mine_db.reactions.distinct("Products.c_id"))
    for _id in c_ids:
        if not mine_db.compounds.count({"_id": _id}):
            comp = metacyc_db.compounds.find_one({"_id": _id})
            mine_db.compounds.insert(comp)


if __name__ == '__main__':
    AllChem.WrapLogs()
    db = MINE(sys.argv[1])
    hash_dict = dict_from_sdf(sys.argv[2])
    add_metacyc_rxns(db, sys.argv[3], hash_dict)
    add_metacyc_comps(db, MINE(sys.argv[1]))
Example #25
0
    mine_db.meta_data.insert({
        "Timestamp": datetime.datetime.now(),
        "Action": "MolFiles Imported",
        "Filepath": target
    })


if __name__ == '__main__':
    # User inputs task as first argument (export-sdf, export-smi, export-mol,
    #  import-sdf, import-smi, or import-mol)
    TASK = sys.argv[1]
    # User inputs database name as second argument
    DB_NAME = sys.argv[2]
    # User inputs file path as third argument
    PATH = sys.argv[3]
    database = MINE(DB_NAME)  # pylint: disable=invalid-name
    if TASK == 'export-sdf':
        # If a maximum molecules per file is specified (fourth argument
        # entered by user), then pass that to the export function.
        if len(sys.argv) == 5:
            export_sdf(database, PATH, int(sys.argv[4]))
        # Otherwise, assume an unlimited number of molecules per file
        else:
            export_sdf(database, PATH)
    elif TASK == 'export-smi':
        # If a maximum molecules per file is specified (fourth argument
        # entered by user), then pass that to the export function.
        if len(sys.argv) == 5:
            export_smiles(database, PATH, int(sys.argv[4]))
        # Otherwise, assume an unlimited number of molecules per file
        else:
Example #26
0
def delete_database(name):
    """Delete database."""
    mine = MINE(name)
    mine.client.drop_database(name)
    mine.client.close()
    'cpd02857',
    'cpd00031',
    'cpd00038',
    'cpd00126',
    'cpd00241',
    'cpd00295',
    'cpd02552',
    'cpd00338',
    'cpd00683',
    'cpd00171',
    'cpd00198',
    'cpd00238',
    'cpd01977',
    'cpd00051',
    'cpd02069',
]

db = MINE('plant_spontanious')
rxn_ids = set()
for cpd_id in top_30:
    cpd = db.compounds.find_one({"DB_links": {
        'Model_SEED': cpd_id
    }}, {'Reactant_in': 1})
    if cpd:
        print(cpd_id)
        rxn_ids.update(cpd.get('Reactant_in'))
    else:
        print("Can't find: {}".format(cpd_id))
print("Printing {} rxns".format(len(rxn_ids)))
export_inchi_rxns(db, "./", list(rxn_ids))
Example #28
0
        if cleanup:
            os.remove(os.path.join(result_dir, spec_file))


if __name__ == "__main__":

    # pylint: disable=invalid-name
    # collect user input
    if sys.argv[1] == "calculate":
        db_name = sys.argv[2]
        file_dir = sys.argv[3]
        job_comp_number = int(sys.argv[4])
        spec_type = sys.argv[5]
        if len(sys.argv) == 7:
            job_template = sys.argv[6]
        else:
            job_template = None

        db = MINE(db_name)
        start_cfm_jobs(file_dir,
                       db,
                       spec_type,
                       job_template=job_template,
                       job_comp_number=job_comp_number)

    if sys.argv[1] == 'load':
        result_dir = sys.argv[2]
        spec_type = sys.argv[3]
        dbs = [MINE(x) for x in sys.argv[4:]]
        load_cfm_results(result_dir, dbs, spec_type=spec_type)