Esempio n. 1
0
    def link_to_external_database(self,
                                  external_database,
                                  compound=None,
                                  match_field="Inchikey",
                                  fields_to_copy=None):
        """This function looks for matching compounds in other databases (i.e.
        PubChem) and adds links where found.

        :param external_database: The name of the database to search for
            matching compounds
        :type external_database: str
        :param compound: The compound to search for external links. If none,
            link all compounds in the database.
        :type compound: dict
        :param match_field: The field to search on for matching compounds
        :type match_field: str
        :param fields_to_copy: Data to copy into the mine database. The first
            field is the field name in the external database. The second field
            is the field name in the MINE database where the data will be
            copied.
        :type fields_to_copy: list(tuple)
        """
        if compound:
            ext = MINE(external_database)
            projection = dict([(
                "_id",
                0,
            )] + [(
                x[0],
                1,
            ) for x in fields_to_copy])
            # Find compounds that have same name in another database
            for ext_comp in ext.compounds.find(
                {match_field: compound[match_field]}, projection):
                for field in fields_to_copy:
                    if field[0] in ext_comp:
                        # dict_merge merges two dictionaries using sets to
                        # avoid duplicate values
                        utils.dict_merge(
                            compound,
                            utils.save_dotted_field(
                                field[1],
                                utils.get_dotted_field(ext_comp, field[0])))
            return utils.convert_sets_to_lists(compound)

        # If compound is None, link all compounds in database
        else:
            for comp in self.compounds.find():
                self.compounds.save(
                    self.link_to_external_database(
                        external_database,
                        compound=comp,
                        match_field=match_field,
                        fields_to_copy=fields_to_copy))
Esempio n. 2
0
def export_mol(mine_db, target, name_field='_id'):
    """Exports compounds from the database as MDL molfiles

    :param mine_db: The database to export
    :type mine_db: a MINE object
    :param target: a directory in which to place the files
    :type target: str
    :param name_field: the field to provide names for the mol files. Must be
        unique & universal
    :type name_field: str
    :return:
    :rtype:
    """
    # Create the file if it doesn't yet exist
    if not os.path.exists(target):
        os.mkdir(target)

    # Let user know if an id does not exist for every compound in database
    if mine_db.compounds.find().count() != mine_db.compounds.find({
            name_field: {
                '$exists': 1
            }
    }).count():
        raise ValueError(
            '%s does not exist for every compound in the database' %
            name_field)

    for compound in mine_db.compounds.find({'_id': {'$regex': '^C'}}):
        # Create Mol object from SMILES code for each compound using
        # MolFromSmiles (rdkit). Take stereochemistry into account (True),
        # and replace CoA and R with *.
        mol = AllChem.MolFromSmiles(compound['SMILES'], True, {
            'CoA': '*',
            'R': "*"
        })
        if "." in name_field:
            compound[name_field] = utils.get_dotted_field(compound, name_field)
        # Make things more compact and look nicer
        if isinstance(compound[name_field], list):
            compound[name_field] = ','.join(compound[name_field])
        # Use MolToMolFile (rdkit) to create a mol file from the Mol object
        # with the file path specified.
        AllChem.MolToMolFile(
            mol, os.path.join(target, compound[name_field] + '.mol'))
Esempio n. 3
0
def export_mol(mine_db: MINE, target: str, name_field: str = "_id") -> None:
    """Exports compounds from the database as a MDL molfiles

    Parameters
    ----------
    mine_db : MINE
        MINE object that contains the database.
    target : str
        Directory in which to place the files.
    name_field : str, optional
        FIeld to provide names for the mol files. Must be unique and universal.
        By default, "_id".
    """
    # Create the file if it doesn't yet exist
    if not os.path.exists(target):
        os.mkdir(target)

    # Let user know if an id does not exist for every compound in database
    if (mine_db.compounds.find().count() != mine_db.compounds.find({
            name_field: {
                "$exists": 1
            }
    }).count()):
        raise ValueError(
            f"{name_field} does not exist for every compound in the database")

    for compound in mine_db.compounds.find({"_id": {"$regex": "^C"}}):
        # Create Mol object from SMILES code for each compound using
        # MolFromSmiles (rdkit). Take stereochemistry into account (True),
        # and replace CoA and R with *.
        mol = AllChem.MolFromSmiles(compound["SMILES"], True, {
            "CoA": "*",
            "R": "*"
        })
        if "." in name_field:
            compound[name_field] = utils.get_dotted_field(compound, name_field)
        # Make things more compact and look nicer
        if isinstance(compound[name_field], list):
            compound[name_field] = ",".join(compound[name_field])
        # Use MolToMolFile (rdkit) to create a mol file from the Mol object
        # with the file path specified.
        AllChem.MolToMolFile(
            mol, os.path.join(target, compound[name_field] + ".mol"))
Esempio n. 4
0
def make_hash_dict(db, key_field):
    hash_dict = {}
    for comp in db.compounds.find({key_field: {'$exists': 1}}, {key_field: 1}):
        for name in utils.get_dotted_field(comp, key_field):
            hash_dict[name] = comp['_id']
    return hash_dict