Beispiel #1
0
def export_sdf(mine_db: MINE,
               dir_path: str,
               max_compounds: int = None) -> None:
    """Exports compounds from the database as an MDL SDF file.

    Parameters
    ----------
    mine_db : MINE
        MINE object that contains the database.
    dir_path : str
        Directory for files.
    max_compounds : int, optional
        Maximum number of compounds per file, by default None.
    """

    # Make sure that all compounds point to all their reactants
    if not mine_db.compounds.find_one({"Product_of": {"$exists": 1}}):
        mine_db.add_rxn_pointers()

    print(
        f"Exporting {mine_db.compounds.count()} compounds from {mine_db.name}"
        " as an SDF file")
    target = utils.prevent_overwrite(
        os.path.join(dir_path, mine_db.name) + "_1.sdf")
    # SDWriter (rdkit) writes Mol objects to SD files
    writer = AllChem.SDWriter(target)
    writer.SetKekulize(True)
    n_files = 1
    for compound in mine_db.compounds.find():
        # Convert SMILES string to Mol object, replacing 'CoA' and 'R' by '*'
        mol = AllChem.MolFromSmiles(compound["SMILES"], True, {
            "CoA": "*",
            "R": "*"
        })
        # if Mol object successfully generated, annotate properties
        if mol:
            mol.SetProp("_id", compound["_id"])
            mol.SetProp("Generation", str(compound["Generation"]))
            if "Reactant_in" in compound:
                mol.SetProp("Reactant_in", str(compound["Reactant_in"]))
            if "Product_of" in compound:
                mol.SetProp("Product_of", str(compound["Product_of"]))
            writer.write(mol)
            # Start writing a new sdf file if the maximum (set by user) has
            # been reached for the current file
            if max_compounds and (writer.NumMols() >= max_compounds):
                n_files += 1
                target = utils.prevent_overwrite(
                    os.path.join(dir_path, mine_db.name) + f"_(n_files).sdf")
                writer = AllChem.SmilesWriter(target)
    writer.close()
Beispiel #2
0
def export_smiles(mine_db: MINE,
                  dir_path: str,
                  max_compounds: int = None) -> None:
    """Exports compounds from the database as a SMILES file.

    Parameters
    ----------
    mine_db : MINE
        MINE object that contains the database.
    dir_path : str
        Directory for files.
    max_compounds : int, optional
        Maximum number of compounds per file, by default None.
    """
    header = ["SMILES", "_id", "Generation", "Reactant_in", "Product_of"]
    # Make sure that all compounds point to all their reactants
    if not mine_db.compounds.find_one({"Product_of": {"$exists": 1}}):
        mine_db.add_rxn_pointers()

    print(
        f"Exporting {mine_db.compounds.count()} compounds from {mine_db.name()}"
        " as SMILES file")
    target = open(
        utils.prevent_overwrite(
            os.path.join(dir_path, mine_db.name) + "_1.smiles"), "w")

    # DictWriter allows for each key:value pair of a dictionary to be written
    # on its own row (by writerow)
    writer = csv.DictWriter(target, fieldnames=header, dialect="excel-tab")
    n_files = 1
    i = 0
    for compound in mine_db.compounds.find({}, dict([(x, 1) for x in header])):
        writer.writerow(compound)
        i += 1
        # If max compounds per file has been set by user and our number of
        # compounds that we have written so far is divisible by the max number,
        # then we start a new file
        if max_compounds and not i % max_compounds:
            n_files += 1
            target = open(
                utils.prevent_overwrite(
                    os.path.join(dir_path, mine_db.name) +
                    f"_{n_files}.smiles"),
                "w",
            )
            writer = csv.DictWriter(target,
                                    fieldnames=header,
                                    dialect="excel-tab")