예제 #1
0
 def setUp(self):
     super().setUp()
     # Suppress RDKit warnings to clean up the test output.
     RDLogger.logger().setLevel(RDLogger.CRITICAL)
     self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir)
     reaction1 = reaction_pb2.Reaction()
     dummy_input = reaction1.inputs['dummy_input']
     dummy_component = dummy_input.components.add()
     dummy_component.identifiers.add(type='CUSTOM')
     dummy_component.identifiers[0].details = 'custom_identifier'
     dummy_component.identifiers[0].value = 'custom_value'
     dummy_component.is_limiting = reaction_pb2.Boolean.TRUE
     dummy_component.mass.value = 1
     dummy_component.mass.units = reaction_pb2.Mass.GRAM
     reaction1.outcomes.add().conversion.value = 75
     dataset1 = dataset_pb2.Dataset(reactions=[reaction1])
     self.dataset1_filename = os.path.join(self.test_subdirectory,
                                           'dataset1.pbtxt')
     message_helpers.write_message(dataset1, self.dataset1_filename)
     # reaction2 is empty.
     reaction2 = reaction_pb2.Reaction()
     dataset2 = dataset_pb2.Dataset(reactions=[reaction1, reaction2])
     self.dataset2_filename = os.path.join(self.test_subdirectory,
                                           'dataset2.pbtxt')
     message_helpers.write_message(dataset2, self.dataset2_filename)
예제 #2
0
def suppress_warnings():
    """ Suppresses unimportant warnings for a cleaner readout.
    """
    from rdkit import RDLogger
    from warnings import filterwarnings

    RDLogger.logger().setLevel(RDLogger.CRITICAL)
    filterwarnings(action="ignore", category=UserWarning)
    filterwarnings(action="ignore", category=FutureWarning)
예제 #3
0
def check_bondtype_change(reactions):
    '''
    bond 可能有断开、合上(三种)、变更(三种),这里验证变更
    '''
    from rdkit import RDLogger
    rdl = RDLogger.logger()
    rdl.setLevel(RDLogger.CRITICAL)

    reactants = reactions[0]
    actions = reactions[2]

    mol = Chem.MolFromSmiles(reactants)

    bond_type_to_channel = {
        Chem.BondType.SINGLE: 0,
        Chem.BondType.DOUBLE: 1,
        Chem.BondType.TRIPLE: 2,
        Chem.BondType.AROMATIC: 3
    }
    actions_dict = {}
    for a in actions.split(';'):
        tmp = a.split('-')
        actions_dict[str(min(int(tmp[0]), int(tmp[1]))) + '-' + str(
            max(int(tmp[0]), int(tmp[1])))] = int(float(tmp[2]) - 1)
    for bond in mol.GetBonds():
        ch = bond_type_to_channel[bond.GetBondType()]
        i = bond.GetBeginAtom().GetAtomMapNum()
        j = bond.GetEndAtom().GetAtomMapNum()
        key = str(min(i, j)) + '-' + str(max(i, j))
        if key in list(actions_dict.keys()) and actions_dict[key] != -1:
            return reactions

    return ['', '', '']
예제 #4
0
def disable_rdkit_logging():
    """
    Disables RDKit whiny logging.
    """
    logger = rkl.logger()
    logger.setLevel(rkl.ERROR)
    rkrb.DisableLog('rdApp.error')
예제 #5
0
    def __init__(self, moli, molj):
        """
        Inizialization function
    
        Parameters
        ----------

        moli : RDKit molecule object 
            the first molecule used to perform the Figureprint calculation
        molj : RDKit molecule object 
            the second molecule used to perform the Figureprint calculation
        options : argparse python object 
            the list of user options 
       
        """

        # Set logging level and format
        logging.basicConfig(format='%(levelname)s:\t%(message)s',
                            level=logging.INFO)

        # Local pointers to the passed molecules
        self.moli = moli
        self.molj = molj

        if not options.verbose == 'pedantic':
            lg = RDLogger.logger()
            lg.setLevel(RDLogger.CRITICAL)

        self.fps_moli = FingerprintMols.FingerprintMol(self.moli)
        self.fps_molj = FingerprintMols.FingerprintMol(self.molj)
        self.fps_tan = DataStructs.FingerprintSimilarity(
            self.fps_moli, self.fps_molj)
예제 #6
0
def count_valid_samples(smiles, rdkit=True):
    if rdkit:
        from rdkit import Chem
        from rdkit import RDLogger
        lg = RDLogger.logger()

        lg.setLevel(RDLogger.CRITICAL)

        def toMol(smi):
            try:
                mol = Chem.MolFromSmiles(smi)
                return Chem.MolToSmiles(mol)
            except:
                return None
    else:
        import pybel
        def toMol(smi):
            try:
                m = pybel.readstring("smi", smi)
                return m.write("smi")
            except:
                return None

    count = 0
    goods = []
    for smi in smiles:
        try:
            mol = toMol(smi)
            if mol is not None:
                goods.append(mol)
                count += 1
        except:
            continue
    return count, goods
예제 #7
0
def canonicalize(
    compounds: Mapping[str, Chem.Mol],
    standardize: bool = False,
    standardizer: str = "chembl",
    progress_callback: Optional[Callable] = None,
    timeout: Optional[int] = None,
) -> Tuple[Mapping[str, Tuple[Chem.Mol, Mapping[str, bool]]], List[str]]:
    @concurrent.process(timeout=timeout)
    def process_compound(*args, **kwargs):
        return canonicalize_compound(*args, **kwargs)

    canonicalizer_fun = tautomer.TautomerCanonicalizer()
    res = {}
    skipped = list()
    standardizer_fun = STANDARDIZERS[standardizer]
    # Suppress pesky warning messages
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.ERROR)
    for i, (k, mol) in enumerate(compounds.items()):
        if i % 100 == 0 and progress_callback is not None:
            progress_callback(i)
        future = process_compound(
            mol, canonicalizer_fun=canonicalizer_fun, standardizer_fun=standardizer_fun
        )
        try:
            res[k] = future.result()
        except TimeoutError as error:
            print(f"Processing `{k}` took longer than {timeout}s. Skipping.")
            skipped.append(k)
        except Exception as error:
            print(f"Error canonicalizing {k}. Skipping.\n{error}")
            skipped.append(k)
    return (res, skipped)
예제 #8
0
    def test_mcs(self):
        f = open('test/basic/MCS.pickle', 'rb')
        data = pickle.load(f)
        data_no_hydrogens = data[0]
        data_hydrogens = data[1]

        db = self.inst

        nohyds = {}
        hyds = {}

        lg = RDLogger.logger()
        lg.setLevel(RDLogger.CRITICAL)

        for i in range(0, db.nums()):
            for j in range(i + 1, db.nums()):
                MCS_no_hyds = MCS.getMapping(db[i].getMolecule(),
                                             db[j].getMolecule())
                MCS_hyds = MCS.getMapping(db[i].getMolecule(),
                                          db[j].getMolecule(),
                                          hydrogens=True)
                nohyds[(i, j)] = MCS_no_hyds
                hyds[(i, j)] = MCS_hyds

        self.assertEqual(True, nohyds == data_no_hydrogens)
        self.assertEqual(True, hyds == data_hydrogens)
예제 #9
0
def main(parameters):
    start = time.time()
    #suppress warnings
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.ERROR)

    query_file = parameters[0]  #location of input file
    output_path = parameters[1]  #path for output file

    #load the drug name dictionary and the precision matrix
    dictionary()
    precision()

    # STEP #1: read the query compounds
    print('reading input file...........')
    input_molecules = read_input_smiles(query_file.lstrip("-"))
    # STEP #2: load the database compounds
    print('loading database file........')
    global dbComps
    dbComps = read_db_file()
    print('Done!')

    # STEP #3: search for 10 targets
    print('searching for 10 targets........')
    ###divide the input molecules into a list of dictionaries to be processed in parallel###
    input_molecules_list = [{
        k: input_molecules[k]
    } for k in input_molecules.keys()]
    pool = Pool(processes=workers)
    target_dict = pool.map(search_10_target, input_molecules_list)
    pool.close()
    pool.join()
    print("search: Done!")

    # STEP #4: calculate and sort the scores
    print('calculating scores.........')

    pool = Pool(processes=workers)
    input_molecules = pool.map(calculate_scores, target_dict)
    pool.close()
    pool.join()
    print("scores calculation: Done!")
    # STEP #5: output a txt file

    print('creating output directory and file.......')
    output(input_molecules, query_file, output_path)
    ##file in json format
    outputjson(query_file, output_path)
    print("output: Done")

    usedTime = time.time() - start

    if len(input_molecules) > 0:
        print("\nTime elapsed: " + str(usedTime) + " seconds, " +
              str(usedTime / float(len(input_molecules))) +
              " seconds per input molecule.\n")
        #print ("\nTime elapsed to read the database: "+ str(x_time))
    else:
        print("\nTime elapsed: " + str(usedTime) +
              " seconds, 0 seconds per input molecule.\n")
예제 #10
0
 def __init__(self, logging_level=logging.INFO):
     super().__init__(
         "Populate",
         logging_level=logging_level,
         tables_to_drop=[self.RAW_DATA_DB, self.COUNTS_DB, self.LIGANDS_DB])
     lg = RDLogger.logger()
     lg.setLevel(RDLogger.CRITICAL)
예제 #11
0
def disable_rdkit_logging():
    """
    Disables RDKit whiny logging.
    """
    import rdkit.rdBase as rkrb
    import rdkit.RDLogger as rkl
    logger = rkl.logger()
    logger.setLevel(rkl.ERROR)
    rkrb.DisableLog('rdApp.error')
예제 #12
0
def remove_salts(mol, dictionary=True, *args, **kwargs):
    """Removes salts from a molecule.

    This function removes detected salts following a salts dictionary by
    default.

    Parameters
    ----------
    mol: rdkit.Chem.Mol
        The molecule to be modified.
    dictionary: bool, optional
        True (default): Activates the use of the salt dictionary.
        False: Uses the standard StripMol functionality, provided by
        rdkit.Chem.SaltRemover.
    defnData: list of str, optional
        If the dictionary is set to False, a custom dictionary can be
        set up. If not rdkit default values from
        '/scratch/RDKit_git/Data/Salts.txt' are used.

    Returns
    -------
    mol: rdkit.Chem.Mol
        A new molecule with salts removed.

    Notes
    -----
    The Salts Dictionary
        The dictionary used is a derived version from the ChEMBL salt
        dictionary, created for the standardiser application by Francis 
        Atkinson. The salts are stored as list of (neutral) SMILES.
    """
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.ERROR)
    i = 0

    if dictionary == True:
        salts = _extract_row_from_csv(0)
        salt_names = _extract_row_from_csv(1)
        list_len = len(salts)

        while i < list_len:
            salt = salts[i]
            salt_name = salt_names[i]
            test = Chem.MolToSmiles(mol)
            i += 1
            remover = SaltRemover(defnData=salt)
            stripped_mol = remover.StripMol(mol)
            test_smiles = Chem.MolToSmiles(stripped_mol)
            if test_smiles != test:
                logging.debug("Following salt was stripped: %s", salt_name)
                mol = stripped_mol
                continue
    else:
        mol = SaltRemover(*args, **kwargs).StripMol(mol)

    return mol
예제 #13
0
def set_up_logging(logger_name):
    # Set up logging
    FORMAT = '%(asctime)s - %(levelname)s: %(message)s'
    logging.basicConfig(format=FORMAT)
    LOGGER = logging.getLogger(logger_name)
    LOGGER.setLevel(logging.DEBUG)

    # Set rdkit logger to critical
    rdlg = RDLogger.logger()
    rdlg.setLevel(RDLogger.CRITICAL)

    return LOGGER
예제 #14
0
def split_sdf(sdf_file_name, outdir="data/"):
    print("Loading sdf.")
    # Parse the SDF file into a Pandas dataframe.
    rdk_lg = RDLogger.logger()
    rdk_lg.setLevel(RDLogger.CRITICAL)
    df = PandasTools.LoadSDF(sdf_file_name,
                             smilesName='SMILES',
                             molColName='Molecule',
                             includeFingerprints=False)
    print("Raw cols = ", [str(x) for x in df.columns])
    # Select only the needed columns and merge the two PDB cols.
    df_list = [
        'PDB ID(s) for Ligand-Target Complex', 'PDB ID(s) of Target Chain',
        'SMILES', 'IC50 (nM)', 'Molecule'
    ]
    df_selected = df[df_list].copy()
    df_selected["PDB IDs"] = df_selected[
        'PDB ID(s) for Ligand-Target Complex'] + ',' + df_selected[
            'PDB ID(s) of Target Chain']
    print("Selected cols = ", [str(x) for x in df_selected.columns])
    df_selected = df_selected[["PDB IDs"] + df_list[2:]]
    # Drop any rows with missing data.
    df_selected = df_selected.replace('', np.nan)
    df_selected = df_selected.replace(',', np.nan)
    df_selected = df_selected.dropna()
    r_rows = len(df.index)
    s_rows = len(df_selected.index)
    print("Raw rows = ", r_rows)
    print("Sel rows = ", s_rows)
    print("Keep pct = %.2f%s" %
          (((float(s_rows) / float(r_rows)) * 100.0), '%'))
    # Build ligand dictionary and a protein dictionary.
    print("Building protein-ligand dictionary.")
    uligs = {}
    prots_ligs = {}
    for lndx, row in enumerate(df_selected.values):
        pdbs = row[0].split(',')
        for pdb in pdbs:
            if pdb == '':
                continue
            if pdb not in prots_ligs:
                prots_ligs[pdb] = []
            prots_ligs[pdb] += [lndx]
        uligs[lndx] = row
    print("Unique proteins = ", len(prots_ligs))
    print("Writing per-ligand output files.")
    # Write out .lig files and return the data dictionaries.
    for key in uligs:
        ndx = str(key)
        lig = uligs[key]
        write_lig_file(lig[3], outdir + "/lig/lig%s.lig" % ndx)
    return uligs, prots_ligs
예제 #15
0
def findDuplicates (sdf, name, out):

    lg = RDLogger.logger()
    lg.setLevel(RDLogger.ERROR)
    
    suppl = Chem.SDMolSupplier(sdf,removeHs=False, sanitize=False)

    idlist = []
    nmlist = []
    smlist = []

    print 'reading SDFile...'
    counter = 0
    for mol in suppl:

        counter+=1
        
        if mol is None: continue
        try:
            inchi = Chem.MolToInchi(mol)
            inkey = Chem.InchiToInchiKey(inchi)
            smile = Chem.MolToSmiles(mol)
        except:
            continue

        try:
            ni = mol.GetProp(name)
        except:
            ni = 'mol%0.8d' %counter

        idlist.append(inkey[:-3])
        nmlist.append(ni)
        smlist.append(smile)
    
    n = len(idlist)

    print 'analizing duplicates...'

    fo = open (out,'w+')
    fo.write('i\tj\tnamei\tnamej\tsmilesi\tsmilesj\n')
    duplicates = 0
    for i in range (n):
        for j in range (i+1,n):
            if idlist[i]==idlist[j]:
                line=str(i)+'\t'+str(j)+'\t'+nmlist[i]+'\t'+nmlist[j]+'\t'+smlist[i]+'\t'+smlist[j]
                fo.write(line+'\n')
                duplicates+=1
    fo.close()

    print '\n%d duplicate molecules found' %duplicates
예제 #16
0
def main():

    lg = RDLogger.logger()
    lg.setLevel(RDLogger.CRITICAL)

    # parse command line arguments
    args = docopt(__doc__)
    pdb_list_file = args['<pdb_list_file>']
    pdbbind_dir = args['<pdbbind_dir>']
    output_file = args['<output_file>']

    with open(pdb_list_file, 'r') as f:
        pdbs = [l.strip() for l in f]

    # load ligands and compute features
    fingerprints = {}
    for pdb in pdbs:
        # prefer to use the .sdf provided by PDBbind
        sdf = os.path.join(pdbbind_dir, pdb, f'{pdb}_ligand.sdf')
        mol = next(Chem.SDMolSupplier(sdf, removeHs=False))

        # but we'll try the .mol2 if RDKit can't parse the .sdf
        if mol is None:
            mol2 = os.path.join(pdbbind_dir, pdb, f'{pdb}_ligand.mol2')
            mol = Chem.MolFromMol2File(mol2, removeHs=False)

        # skip the ligand if RDKit can't parse the .mol2
        if mol is None:
            continue

        try:
            fingerprints[pdb] = AllChem.GetMorganFingerprintAsBitVect(
                mol, 2, nBits=2048)
        except ValueError as e:
            print(e)
            continue

    tc = {
        pdb1: {
            pdb2: DataStructs.FingerprintSimilarity(fingerprints[pdb1],
                                                    fingerprints[pdb2])
            for pdb2 in fingerprints
        }
        for pdb1 in fingerprints
    }

    tc = pd.DataFrame(tc)
    tc.to_csv(output_file)
예제 #17
0
def readsdfiles(fname):
    """ read all of the individual SDFiles from the concatenated SDFile """
    print('readsdfile ', fname)
    sql = 'insert into ' + schema + '.sdfile (%s) values %s;'
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.CRITICAL)
    count = 0
    with gzip.open(fname, 'r') as file:

        while True:
            sdrecord = readnextSDfile(file)
            if sdrecord != 'EOF':
                writedb(conn, sdrecord, sql)
                count += 1
                if (count % 50000 == 0):
                    print('readsdfiles records', count)
            else:
                break

    flush(conn)
    print("wrote ", count, " records")
예제 #18
0
def configure_worker(options={}, **kwargs):
    if 'queues' not in options:
        return
    if CORRESPONDING_QUEUE not in options['queues'].split(','):
        return
    print('### STARTING UP A NEURAL NETWORK CONTEXT RECOMMENDER WORKER ###')

    global recommender

    # Setting logging low
    from rdkit import RDLogger
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.CRITICAL)
    try:
        recommender = NeuralNetContextRecommender()
        recommender.load()
    except Exception as e:
        print(e)
    print('Loaded context recommendation model')

    print('### NEURAL NETWORK CONTEXT RECOMMENDER STARTED UP ###')
예제 #19
0
def configure_worker(options={}, **kwargs):
    if 'queues' not in options:
        return
    if CORRESPONDING_QUEUE not in options['queues'].split(','):
        return
    print('### STARTING UP A NEAREST NEIGHBOR CONTEXT RECOMMENDER WORKER ###')

    global recommender

    # Setting logging low
    from rdkit import RDLogger
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.CRITICAL)
    try:
        recommender = NNContextRecommender()
        recommender.load_nn_model(model_path=gc.CONTEXT_REC['model_path'],
                                  info_path=gc.CONTEXT_REC['info_path'])
    except Exception as e:
        print(e)
    print('Loaded context recommendation model')

    print('### NEAREST NEIGHBOR CONTEXT RECOMMENDER STARTED UP ###')
예제 #20
0
def load_csv(csv_file_name):
    print("Loading CSV.")
    # Parse the CSV file.
    rdk_lg = RDLogger.logger()
    rdk_lg.setLevel(RDLogger.CRITICAL)
    with open(csv_file_name, "r") as csvf:
        ligands = [list(line.split(",")) for line in csvf.read().split("\n")]
    # Convert to mol objects.
    print("Converting ligands to mol objects.")
    valid_ligands = []
    for ndx, ligand in enumerate(ligands):
        if len(ligand) == 1 and ligand[0] == "":
            continue
        if len(ligand) != 2:
            print(ligand)
            continue
        ligand.append(Chem.MolFromSmiles(ligand[1]))
        valid_ligands.append(ligand)
        if ndx < 10:
            print(ligand)
        elif ndx == 10:
            print("...")
    print("Done creating mol objects.")
    return valid_ligands
예제 #21
0
import warnings
import os
import shutil
import logging
import argparse
from rdkit import RDLogger
from os.path import join, basename, abspath

from .molecular import Molecule, CACHE_SETTINGS
from .ga import GAPopulation, GAInput
from .convenience_tools import (tar_output, errorhandler, streamhandler,
                                archive_output, kill_macromodel)
from .ga import plotting as plot

warnings.filterwarnings("ignore")
RDLogger.logger().setLevel(RDLogger.CRITICAL)

# Get the loggers.
rootlogger = logging.getLogger()
rootlogger.addHandler(errorhandler)
rootlogger.addHandler(streamhandler)

logger = logging.getLogger(__name__)


class GAProgress:
    """
    Deals with logging the GA's progress.

    Attributes
    ----------
예제 #22
0
from __future__ import print_function

from rdkit import RDLogger
lg = RDLogger.logger()
lg.setLevel(4)

import rdkit.Chem as Chem
import rdkit.Chem.AllChem as AllChem
from rdkit import DataStructs
import pandas as pd
import numpy as np
from tqdm import tqdm
import json
import sys

from retrosim.utils.generate_retro_templates import process_an_example
from retrosim.data.get_data import get_data_df, split_data_df

from joblib import Parallel, delayed
import multiprocessing
num_cores = multiprocessing.cpu_count()

from rdchiral.main import rdchiralRun, rdchiralReaction, rdchiralReactants
import os

SCRIPT_ROOT = os.path.dirname(__file__)
PROJ_ROOT = os.path.dirname(SCRIPT_ROOT)

############### DEFINITIONS FOR VALIDATION SEARCH ########################
all_getfp_labels = ['Morgan2noFeat', 'Morgan3noFeat', 'Morgan2Feat', 'Morgan3Feat']
all_similarity_labels = ['Tanimoto', 'Dice', 'TverskyA', 'TverskyB',]
예제 #23
0
    # Output is either s fixed name in an output directory
    # or a prefixed filename (without an output directory)
    if args.output_is_prefix:
        output_filename = '{}.{}'.format(args.output, output_filename)
    else:
        # Create the output directory
        if os.path.exists(args.output):
            logger.error('Output exists')
            sys.exit(1)
        os.mkdir(args.output)
        os.chmod(args.output, 0o777)
        output_filename = os.path.join(args.output,
                                       '{}'.format(output_filename))

    # Suppress basic RDKit logging...
    RDLogger.logger().setLevel(RDLogger.ERROR)

    # Report any limiting...?
    if args.limit:
        logger.warning('Limiting processing to first {:,} molecules'.format(
            args.limit))

    # Before we open the output file
    # get a lit of all the input files (the prefix may be the same)
    # so we don't want our file in the list of files to be processed)
    real_files = glob.glob('{}/{}*'.format(args.vendor_dir,
                                           args.vendor_prefix))

    # Open the file we'll write the standardised data set to.
    # A text, tab-separated file.
    logger.info('Writing %s...', output_filename)
예제 #24
0
    except UnboundLocalError:
        print("something wrong with IRC")
        ts_found_opt = None
    _dict.update({"IRC check": ts_found})


    return _dict, ts_found_opt, ts_found


if __name__ == '__main__':
    FILE_1 = sys.argv[1] #reactant .xyz file
    FILE_2 = sys.argv[2] #product .xyz file
    SUCCESS_PKL = sys.argv[3] #.pkl file with dataframe to save result if search successful
    FAIL_PKL = sys.argv[4] #.pkl faile with daraframe to save result if search unsuccessful
    METHOD = 'ub3lyp/6-31G(d,p)' #Specify the method for the Gaussian calculations
    LG = RDLogger.logger()
    LG.setLevel(RDLogger.ERROR)


    with open("log_err.txt", 'w') as err:
        with redirect_stderr(err):
            with open("log.txt", 'w') as out:
                with redirect_stdout(out):
                    # create empty dictionary to save results
                    DICT = {}

                    #Get the xTB path for reactant and product
                    PATH_FILE, OUTFILE, N_PATH = find_xtb_path(FILE_1, FILE_2)
                    DICT = xtb_path_parameter(N_PATH, OUTFILE, DICT)

                    #extract path structures and do sp energy calculations
예제 #25
0
파일: fixer.py 프로젝트: oddt/oddt
def UFFConstrainedOptimize(mol, moving_atoms=None, fixed_atoms=None,
                           cutoff=5., verbose=False):
    """Minimize a molecule using UFF forcefield with a set of moving/fixed
    atoms. If both moving and fixed atoms are provided, fixed_atoms parameter
    will be ignored.  The minimization is done in-place (without copying
    molecule).

    Parameters
    ----------
        mol: rdkit.Chem.rdchem.Mol
            Molecule to be minimized.
        moving_atoms: array-like (default=None)
            Indices of freely moving atoms. If None, fixed atoms are assigned
            based on `fixed_atoms`. These two arguments are mutually exclusive.
        fixed_atoms: array-like (default=None)
            Indices of fixed atoms. If None, fixed atoms are assigned based on
            `moving_atoms`. These two arguments are mutually exclusive.
        cutoff: float (default=10.)
            Distance cutoff for the UFF minimization

    Returns
    -------
        mol: rdkit.Chem.rdchem.Mol
            Molecule with mimimized `moving_atoms`
    """
    logger = RDLogger.logger()

    if not verbose:
        logger.setLevel(RDLogger.CRITICAL)

    if moving_atoms is None and fixed_atoms is None:
        raise ValueError('You must supply at least one set of moving/fixed '
                         'atoms.')

    all_atoms = set(range(mol.GetNumAtoms()))
    if moving_atoms is None:
        moving_atoms = list(all_atoms.difference(fixed_atoms))
    else:
        fixed_atoms = list(all_atoms.difference(moving_atoms))
    # extract submolecules containing atoms within cutoff
    mol_conf = mol.GetConformer(-1)
    pos = np.array([mol_conf.GetAtomPosition(i)
                   for i in range(mol_conf.GetNumAtoms())])
    mask = (cdist(pos, pos[moving_atoms]) <= cutoff).any(axis=1)
    amap = np.where(mask)[0].tolist()

    # expand to whole residues
    pocket_residues = OrderedDict()
    protein_residues = GetResidues(mol)
    for res_id in protein_residues.keys():
        if any(1 for res_aix in protein_residues[res_id]
               if res_aix in amap):
            pocket_residues[res_id] = protein_residues[res_id]
    amap = list(chain(*pocket_residues.values()))

    # TODO: above certain threshold its making a submolis redundant
    submol = AtomListToSubMol(mol, amap, includeConformer=True)
    # initialize ring info
    Chem.GetSSSR(submol)
    ff = UFFGetMoleculeForceField(submol, vdwThresh=cutoff,
                                  ignoreInterfragInteractions=False)
    for submol_id, atom_id in enumerate(amap):
        if atom_id not in moving_atoms:
            ff.AddFixedPoint(submol_id)
    ff.Initialize()
    ff.Minimize(energyTol=1e-4, forceTol=1e-3, maxIts=2000)

    # get the positions backbone
    conf = mol.GetConformer(-1)
    submol_conf = submol.GetConformer(-1)
    for submol_idx, mol_idx in enumerate(amap,):
        conf.SetAtomPosition(mol_idx, submol_conf.GetAtomPosition(submol_idx))

    # FIXME: there's no getLevel method, so we set to default level
    if not verbose:
        logger.setLevel(RDLogger.INFO)

    return mol
예제 #26
0
""" RDKit interface
"""

from rdkit import RDLogger
from rdkit.Chem import Draw
import rdkit.Chem as _rd_chem
import rdkit.Chem.AllChem as _rd_all_chem
from automol import util
import automol.geom.base
import automol.graph.base

_LOGGER = RDLogger.logger()
_LOGGER.setLevel(RDLogger.ERROR)


# inchi
def from_inchi(ich, print_debug=False):
    """ Generate an RDKit molecule object from an InChI string.

        :param ich: InChI string
        :type ich: str
        :param print_debug: control the printing of a debug message
        :type print_debug: bool
        :rtype: RDKit molecule object
    """

    rdm = _rd_chem.inchi.MolFromInchi(ich, treatWarningAsError=False)
    if rdm is None and print_debug:
        print(f'rdm fails for {ich} by returning {rdm}')

    return rdm
'''
Modified from https://github.com/wengong-jin/nips17-rexgen/blob/master/USPTO/core-wln-global/mol_graph.py
'''

import chainer

import numpy as np
from rdkit import Chem
from rdkit import RDLogger
from tqdm import tqdm

from chainer_chemistry.dataset.preprocessors.gwm_preprocessor import GGNNGWMPreprocessor

rdl = RDLogger.logger()
rdl.setLevel(RDLogger.CRITICAL)
elem_list = [
    'C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe',
    'As', 'Al', 'I', 'B', 'V', 'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co',
    'Se', 'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd', 'In', 'Mn',
    'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'W', 'Ru', 'Nb', 'Re', 'Te', 'Rh', 'Tc',
    'Ba', 'Bi', 'Hf', 'Mo', 'U', 'Sm', 'Os', 'Ir', 'Ce', 'Gd', 'Ga', 'Cs',
    'unknown'
]


def read_data(path):
    data = []
    with open(path, 'r') as f:
        for line in f:
            r, action = line.strip('\r\n ').split()
            if len(r.split('>')) != 3 or r.split('>')[1] != '':
예제 #28
0
파일: __init__.py 프로젝트: ridderl/sygma
"""SyGMa: Systematically Generating potential Metabolites"""

from builtins import str
import argparse
import sygma
import sys
from rdkit import Chem, RDLogger
RDLogger.logger().setLevel(RDLogger.ERROR)
import logging
logging.basicConfig()
logger = logging.getLogger('sygma')

def run_sygma(args, file=sys.stdout):
    logger.setLevel(args.loglevel.upper())
    scenario = sygma.Scenario([
        [sygma.ruleset['phase1'], args.phase1],
        [sygma.ruleset['phase2'], args.phase2]
    ])

    parent = Chem.MolFromSmiles(args.parentmol)
    metabolic_tree = scenario.run(parent)
    metabolic_tree.calc_scores()
    if args.outputtype == "sdf":
        metabolic_tree.write_sdf(file)
    elif args.outputtype == "smiles":
        file.write("\n".join([m+" "+str(s) for m,s in metabolic_tree.to_smiles()])+'\n')
    return None

def get_sygma_parser():
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument('--version', action='version', version='%(prog)s ' + sygma.__version__)
예제 #29
0
from collections import Counter

import tqdm
import networkx as nx

from loguru import logger
from rdkit import RDLogger
from rdkit.Chem import rdMolHash, MolToSmiles, rdmolops
from rdkit.Chem.rdMolDescriptors import CalcNumRings

from scaffoldgraph.io import *
from scaffoldgraph.utils import canonize_smiles
from .fragment import get_murcko_scaffold, get_annotated_murcko_scaffold
from .scaffold import Scaffold

rdlogger = RDLogger.logger()


def init_molecule_name(mol):
    """Initialize the name of a molecule if not provided"""
    if not mol.HasProp('_Name') or mol.GetProp('_Name') == '':
        n = rdMolHash.GenerateMoleculeHashString(mol)
        mol.SetProp('_Name', n)


class ScaffoldGraph(nx.DiGraph, ABC):
    """Abstract base class for ScaffoldGraphs"""

    def __init__(self, graph=None, fragmenter=None):
        """
        Initialize a ScaffoldGraph object
예제 #30
0
파일: inchi.py 프로젝트: CKannas/rdkit
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

INCHI_AVAILABLE = True

import rdinchi
import logging
from rdkit import RDLogger
logger = RDLogger.logger()

logLevelToLogFunctionLookup = {
        logging.INFO : logger.info,
        logging.DEBUG : logger.debug,
        logging.WARNING : logger.warning,
        logging.CRITICAL : logger.critical,
        logging.ERROR : logger.error
        }

class InchiReadWriteError(Exception):
    pass

def MolFromInchi(inchi, sanitize=True, removeHs=True, logLevel=None,
        treatWarningAsError=False):
    """Construct a molecule from a InChI string
예제 #31
0
from molgym.envs.rewards import RewardFunction
from molgym.envs.rewards.multiobjective import AdditiveReward
from molgym.envs.rewards.oneshot import OneShotScore
from molgym.envs.rewards.tuned import LogisticCombination
from molgym.envs.simple import Molecule
from molgym.envs.rewards.rdkit import LogP, QEDReward, SAScore, CycleLength
from molgym.envs.rewards.mpnn import MPNNReward
from molgym.utils.conversions import convert_nx_to_smiles, convert_smiles_to_nx
from molgym.mpnn.layers import custom_objects
from tensorflow.keras.models import load_model

# Set up the logger
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger('RL-Logger')
logger.setLevel(logging.DEBUG)
rdkit_logger = RDLogger.logger()
rdkit_logger.setLevel(RDLogger.CRITICAL)


def get_platform_info():
    """Get information about the computer running this process"""

    return {
        'processor': platform.machine(),
        'python_version': platform.python_version(),
        'python_compiler': platform.python_compiler(),
        'hostname': platform.node(),
        'os': platform.platform(),
        'cpu_name': platform.processor(),
        'n_cores': os.cpu_count()
    }
def generate_substructures(input_file):
    """ takes all text from input file containing the structures' smile string
    and identifier. Returns structure info list and a dictionary with all 
    possibles substructure per structure.
  
    input_file: structure txt file with structure SMILES and identifier (tab separated)
    """

    official_subs_dict = {}

    with open(input_file) as file_object:
        input_file = file_object.read()

    # Create a structure list
    all_lines = input_file.split('\n')
    structure_smile_list = []
    structure_mol_list = []
    structure_combo_list = []
    #  for line in all_lines[0:5]:
    for line in all_lines[:-1]:
        line = line.split('\t')
        structure_id = line[1]
        structure_mol = Chem.MolFromSmiles(line[0])
        structure_smile = Chem.MolToSmiles(structure_mol)
        structure_smile_list += [structure_smile]
        structure_mol_list += [structure_mol]
        structure_combo_list += [[
            structure_smile, structure_mol, structure_id
        ]]

    # Generate the mols for each structure in the class
    draw_list = []
    draw_legend_list = []
    for i, structure_info in enumerate(structure_combo_list):
        valid_sub_list = []
        valid_sub_mol_list = []
        structure_smile = structure_info[0]
        structure_mol = structure_info[1]
        structure_id = structure_info[2]

        nr_of_atoms = structure_mol.GetNumAtoms()

        # Generate all possible mol environments per structure
        mol_env_list = []
        for j in range(nr_of_atoms):
            for k in range(nr_of_atoms):
                env = Chem.FindAtomEnvironmentOfRadiusN(structure_mol, j, k)
                mol_env_list += [env]

        # Generate all possible substructures based on the mol envs
        for env in mol_env_list:
            submol = Chem.PathToSubmol(structure_mol, env)
            # Generate the mol of each substructure
            sub_smile = Chem.MolToSmiles(submol)
            submol = Chem.MolFromSmiles(sub_smile)
            if sub_smile != '' and sub_smile != structure_smile:
                lg = RDLogger.logger()
                lg.setLevel(RDLogger.CRITICAL)
                try:
                    Chem.SanitizeMol(submol)
                    if sub_smile not in valid_sub_list and structure_mol.HasSubstructMatch(
                            submol) == True:
                        valid_sub_list += [sub_smile]
                        valid_sub_mol_list += [submol]
                except:
                    pass
        # Write each substructure per structure in a dictionary and also generate the draw_list

        for i, valid_substructure in enumerate(valid_sub_list):
            if valid_substructure not in draw_list:
                draw_list += [valid_sub_mol_list[i]]
                draw_legend_list += [valid_substructure]
            if structure_id in official_subs_dict:
                official_subs_dict[structure_id].append(valid_substructure)
            if structure_id not in official_subs_dict:
                official_subs_dict[structure_id] = [valid_substructure]
        if structure_id not in official_subs_dict:
            official_subs_dict[structure_id] = ['<NA>']

    official_subs_dict_sorted = sorted(official_subs_dict)
    with open("all_test_substructures.txt", 'w') as db_file:
        for name in official_subs_dict_sorted:
            for key in official_subs_dict.keys():
                if key == name:
                    value_string = ''
                    for value in official_subs_dict[key]:
                        value_string += value + "."
                    value_string = value_string[:-1]
                    db_file.write(value_string + '\t' + key + '\n')
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('All possible substructures')
    nr_of_subs = 0
    for key, value in official_subs_dict.items():
        for val in value:
            nr_of_subs += 1
    print(nr_of_subs)

    return structure_combo_list, official_subs_dict
예제 #33
0
파일: fixer.py 프로젝트: zchwang/oddt
def UFFConstrainedOptimize(mol,
                           moving_atoms=None,
                           fixed_atoms=None,
                           cutoff=5.,
                           verbose=False):
    """Minimize a molecule using UFF forcefield with a set of moving/fixed
    atoms. If both moving and fixed atoms are provided, fixed_atoms parameter
    will be ignored.  The minimization is done in-place (without copying
    molecule).

    Parameters
    ----------
        mol: rdkit.Chem.rdchem.Mol
            Molecule to be minimized.
        moving_atoms: array-like (default=None)
            Indices of freely moving atoms. If None, fixed atoms are assigned
            based on `fixed_atoms`. These two arguments are mutually exclusive.
        fixed_atoms: array-like (default=None)
            Indices of fixed atoms. If None, fixed atoms are assigned based on
            `moving_atoms`. These two arguments are mutually exclusive.
        cutoff: float (default=10.)
            Distance cutoff for the UFF minimization

    Returns
    -------
        mol: rdkit.Chem.rdchem.Mol
            Molecule with mimimized `moving_atoms`
    """
    logger = RDLogger.logger()

    if not verbose:
        logger.setLevel(RDLogger.CRITICAL)

    if moving_atoms is None and fixed_atoms is None:
        raise ValueError('You must supply at least one set of moving/fixed '
                         'atoms.')

    all_atoms = set(range(mol.GetNumAtoms()))
    if moving_atoms is None:
        moving_atoms = list(all_atoms.difference(fixed_atoms))
    else:
        fixed_atoms = list(all_atoms.difference(moving_atoms))
    # extract submolecules containing atoms within cutoff
    mol_conf = mol.GetConformer(-1)
    pos = np.array(
        [mol_conf.GetAtomPosition(i) for i in range(mol_conf.GetNumAtoms())])
    mask = (cdist(pos, pos[moving_atoms]) <= cutoff).any(axis=1)
    amap = np.where(mask)[0].tolist()

    # expand to whole residues
    pocket_residues = OrderedDict()
    protein_residues = GetResidues(mol)
    for res_id in protein_residues.keys():
        if any(1 for res_aix in protein_residues[res_id] if res_aix in amap):
            pocket_residues[res_id] = protein_residues[res_id]
    amap = list(chain(*pocket_residues.values()))

    # TODO: above certain threshold its making a submolis redundant
    submol = AtomListToSubMol(mol, amap, includeConformer=True)
    # initialize ring info
    Chem.GetSSSR(submol)
    ff = UFFGetMoleculeForceField(submol,
                                  vdwThresh=cutoff,
                                  ignoreInterfragInteractions=False)
    for submol_id, atom_id in enumerate(amap):
        if atom_id not in moving_atoms:
            ff.AddFixedPoint(submol_id)
    ff.Initialize()
    ff.Minimize(energyTol=1e-4, forceTol=1e-3, maxIts=2000)

    # get the positions backbone
    conf = mol.GetConformer(-1)
    submol_conf = submol.GetConformer(-1)
    for submol_idx, mol_idx in enumerate(amap, ):
        conf.SetAtomPosition(mol_idx, submol_conf.GetAtomPosition(submol_idx))

    # FIXME: there's no getLevel method, so we set to default level
    if not verbose:
        logger.setLevel(RDLogger.INFO)

    return mol
예제 #34
0
from rdkit import RDConfig

import sys, time, math
from rdkit.ML.Data import Stats
import rdkit.DistanceGeometry as DG
from rdkit import Chem
import numpy
from rdkit.Chem import rdDistGeom as MolDG
from rdkit.Chem import ChemicalFeatures
from rdkit.Chem import ChemicalForceFields
from rdkit.Chem.Pharm3D import Pharmacophore, ExcludedVolume
from rdkit import Geometry
_times = {}

from rdkit import RDLogger as logging
logger = logging.logger()
defaultFeatLength = 2.0


def GetAtomHeavyNeighbors(atom):
    """ returns a list of the heavy-atom neighbors of the
  atom passed in:

  >>> m = Chem.MolFromSmiles('CCO')
  >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0))
  >>> len(l)
  1
  >>> isinstance(l[0],Chem.Atom)
  True
  >>> l[0].GetIdx()
  1
예제 #35
0
    plt.plot(bayes_recall, bayes_precision, 'k-', color='red', label='BayesGrad(Ours)')
    plt.axhline(y=vanilla_precision[-1], color='gray', linestyle='--')
    plt.legend()
    plt.xlabel("recall")
    plt.ylabel("precision")
    if save_path:
        print('saved to ', save_path)
        plt.savefig(save_path)
        # plt.savefig('artificial_pr.eps')
    else:
        plt.show()


if __name__ == '__main__':
    # Disable errors by RDKit occurred in preprocessing Tox21 dataset.
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.CRITICAL)
    # show INFO level log from chainer chemistry
    logging.basicConfig(level=logging.INFO)

    args = parse()
    # --- extracting configs ---
    dirpath = args.dirpath
    json_path = os.path.join(dirpath, 'args.json')
    if not os.path.exists(json_path):
        raise ValueError(
            'json_path {} not found! Execute train_tox21.py beforehand.'.format(json))
    with open(json_path, 'r') as f:
        train_args = json.load(f)

    method = train_args['method']
예제 #36
0
#   @@ All Rights Reserved @@
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Lipinski,Descriptors,Crippen
from rdkit.Dbase.DbConnection import DbConnect
from rdkit.Dbase import DbModule
import re

#set up the logger:
import rdkit.RDLogger as logging
logger = logging.logger()
logger.setLevel(logging.INFO)

def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
               redraw=False,keepHs=False,
               skipProps=False,addComputedProps=False,
               skipSmiles=False,
               uniqNames=None,namesSeen=None):
  if not mol:
    raise ValueError('no molecule')
  if keepHs:
    Chem.SanitizeMol(mol)
  try:
    nm = mol.GetProp(nameProp)
  except KeyError:
    nm = None