Exemple #1
0
import sys
sys.path.insert(0, './Modules/')

import numpy as np

from file_reader import read_file
from mol_utils import get_fragments
from build_encoding import get_encodings, encode_molecule, decode_molecule, encode_list, save_decodings
from models import build_models
from training import train
from rewards import clean_good
from rdkit import rdBase
import logging
logging.getLogger().setLevel(logging.INFO)
rdBase.DisableLog('rdApp.error')


def main(fragment_file, lead_file):
    fragment_mols = read_file(fragment_file)
    lead_mols = read_file(lead_file)
    fragment_mols += lead_mols

    logging.info("Read %s molecules for fragmentation library",
                 len(fragment_mols))
    logging.info("Read %s lead moleculs", len(lead_mols))

    fragments, used_mols = get_fragments(fragment_mols)
    logging.info("Num fragments: %s", len(fragments))
    logging.info("Total molecules used: %s", len(used_mols))
    assert len(fragments)
    assert len(used_mols)
Exemple #2
0
def disable_rdkit_log():
    rdBase.DisableLog('rdApp.*')
from pathlib import Path
import shutil
from threading import Timer
import numpy as np

from chemgrams import get_arpa_vocab, KenLMDeepSMILESLanguageModel, DeepSMILESLanguageModelUtils, DeepSMILESTokenizer
from chemgrams.logger import get_logger, log_top_best
from chemgrams.tanimotoscorer import TanimotoScorer
from chemgrams.sascorer import sascorer
from chemgrams.cyclescorer import CycleScorer
from chemgrams.training import KenLMTrainer

from openbabel import pybel
from deepsmiles import Converter
from rdkit import rdBase, Chem
rdBase.DisableLog('rdApp.error')
rdBase.DisableLog('rdApp.warning')
logger = get_logger('chemgrams.log')

THIS_DIR = os.path.dirname(os.path.abspath(__file__))

logger.info(os.path.basename(__file__))
logger.info(
    "KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)"
)
logger.info("TanimotoScorer(abilify, radius=6)")
logger.info("num_iterations = 100")
logger.info("time per iteration = 45 min.")
logger.info("keep_top_n = 20000 of all (including duplicates)")

vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa')
Exemple #4
0
def disable_rdkit_log():
    """tbd"""
    rdBase.DisableLog('rdApp.*')
Exemple #5
0
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem import PandasTools

from rdkit import rdBase

#import sys
#sys.path.append("../
from chemical_curation.modification_graph import Modification
from chemical_curation.modification_graph import Modification_Graph

import molvs.normalize
import molvs.fragment
import molvs.tautomer
import molvs.metal

rdBase.DisableLog('rdApp.*')

import os

import math  #for rounding

import pandas

import logging
import pathlib

#list of atoms allowed for dragon descriptor calculation
dragon_allowed_atoms = set([
    "H", "B", "C", "N", "O", "F", "Al", "Si", "P", "S", "Cl", "Cr", "Mn", "Fe",
    "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Mo", "Ag", "Cd",
    "In", "Sn", "Sb", "Te", "I", "Gd", "Pt", "Au", "Hg", "Ti", "Pb", "Bi"
Exemple #6
0
    def __init__(self, name, params={}, use_gpu=True, verbose=True):
        """Parameter initialization.

        Arguments
        -----------

            - name. String which will be used to identify the
            model in any folders or files created.

            - params. Optional. Dictionary containing the parameters
            that the user whishes to specify.

            - use_gpu. Boolean specifying whether a GPU should be
            used. True by default.

            - verbose. Boolean specifying whether output must be
            produced in-line.

        """

        self.verbose = verbose

        # Print logo. Isn't it cool?
        # (Although it is cool, we won't print it if you don't want)
        if self.verbose:
            print(__logo__.format(__version__))

        # Set minimum verbosity for RDKit, Keras and TF backends
        os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
        logging.set_verbosity(logging.INFO)
        rdBase.DisableLog('rdApp.error')

        # Set configuration for GPU
        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True

        # Set parameters
        self.PREFIX = name

        if 'PRETRAIN_GEN_EPOCHS' in params:
            self.PRETRAIN_GEN_EPOCHS = params['PRETRAIN_GEN_EPOCHS']
        else:
            self.PRETRAIN_GEN_EPOCHS = 240

        if 'PRETRAIN_DIS_EPOCHS' in params:
            self.PRETRAIN_DIS_EPOCHS = params['PRETRAIN_DIS_EPOCHS']
        else:
            self.PRETRAIN_DIS_EPOCHS = 50

        if 'GEN_ITERATIONS' in params:
            self.GEN_ITERATIONS = params['GEN_ITERATIONS']
        else:
            self.GEN_ITERATIONS = 2

        if 'GEN_BATCH_SIZE' in params:
            self.GEN_BATCH_SIZE = params['GEN_BATCH_SIZE']
        else:
            self.GEN_BATCH_SIZE = 64

        if 'SEED' in params:
            self.SEED = params['SEED']
        else:
            self.SEED = None
        random.seed(self.SEED)
        np.random.seed(self.SEED)

        if 'DIS_BATCH_SIZE' in params:
            self.DIS_BATCH_SIZE = params['DIS_BATCH_SIZE']
        else:
            self.DIS_BATCH_SIZE = 64

        if 'DIS_EPOCHS' in params:
            self.DIS_EPOCHS = params['DIS_EPOCHS']
        else:
            self.DIS_EPOCHS = 3

        if 'EPOCH_SAVES' in params:
            self.EPOCH_SAVES = params['EPOCH_SAVES']
        else:
            self.EPOCH_SAVES = 20

        if 'CHK_PATH' in params:
            self.CHK_PATH = params['CHK_PATH']
        else:
            self.CHK_PATH = os.path.join(os.getcwd(),
                                         'checkpoints/{}'.format(self.PREFIX))

        if 'GEN_EMB_DIM' in params:
            self.GEN_EMB_DIM = params['GEN_EMB_DIM']
        else:
            self.GEN_EMB_DIM = 32

        if 'GEN_HIDDEN_DIM' in params:
            self.GEN_HIDDEN_DIM = params['GEN_HIDDEN_DIM']
        else:
            self.GEN_HIDDEN_DIM = 32

        if 'START_TOKEN' in params:
            self.START_TOKEN = params['START_TOKEN']
        else:
            self.START_TOKEN = 0

        if 'SAMPLE_NUM' in params:
            self.SAMPLE_NUM = params['SAMPLE_NUM']
        else:
            self.SAMPLE_NUM = 6400

        if 'BIG_SAMPLE_NUM' in params:
            self.BIG_SAMPLE_NUM = params['BIG_SAMPLE_NUM']
        else:
            self.BIG_SAMPLE_NUM = self.SAMPLE_NUM * 5

        if 'LAMBDA' in params:
            self.LAMBDA = params['LAMBDA']
        else:
            self.LAMBDA = 0.5

        # In case this parameter is not specified by the user,
        # it will be determined later, in the training set
        # loading.
        if 'MAX_LENGTH' in params:
            self.MAX_LENGTH = params['MAX_LENGTH']

        if 'DIS_EMB_DIM' in params:
            self.DIS_EMB_DIM = params['DIS_EMB_DIM']
        else:
            self.DIS_EMB_DIM = 64

        if 'DIS_FILTER_SIZES' in params:
            self.DIS_FILTER_SIZES = params['DIS_FILTER_SIZES']
        else:
            self.DIS_FILTER_SIZES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]

        if 'DIS_NUM_FILTERS' in params:
            self.DIS_NUM_FILTERS = params['DIS_FILTER_SIZES']
        else:
            self.DIS_NUM_FILTERS = [
                100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160
            ]

        if 'DIS_DROPOUT' in params:
            self.DIS_DROPOUT = params['DIS_DROPOUT']
        else:
            self.DIS_DROPOUT = 0.75
        if 'DIS_L2REG' in params:
            self.DIS_L2REG = params['DIS_L2REG']
        else:
            self.DIS_L2REG = 0.2

        self.AV_METRICS = get_metrics()
        self.LOADINGS = metrics_loading()

        self.PRETRAINED = False
        self.SESS_LOADED = False
        self.USERDEF_METRIC = False
import abc
import time
from copy import copy
from typing import List, Set

import rdkit.rdBase as rkrb
import rdkit.RDLogger as rkl

from minedatabase.pickaxe import Pickaxe

logger = rkl.logger()
logger.setLevel(rkl.ERROR)
rkrb.DisableLog("rdApp.error")


class Filter(metaclass=abc.ABCMeta):
    """Abstract base class used to generate filters.

    The Filter class provides the framework for interaction with pickaxe expansions.
    Each filter subclass must inherit properties from the Filter class.
    All subclasses must implement properties and methods decorated with
    @abc.abstractmethod. Feel free to override other non-private methods as
    well, such as _pre_print() and _post_print().
    """
    @property
    @abc.abstractmethod
    def filter_name(self) -> str:
        """Obtain name of filter."""
        pass

    @abc.abstractmethod
Exemple #8
0
            "R4": "c(c:[*:5]):[*:4]",
            "R5": "c(c:[*:5]):[*:4]",
            "R6": "F[*:6]"
        }]
        expected_items = {
            "Core": [
                "O=C(c1cncn1[*:2])[*:1]",
                "c1c([*:2])[*:3]c2nc([*:6])[*:5]:[*:4]c2[*:1]1"
            ],
            "R1": ["CN[*:1]", "c(:[*:1]):[*:1]"],
            "R2": ["CC[*:2]", "Br[*:2]"],
            "R3": ["", "n(:[*:3]):[*:3]"],
            "R4": ["", "c(c:[*:5]):[*:4]"],
            "R5": ["", "c(c:[*:5]):[*:4]"],
            "R6": ["", "F[*:6]"]
        }
        params.labels = RGroupLabels.AutoDetect
        params.alignment = RGroupCoreAlignment.MCS
        multicorergd_test(cores, params, expected_rows, expected_items)
        # test pre-labelled with dummy atom labels, no autodetect
        # in this case there is no difference from autodetect as the RGD code
        # cannot tell the difference between query atoms and dummy R-groups
        params.labels = RGroupLabels.DummyAtomLabels | RGroupLabels.RelabelDuplicateLabels
        params.alignment = RGroupCoreAlignment.MCS
        multicorergd_test(cores, params, expected_rows, expected_items)


if __name__ == '__main__':
    rdBase.DisableLog("rdApp.debug")
    unittest.main()
Exemple #9
0
    max_conformers = MAXCONF
    gaussian_input = "%NProcShared=" + "NPROCESSES" "\n%Mem=" + "MEMORY" + "MB"
else:
    struc = "CO[C@@H]2/C=[O+]\[C@@H]1CO[C@@H](C)O[C@H]1[C@@H]2OC"
    struc_mode = "smiles"
    ion_mode = ""
    file_name = "FILE_NAME"
    output_dir = "Y:/AIMS/Testing/"
    sdf_output_file = output_dir + file_name + ".sdf"
    AIMS_log_file = output_dir + file_name + ".log"
    com_output_folder = output_dir + "gaussian_files/"
    max_conformers = 20
    gaussian_input = "%NProcShared=24\n%Mem=55000MB"

#%% Initializing variables and linking to input and output
rdBase.DisableLog("rdApp.*")

# Calculation parameters
optimization = "PM6 opt freq"
multiplicity = 1
potential = AllChem.ETKDG()
potential.randomSeed = 42  # use the same seed to ensure reproducability
Nconf = 500
ion_mode = ion_mode.lower()

selected_metabolites = Chem.SDWriter(sdf_output_file)

generated_mols = list()
omitted_mols = list()
charged_mols = list()
chiral_error_mols = list()
Exemple #10
0
import argparse
import os
import sys
import time
from itertools import chain

from rdkit import Chem
from rdkit import rdBase
from rdkit.Chem import AllChem
from tqdm import tqdm

from pyflow.mol.mol_utils import valid_smiles

rdBase.DisableLog('rdApp.warning')
"""
This is a script which substitutes a given core molecule with the standard set
of spacers and linkers developed by Biruk Abreha and Steven Lopez. The script
will perform substitutions on the core at position indicated using Uranium (U).
The conformers are written to PDB files in a folder named after the given
molecule name.

USAGE: python pymolgen.py molecule_name smiles
"""

# reaction SMILES for linkers
linker_rxns = {
    'unsubstituted': '[*:1]([U])>>[*:1]([H])',
    'benzene': '[*:1]([U])>>[*:1](c2ccc([Y])cc2)',
    'pyridine': '[*:1]([U])>>[*:1](c2ncc([Y])cc2)',
    'pyrimidine': '[*:1]([U])>>[*:1](c2ncc([Y])cn2)',
    'tetrazine': '[*:1]([U])>>[*:1](c2nnc([Y])nn2)',
Exemple #11
0
"""Contains functions needed to process reaction SMILES and their tokens"""
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
import re
from functools import partial
from typing import Any, List

import numpy as np
from rdkit import Chem, rdBase

LOGGER = logging.getLogger("attnmapper:smiles_utils")

# rdBase.DisableLog("rdApp.error")
rdBase.DisableLog("rdApp.warning")

SMI_REGEX_PATTERN = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
BAD_TOKS = ["[CLS]", "[SEP]"]  # Default Bad Tokens


def tokenize(smiles: str) -> List[str]:
    """Tokenize a SMILES molecule or reaction"""
    regex = re.compile(SMI_REGEX_PATTERN)
    tokens = [token for token in regex.findall(smiles)]
    assert smiles == "".join(tokens)
    return tokens


def get_atom_types(smiles: str):
    """Return atomic numbers for every token in (reaction) SMILES"""
    atom_tokens = get_atom_tokens_mask(smiles)
"""
Written by Jan H. Jensen 2018
"""
import random

import numpy as np
from rdkit import Chem
from rdkit import rdBase
from rdkit.Chem import AllChem

import synergetic_molecule_generator.crossover as co

rdBase.DisableLog("rdApp.error")


def delete_atom():
    choices = [
        "[*:1]~[D1]>>[*:1]",
        "[*:1]~[D2]~[*:2]>>[*:1]-[*:2]",
        "[*:1]~[D3](~[*;!H0:2])~[*:3]>>[*:1]-[*:2]-[*:3]",
        "[*:1]~[D4](~[*;!H0:2])(~[*;!H0:3])~[*:4]>>[*:1]-[*:2]-[*:3]-[*:4]",
        "[*:1]~[D4](~[*;!H0;!H1:2])(~[*:3])~[*:4]>>[*:1]-[*:2](-[*:3])-[*:4]",
    ]
    p = [0.25, 0.25, 0.25, 0.1875, 0.0625]

    return np.random.choice(choices, p=p)


def append_atom():
    choices = [
        ["single", ["C", "N", "O", "F", "S", "Cl", "Br"], 7 * [1.0 / 7.0]],
Exemple #13
0
#  All Rights Reserved
#
#  This file is part of the RDKit.
#  The contents are covered by the terms of the BSD license
#  which is included in the file license.txt, found at the root
#  of the RDKit source tree.
#

import unittest
from rdkit import Chem
from rdkit.Chem.Scaffolds import rdScaffoldNetwork
from rdkit import RDConfig
from rdkit import rdBase
import pickle

rdBase.DisableLog("rdApp.info")


class TestCase(unittest.TestCase):
    def setUp(self):
        pass

    def test1Basics(self):
        smis = ["c1ccccc1CC1NC(=O)CCC1", "c1cccnc1CC1NC(=O)CCC1"]
        ms = [Chem.MolFromSmiles(x) for x in smis]
        params = rdScaffoldNetwork.ScaffoldNetworkParams()

        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 12)
        self.assertEqual(len(net.edges), 12)
        self.assertEqual(len(net.counts), len(net.nodes))
    def __init__(self,
                 n_node,
                 dim_node,
                 dim_edge,
                 dim_y,
                 mu_prior,
                 cov_prior,
                 dim_h=50,
                 dim_z=100,
                 dim_f=500,
                 n_mpnn_step=3,
                 n_dummy=5,
                 batch_size=20,
                 lr=0.0005,
                 useGPU=True,
                 use_PREFERENCE=False):

        warnings.filterwarnings('ignore')
        tf.logging.set_verbosity(tf.logging.ERROR)
        rdBase.DisableLog('rdApp.error')
        rdBase.DisableLog('rdApp.warning')

        if use_PREFERENCE: self.dim_R = 2
        else: self.dim_R = 1

        self.n_node = n_node
        self.dim_node = dim_node
        self.dim_edge = dim_edge
        self.dim_y = dim_y

        self.mu_prior = mu_prior
        self.cov_prior = cov_prior

        self.dim_h = dim_h
        self.dim_z = dim_z
        self.dim_f = dim_f
        self.n_mpnn_step = n_mpnn_step
        self.n_dummy = n_dummy
        self.batch_size = batch_size
        self.lr = lr

        # variables
        self.G = tf.Graph()
        self.G.as_default()

        self.node = tf.placeholder(
            tf.float32, [self.batch_size, self.n_node, self.dim_node])
        self.edge = tf.placeholder(
            tf.float32,
            [self.batch_size, self.n_node, self.n_node, self.dim_edge])
        self.property = tf.placeholder(tf.float32,
                                       [self.batch_size, self.dim_y])

        self.latent = self._encoder(self.batch_size,
                                    self.node,
                                    self.edge,
                                    self.property,
                                    self.n_mpnn_step,
                                    self.dim_h,
                                    self.dim_h * 2,
                                    self.dim_z * 2,
                                    0,
                                    name='encoder',
                                    reuse=False)
        self.latent_mu, self.latent_lsgms = tf.split(self.latent,
                                                     [self.dim_z, self.dim_z],
                                                     1)

        self.latent_epsilon = tf.random_normal([self.batch_size, self.dim_z],
                                               0., 1.)
        self.latent_sample = tf.add(
            self.latent_mu,
            tf.multiply(tf.exp(0.5 * self.latent_lsgms), self.latent_epsilon))
        self.latent_sample2 = tf.concat([self.latent_sample, self.property], 1)

        self.rec_node, self.rec_edge = self._generator(self.batch_size,
                                                       self.latent_sample2,
                                                       self.n_mpnn_step,
                                                       name='generator',
                                                       reuse=False)

        self.new_latent = tf.random_normal([self.batch_size, self.dim_z], 0.,
                                           1.)
        mngen = tf.contrib.distributions.MultivariateNormalFullCovariance(
            loc=self.mu_prior, covariance_matrix=self.cov_prior)
        self.new_y = tf.dtypes.cast(mngen.sample(self.batch_size, self.dim_y),
                                    tf.float32)
        self.new_latent2 = tf.concat([self.new_latent, self.new_y], 1)

        self.new_node, self.new_edge = self._generator(self.batch_size,
                                                       self.new_latent2,
                                                       self.n_mpnn_step,
                                                       name='generator',
                                                       reuse=True)

        self.node_pad = tf.pad(
            self.node, tf.constant([[0, 0], [0, self.n_dummy], [0, 0]]),
            'CONSTANT')
        self.edge_pad = tf.pad(
            self.edge,
            tf.constant([[0, 0], [0, self.n_dummy], [0, self.n_dummy],
                         [0, 0]]), 'CONSTANT')

        # auxiliary
        self.R_rec = self._encoder(self.batch_size,
                                   self.rec_node,
                                   self.rec_edge,
                                   None,
                                   self.n_mpnn_step,
                                   self.dim_h,
                                   self.dim_h * 2,
                                   self.dim_R,
                                   0,
                                   name='auxiliary/R',
                                   reuse=False)
        self.R_fake = self._encoder(self.batch_size,
                                    self.new_node,
                                    self.new_edge,
                                    None,
                                    self.n_mpnn_step,
                                    self.dim_h,
                                    self.dim_h * 2,
                                    self.dim_R,
                                    0,
                                    name='auxiliary/R',
                                    reuse=True)
        self.R_real = self._encoder(self.batch_size,
                                    self.node_pad,
                                    self.edge_pad,
                                    None,
                                    self.n_mpnn_step,
                                    self.dim_h,
                                    self.dim_h * 2,
                                    self.dim_R,
                                    0,
                                    name='auxiliary/R',
                                    reuse=True)

        self.R_rec_t = tf.placeholder(tf.float32,
                                      [self.batch_size, self.dim_R])
        self.R_fake_t = tf.placeholder(tf.float32,
                                       [self.batch_size, self.dim_R])
        self.R_real_t = tf.placeholder(tf.float32,
                                       [self.batch_size, self.dim_R])

        self.y_rec = self._encoder(self.batch_size,
                                   self.rec_node,
                                   self.rec_edge,
                                   None,
                                   self.n_mpnn_step,
                                   self.dim_h,
                                   self.dim_h * 2,
                                   self.dim_y,
                                   0,
                                   name='auxiliary/Y',
                                   reuse=False)
        self.y_fake = self._encoder(self.batch_size,
                                    self.new_node,
                                    self.new_edge,
                                    None,
                                    self.n_mpnn_step,
                                    self.dim_h,
                                    self.dim_h * 2,
                                    self.dim_y,
                                    0,
                                    name='auxiliary/Y',
                                    reuse=True)
        self.y_real = self._encoder(self.batch_size,
                                    self.node_pad,
                                    self.edge_pad,
                                    None,
                                    self.n_mpnn_step,
                                    self.dim_h,
                                    self.dim_h * 2,
                                    self.dim_y,
                                    0,
                                    name='auxiliary/Y',
                                    reuse=True)

        # session
        self.saver = tf.train.Saver()
        if useGPU:
            self.sess = tf.Session()
        else:
            config = tf.ConfigProto(device_count={'GPU': 0})
            self.sess = tf.Session(config=config)