import sys sys.path.insert(0, './Modules/') import numpy as np from file_reader import read_file from mol_utils import get_fragments from build_encoding import get_encodings, encode_molecule, decode_molecule, encode_list, save_decodings from models import build_models from training import train from rewards import clean_good from rdkit import rdBase import logging logging.getLogger().setLevel(logging.INFO) rdBase.DisableLog('rdApp.error') def main(fragment_file, lead_file): fragment_mols = read_file(fragment_file) lead_mols = read_file(lead_file) fragment_mols += lead_mols logging.info("Read %s molecules for fragmentation library", len(fragment_mols)) logging.info("Read %s lead moleculs", len(lead_mols)) fragments, used_mols = get_fragments(fragment_mols) logging.info("Num fragments: %s", len(fragments)) logging.info("Total molecules used: %s", len(used_mols)) assert len(fragments) assert len(used_mols)
def disable_rdkit_log(): rdBase.DisableLog('rdApp.*')
from pathlib import Path import shutil from threading import Timer import numpy as np from chemgrams import get_arpa_vocab, KenLMDeepSMILESLanguageModel, DeepSMILESLanguageModelUtils, DeepSMILESTokenizer from chemgrams.logger import get_logger, log_top_best from chemgrams.tanimotoscorer import TanimotoScorer from chemgrams.sascorer import sascorer from chemgrams.cyclescorer import CycleScorer from chemgrams.training import KenLMTrainer from openbabel import pybel from deepsmiles import Converter from rdkit import rdBase, Chem rdBase.DisableLog('rdApp.error') rdBase.DisableLog('rdApp.warning') logger = get_logger('chemgrams.log') THIS_DIR = os.path.dirname(os.path.abspath(__file__)) logger.info(os.path.basename(__file__)) logger.info( "KenLMDeepSMILESLanguageModel('../models/chembl_25_deepsmiles_klm_10gram_200503.klm', vocab)" ) logger.info("TanimotoScorer(abilify, radius=6)") logger.info("num_iterations = 100") logger.info("time per iteration = 45 min.") logger.info("keep_top_n = 20000 of all (including duplicates)") vocab = get_arpa_vocab('../models/chembl_25_deepsmiles_klm_10gram_200503.arpa')
def disable_rdkit_log(): """tbd""" rdBase.DisableLog('rdApp.*')
from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem import PandasTools from rdkit import rdBase #import sys #sys.path.append("../ from chemical_curation.modification_graph import Modification from chemical_curation.modification_graph import Modification_Graph import molvs.normalize import molvs.fragment import molvs.tautomer import molvs.metal rdBase.DisableLog('rdApp.*') import os import math #for rounding import pandas import logging import pathlib #list of atoms allowed for dragon descriptor calculation dragon_allowed_atoms = set([ "H", "B", "C", "N", "O", "F", "Al", "Si", "P", "S", "Cl", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Mo", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Gd", "Pt", "Au", "Hg", "Ti", "Pb", "Bi"
def __init__(self, name, params={}, use_gpu=True, verbose=True): """Parameter initialization. Arguments ----------- - name. String which will be used to identify the model in any folders or files created. - params. Optional. Dictionary containing the parameters that the user whishes to specify. - use_gpu. Boolean specifying whether a GPU should be used. True by default. - verbose. Boolean specifying whether output must be produced in-line. """ self.verbose = verbose # Print logo. Isn't it cool? # (Although it is cool, we won't print it if you don't want) if self.verbose: print(__logo__.format(__version__)) # Set minimum verbosity for RDKit, Keras and TF backends os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' logging.set_verbosity(logging.INFO) rdBase.DisableLog('rdApp.error') # Set configuration for GPU self.config = tf.ConfigProto() self.config.gpu_options.allow_growth = True # Set parameters self.PREFIX = name if 'PRETRAIN_GEN_EPOCHS' in params: self.PRETRAIN_GEN_EPOCHS = params['PRETRAIN_GEN_EPOCHS'] else: self.PRETRAIN_GEN_EPOCHS = 240 if 'PRETRAIN_DIS_EPOCHS' in params: self.PRETRAIN_DIS_EPOCHS = params['PRETRAIN_DIS_EPOCHS'] else: self.PRETRAIN_DIS_EPOCHS = 50 if 'GEN_ITERATIONS' in params: self.GEN_ITERATIONS = params['GEN_ITERATIONS'] else: self.GEN_ITERATIONS = 2 if 'GEN_BATCH_SIZE' in params: self.GEN_BATCH_SIZE = params['GEN_BATCH_SIZE'] else: self.GEN_BATCH_SIZE = 64 if 'SEED' in params: self.SEED = params['SEED'] else: self.SEED = None random.seed(self.SEED) np.random.seed(self.SEED) if 'DIS_BATCH_SIZE' in params: self.DIS_BATCH_SIZE = params['DIS_BATCH_SIZE'] else: self.DIS_BATCH_SIZE = 64 if 'DIS_EPOCHS' in params: self.DIS_EPOCHS = params['DIS_EPOCHS'] else: self.DIS_EPOCHS = 3 if 'EPOCH_SAVES' in params: self.EPOCH_SAVES = params['EPOCH_SAVES'] else: self.EPOCH_SAVES = 20 if 'CHK_PATH' in params: self.CHK_PATH = params['CHK_PATH'] else: self.CHK_PATH = os.path.join(os.getcwd(), 'checkpoints/{}'.format(self.PREFIX)) if 'GEN_EMB_DIM' in params: self.GEN_EMB_DIM = params['GEN_EMB_DIM'] else: self.GEN_EMB_DIM = 32 if 'GEN_HIDDEN_DIM' in params: self.GEN_HIDDEN_DIM = params['GEN_HIDDEN_DIM'] else: self.GEN_HIDDEN_DIM = 32 if 'START_TOKEN' in params: self.START_TOKEN = params['START_TOKEN'] else: self.START_TOKEN = 0 if 'SAMPLE_NUM' in params: self.SAMPLE_NUM = params['SAMPLE_NUM'] else: self.SAMPLE_NUM = 6400 if 'BIG_SAMPLE_NUM' in params: self.BIG_SAMPLE_NUM = params['BIG_SAMPLE_NUM'] else: self.BIG_SAMPLE_NUM = self.SAMPLE_NUM * 5 if 'LAMBDA' in params: self.LAMBDA = params['LAMBDA'] else: self.LAMBDA = 0.5 # In case this parameter is not specified by the user, # it will be determined later, in the training set # loading. if 'MAX_LENGTH' in params: self.MAX_LENGTH = params['MAX_LENGTH'] if 'DIS_EMB_DIM' in params: self.DIS_EMB_DIM = params['DIS_EMB_DIM'] else: self.DIS_EMB_DIM = 64 if 'DIS_FILTER_SIZES' in params: self.DIS_FILTER_SIZES = params['DIS_FILTER_SIZES'] else: self.DIS_FILTER_SIZES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20] if 'DIS_NUM_FILTERS' in params: self.DIS_NUM_FILTERS = params['DIS_FILTER_SIZES'] else: self.DIS_NUM_FILTERS = [ 100, 200, 200, 200, 200, 100, 100, 100, 100, 100, 160, 160 ] if 'DIS_DROPOUT' in params: self.DIS_DROPOUT = params['DIS_DROPOUT'] else: self.DIS_DROPOUT = 0.75 if 'DIS_L2REG' in params: self.DIS_L2REG = params['DIS_L2REG'] else: self.DIS_L2REG = 0.2 self.AV_METRICS = get_metrics() self.LOADINGS = metrics_loading() self.PRETRAINED = False self.SESS_LOADED = False self.USERDEF_METRIC = False
import abc import time from copy import copy from typing import List, Set import rdkit.rdBase as rkrb import rdkit.RDLogger as rkl from minedatabase.pickaxe import Pickaxe logger = rkl.logger() logger.setLevel(rkl.ERROR) rkrb.DisableLog("rdApp.error") class Filter(metaclass=abc.ABCMeta): """Abstract base class used to generate filters. The Filter class provides the framework for interaction with pickaxe expansions. Each filter subclass must inherit properties from the Filter class. All subclasses must implement properties and methods decorated with @abc.abstractmethod. Feel free to override other non-private methods as well, such as _pre_print() and _post_print(). """ @property @abc.abstractmethod def filter_name(self) -> str: """Obtain name of filter.""" pass @abc.abstractmethod
"R4": "c(c:[*:5]):[*:4]", "R5": "c(c:[*:5]):[*:4]", "R6": "F[*:6]" }] expected_items = { "Core": [ "O=C(c1cncn1[*:2])[*:1]", "c1c([*:2])[*:3]c2nc([*:6])[*:5]:[*:4]c2[*:1]1" ], "R1": ["CN[*:1]", "c(:[*:1]):[*:1]"], "R2": ["CC[*:2]", "Br[*:2]"], "R3": ["", "n(:[*:3]):[*:3]"], "R4": ["", "c(c:[*:5]):[*:4]"], "R5": ["", "c(c:[*:5]):[*:4]"], "R6": ["", "F[*:6]"] } params.labels = RGroupLabels.AutoDetect params.alignment = RGroupCoreAlignment.MCS multicorergd_test(cores, params, expected_rows, expected_items) # test pre-labelled with dummy atom labels, no autodetect # in this case there is no difference from autodetect as the RGD code # cannot tell the difference between query atoms and dummy R-groups params.labels = RGroupLabels.DummyAtomLabels | RGroupLabels.RelabelDuplicateLabels params.alignment = RGroupCoreAlignment.MCS multicorergd_test(cores, params, expected_rows, expected_items) if __name__ == '__main__': rdBase.DisableLog("rdApp.debug") unittest.main()
max_conformers = MAXCONF gaussian_input = "%NProcShared=" + "NPROCESSES" "\n%Mem=" + "MEMORY" + "MB" else: struc = "CO[C@@H]2/C=[O+]\[C@@H]1CO[C@@H](C)O[C@H]1[C@@H]2OC" struc_mode = "smiles" ion_mode = "" file_name = "FILE_NAME" output_dir = "Y:/AIMS/Testing/" sdf_output_file = output_dir + file_name + ".sdf" AIMS_log_file = output_dir + file_name + ".log" com_output_folder = output_dir + "gaussian_files/" max_conformers = 20 gaussian_input = "%NProcShared=24\n%Mem=55000MB" #%% Initializing variables and linking to input and output rdBase.DisableLog("rdApp.*") # Calculation parameters optimization = "PM6 opt freq" multiplicity = 1 potential = AllChem.ETKDG() potential.randomSeed = 42 # use the same seed to ensure reproducability Nconf = 500 ion_mode = ion_mode.lower() selected_metabolites = Chem.SDWriter(sdf_output_file) generated_mols = list() omitted_mols = list() charged_mols = list() chiral_error_mols = list()
import argparse import os import sys import time from itertools import chain from rdkit import Chem from rdkit import rdBase from rdkit.Chem import AllChem from tqdm import tqdm from pyflow.mol.mol_utils import valid_smiles rdBase.DisableLog('rdApp.warning') """ This is a script which substitutes a given core molecule with the standard set of spacers and linkers developed by Biruk Abreha and Steven Lopez. The script will perform substitutions on the core at position indicated using Uranium (U). The conformers are written to PDB files in a folder named after the given molecule name. USAGE: python pymolgen.py molecule_name smiles """ # reaction SMILES for linkers linker_rxns = { 'unsubstituted': '[*:1]([U])>>[*:1]([H])', 'benzene': '[*:1]([U])>>[*:1](c2ccc([Y])cc2)', 'pyridine': '[*:1]([U])>>[*:1](c2ncc([Y])cc2)', 'pyrimidine': '[*:1]([U])>>[*:1](c2ncc([Y])cn2)', 'tetrazine': '[*:1]([U])>>[*:1](c2nnc([Y])nn2)',
"""Contains functions needed to process reaction SMILES and their tokens""" from __future__ import absolute_import, division, print_function, unicode_literals import logging import re from functools import partial from typing import Any, List import numpy as np from rdkit import Chem, rdBase LOGGER = logging.getLogger("attnmapper:smiles_utils") # rdBase.DisableLog("rdApp.error") rdBase.DisableLog("rdApp.warning") SMI_REGEX_PATTERN = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])" BAD_TOKS = ["[CLS]", "[SEP]"] # Default Bad Tokens def tokenize(smiles: str) -> List[str]: """Tokenize a SMILES molecule or reaction""" regex = re.compile(SMI_REGEX_PATTERN) tokens = [token for token in regex.findall(smiles)] assert smiles == "".join(tokens) return tokens def get_atom_types(smiles: str): """Return atomic numbers for every token in (reaction) SMILES""" atom_tokens = get_atom_tokens_mask(smiles)
""" Written by Jan H. Jensen 2018 """ import random import numpy as np from rdkit import Chem from rdkit import rdBase from rdkit.Chem import AllChem import synergetic_molecule_generator.crossover as co rdBase.DisableLog("rdApp.error") def delete_atom(): choices = [ "[*:1]~[D1]>>[*:1]", "[*:1]~[D2]~[*:2]>>[*:1]-[*:2]", "[*:1]~[D3](~[*;!H0:2])~[*:3]>>[*:1]-[*:2]-[*:3]", "[*:1]~[D4](~[*;!H0:2])(~[*;!H0:3])~[*:4]>>[*:1]-[*:2]-[*:3]-[*:4]", "[*:1]~[D4](~[*;!H0;!H1:2])(~[*:3])~[*:4]>>[*:1]-[*:2](-[*:3])-[*:4]", ] p = [0.25, 0.25, 0.25, 0.1875, 0.0625] return np.random.choice(choices, p=p) def append_atom(): choices = [ ["single", ["C", "N", "O", "F", "S", "Cl", "Br"], 7 * [1.0 / 7.0]],
# All Rights Reserved # # This file is part of the RDKit. # The contents are covered by the terms of the BSD license # which is included in the file license.txt, found at the root # of the RDKit source tree. # import unittest from rdkit import Chem from rdkit.Chem.Scaffolds import rdScaffoldNetwork from rdkit import RDConfig from rdkit import rdBase import pickle rdBase.DisableLog("rdApp.info") class TestCase(unittest.TestCase): def setUp(self): pass def test1Basics(self): smis = ["c1ccccc1CC1NC(=O)CCC1", "c1cccnc1CC1NC(=O)CCC1"] ms = [Chem.MolFromSmiles(x) for x in smis] params = rdScaffoldNetwork.ScaffoldNetworkParams() net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params) self.assertEqual(len(net.nodes), 12) self.assertEqual(len(net.edges), 12) self.assertEqual(len(net.counts), len(net.nodes))
def __init__(self, n_node, dim_node, dim_edge, dim_y, mu_prior, cov_prior, dim_h=50, dim_z=100, dim_f=500, n_mpnn_step=3, n_dummy=5, batch_size=20, lr=0.0005, useGPU=True, use_PREFERENCE=False): warnings.filterwarnings('ignore') tf.logging.set_verbosity(tf.logging.ERROR) rdBase.DisableLog('rdApp.error') rdBase.DisableLog('rdApp.warning') if use_PREFERENCE: self.dim_R = 2 else: self.dim_R = 1 self.n_node = n_node self.dim_node = dim_node self.dim_edge = dim_edge self.dim_y = dim_y self.mu_prior = mu_prior self.cov_prior = cov_prior self.dim_h = dim_h self.dim_z = dim_z self.dim_f = dim_f self.n_mpnn_step = n_mpnn_step self.n_dummy = n_dummy self.batch_size = batch_size self.lr = lr # variables self.G = tf.Graph() self.G.as_default() self.node = tf.placeholder( tf.float32, [self.batch_size, self.n_node, self.dim_node]) self.edge = tf.placeholder( tf.float32, [self.batch_size, self.n_node, self.n_node, self.dim_edge]) self.property = tf.placeholder(tf.float32, [self.batch_size, self.dim_y]) self.latent = self._encoder(self.batch_size, self.node, self.edge, self.property, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_z * 2, 0, name='encoder', reuse=False) self.latent_mu, self.latent_lsgms = tf.split(self.latent, [self.dim_z, self.dim_z], 1) self.latent_epsilon = tf.random_normal([self.batch_size, self.dim_z], 0., 1.) self.latent_sample = tf.add( self.latent_mu, tf.multiply(tf.exp(0.5 * self.latent_lsgms), self.latent_epsilon)) self.latent_sample2 = tf.concat([self.latent_sample, self.property], 1) self.rec_node, self.rec_edge = self._generator(self.batch_size, self.latent_sample2, self.n_mpnn_step, name='generator', reuse=False) self.new_latent = tf.random_normal([self.batch_size, self.dim_z], 0., 1.) mngen = tf.contrib.distributions.MultivariateNormalFullCovariance( loc=self.mu_prior, covariance_matrix=self.cov_prior) self.new_y = tf.dtypes.cast(mngen.sample(self.batch_size, self.dim_y), tf.float32) self.new_latent2 = tf.concat([self.new_latent, self.new_y], 1) self.new_node, self.new_edge = self._generator(self.batch_size, self.new_latent2, self.n_mpnn_step, name='generator', reuse=True) self.node_pad = tf.pad( self.node, tf.constant([[0, 0], [0, self.n_dummy], [0, 0]]), 'CONSTANT') self.edge_pad = tf.pad( self.edge, tf.constant([[0, 0], [0, self.n_dummy], [0, self.n_dummy], [0, 0]]), 'CONSTANT') # auxiliary self.R_rec = self._encoder(self.batch_size, self.rec_node, self.rec_edge, None, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_R, 0, name='auxiliary/R', reuse=False) self.R_fake = self._encoder(self.batch_size, self.new_node, self.new_edge, None, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_R, 0, name='auxiliary/R', reuse=True) self.R_real = self._encoder(self.batch_size, self.node_pad, self.edge_pad, None, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_R, 0, name='auxiliary/R', reuse=True) self.R_rec_t = tf.placeholder(tf.float32, [self.batch_size, self.dim_R]) self.R_fake_t = tf.placeholder(tf.float32, [self.batch_size, self.dim_R]) self.R_real_t = tf.placeholder(tf.float32, [self.batch_size, self.dim_R]) self.y_rec = self._encoder(self.batch_size, self.rec_node, self.rec_edge, None, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_y, 0, name='auxiliary/Y', reuse=False) self.y_fake = self._encoder(self.batch_size, self.new_node, self.new_edge, None, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_y, 0, name='auxiliary/Y', reuse=True) self.y_real = self._encoder(self.batch_size, self.node_pad, self.edge_pad, None, self.n_mpnn_step, self.dim_h, self.dim_h * 2, self.dim_y, 0, name='auxiliary/Y', reuse=True) # session self.saver = tf.train.Saver() if useGPU: self.sess = tf.Session() else: config = tf.ConfigProto(device_count={'GPU': 0}) self.sess = tf.Session(config=config)