def setUp(self): super().setUp() # Suppress RDKit warnings to clean up the test output. RDLogger.logger().setLevel(RDLogger.CRITICAL) self.test_subdirectory = tempfile.mkdtemp(dir=flags.FLAGS.test_tmpdir) reaction1 = reaction_pb2.Reaction() dummy_input = reaction1.inputs['dummy_input'] dummy_component = dummy_input.components.add() dummy_component.identifiers.add(type='CUSTOM') dummy_component.identifiers[0].details = 'custom_identifier' dummy_component.identifiers[0].value = 'custom_value' dummy_component.is_limiting = reaction_pb2.Boolean.TRUE dummy_component.mass.value = 1 dummy_component.mass.units = reaction_pb2.Mass.GRAM reaction1.outcomes.add().conversion.value = 75 dataset1 = dataset_pb2.Dataset(reactions=[reaction1]) self.dataset1_filename = os.path.join(self.test_subdirectory, 'dataset1.pbtxt') message_helpers.write_message(dataset1, self.dataset1_filename) # reaction2 is empty. reaction2 = reaction_pb2.Reaction() dataset2 = dataset_pb2.Dataset(reactions=[reaction1, reaction2]) self.dataset2_filename = os.path.join(self.test_subdirectory, 'dataset2.pbtxt') message_helpers.write_message(dataset2, self.dataset2_filename)
def suppress_warnings(): """ Suppresses unimportant warnings for a cleaner readout. """ from rdkit import RDLogger from warnings import filterwarnings RDLogger.logger().setLevel(RDLogger.CRITICAL) filterwarnings(action="ignore", category=UserWarning) filterwarnings(action="ignore", category=FutureWarning)
def check_bondtype_change(reactions): ''' bond 可能有断开、合上(三种)、变更(三种),这里验证变更 ''' from rdkit import RDLogger rdl = RDLogger.logger() rdl.setLevel(RDLogger.CRITICAL) reactants = reactions[0] actions = reactions[2] mol = Chem.MolFromSmiles(reactants) bond_type_to_channel = { Chem.BondType.SINGLE: 0, Chem.BondType.DOUBLE: 1, Chem.BondType.TRIPLE: 2, Chem.BondType.AROMATIC: 3 } actions_dict = {} for a in actions.split(';'): tmp = a.split('-') actions_dict[str(min(int(tmp[0]), int(tmp[1]))) + '-' + str( max(int(tmp[0]), int(tmp[1])))] = int(float(tmp[2]) - 1) for bond in mol.GetBonds(): ch = bond_type_to_channel[bond.GetBondType()] i = bond.GetBeginAtom().GetAtomMapNum() j = bond.GetEndAtom().GetAtomMapNum() key = str(min(i, j)) + '-' + str(max(i, j)) if key in list(actions_dict.keys()) and actions_dict[key] != -1: return reactions return ['', '', '']
def disable_rdkit_logging(): """ Disables RDKit whiny logging. """ logger = rkl.logger() logger.setLevel(rkl.ERROR) rkrb.DisableLog('rdApp.error')
def __init__(self, moli, molj): """ Inizialization function Parameters ---------- moli : RDKit molecule object the first molecule used to perform the Figureprint calculation molj : RDKit molecule object the second molecule used to perform the Figureprint calculation options : argparse python object the list of user options """ # Set logging level and format logging.basicConfig(format='%(levelname)s:\t%(message)s', level=logging.INFO) # Local pointers to the passed molecules self.moli = moli self.molj = molj if not options.verbose == 'pedantic': lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) self.fps_moli = FingerprintMols.FingerprintMol(self.moli) self.fps_molj = FingerprintMols.FingerprintMol(self.molj) self.fps_tan = DataStructs.FingerprintSimilarity( self.fps_moli, self.fps_molj)
def count_valid_samples(smiles, rdkit=True): if rdkit: from rdkit import Chem from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) def toMol(smi): try: mol = Chem.MolFromSmiles(smi) return Chem.MolToSmiles(mol) except: return None else: import pybel def toMol(smi): try: m = pybel.readstring("smi", smi) return m.write("smi") except: return None count = 0 goods = [] for smi in smiles: try: mol = toMol(smi) if mol is not None: goods.append(mol) count += 1 except: continue return count, goods
def canonicalize( compounds: Mapping[str, Chem.Mol], standardize: bool = False, standardizer: str = "chembl", progress_callback: Optional[Callable] = None, timeout: Optional[int] = None, ) -> Tuple[Mapping[str, Tuple[Chem.Mol, Mapping[str, bool]]], List[str]]: @concurrent.process(timeout=timeout) def process_compound(*args, **kwargs): return canonicalize_compound(*args, **kwargs) canonicalizer_fun = tautomer.TautomerCanonicalizer() res = {} skipped = list() standardizer_fun = STANDARDIZERS[standardizer] # Suppress pesky warning messages lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) for i, (k, mol) in enumerate(compounds.items()): if i % 100 == 0 and progress_callback is not None: progress_callback(i) future = process_compound( mol, canonicalizer_fun=canonicalizer_fun, standardizer_fun=standardizer_fun ) try: res[k] = future.result() except TimeoutError as error: print(f"Processing `{k}` took longer than {timeout}s. Skipping.") skipped.append(k) except Exception as error: print(f"Error canonicalizing {k}. Skipping.\n{error}") skipped.append(k) return (res, skipped)
def test_mcs(self): f = open('test/basic/MCS.pickle', 'rb') data = pickle.load(f) data_no_hydrogens = data[0] data_hydrogens = data[1] db = self.inst nohyds = {} hyds = {} lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) for i in range(0, db.nums()): for j in range(i + 1, db.nums()): MCS_no_hyds = MCS.getMapping(db[i].getMolecule(), db[j].getMolecule()) MCS_hyds = MCS.getMapping(db[i].getMolecule(), db[j].getMolecule(), hydrogens=True) nohyds[(i, j)] = MCS_no_hyds hyds[(i, j)] = MCS_hyds self.assertEqual(True, nohyds == data_no_hydrogens) self.assertEqual(True, hyds == data_hydrogens)
def main(parameters): start = time.time() #suppress warnings lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) query_file = parameters[0] #location of input file output_path = parameters[1] #path for output file #load the drug name dictionary and the precision matrix dictionary() precision() # STEP #1: read the query compounds print('reading input file...........') input_molecules = read_input_smiles(query_file.lstrip("-")) # STEP #2: load the database compounds print('loading database file........') global dbComps dbComps = read_db_file() print('Done!') # STEP #3: search for 10 targets print('searching for 10 targets........') ###divide the input molecules into a list of dictionaries to be processed in parallel### input_molecules_list = [{ k: input_molecules[k] } for k in input_molecules.keys()] pool = Pool(processes=workers) target_dict = pool.map(search_10_target, input_molecules_list) pool.close() pool.join() print("search: Done!") # STEP #4: calculate and sort the scores print('calculating scores.........') pool = Pool(processes=workers) input_molecules = pool.map(calculate_scores, target_dict) pool.close() pool.join() print("scores calculation: Done!") # STEP #5: output a txt file print('creating output directory and file.......') output(input_molecules, query_file, output_path) ##file in json format outputjson(query_file, output_path) print("output: Done") usedTime = time.time() - start if len(input_molecules) > 0: print("\nTime elapsed: " + str(usedTime) + " seconds, " + str(usedTime / float(len(input_molecules))) + " seconds per input molecule.\n") #print ("\nTime elapsed to read the database: "+ str(x_time)) else: print("\nTime elapsed: " + str(usedTime) + " seconds, 0 seconds per input molecule.\n")
def __init__(self, logging_level=logging.INFO): super().__init__( "Populate", logging_level=logging_level, tables_to_drop=[self.RAW_DATA_DB, self.COUNTS_DB, self.LIGANDS_DB]) lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL)
def disable_rdkit_logging(): """ Disables RDKit whiny logging. """ import rdkit.rdBase as rkrb import rdkit.RDLogger as rkl logger = rkl.logger() logger.setLevel(rkl.ERROR) rkrb.DisableLog('rdApp.error')
def remove_salts(mol, dictionary=True, *args, **kwargs): """Removes salts from a molecule. This function removes detected salts following a salts dictionary by default. Parameters ---------- mol: rdkit.Chem.Mol The molecule to be modified. dictionary: bool, optional True (default): Activates the use of the salt dictionary. False: Uses the standard StripMol functionality, provided by rdkit.Chem.SaltRemover. defnData: list of str, optional If the dictionary is set to False, a custom dictionary can be set up. If not rdkit default values from '/scratch/RDKit_git/Data/Salts.txt' are used. Returns ------- mol: rdkit.Chem.Mol A new molecule with salts removed. Notes ----- The Salts Dictionary The dictionary used is a derived version from the ChEMBL salt dictionary, created for the standardiser application by Francis Atkinson. The salts are stored as list of (neutral) SMILES. """ lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) i = 0 if dictionary == True: salts = _extract_row_from_csv(0) salt_names = _extract_row_from_csv(1) list_len = len(salts) while i < list_len: salt = salts[i] salt_name = salt_names[i] test = Chem.MolToSmiles(mol) i += 1 remover = SaltRemover(defnData=salt) stripped_mol = remover.StripMol(mol) test_smiles = Chem.MolToSmiles(stripped_mol) if test_smiles != test: logging.debug("Following salt was stripped: %s", salt_name) mol = stripped_mol continue else: mol = SaltRemover(*args, **kwargs).StripMol(mol) return mol
def set_up_logging(logger_name): # Set up logging FORMAT = '%(asctime)s - %(levelname)s: %(message)s' logging.basicConfig(format=FORMAT) LOGGER = logging.getLogger(logger_name) LOGGER.setLevel(logging.DEBUG) # Set rdkit logger to critical rdlg = RDLogger.logger() rdlg.setLevel(RDLogger.CRITICAL) return LOGGER
def split_sdf(sdf_file_name, outdir="data/"): print("Loading sdf.") # Parse the SDF file into a Pandas dataframe. rdk_lg = RDLogger.logger() rdk_lg.setLevel(RDLogger.CRITICAL) df = PandasTools.LoadSDF(sdf_file_name, smilesName='SMILES', molColName='Molecule', includeFingerprints=False) print("Raw cols = ", [str(x) for x in df.columns]) # Select only the needed columns and merge the two PDB cols. df_list = [ 'PDB ID(s) for Ligand-Target Complex', 'PDB ID(s) of Target Chain', 'SMILES', 'IC50 (nM)', 'Molecule' ] df_selected = df[df_list].copy() df_selected["PDB IDs"] = df_selected[ 'PDB ID(s) for Ligand-Target Complex'] + ',' + df_selected[ 'PDB ID(s) of Target Chain'] print("Selected cols = ", [str(x) for x in df_selected.columns]) df_selected = df_selected[["PDB IDs"] + df_list[2:]] # Drop any rows with missing data. df_selected = df_selected.replace('', np.nan) df_selected = df_selected.replace(',', np.nan) df_selected = df_selected.dropna() r_rows = len(df.index) s_rows = len(df_selected.index) print("Raw rows = ", r_rows) print("Sel rows = ", s_rows) print("Keep pct = %.2f%s" % (((float(s_rows) / float(r_rows)) * 100.0), '%')) # Build ligand dictionary and a protein dictionary. print("Building protein-ligand dictionary.") uligs = {} prots_ligs = {} for lndx, row in enumerate(df_selected.values): pdbs = row[0].split(',') for pdb in pdbs: if pdb == '': continue if pdb not in prots_ligs: prots_ligs[pdb] = [] prots_ligs[pdb] += [lndx] uligs[lndx] = row print("Unique proteins = ", len(prots_ligs)) print("Writing per-ligand output files.") # Write out .lig files and return the data dictionaries. for key in uligs: ndx = str(key) lig = uligs[key] write_lig_file(lig[3], outdir + "/lig/lig%s.lig" % ndx) return uligs, prots_ligs
def findDuplicates (sdf, name, out): lg = RDLogger.logger() lg.setLevel(RDLogger.ERROR) suppl = Chem.SDMolSupplier(sdf,removeHs=False, sanitize=False) idlist = [] nmlist = [] smlist = [] print 'reading SDFile...' counter = 0 for mol in suppl: counter+=1 if mol is None: continue try: inchi = Chem.MolToInchi(mol) inkey = Chem.InchiToInchiKey(inchi) smile = Chem.MolToSmiles(mol) except: continue try: ni = mol.GetProp(name) except: ni = 'mol%0.8d' %counter idlist.append(inkey[:-3]) nmlist.append(ni) smlist.append(smile) n = len(idlist) print 'analizing duplicates...' fo = open (out,'w+') fo.write('i\tj\tnamei\tnamej\tsmilesi\tsmilesj\n') duplicates = 0 for i in range (n): for j in range (i+1,n): if idlist[i]==idlist[j]: line=str(i)+'\t'+str(j)+'\t'+nmlist[i]+'\t'+nmlist[j]+'\t'+smlist[i]+'\t'+smlist[j] fo.write(line+'\n') duplicates+=1 fo.close() print '\n%d duplicate molecules found' %duplicates
def main(): lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) # parse command line arguments args = docopt(__doc__) pdb_list_file = args['<pdb_list_file>'] pdbbind_dir = args['<pdbbind_dir>'] output_file = args['<output_file>'] with open(pdb_list_file, 'r') as f: pdbs = [l.strip() for l in f] # load ligands and compute features fingerprints = {} for pdb in pdbs: # prefer to use the .sdf provided by PDBbind sdf = os.path.join(pdbbind_dir, pdb, f'{pdb}_ligand.sdf') mol = next(Chem.SDMolSupplier(sdf, removeHs=False)) # but we'll try the .mol2 if RDKit can't parse the .sdf if mol is None: mol2 = os.path.join(pdbbind_dir, pdb, f'{pdb}_ligand.mol2') mol = Chem.MolFromMol2File(mol2, removeHs=False) # skip the ligand if RDKit can't parse the .mol2 if mol is None: continue try: fingerprints[pdb] = AllChem.GetMorganFingerprintAsBitVect( mol, 2, nBits=2048) except ValueError as e: print(e) continue tc = { pdb1: { pdb2: DataStructs.FingerprintSimilarity(fingerprints[pdb1], fingerprints[pdb2]) for pdb2 in fingerprints } for pdb1 in fingerprints } tc = pd.DataFrame(tc) tc.to_csv(output_file)
def readsdfiles(fname): """ read all of the individual SDFiles from the concatenated SDFile """ print('readsdfile ', fname) sql = 'insert into ' + schema + '.sdfile (%s) values %s;' lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) count = 0 with gzip.open(fname, 'r') as file: while True: sdrecord = readnextSDfile(file) if sdrecord != 'EOF': writedb(conn, sdrecord, sql) count += 1 if (count % 50000 == 0): print('readsdfiles records', count) else: break flush(conn) print("wrote ", count, " records")
def configure_worker(options={}, **kwargs): if 'queues' not in options: return if CORRESPONDING_QUEUE not in options['queues'].split(','): return print('### STARTING UP A NEURAL NETWORK CONTEXT RECOMMENDER WORKER ###') global recommender # Setting logging low from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) try: recommender = NeuralNetContextRecommender() recommender.load() except Exception as e: print(e) print('Loaded context recommendation model') print('### NEURAL NETWORK CONTEXT RECOMMENDER STARTED UP ###')
def configure_worker(options={}, **kwargs): if 'queues' not in options: return if CORRESPONDING_QUEUE not in options['queues'].split(','): return print('### STARTING UP A NEAREST NEIGHBOR CONTEXT RECOMMENDER WORKER ###') global recommender # Setting logging low from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) try: recommender = NNContextRecommender() recommender.load_nn_model(model_path=gc.CONTEXT_REC['model_path'], info_path=gc.CONTEXT_REC['info_path']) except Exception as e: print(e) print('Loaded context recommendation model') print('### NEAREST NEIGHBOR CONTEXT RECOMMENDER STARTED UP ###')
def load_csv(csv_file_name): print("Loading CSV.") # Parse the CSV file. rdk_lg = RDLogger.logger() rdk_lg.setLevel(RDLogger.CRITICAL) with open(csv_file_name, "r") as csvf: ligands = [list(line.split(",")) for line in csvf.read().split("\n")] # Convert to mol objects. print("Converting ligands to mol objects.") valid_ligands = [] for ndx, ligand in enumerate(ligands): if len(ligand) == 1 and ligand[0] == "": continue if len(ligand) != 2: print(ligand) continue ligand.append(Chem.MolFromSmiles(ligand[1])) valid_ligands.append(ligand) if ndx < 10: print(ligand) elif ndx == 10: print("...") print("Done creating mol objects.") return valid_ligands
import warnings import os import shutil import logging import argparse from rdkit import RDLogger from os.path import join, basename, abspath from .molecular import Molecule, CACHE_SETTINGS from .ga import GAPopulation, GAInput from .convenience_tools import (tar_output, errorhandler, streamhandler, archive_output, kill_macromodel) from .ga import plotting as plot warnings.filterwarnings("ignore") RDLogger.logger().setLevel(RDLogger.CRITICAL) # Get the loggers. rootlogger = logging.getLogger() rootlogger.addHandler(errorhandler) rootlogger.addHandler(streamhandler) logger = logging.getLogger(__name__) class GAProgress: """ Deals with logging the GA's progress. Attributes ----------
from __future__ import print_function from rdkit import RDLogger lg = RDLogger.logger() lg.setLevel(4) import rdkit.Chem as Chem import rdkit.Chem.AllChem as AllChem from rdkit import DataStructs import pandas as pd import numpy as np from tqdm import tqdm import json import sys from retrosim.utils.generate_retro_templates import process_an_example from retrosim.data.get_data import get_data_df, split_data_df from joblib import Parallel, delayed import multiprocessing num_cores = multiprocessing.cpu_count() from rdchiral.main import rdchiralRun, rdchiralReaction, rdchiralReactants import os SCRIPT_ROOT = os.path.dirname(__file__) PROJ_ROOT = os.path.dirname(SCRIPT_ROOT) ############### DEFINITIONS FOR VALIDATION SEARCH ######################## all_getfp_labels = ['Morgan2noFeat', 'Morgan3noFeat', 'Morgan2Feat', 'Morgan3Feat'] all_similarity_labels = ['Tanimoto', 'Dice', 'TverskyA', 'TverskyB',]
# Output is either s fixed name in an output directory # or a prefixed filename (without an output directory) if args.output_is_prefix: output_filename = '{}.{}'.format(args.output, output_filename) else: # Create the output directory if os.path.exists(args.output): logger.error('Output exists') sys.exit(1) os.mkdir(args.output) os.chmod(args.output, 0o777) output_filename = os.path.join(args.output, '{}'.format(output_filename)) # Suppress basic RDKit logging... RDLogger.logger().setLevel(RDLogger.ERROR) # Report any limiting...? if args.limit: logger.warning('Limiting processing to first {:,} molecules'.format( args.limit)) # Before we open the output file # get a lit of all the input files (the prefix may be the same) # so we don't want our file in the list of files to be processed) real_files = glob.glob('{}/{}*'.format(args.vendor_dir, args.vendor_prefix)) # Open the file we'll write the standardised data set to. # A text, tab-separated file. logger.info('Writing %s...', output_filename)
except UnboundLocalError: print("something wrong with IRC") ts_found_opt = None _dict.update({"IRC check": ts_found}) return _dict, ts_found_opt, ts_found if __name__ == '__main__': FILE_1 = sys.argv[1] #reactant .xyz file FILE_2 = sys.argv[2] #product .xyz file SUCCESS_PKL = sys.argv[3] #.pkl file with dataframe to save result if search successful FAIL_PKL = sys.argv[4] #.pkl faile with daraframe to save result if search unsuccessful METHOD = 'ub3lyp/6-31G(d,p)' #Specify the method for the Gaussian calculations LG = RDLogger.logger() LG.setLevel(RDLogger.ERROR) with open("log_err.txt", 'w') as err: with redirect_stderr(err): with open("log.txt", 'w') as out: with redirect_stdout(out): # create empty dictionary to save results DICT = {} #Get the xTB path for reactant and product PATH_FILE, OUTFILE, N_PATH = find_xtb_path(FILE_1, FILE_2) DICT = xtb_path_parameter(N_PATH, OUTFILE, DICT) #extract path structures and do sp energy calculations
def UFFConstrainedOptimize(mol, moving_atoms=None, fixed_atoms=None, cutoff=5., verbose=False): """Minimize a molecule using UFF forcefield with a set of moving/fixed atoms. If both moving and fixed atoms are provided, fixed_atoms parameter will be ignored. The minimization is done in-place (without copying molecule). Parameters ---------- mol: rdkit.Chem.rdchem.Mol Molecule to be minimized. moving_atoms: array-like (default=None) Indices of freely moving atoms. If None, fixed atoms are assigned based on `fixed_atoms`. These two arguments are mutually exclusive. fixed_atoms: array-like (default=None) Indices of fixed atoms. If None, fixed atoms are assigned based on `moving_atoms`. These two arguments are mutually exclusive. cutoff: float (default=10.) Distance cutoff for the UFF minimization Returns ------- mol: rdkit.Chem.rdchem.Mol Molecule with mimimized `moving_atoms` """ logger = RDLogger.logger() if not verbose: logger.setLevel(RDLogger.CRITICAL) if moving_atoms is None and fixed_atoms is None: raise ValueError('You must supply at least one set of moving/fixed ' 'atoms.') all_atoms = set(range(mol.GetNumAtoms())) if moving_atoms is None: moving_atoms = list(all_atoms.difference(fixed_atoms)) else: fixed_atoms = list(all_atoms.difference(moving_atoms)) # extract submolecules containing atoms within cutoff mol_conf = mol.GetConformer(-1) pos = np.array([mol_conf.GetAtomPosition(i) for i in range(mol_conf.GetNumAtoms())]) mask = (cdist(pos, pos[moving_atoms]) <= cutoff).any(axis=1) amap = np.where(mask)[0].tolist() # expand to whole residues pocket_residues = OrderedDict() protein_residues = GetResidues(mol) for res_id in protein_residues.keys(): if any(1 for res_aix in protein_residues[res_id] if res_aix in amap): pocket_residues[res_id] = protein_residues[res_id] amap = list(chain(*pocket_residues.values())) # TODO: above certain threshold its making a submolis redundant submol = AtomListToSubMol(mol, amap, includeConformer=True) # initialize ring info Chem.GetSSSR(submol) ff = UFFGetMoleculeForceField(submol, vdwThresh=cutoff, ignoreInterfragInteractions=False) for submol_id, atom_id in enumerate(amap): if atom_id not in moving_atoms: ff.AddFixedPoint(submol_id) ff.Initialize() ff.Minimize(energyTol=1e-4, forceTol=1e-3, maxIts=2000) # get the positions backbone conf = mol.GetConformer(-1) submol_conf = submol.GetConformer(-1) for submol_idx, mol_idx in enumerate(amap,): conf.SetAtomPosition(mol_idx, submol_conf.GetAtomPosition(submol_idx)) # FIXME: there's no getLevel method, so we set to default level if not verbose: logger.setLevel(RDLogger.INFO) return mol
""" RDKit interface """ from rdkit import RDLogger from rdkit.Chem import Draw import rdkit.Chem as _rd_chem import rdkit.Chem.AllChem as _rd_all_chem from automol import util import automol.geom.base import automol.graph.base _LOGGER = RDLogger.logger() _LOGGER.setLevel(RDLogger.ERROR) # inchi def from_inchi(ich, print_debug=False): """ Generate an RDKit molecule object from an InChI string. :param ich: InChI string :type ich: str :param print_debug: control the printing of a debug message :type print_debug: bool :rtype: RDKit molecule object """ rdm = _rd_chem.inchi.MolFromInchi(ich, treatWarningAsError=False) if rdm is None and print_debug: print(f'rdm fails for {ich} by returning {rdm}') return rdm
''' Modified from https://github.com/wengong-jin/nips17-rexgen/blob/master/USPTO/core-wln-global/mol_graph.py ''' import chainer import numpy as np from rdkit import Chem from rdkit import RDLogger from tqdm import tqdm from chainer_chemistry.dataset.preprocessors.gwm_preprocessor import GGNNGWMPreprocessor rdl = RDLogger.logger() rdl.setLevel(RDLogger.CRITICAL) elem_list = [ 'C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe', 'As', 'Al', 'I', 'B', 'V', 'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se', 'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd', 'In', 'Mn', 'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'W', 'Ru', 'Nb', 'Re', 'Te', 'Rh', 'Tc', 'Ba', 'Bi', 'Hf', 'Mo', 'U', 'Sm', 'Os', 'Ir', 'Ce', 'Gd', 'Ga', 'Cs', 'unknown' ] def read_data(path): data = [] with open(path, 'r') as f: for line in f: r, action = line.strip('\r\n ').split() if len(r.split('>')) != 3 or r.split('>')[1] != '':
"""SyGMa: Systematically Generating potential Metabolites""" from builtins import str import argparse import sygma import sys from rdkit import Chem, RDLogger RDLogger.logger().setLevel(RDLogger.ERROR) import logging logging.basicConfig() logger = logging.getLogger('sygma') def run_sygma(args, file=sys.stdout): logger.setLevel(args.loglevel.upper()) scenario = sygma.Scenario([ [sygma.ruleset['phase1'], args.phase1], [sygma.ruleset['phase2'], args.phase2] ]) parent = Chem.MolFromSmiles(args.parentmol) metabolic_tree = scenario.run(parent) metabolic_tree.calc_scores() if args.outputtype == "sdf": metabolic_tree.write_sdf(file) elif args.outputtype == "smiles": file.write("\n".join([m+" "+str(s) for m,s in metabolic_tree.to_smiles()])+'\n') return None def get_sygma_parser(): ap = argparse.ArgumentParser(description=__doc__) ap.add_argument('--version', action='version', version='%(prog)s ' + sygma.__version__)
from collections import Counter import tqdm import networkx as nx from loguru import logger from rdkit import RDLogger from rdkit.Chem import rdMolHash, MolToSmiles, rdmolops from rdkit.Chem.rdMolDescriptors import CalcNumRings from scaffoldgraph.io import * from scaffoldgraph.utils import canonize_smiles from .fragment import get_murcko_scaffold, get_annotated_murcko_scaffold from .scaffold import Scaffold rdlogger = RDLogger.logger() def init_molecule_name(mol): """Initialize the name of a molecule if not provided""" if not mol.HasProp('_Name') or mol.GetProp('_Name') == '': n = rdMolHash.GenerateMoleculeHashString(mol) mol.SetProp('_Name', n) class ScaffoldGraph(nx.DiGraph, ABC): """Abstract base class for ScaffoldGraphs""" def __init__(self, graph=None, fragmenter=None): """ Initialize a ScaffoldGraph object
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # INCHI_AVAILABLE = True import rdinchi import logging from rdkit import RDLogger logger = RDLogger.logger() logLevelToLogFunctionLookup = { logging.INFO : logger.info, logging.DEBUG : logger.debug, logging.WARNING : logger.warning, logging.CRITICAL : logger.critical, logging.ERROR : logger.error } class InchiReadWriteError(Exception): pass def MolFromInchi(inchi, sanitize=True, removeHs=True, logLevel=None, treatWarningAsError=False): """Construct a molecule from a InChI string
from molgym.envs.rewards import RewardFunction from molgym.envs.rewards.multiobjective import AdditiveReward from molgym.envs.rewards.oneshot import OneShotScore from molgym.envs.rewards.tuned import LogisticCombination from molgym.envs.simple import Molecule from molgym.envs.rewards.rdkit import LogP, QEDReward, SAScore, CycleLength from molgym.envs.rewards.mpnn import MPNNReward from molgym.utils.conversions import convert_nx_to_smiles, convert_smiles_to_nx from molgym.mpnn.layers import custom_objects from tensorflow.keras.models import load_model # Set up the logger logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger('RL-Logger') logger.setLevel(logging.DEBUG) rdkit_logger = RDLogger.logger() rdkit_logger.setLevel(RDLogger.CRITICAL) def get_platform_info(): """Get information about the computer running this process""" return { 'processor': platform.machine(), 'python_version': platform.python_version(), 'python_compiler': platform.python_compiler(), 'hostname': platform.node(), 'os': platform.platform(), 'cpu_name': platform.processor(), 'n_cores': os.cpu_count() }
def generate_substructures(input_file): """ takes all text from input file containing the structures' smile string and identifier. Returns structure info list and a dictionary with all possibles substructure per structure. input_file: structure txt file with structure SMILES and identifier (tab separated) """ official_subs_dict = {} with open(input_file) as file_object: input_file = file_object.read() # Create a structure list all_lines = input_file.split('\n') structure_smile_list = [] structure_mol_list = [] structure_combo_list = [] # for line in all_lines[0:5]: for line in all_lines[:-1]: line = line.split('\t') structure_id = line[1] structure_mol = Chem.MolFromSmiles(line[0]) structure_smile = Chem.MolToSmiles(structure_mol) structure_smile_list += [structure_smile] structure_mol_list += [structure_mol] structure_combo_list += [[ structure_smile, structure_mol, structure_id ]] # Generate the mols for each structure in the class draw_list = [] draw_legend_list = [] for i, structure_info in enumerate(structure_combo_list): valid_sub_list = [] valid_sub_mol_list = [] structure_smile = structure_info[0] structure_mol = structure_info[1] structure_id = structure_info[2] nr_of_atoms = structure_mol.GetNumAtoms() # Generate all possible mol environments per structure mol_env_list = [] for j in range(nr_of_atoms): for k in range(nr_of_atoms): env = Chem.FindAtomEnvironmentOfRadiusN(structure_mol, j, k) mol_env_list += [env] # Generate all possible substructures based on the mol envs for env in mol_env_list: submol = Chem.PathToSubmol(structure_mol, env) # Generate the mol of each substructure sub_smile = Chem.MolToSmiles(submol) submol = Chem.MolFromSmiles(sub_smile) if sub_smile != '' and sub_smile != structure_smile: lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) try: Chem.SanitizeMol(submol) if sub_smile not in valid_sub_list and structure_mol.HasSubstructMatch( submol) == True: valid_sub_list += [sub_smile] valid_sub_mol_list += [submol] except: pass # Write each substructure per structure in a dictionary and also generate the draw_list for i, valid_substructure in enumerate(valid_sub_list): if valid_substructure not in draw_list: draw_list += [valid_sub_mol_list[i]] draw_legend_list += [valid_substructure] if structure_id in official_subs_dict: official_subs_dict[structure_id].append(valid_substructure) if structure_id not in official_subs_dict: official_subs_dict[structure_id] = [valid_substructure] if structure_id not in official_subs_dict: official_subs_dict[structure_id] = ['<NA>'] official_subs_dict_sorted = sorted(official_subs_dict) with open("all_test_substructures.txt", 'w') as db_file: for name in official_subs_dict_sorted: for key in official_subs_dict.keys(): if key == name: value_string = '' for value in official_subs_dict[key]: value_string += value + "." value_string = value_string[:-1] db_file.write(value_string + '\t' + key + '\n') print('~~~~~~~~~~~~~~~~~~~~~~~~~~') print('All possible substructures') nr_of_subs = 0 for key, value in official_subs_dict.items(): for val in value: nr_of_subs += 1 print(nr_of_subs) return structure_combo_list, official_subs_dict
def UFFConstrainedOptimize(mol, moving_atoms=None, fixed_atoms=None, cutoff=5., verbose=False): """Minimize a molecule using UFF forcefield with a set of moving/fixed atoms. If both moving and fixed atoms are provided, fixed_atoms parameter will be ignored. The minimization is done in-place (without copying molecule). Parameters ---------- mol: rdkit.Chem.rdchem.Mol Molecule to be minimized. moving_atoms: array-like (default=None) Indices of freely moving atoms. If None, fixed atoms are assigned based on `fixed_atoms`. These two arguments are mutually exclusive. fixed_atoms: array-like (default=None) Indices of fixed atoms. If None, fixed atoms are assigned based on `moving_atoms`. These two arguments are mutually exclusive. cutoff: float (default=10.) Distance cutoff for the UFF minimization Returns ------- mol: rdkit.Chem.rdchem.Mol Molecule with mimimized `moving_atoms` """ logger = RDLogger.logger() if not verbose: logger.setLevel(RDLogger.CRITICAL) if moving_atoms is None and fixed_atoms is None: raise ValueError('You must supply at least one set of moving/fixed ' 'atoms.') all_atoms = set(range(mol.GetNumAtoms())) if moving_atoms is None: moving_atoms = list(all_atoms.difference(fixed_atoms)) else: fixed_atoms = list(all_atoms.difference(moving_atoms)) # extract submolecules containing atoms within cutoff mol_conf = mol.GetConformer(-1) pos = np.array( [mol_conf.GetAtomPosition(i) for i in range(mol_conf.GetNumAtoms())]) mask = (cdist(pos, pos[moving_atoms]) <= cutoff).any(axis=1) amap = np.where(mask)[0].tolist() # expand to whole residues pocket_residues = OrderedDict() protein_residues = GetResidues(mol) for res_id in protein_residues.keys(): if any(1 for res_aix in protein_residues[res_id] if res_aix in amap): pocket_residues[res_id] = protein_residues[res_id] amap = list(chain(*pocket_residues.values())) # TODO: above certain threshold its making a submolis redundant submol = AtomListToSubMol(mol, amap, includeConformer=True) # initialize ring info Chem.GetSSSR(submol) ff = UFFGetMoleculeForceField(submol, vdwThresh=cutoff, ignoreInterfragInteractions=False) for submol_id, atom_id in enumerate(amap): if atom_id not in moving_atoms: ff.AddFixedPoint(submol_id) ff.Initialize() ff.Minimize(energyTol=1e-4, forceTol=1e-3, maxIts=2000) # get the positions backbone conf = mol.GetConformer(-1) submol_conf = submol.GetConformer(-1) for submol_idx, mol_idx in enumerate(amap, ): conf.SetAtomPosition(mol_idx, submol_conf.GetAtomPosition(submol_idx)) # FIXME: there's no getLevel method, so we set to default level if not verbose: logger.setLevel(RDLogger.INFO) return mol
from rdkit import RDConfig import sys, time, math from rdkit.ML.Data import Stats import rdkit.DistanceGeometry as DG from rdkit import Chem import numpy from rdkit.Chem import rdDistGeom as MolDG from rdkit.Chem import ChemicalFeatures from rdkit.Chem import ChemicalForceFields from rdkit.Chem.Pharm3D import Pharmacophore, ExcludedVolume from rdkit import Geometry _times = {} from rdkit import RDLogger as logging logger = logging.logger() defaultFeatLength = 2.0 def GetAtomHeavyNeighbors(atom): """ returns a list of the heavy-atom neighbors of the atom passed in: >>> m = Chem.MolFromSmiles('CCO') >>> l = GetAtomHeavyNeighbors(m.GetAtomWithIdx(0)) >>> len(l) 1 >>> isinstance(l[0],Chem.Atom) True >>> l[0].GetIdx() 1
plt.plot(bayes_recall, bayes_precision, 'k-', color='red', label='BayesGrad(Ours)') plt.axhline(y=vanilla_precision[-1], color='gray', linestyle='--') plt.legend() plt.xlabel("recall") plt.ylabel("precision") if save_path: print('saved to ', save_path) plt.savefig(save_path) # plt.savefig('artificial_pr.eps') else: plt.show() if __name__ == '__main__': # Disable errors by RDKit occurred in preprocessing Tox21 dataset. lg = RDLogger.logger() lg.setLevel(RDLogger.CRITICAL) # show INFO level log from chainer chemistry logging.basicConfig(level=logging.INFO) args = parse() # --- extracting configs --- dirpath = args.dirpath json_path = os.path.join(dirpath, 'args.json') if not os.path.exists(json_path): raise ValueError( 'json_path {} not found! Execute train_tox21.py beforehand.'.format(json)) with open(json_path, 'r') as f: train_args = json.load(f) method = train_args['method']
# @@ All Rights Reserved @@ # This file is part of the RDKit. # The contents are covered by the terms of the BSD license # which is included in the file license.txt, found at the root # of the RDKit source tree. # from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Lipinski,Descriptors,Crippen from rdkit.Dbase.DbConnection import DbConnect from rdkit.Dbase import DbModule import re #set up the logger: import rdkit.RDLogger as logging logger = logging.logger() logger.setLevel(logging.INFO) def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id', redraw=False,keepHs=False, skipProps=False,addComputedProps=False, skipSmiles=False, uniqNames=None,namesSeen=None): if not mol: raise ValueError('no molecule') if keepHs: Chem.SanitizeMol(mol) try: nm = mol.GetProp(nameProp) except KeyError: nm = None