def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs): '''Run from SMARTS string and SMILES string. This is NOT recommended for library application, since initialization is pretty slow. You should separately initialize the template and molecules and call run()''' rxn = rdchiralReaction(reaction_smarts) reactants = rdchiralReactants(reactant_smiles) return rdchiralRun(rxn, reactants, **kwargs)
def get_outcomes(self, smiles, mincount, prioritizers, start_at=-1, end_at=-1, singleonly=False, stop_if=False, template_count=10000, max_cum_prob=1.0, mode='Maximum', depth=None): ''' Performs a one-step retrosynthesis given a SMILES string of a target molecule by applying each transformation template sequentially. ''' (precursor_prioritizer, template_prioritizer) = prioritizers # Check modules: if not (template_prioritizer and precursor_prioritizer): print( 'Template prioritizer and/or precursor prioritizer are missing. Exiting...') self.mincount = mincount self.get_precursor_prioritizers(precursor_prioritizer) self.get_template_prioritizers(template_prioritizer) self.template_prioritizer.set_max_templates(template_count) self.template_prioritizer.set_max_cum_prob(max_cum_prob) # Define mol to operate on mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) # to canonicalize if self.chiral: mol = rdchiralReactants(smiles) # Initialize results object result = RetroResult(smiles,depth) for template in self.top_templates(smiles): for precursor in self.apply_one_template(mol, smiles, template, singleonly=singleonly, stop_if=stop_if): result.add_precursor(precursor, self.precursor_prioritizer, mode = mode) return result
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs): '''Run from SMARTS string and SMILES string. This is NOT recommended for library application, since initialization is pretty slow. You should separately initialize the template and molecules and call run() Args: reaction_smarts (str): Reaction SMARTS string reactant_smiles (str): Reactant SMILES string **kwargs: passed through to `rdchiralRun` Returns: list: List of outcomes from `rdchiralRun` ''' rxn = rdchiralReaction(reaction_smarts) reactants = rdchiralReactants(reactant_smiles) return rdchiralRun(rxn, reactants, **kwargs)
final_outcomes.add(smiles_new) mapped_outcomes[smiles_new] = (mapped_outcome, atoms_changed) ############################################################################### # One last fix for consolidating multiple stereospecified products... if combine_enantiomers: final_outcomes = combine_enantiomers_into_racemic(final_outcomes) ############################################################################### if return_mapped: return list(final_outcomes), mapped_outcomes else: return list(final_outcomes) if __name__ == '__main__': # Directly use SMILES/SMARTS reaction_smarts = '[C:1][OH:2]>>[C:1][O:2][C]' reactant_smiles = 'OCC(=O)OCCCO' outcomes = rdchiralRunText(reaction_smarts, reactant_smiles) print(outcomes) # Pre-initialize rxn = rdchiralReaction(reaction_smarts) reactants = rdchiralReactants(reactant_smiles) outcomes = rdchiralRun(rxn, reactants) print(outcomes) # Get list of atoms that changed as well outcomes, mapped_outcomes = rdchiralRun(rxn, reactants, return_mapped=True) print(outcomes, mapped_outcomes)
def apply_one_template_by_idx(self, _id, smiles, template_idx, calculate_next_probs=True, **kwargs): """Takes a SMILES and applies the template with given index. This is useful in the MCTS code. Args: _id (int): Not used; passed through to output. smiles (str): SMILES of molecule to apply template to. template_idx (int): Index of template to be used. calculate_next_probs (bool, optional): Whether to calculate template relevance probabilities for precursors (default: {True}) **kwargs: Additional optional arguments. Returns: list of 5-tuples of (int, str, int, list, float): Result of applying given template to the molecule including the template relevance probabilities of all resulting precursors when calculate_next_probs is True. """ # QUESTION: Why are these not just optional named arguments? apply_fast_filter = kwargs.pop('apply_fast_filter', True) filter_threshold = kwargs.pop('filter_threshold', 0.75) use_ban_list = kwargs.pop('use_ban_list', True) template_count = kwargs.pop('template_count', 100) max_cum_prob = kwargs.pop('max_cum_prob', 0.995) if (apply_fast_filter and not self.fast_filter): self.load_fast_filter() self.get_template_prioritizers(gc.relevance) # Define mol to operate on mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) # to canonicalize if self.chiral: mol = rdchiralReactants(smiles) all_outcomes = [] seen_reactants = {} seen_reactant_combos = [] if use_ban_list and smiles in BANNED_SMILES: return all_outcomes for smiles_list in self.apply_one_template_smilesonly( mol, smiles, self.templates[template_idx]): # Avoid duplicate outcomes (e.g., by symmetry) reactant_smiles = '.'.join(smiles_list) if reactant_smiles in seen_reactant_combos: continue seen_reactant_combos.append(reactant_smiles) # Should we add this to the results? filter_score = 1.0 if apply_fast_filter: filter_flag, filter_score = self.fast_filter.filter_with_threshold( reactant_smiles, smiles, filter_threshold) if not filter_flag: continue # Should we calculate template relevance scores for each precursor? reactants = [] if calculate_next_probs: for reactant_smi in smiles_list: if reactant_smi not in seen_reactants: probs, indeces = self.template_prioritizer.get_topk_from_smi( reactant_smi, k=template_count) # Truncate based on max_cum_prob? truncate_to = np.argwhere( np.cumsum(probs) >= max_cum_prob) if len(truncate_to): truncate_to = truncate_to[0][ 0] + 1 # Truncate based on max_cum_prob? else: truncate_to = template_count value = 1 # current value assigned to precursor (note: may replace with real value function) # Save to dict seen_reactants[reactant_smi] = (reactant_smi, probs[:truncate_to], indeces[:truncate_to], value) reactants.append(seen_reactants[reactant_smi]) all_outcomes.append( (_id, smiles, template_idx, reactants, filter_score)) else: all_outcomes.append( (_id, smiles, template_idx, smiles_list, filter_score)) if not all_outcomes: all_outcomes.append( (_id, smiles, template_idx, [], 0.0)) # dummy outcome return all_outcomes
def get_outcomes(self, smiles, mincount, prioritizers, **kwargs): """Performs a one-step retrosynthesis given a SMILES string. Applies each transformation template sequentially to given target molecule to perform retrosynthesis. Args: smiles (str): Product SMILES string to find precursors for. mincount (int): Minimum template popularity. prioritizers (2-tuple of (str, str)): Tuple defining the precursor_prioritizer and template_prioritizer to use for expansion, each as a string. **kwargs: Additional kwargs to pass through to prioritizers or to handle deprecated options. Returns: RetroResult: Special object for a retrosynthetic expansion result, defined by ./results.py """ apply_fast_filter = kwargs.pop('apply_fast_filter', True) filter_threshold = kwargs.pop('filter_threshold', 0.75) use_ban_list = kwargs.pop('use_ban_list', True) if (apply_fast_filter and not self.fast_filter): self.load_fast_filter() (precursor_prioritizer, template_prioritizer) = prioritizers # Check modules: if not (template_prioritizer and precursor_prioritizer): MyLogger.print_and_log( 'Template prioritizer and/or precursor prioritizer are missing. Exiting...', retro_transformer_loc, level=3) self.mincount = mincount self.get_precursor_prioritizers(precursor_prioritizer) self.get_template_prioritizers(template_prioritizer) # Define mol to operate on mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) # to canonicalize if self.chiral: mol = rdchiralReactants(smiles) # Initialize results object result = RetroResult(smiles) if use_ban_list and smiles in BANNED_SMILES: return result for template in self.top_templates(smiles, **kwargs): for precursor in self.apply_one_template(mol, smiles, template): # Should we add this to the results? if apply_fast_filter: reactant_smiles = '.'.join(precursor.smiles_list) filter_flag, filter_score = self.fast_filter.filter_with_threshold( reactant_smiles, smiles, filter_threshold) if filter_flag: precursor.plausibility = filter_score result.add_precursor(precursor, self.precursor_prioritizer, **kwargs) else: result.add_precursor(precursor, self.precursor_prioritizer, **kwargs) return result
def apply_one_template_by_idx(self, _id, smiles, template_idx, calculate_next_probs=True, **kwargs): '''Takes a SMILES and applies the template with index template_idx. Returns results including the template relevance probabilities of all resulting precursors when calculate_next_probs is True _id just gets carrie dthrough This is useful in the MCTS code.''' apply_fast_filter = kwargs.pop('apply_fast_filter', True) filter_threshold = kwargs.pop('filter_threshold', 0.75) use_ban_list = kwargs.pop('use_ban_list', True) template_count = kwargs.pop('template_count', 100) max_cum_prob = kwargs.pop('max_cum_prob', 0.995) if (apply_fast_filter and not self.fast_filter): self.load_fast_filter() self.get_template_prioritizers(gc.relevance) # Define mol to operate on mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) # to canonicalize if self.chiral: mol = rdchiralReactants(smiles) all_outcomes = [] seen_reactants = {} seen_reactant_combos = [] if use_ban_list and smiles in self.banned_smiles: return all_outcomes for smiles_list in self.apply_one_template_smilesonly( mol, smiles, self.templates[template_idx]): # Avoid duplicate outcomes (e.g., by symmetry) reactant_smiles = '.'.join(smiles_list) if reactant_smiles in seen_reactant_combos: continue seen_reactant_combos.append(reactant_smiles) # Should we add this to the results? filter_score = 1.0 if apply_fast_filter: filter_flag, filter_score = self.fast_filter.filter_with_threshold( reactant_smiles, smiles, filter_threshold) if not filter_flag: continue # Should we calculate template relevance scores for each precursor? reactants = [] if calculate_next_probs: for reactant_smi in smiles_list: if reactant_smi not in seen_reactants: probs, indeces = self.template_prioritizer.get_topk_from_smi( reactant_smi, k=template_count) # Truncate based on max_cum_prob? truncate_to = np.argwhere( np.cumsum(probs) >= max_cum_prob) if len(truncate_to): truncate_to = truncate_to[0][ 0] + 1 # Truncate based on max_cum_prob? else: truncate_to = template_count value = 1 # current value assigned to precursor (note: may replace with real value function) # Save to dict seen_reactants[reactant_smi] = (reactant_smi, probs[:truncate_to], indeces[:truncate_to], value) reactants.append(seen_reactants[reactant_smi]) all_outcomes.append( (_id, smiles, template_idx, reactants, filter_score)) else: all_outcomes.append( (_id, smiles, template_idx, smiles_list, filter_score)) if not all_outcomes: all_outcomes.append( (_id, smiles, template_idx, [], 0.0)) # dummy outcome return all_outcomes
def apply_one_template_by_idx( self, _id, smiles, template_idx, calculate_next_probs=True, fast_filter_threshold=0.75, max_num_templates=100, max_cum_prob=0.995, template_prioritizer=None, template_set=None, fast_filter=None, use_ban_list=True, ): """Applies one template by index. Args: _id (int): Pathway id used by tree builder. smiles (str): SMILES string of molecule to apply template to. template_idx (int): index of template to apply. calculate_next_probs (bool): F*g to caculate probabilies (template relevance scores) for precursors generated by template application. fast_filter_threshold (float): Fast filter threshold to filter bad predictions. 1.0 means use all templates. max_num_templates (int): Maximum number of template scores and indices to return when calculating next probabilities. max_cum_prob (float): Maximum cumulative probabilites to use when returning next probabilities. template_prioritizer (Prioritizer): Use to override prioritizer created during initialization. This can be any Prioritizer instance that implements a predict method that accepts (smiles, templates, max_num_templates, max_cum_prob) as arguments and returns a (scores, indices) for templates up until max_num_templates or max_cum_prob. template_set (str): Name of template set to use when multiple template sets are available. Returns: List of outcomes wth (_id, smiles, template_idx, precursors, fast_filter_score) """ if template_prioritizer is None: template_prioritizer = self.template_prioritizer if template_set is None: template_set = self.template_set if fast_filter == None: fast_filter = self.fast_filter mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) mol = rdchiralReactants(smiles) all_outcomes = [] seen_reactants = {} seen_reactant_combos = [] if use_ban_list and smiles in BANNED_SMILES: return [(_id, smiles, template_idx, [], 0.0)] # dummy outcome template = self.get_one_template_by_idx(template_idx, template_set) template['rxn'] = rdchiralReaction(template['reaction_smarts']) for precursor in self.apply_one_template(mol, template): reactant_smiles = precursor['smiles'] if reactant_smiles in seen_reactant_combos: continue seen_reactant_combos.append(reactant_smiles) fast_filter_score = fast_filter(reactant_smiles, smiles) if fast_filter_score < fast_filter_threshold: continue reactants = [] if calculate_next_probs: for reactant_smi in precursor['smiles_split']: if reactant_smi not in seen_reactants: scores, indeces = template_prioritizer.predict( reactant_smi, max_num_templates=max_num_templates, max_cum_prob=max_cum_prob) # scores and indeces will be passed through celery, need to be lists scores = scores.tolist() indeces = indeces.tolist() value = 1 seen_reactants[reactant_smi] = (reactant_smi, scores, indeces, value) reactants.append(seen_reactants[reactant_smi]) all_outcomes.append( (_id, smiles, template_idx, reactants, fast_filter_score)) else: all_outcomes.append( (_id, smiles, template_idx, precursor['smiles_split'], fast_filter_score)) if not all_outcomes: all_outcomes.append( (_id, smiles, template_idx, [], 0.0)) # dummy outcome return all_outcomes
def get_outcomes(self, smiles, precursor_prioritizer=None, template_set=None, template_prioritizer=None, fast_filter=None, fast_filter_threshold=0.75, max_num_templates=100, max_cum_prob=0.995, cluster=None, cluster_settings={}, use_ban_list=True, **kwargs): """Performs a one-step retrosynthesis given a SMILES string. Applies each transformation template sequentially to given target molecule to perform retrosynthesis. Args: smiles (str): Target SMILES string to find precursors for. template_prioritizer (optional, Prioritizer): Use to override prioritizer created during initialization. This can be any Prioritizer instance that implements a predict method that accepts (smiles, templates, max_num_templates, max_cum_prob) as arguments and returns a (scores, indices) for templates up until max_num_templates or max_cum_prob. precursor_prioritizer (optional, callable): Use to override prioritizer created during initialization. This can be any callable function that reorders a list of precursor dictionary objects. fast_filter (optional, callable): Use to override fast filter created during initialization. This can be any callable function that accepts (reactants, products) smiles strings as arguments and returns a score on the range [0.0, 1.0]. fast_filter_threshold (float): Fast filter threshold to filter bad predictions. 1.0 means use all templates. cluster (optional, callable): Use to override cluster method. This can be any callable that accepts (target, outcomes, **cluster_settings) where target is a smiles string, outcomes is a list of precursor dictionaries, and cluster_settings are cluster specific cluster settings. cluster_settings (optional, dict): Dictionary of cluster specific settings to be passed to clustering method. **kwargs: Additional kwargs to pass through to prioritizers or to handle deprecated options. Returns: RetroResult: Special object for a retrosynthetic expansion result, defined by ./results.py """ if template_set is None: template_set = self.template_set if template_prioritizer is None: template_prioritizer = self.template_prioritizer if precursor_prioritizer is None: precursor_prioritizer = self.precursor_prioritizer if fast_filter == None: fast_filter = self.fast_filter if cluster == None: cluster = self.cluster if cluster_settings == None: cluster_settings = self.cluster_settings mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) mol = rdchiralReactants(smiles) results = [] smiles_to_index = {} if use_ban_list and smiles in BANNED_SMILES: return results scores, indices = template_prioritizer.predict( smiles, max_num_templates=max_num_templates, max_cum_prob=max_cum_prob) templates = self.order_templates_by_indices(indices, template_set) for template, score in zip(templates, scores): precursors = self.apply_one_template(mol, template) for precursor in precursors: precursor['template_score'] = score joined_smiles = '.'.join(precursor['smiles_split']) precursor['plausibility'] = fast_filter(joined_smiles, smiles) # skip if no transformation happened or plausibility is below threshold if joined_smiles == smiles or precursor[ 'plausibility'] < fast_filter_threshold: continue if joined_smiles in smiles_to_index: res = results[smiles_to_index[joined_smiles]] res['tforms'] |= set([precursor['template_id']]) res['num_examples'] += precursor['num_examples'] res['template_score'] = max(res['template_score'], score) else: precursor['tforms'] = set([precursor['template_id']]) smiles_to_index[joined_smiles] = len(results) results.append(precursor) for rank, result in enumerate(results, 1): result['tforms'] = list(result['tforms']) result['rank'] = rank results = precursor_prioritizer(results) cluster_ids = cluster(smiles, results, **cluster_settings) for (i, precursor) in enumerate(results): precursor['group_id'] = cluster_ids[i] return results