Example #1
0
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs):
    '''Run from SMARTS string and SMILES string. This is NOT recommended
    for library application, since initialization is pretty slow. You should
    separately initialize the template and molecules and call run()'''
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    return rdchiralRun(rxn, reactants, **kwargs)
Example #2
0
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs):
    '''Run from SMARTS string and SMILES string. This is NOT recommended
    for library application, since initialization is pretty slow. You should
    separately initialize the template and molecules and call run()'''
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    return rdchiralRun(rxn, reactants, **kwargs)
Example #3
0
    def get_outcomes(self, smiles, mincount, prioritizers, start_at=-1, end_at=-1,
                     singleonly=False, stop_if=False, template_count=10000, 
                     max_cum_prob=1.0, mode='Maximum', depth=None):
        '''
        Performs a one-step retrosynthesis given a SMILES string of a
        target molecule by applying each transformation template
        sequentially.
        '''
        (precursor_prioritizer, template_prioritizer) = prioritizers
        # Check modules:
        if not (template_prioritizer and precursor_prioritizer):
            print(
                'Template prioritizer and/or precursor prioritizer are missing. Exiting...')
        self.mincount = mincount
        self.get_precursor_prioritizers(precursor_prioritizer)
        self.get_template_prioritizers(template_prioritizer)
        self.template_prioritizer.set_max_templates(template_count)
        self.template_prioritizer.set_max_cum_prob(max_cum_prob)
        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)  # to canonicalize
        if self.chiral:
            mol = rdchiralReactants(smiles)
        # Initialize results object
        result = RetroResult(smiles,depth)

        for template in self.top_templates(smiles):
            for precursor in self.apply_one_template(mol, smiles, template, singleonly=singleonly, stop_if=stop_if):
                result.add_precursor(precursor, self.precursor_prioritizer, mode = mode)

        return result
Example #4
0
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs):
    '''Run from SMARTS string and SMILES string. This is NOT recommended
    for library application, since initialization is pretty slow. You should
    separately initialize the template and molecules and call run()
    
    Args:
        reaction_smarts (str): Reaction SMARTS string
        reactant_smiles (str): Reactant SMILES string
        **kwargs: passed through to `rdchiralRun`

    Returns:
        list: List of outcomes from `rdchiralRun`
    '''
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    return rdchiralRun(rxn, reactants, **kwargs)
Example #5
0
        final_outcomes.add(smiles_new)
        mapped_outcomes[smiles_new] = (mapped_outcome, atoms_changed)
    ###############################################################################
    # One last fix for consolidating multiple stereospecified products...
    if combine_enantiomers:
        final_outcomes = combine_enantiomers_into_racemic(final_outcomes)
    ###############################################################################
    if return_mapped:
        return list(final_outcomes), mapped_outcomes
    else:
        return list(final_outcomes)


if __name__ == '__main__':
    # Directly use SMILES/SMARTS
    reaction_smarts = '[C:1][OH:2]>>[C:1][O:2][C]'
    reactant_smiles = 'OCC(=O)OCCCO'
    outcomes = rdchiralRunText(reaction_smarts, reactant_smiles)
    print(outcomes)

    # Pre-initialize
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    outcomes = rdchiralRun(rxn, reactants)
    print(outcomes)

    # Get list of atoms that changed as well
    outcomes, mapped_outcomes = rdchiralRun(rxn, reactants, return_mapped=True)
    print(outcomes, mapped_outcomes)
Example #6
0
    def apply_one_template_by_idx(self,
                                  _id,
                                  smiles,
                                  template_idx,
                                  calculate_next_probs=True,
                                  **kwargs):
        """Takes a SMILES and applies the template with given index.

        This is useful in the MCTS code.

        Args:
            _id (int): Not used; passed through to output.
            smiles (str): SMILES of molecule to apply template to.
            template_idx (int): Index of template to be used.
            calculate_next_probs (bool, optional): Whether to calculate template
                relevance probabilities for precursors (default: {True})
            **kwargs: Additional optional arguments.

        Returns:
            list of 5-tuples of (int, str, int, list, float): Result of
                applying given template to the molecule including the template
                relevance probabilities of all resulting precursors when
                calculate_next_probs is True.
        """
        # QUESTION: Why are these not just optional named arguments?
        apply_fast_filter = kwargs.pop('apply_fast_filter', True)
        filter_threshold = kwargs.pop('filter_threshold', 0.75)
        use_ban_list = kwargs.pop('use_ban_list', True)
        template_count = kwargs.pop('template_count', 100)
        max_cum_prob = kwargs.pop('max_cum_prob', 0.995)
        if (apply_fast_filter and not self.fast_filter):
            self.load_fast_filter()
        self.get_template_prioritizers(gc.relevance)

        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)  # to canonicalize
        if self.chiral:
            mol = rdchiralReactants(smiles)

        all_outcomes = []
        seen_reactants = {}
        seen_reactant_combos = []

        if use_ban_list and smiles in BANNED_SMILES:
            return all_outcomes

        for smiles_list in self.apply_one_template_smilesonly(
                mol, smiles, self.templates[template_idx]):
            # Avoid duplicate outcomes (e.g., by symmetry)
            reactant_smiles = '.'.join(smiles_list)
            if reactant_smiles in seen_reactant_combos:
                continue
            seen_reactant_combos.append(reactant_smiles)

            # Should we add this to the results?
            filter_score = 1.0
            if apply_fast_filter:
                filter_flag, filter_score = self.fast_filter.filter_with_threshold(
                    reactant_smiles, smiles, filter_threshold)
                if not filter_flag:
                    continue

            # Should we calculate template relevance scores for each precursor?
            reactants = []
            if calculate_next_probs:
                for reactant_smi in smiles_list:
                    if reactant_smi not in seen_reactants:
                        probs, indeces = self.template_prioritizer.get_topk_from_smi(
                            reactant_smi, k=template_count)
                        # Truncate based on max_cum_prob?
                        truncate_to = np.argwhere(
                            np.cumsum(probs) >= max_cum_prob)
                        if len(truncate_to):
                            truncate_to = truncate_to[0][
                                0] + 1  # Truncate based on max_cum_prob?
                        else:
                            truncate_to = template_count
                        value = 1  # current value assigned to precursor (note: may replace with real value function)
                        # Save to dict
                        seen_reactants[reactant_smi] = (reactant_smi,
                                                        probs[:truncate_to],
                                                        indeces[:truncate_to],
                                                        value)
                    reactants.append(seen_reactants[reactant_smi])

                all_outcomes.append(
                    (_id, smiles, template_idx, reactants, filter_score))

            else:
                all_outcomes.append(
                    (_id, smiles, template_idx, smiles_list, filter_score))

        if not all_outcomes:
            all_outcomes.append(
                (_id, smiles, template_idx, [], 0.0))  # dummy outcome

        return all_outcomes
Example #7
0
    def get_outcomes(self, smiles, mincount, prioritizers, **kwargs):
        """Performs a one-step retrosynthesis given a SMILES string.

        Applies each transformation template sequentially to given target
        molecule to perform retrosynthesis.

        Args:
            smiles (str): Product SMILES string to find precursors for.
            mincount (int): Minimum template popularity.
            prioritizers (2-tuple of (str, str)): Tuple defining the
                precursor_prioritizer and template_prioritizer to use for
                expansion, each as a string.
            **kwargs: Additional kwargs to pass through to prioritizers or to
                handle deprecated options.

        Returns:
             RetroResult: Special object for a retrosynthetic expansion result,
                defined by ./results.py
        """
        apply_fast_filter = kwargs.pop('apply_fast_filter', True)
        filter_threshold = kwargs.pop('filter_threshold', 0.75)
        use_ban_list = kwargs.pop('use_ban_list', True)
        if (apply_fast_filter and not self.fast_filter):
            self.load_fast_filter()

        (precursor_prioritizer, template_prioritizer) = prioritizers
        # Check modules:
        if not (template_prioritizer and precursor_prioritizer):
            MyLogger.print_and_log(
                'Template prioritizer and/or precursor prioritizer are missing. Exiting...',
                retro_transformer_loc,
                level=3)
        self.mincount = mincount
        self.get_precursor_prioritizers(precursor_prioritizer)
        self.get_template_prioritizers(template_prioritizer)

        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)  # to canonicalize
        if self.chiral:
            mol = rdchiralReactants(smiles)

        # Initialize results object
        result = RetroResult(smiles)

        if use_ban_list and smiles in BANNED_SMILES:
            return result

        for template in self.top_templates(smiles, **kwargs):
            for precursor in self.apply_one_template(mol, smiles, template):

                # Should we add this to the results?
                if apply_fast_filter:
                    reactant_smiles = '.'.join(precursor.smiles_list)
                    filter_flag, filter_score = self.fast_filter.filter_with_threshold(
                        reactant_smiles, smiles, filter_threshold)
                    if filter_flag:
                        precursor.plausibility = filter_score
                        result.add_precursor(precursor,
                                             self.precursor_prioritizer,
                                             **kwargs)
                else:
                    result.add_precursor(precursor, self.precursor_prioritizer,
                                         **kwargs)
        return result
Example #8
0
    def apply_one_template_by_idx(self,
                                  _id,
                                  smiles,
                                  template_idx,
                                  calculate_next_probs=True,
                                  **kwargs):
        '''Takes a SMILES and applies the template with index template_idx. Returns
        results including the template relevance probabilities of all resulting precursors when
        calculate_next_probs is True

        _id just gets carrie dthrough

        This is useful in the MCTS code.'''

        apply_fast_filter = kwargs.pop('apply_fast_filter', True)
        filter_threshold = kwargs.pop('filter_threshold', 0.75)
        use_ban_list = kwargs.pop('use_ban_list', True)
        template_count = kwargs.pop('template_count', 100)
        max_cum_prob = kwargs.pop('max_cum_prob', 0.995)
        if (apply_fast_filter and not self.fast_filter):
            self.load_fast_filter()
        self.get_template_prioritizers(gc.relevance)

        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)  # to canonicalize
        if self.chiral:
            mol = rdchiralReactants(smiles)

        all_outcomes = []
        seen_reactants = {}
        seen_reactant_combos = []

        if use_ban_list and smiles in self.banned_smiles:
            return all_outcomes

        for smiles_list in self.apply_one_template_smilesonly(
                mol, smiles, self.templates[template_idx]):
            # Avoid duplicate outcomes (e.g., by symmetry)
            reactant_smiles = '.'.join(smiles_list)
            if reactant_smiles in seen_reactant_combos:
                continue
            seen_reactant_combos.append(reactant_smiles)

            # Should we add this to the results?
            filter_score = 1.0
            if apply_fast_filter:
                filter_flag, filter_score = self.fast_filter.filter_with_threshold(
                    reactant_smiles, smiles, filter_threshold)
                if not filter_flag:
                    continue

            # Should we calculate template relevance scores for each precursor?
            reactants = []
            if calculate_next_probs:
                for reactant_smi in smiles_list:
                    if reactant_smi not in seen_reactants:
                        probs, indeces = self.template_prioritizer.get_topk_from_smi(
                            reactant_smi, k=template_count)
                        # Truncate based on max_cum_prob?
                        truncate_to = np.argwhere(
                            np.cumsum(probs) >= max_cum_prob)
                        if len(truncate_to):
                            truncate_to = truncate_to[0][
                                0] + 1  # Truncate based on max_cum_prob?
                        else:
                            truncate_to = template_count
                        value = 1  # current value assigned to precursor (note: may replace with real value function)
                        # Save to dict
                        seen_reactants[reactant_smi] = (reactant_smi,
                                                        probs[:truncate_to],
                                                        indeces[:truncate_to],
                                                        value)
                    reactants.append(seen_reactants[reactant_smi])

                all_outcomes.append(
                    (_id, smiles, template_idx, reactants, filter_score))

            else:
                all_outcomes.append(
                    (_id, smiles, template_idx, smiles_list, filter_score))

        if not all_outcomes:
            all_outcomes.append(
                (_id, smiles, template_idx, [], 0.0))  # dummy outcome

        return all_outcomes
Example #9
0
    def apply_one_template_by_idx(
        self,
        _id,
        smiles,
        template_idx,
        calculate_next_probs=True,
        fast_filter_threshold=0.75,
        max_num_templates=100,
        max_cum_prob=0.995,
        template_prioritizer=None,
        template_set=None,
        fast_filter=None,
        use_ban_list=True,
    ):
        """Applies one template by index.

        Args:
            _id (int): Pathway id used by tree builder.
            smiles (str): SMILES string of molecule to apply template to.
            template_idx (int): index of template to apply.
            calculate_next_probs (bool): F*g to caculate probabilies (template 
                relevance scores) for precursors generated by template 
                application.
            fast_filter_threshold (float): Fast filter threshold to filter
                bad predictions. 1.0 means use all templates.
            max_num_templates (int): Maximum number of template scores and 
                indices to return when calculating next probabilities.
            max_cum_prob (float): Maximum cumulative probabilites to use 
                when returning next probabilities.
            template_prioritizer (Prioritizer): Use to override
                prioritizer created during initialization. This can be 
                any Prioritizer instance that implements a predict method 
                that accepts (smiles, templates, max_num_templates, max_cum_prob) 
                as arguments and returns a (scores, indices) for templates
                up until max_num_templates or max_cum_prob.
            template_set (str): Name of template set to use when multiple 
                template sets are available.

        Returns:
            List of outcomes wth (_id, smiles, template_idx, precursors, fast_filter_score)
        """
        if template_prioritizer is None:
            template_prioritizer = self.template_prioritizer

        if template_set is None:
            template_set = self.template_set

        if fast_filter == None:
            fast_filter = self.fast_filter

        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
        mol = rdchiralReactants(smiles)

        all_outcomes = []
        seen_reactants = {}
        seen_reactant_combos = []

        if use_ban_list and smiles in BANNED_SMILES:
            return [(_id, smiles, template_idx, [], 0.0)]  # dummy outcome

        template = self.get_one_template_by_idx(template_idx, template_set)
        template['rxn'] = rdchiralReaction(template['reaction_smarts'])

        for precursor in self.apply_one_template(mol, template):
            reactant_smiles = precursor['smiles']
            if reactant_smiles in seen_reactant_combos:
                continue
            seen_reactant_combos.append(reactant_smiles)
            fast_filter_score = fast_filter(reactant_smiles, smiles)
            if fast_filter_score < fast_filter_threshold:
                continue

            reactants = []
            if calculate_next_probs:
                for reactant_smi in precursor['smiles_split']:
                    if reactant_smi not in seen_reactants:
                        scores, indeces = template_prioritizer.predict(
                            reactant_smi,
                            max_num_templates=max_num_templates,
                            max_cum_prob=max_cum_prob)
                        # scores and indeces will be passed through celery, need to be lists
                        scores = scores.tolist()
                        indeces = indeces.tolist()
                        value = 1
                        seen_reactants[reactant_smi] = (reactant_smi, scores,
                                                        indeces, value)
                    reactants.append(seen_reactants[reactant_smi])
                all_outcomes.append(
                    (_id, smiles, template_idx, reactants, fast_filter_score))
            else:
                all_outcomes.append(
                    (_id, smiles, template_idx, precursor['smiles_split'],
                     fast_filter_score))
        if not all_outcomes:
            all_outcomes.append(
                (_id, smiles, template_idx, [], 0.0))  # dummy outcome

        return all_outcomes
Example #10
0
    def get_outcomes(self,
                     smiles,
                     precursor_prioritizer=None,
                     template_set=None,
                     template_prioritizer=None,
                     fast_filter=None,
                     fast_filter_threshold=0.75,
                     max_num_templates=100,
                     max_cum_prob=0.995,
                     cluster=None,
                     cluster_settings={},
                     use_ban_list=True,
                     **kwargs):
        """Performs a one-step retrosynthesis given a SMILES string.

        Applies each transformation template sequentially to given target
        molecule to perform retrosynthesis.

        Args:
            smiles (str): Target SMILES string to find precursors for.
            template_prioritizer (optional, Prioritizer): Use to override
                prioritizer created during initialization. This can be 
                any Prioritizer instance that implements a predict method 
                that accepts (smiles, templates, max_num_templates, max_cum_prob) 
                as arguments and returns a (scores, indices) for templates
                up until max_num_templates or max_cum_prob.
            precursor_prioritizer (optional, callable): Use to override
                prioritizer created during initialization. This can be
                any callable function that reorders a list of precursor
                dictionary objects.
            fast_filter (optional, callable): Use to override fast filter
                created during initialization. This can be any callable 
                function that accepts (reactants, products) smiles strings 
                as arguments and returns a score on the range [0.0, 1.0].
            fast_filter_threshold (float): Fast filter threshold to filter
                bad predictions. 1.0 means use all templates.
            cluster (optional, callable): Use to override cluster method.
                This can be any callable that accepts 
                (target, outcomes, **cluster_settings) where target is a smiles 
                string, outcomes is a list of precursor dictionaries, and cluster_settings 
                are cluster specific cluster settings.
            cluster_settings (optional, dict): Dictionary of cluster specific settings
                to be passed to clustering method.
            **kwargs: Additional kwargs to pass through to prioritizers or to
                handle deprecated options.

        Returns:
             RetroResult: Special object for a retrosynthetic expansion result,
                defined by ./results.py
        """

        if template_set is None:
            template_set = self.template_set

        if template_prioritizer is None:
            template_prioritizer = self.template_prioritizer

        if precursor_prioritizer is None:
            precursor_prioritizer = self.precursor_prioritizer

        if fast_filter == None:
            fast_filter = self.fast_filter

        if cluster == None:
            cluster = self.cluster

        if cluster_settings == None:
            cluster_settings = self.cluster_settings

        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
        mol = rdchiralReactants(smiles)

        results = []
        smiles_to_index = {}

        if use_ban_list and smiles in BANNED_SMILES:
            return results

        scores, indices = template_prioritizer.predict(
            smiles,
            max_num_templates=max_num_templates,
            max_cum_prob=max_cum_prob)

        templates = self.order_templates_by_indices(indices, template_set)

        for template, score in zip(templates, scores):
            precursors = self.apply_one_template(mol, template)
            for precursor in precursors:
                precursor['template_score'] = score
                joined_smiles = '.'.join(precursor['smiles_split'])
                precursor['plausibility'] = fast_filter(joined_smiles, smiles)
                # skip if no transformation happened or plausibility is below threshold
                if joined_smiles == smiles or precursor[
                        'plausibility'] < fast_filter_threshold:
                    continue
                if joined_smiles in smiles_to_index:
                    res = results[smiles_to_index[joined_smiles]]
                    res['tforms'] |= set([precursor['template_id']])
                    res['num_examples'] += precursor['num_examples']
                    res['template_score'] = max(res['template_score'], score)
                else:
                    precursor['tforms'] = set([precursor['template_id']])
                    smiles_to_index[joined_smiles] = len(results)
                    results.append(precursor)
        for rank, result in enumerate(results, 1):
            result['tforms'] = list(result['tforms'])
            result['rank'] = rank
        results = precursor_prioritizer(results)
        cluster_ids = cluster(smiles, results, **cluster_settings)
        for (i, precursor) in enumerate(results):
            precursor['group_id'] = cluster_ids[i]
        return results