Esempio n. 1
0
            def get_ready_result(is_ready):

                for i in is_ready:
                    (smiles, outcomes) = self.pending_results[i].get(timeout=0.2)
                    self.pending_results[i].forget()
                    result = ForwardResult(smiles)
                    for outcome in outcomes:
                        result.add_product(ForwardProduct(smiles_list=outcome['smiles_list'], smiles=outcome['smiles'],
                                                          template_ids=outcome[
                                                              'template_ids'],
                                                          num_examples=outcome[
                                                              'num_examples'],
                                                          edits=outcome['edits']))
                    yield result, is_ready
Esempio n. 2
0
    def get_candidate_edits(self, smiles):

        candidate_edits = []
        stiched_result = ForwardResult(smiles)
        rct_temp = Chem.MolFromSmiles(smiles)
        [a.ClearProp('molAtomMapNumber') for a in rct_temp.GetAtoms()]
        split_smiles = Chem.MolToSmiles(rct_temp).split('.')
        print('SPLIT SMILES FOR GET_CANDIDATE_EDITS: {}'.format(split_smiles))
        all_results = []
        is_ready = [
            i for (i, res) in enumerate(self.pending_results) if res.ready()
        ]
        while self.waiting_for_results():
            try:
                for result, is_ready in self.get_ready_result(is_ready):
                    for product in result.products:
                        if product.smiles not in split_smiles:
                            stiched_result.add_product(product)
                    '''
                    products = result.get_products()
                    for product in products:
                        self.add_product(all_results, candidate_edits, product)
                    '''
                time.sleep(0.5)
                self.pending_results = [
                    res for (i, res) in enumerate(self.pending_results)
                    if i not in is_ready
                ]
                is_ready = [
                    i for (i, res) in enumerate(self.pending_results)
                    if res.ready()
                ]
            except Exception as e:
                print(e)
                pass

        for product in stiched_result.products:
            all_results.append(product.as_dict())
            candidate_edits.append((product.get_smiles(), product.get_edits()))

        self.stop_expansion()
        return (all_results, candidate_edits)
Esempio n. 3
0
def test_batch(ft, smiles, template_count, size):
    outcomes = []
    for start_at in range(0, template_count, size):
        outcomes.append(
            ft.get_outcomes(smiles,
                            100,
                            start_at=start_at,
                            end_at=start_at + size,
                            template_prioritization=gc.popularity))
    unique_res = ForwardResult(smiles)
    for smiles, result in outcomes:
        unique_res.add_products(result.products)
    with open(
            os.path.join(os.path.dirname(__file__),
                         'expected/' + str(size) + '.pkl'), 'rb') as f:
        expected = pickle.load(f).get_products()
    result = unique_res.get_products()
    if len(result) != len(expected):
        return False
    for i in range(len(expected)):
        if expected[i].as_dict() != result[i].as_dict():
            return False
    return True
Esempio n. 4
0
    def get_outcomes(self,
                     smiles,
                     mincount,
                     template_prioritization,
                     start_at=-1,
                     end_at=-1,
                     singleonly=True,
                     stop_if=False,
                     template_count=10000,
                     max_cum_prob=1.0):
        """Performs a one-step synthesis reaction for a given SMILES string.

        Each candidate in self.result.products is of type ForwardProduct

        Args:
            smiles (str): SMILES string of ??
            mincount (int): Minimum popularity of used templates.
            template_prioritization (??): Specifies method to use for ordering
                templates.
            start_at (int, optional): Index of first prioritized template to
                use. (default: {-1})
            end_at (int, optional): Index of prioritized template to stop
                before. (default: {-1})
            singleonly (bool, optional): Whether to reduce each product to the
                largest (longest) one. (default: {True})
            stop_if (bool or string, optional): SMILES string of molecule to
                stop at if found, or False for no target. (default: {False})
            template_count (int, optional): Maximum number of templates to use.
                (default: {10000})
            max_cum_prob (float, optional): Maximum cumulative probability of
                all templates used. (default: {1.0})
        """
        self.get_template_prioritizers(template_prioritization)
        # Get sorted by popularity during loading.
        if template_prioritization == gc.popularity:
            prioritized_templates = self.templates
        else:
            self.template_prioritizer.set_max_templates(template_count)
            self.template_prioritizer.set_max_cum_prob(max_cum_prob)
            prioritized_templates = self.template_prioritizer.get_priority(
                (self.templates, smiles))
        self.mincount = mincount
        self.start_at = start_at
        self.singleonly = singleonly
        self.stop_if = stop_if

        if end_at == -1 or end_at >= len(self.templates):
            self.end_at = len(self.templates)
        else:
            self.end_at = end_at
        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        clean_reactant_mapping(mol)
        [a.SetIsotope(i + 1) for (i, a) in enumerate(mol.GetAtoms())]
        reactants_smiles = Chem.MolToSmiles(mol)
        smiles = Chem.MolToSmiles(
            mol, isomericSmiles=USE_STEREOCHEMISTRY)  # to canonicalize
        # Initialize results object
        if self.celery:
            result = []
        else:
            result = ForwardResult(smiles)
        for i in range(self.start_at, self.end_at):

            # only use templates between the specified boundaries.
            template = prioritized_templates[i]
            if template['count'] > mincount:
                products = self.apply_one_template(mol,
                                                   smiles,
                                                   template,
                                                   singleonly=singleonly,
                                                   stop_if=stop_if)
                if self.celery:
                    for product in products:
                        result.append({
                            'smiles_list': product.smiles_list,
                            'smiles': product.smiles,
                            'edits': product.edits,
                            'template_ids': product.template_ids,
                            'num_examples': product.num_examples
                        })
                else:
                    result.add_products(products)

        return (smiles, result)
Esempio n. 5
0
        return results


if __name__ == '__main__':
    MyLogger.initialize_logFile()
    ft = ForwardTransformer(mincount=10)
    ft.load()

    template_count = ft.template_count()
    smiles = 'NC(=O)[C@H](CCC=O)N1C(=O)c2ccccc2C1=O'
    for batch_size in range(100, 1000, 100):
        print()
        print(batch_size)
        outcomes = []
        i = 0
        for start_at in range(0, template_count, batch_size):
            i += 1
            outcomes.append(
                ft.get_outcomes(smiles,
                                100,
                                start_at=start_at,
                                end_at=start_at + batch_size,
                                template_prioritization=gc.popularity))
        print('Ran {} batches of {} templates'.format(i, batch_size))
        unique_res = ForwardResult(smiles)

        for smiles, result in outcomes:
            unique_res.add_products(result.products)
        print(len(unique_res.products))
Esempio n. 6
0
    def get_outcomes(self,
                     smiles,
                     mincount,
                     template_prioritization,
                     start_at=-1,
                     end_at=-1,
                     singleonly=True,
                     stop_if=False,
                     template_count=10000,
                     max_cum_prob=1.0):
        '''
        Each candidate in self.result.products is of type ForwardProduct
        '''
        self.get_template_prioritizers(template_prioritization)
        # Get sorted by popularity during loading.
        if template_prioritization == gc.popularity:
            prioritized_templates = self.templates
        else:
            self.template_prioritizer.set_max_templates(template_count)
            self.template_prioritizer.set_max_cum_prob(max_cum_prob)
            prioritized_templates = self.template_prioritizer.get_priority(
                (self.templates, smiles))
        self.mincount = mincount
        self.start_at = start_at
        self.singleonly = singleonly
        self.stop_if = stop_if

        if end_at == -1 or end_at >= len(self.templates):
            self.end_at = len(self.templates)
        else:
            self.end_at = end_at
        # Define mol to operate on
        mol = Chem.MolFromSmiles(smiles)
        clean_reactant_mapping(mol)
        reactants_smiles = Chem.MolToSmiles(mol)
        smiles = Chem.MolToSmiles(
            mol, isomericSmiles=USE_STEREOCHEMISTRY)  # to canonicalize
        # Initialize results object
        if self.celery:
            result = []
        else:
            result = ForwardResult(smiles)
        for i in range(self.start_at, self.end_at):

            # only use templates between the specified boundaries.
            template = prioritized_templates[i]
            if template['count'] > mincount:
                products = self.apply_one_template(mol,
                                                   smiles,
                                                   template,
                                                   singleonly=singleonly,
                                                   stop_if=stop_if)
                if self.celery:
                    for product in products:
                        result.append({
                            'smiles_list': product.smiles_list,
                            'smiles': product.smiles,
                            'edits': product.edits,
                            'template_ids': product.template_ids,
                            'num_examples': product.num_examples
                        })
                else:
                    result.add_products(products)

        return (smiles, result)