Example #1
0
    def __init__(self,
                 celery=False,
                 rank_inclusion=10,
                 prob_inclusion=0.2,
                 max_contexts=10,
                 single_solv=True,
                 with_smiles=True,
                 context_recommender=None,
                 batch_size=500):
        self.celery = celery
        self.single_solv = single_solv
        self.with_smiles = with_smiles
        self.batch_size = batch_size
        self.evaluator = Evaluator(celery=self.celery)
        self.rank_threshold = rank_inclusion
        self.prob_threshold = prob_inclusion
        self.recommender = context_recommender
        self.max_contexts = max_contexts
        self._loaded_context_recommender = False

        if self.celery:

            def evaluate_reaction(reactant_smiles,
                                  target,
                                  contexts,
                                  worker_no=0):
                res = evaluate.apply_async(args=(reactant_smiles, target,
                                                 contexts),
                                           kwargs={
                                               'mincount':
                                               self.mincount,
                                               'forward_scorer':
                                               self.forward_scorer,
                                               'template_count':
                                               self.template_count
                                           })
                return res.get(120)
        else:

            def evaluate_reaction(reactant_smiles,
                                  target,
                                  contexts,
                                  worker_no=0):
                return self.evaluator.evaluate(
                    reactant_smiles,
                    target,
                    contexts,
                    mincount=self.mincount,
                    forward_scorer=self.forward_scorer,
                    nproc=self.nproc,
                    batch_size=self.batch_size,
                    worker_no=worker_no,
                    template_count=self.template_count)

        self.evaluate_reaction = evaluate_reaction
Example #2
0
def configure_coordinator(options={}, **kwargs):
    if 'queues' not in options:
        return
    if CORRESPONDING_QUEUE not in options['queues'].split(','):
        return
    print('### STARTING UP A SCORING COORDINATOR ###')

    global evaluator

    evaluator = Evaluator(celery=True)
    #evaluator.evaluate('CCC(=O)O.CCCCN','CCC(=O)NCCCC',[(65, 'CCO', '', '', 24, 85)], mincount=100,forward_scorer='templatebased')
    print('### SCORING COORDINATOR STARTED UP ###')
Example #3
0
def configure_coordinator(options={}, **kwargs):
    if 'queues' not in options:
        return
    if CORRESPONDING_QUEUE not in options['queues'].split(','):
        return
    print('### STARTING UP A TREE BUILDER COORDINATOR ###')

    global treeBuilder
    global evaluator

    evaluator = Evaluator(celery=True)

    # Prices
    print('Loading prices...')
    pricer = Pricer()
    pricer.load()
    print('Loaded known prices')
    treeBuilder = TreeBuilder(celery=True, pricer=pricer)

    print('Finished initializing treebuilder coordinator')
###label rxns
i = 0
rxn_le = {}
encoded_rxn_dict = {}
for key, value in rxn_dict.items():
    rxn_le[i] = key
    encoded_rxn_dict[i] = value
    i += 1

##load conditionprediction and forward evaluation models
cont = NeuralNetContextRecommender()
cont.load_nn_model(model_path=gc.NEURALNET_CONTEXT_REC['model_path'],
                   info_path=gc.NEURALNET_CONTEXT_REC['info_path'],
                   weights_path=gc.NEURALNET_CONTEXT_REC['weights_path'])
evaluator = Evaluator(celery=False)

from tqdm import tqdm
cond_dict = collections.defaultdict(list)

ctr = 0

for key, value in tqdm(encoded_rxn_dict.items()):
    rxn = rxn_le[key]
    rsmi = rxn.split('>>')[0]
    psmi = rxn.split('>>')[1]
    uncleaned_contexts = cont.get_n_conditions(rxn, n=10, return_separate=True)
    contexts = cont.get_n_conditions(rxn, n=10, return_separate=False)
    contexts = [context_cleaner.clean_context(context) for context in contexts]
    try:
        eval_res = evaluator.evaluate(
Example #5
0
class TreeEvaluator():
    '''
    Class for the evaluation of the found retrosynthetic tree
    '''
    def __init__(self,
                 celery=False,
                 rank_inclusion=10,
                 prob_inclusion=0.2,
                 max_contexts=10,
                 single_solv=True,
                 with_smiles=True,
                 context_recommender=None,
                 batch_size=500):
        self.celery = celery
        self.single_solv = single_solv
        self.with_smiles = with_smiles
        self.batch_size = batch_size
        self.evaluator = Evaluator(celery=self.celery)
        self.rank_threshold = rank_inclusion
        self.prob_threshold = prob_inclusion
        self.recommender = context_recommender
        self.max_contexts = max_contexts
        self._loaded_context_recommender = False

        if self.celery:

            def evaluate_reaction(reactant_smiles,
                                  target,
                                  contexts,
                                  worker_no=0):
                res = evaluate.apply_async(args=(reactant_smiles, target,
                                                 contexts),
                                           kwargs={
                                               'mincount':
                                               self.mincount,
                                               'forward_scorer':
                                               self.forward_scorer,
                                               'template_count':
                                               self.template_count
                                           })
                return res.get(120)
        else:

            def evaluate_reaction(reactant_smiles,
                                  target,
                                  contexts,
                                  worker_no=0):
                return self.evaluator.evaluate(
                    reactant_smiles,
                    target,
                    contexts,
                    mincount=self.mincount,
                    forward_scorer=self.forward_scorer,
                    nproc=self.nproc,
                    batch_size=self.batch_size,
                    worker_no=worker_no,
                    template_count=self.template_count)

        self.evaluate_reaction = evaluate_reaction

    def load_context_recommender(self):
        if not self.celery:
            self.context_recommender = model_loader.load_Context_Recommender(
                self.recommender, max_contexts=self.max_contexts)

        if self.celery:

            def get_contexts(rxn, n):
                res = get_context_recommendations.apply_async(
                    args=(rxn, ),
                    kwargs={
                        'n': n,
                        'singleSlvt': self.single_solv,
                        'with_smiles': self.with_smiles,
                        'context_recommender': self.recommender
                    })
                return res.get(120)
        else:

            def get_contexts(rxn, n):
                return self.context_recommender.get_n_conditions(
                    rxn,
                    n=n,
                    singleSlvt=self.single_solv,
                    with_smiles=self.with_smiles)

        self.get_contexts = get_contexts
        self._loaded_context_recommender = True

    def get_context_prioritizer(self, context_method):
        if context_method == gc.probability:
            self.context_prioritizer = ProbabilityContextPrioritizer()
        elif context_method == gc.rank:
            self.context_prioritizer = RankContextPrioritizer()
        else:
            MyLogger.print_and_log(
                'Specified prioritization method does not exist. Using default method.',
                treeEvaluator_loc,
                level=1)
            self.context_prioritizer = DefaultPrioritizer()
        self.context_prioritizer.load_model()

    def get_top_context(self, evaluation):
        return self.context_prioritizer.get_priority(evaluation)[0]

    def reset(self):
        if self.celery:
            self.evaluation_dict = {}
        else:
            self.evaluation_queue = Queue()
            self.results_queue = Queue()
            self.workers = []
            self.manager = Manager()
            self.done = self.manager.Value('i', 0)
            self.paused = self.manager.Value('i', 0)
            self.idle = self.manager.list()
            self.evaluation_dict = self.manager.dict()
        self.scored_trees = []

    def is_plausible(self, result):
        prob = result['target']['prob']
        rank = result['target']['rank']
        return prob > self.prob_threshold and rank < self.rank_threshold

    def score_step(self, template_score, forward_score):
        if self.tree_scorer == gc.templateonly:
            return template_score
        elif self.tree_scorer == gc.forwardonly:
            return forward_score
        elif self.tree_scorer == gc.product:
            return template_score * forward_score
        else:
            MyLogger.print_and_log(
                'Specified tree scoring method is not implemented. Returning product',
                treeEvaluator_loc,
                level=2)
            return template_score * forward_score

    def evaluate_trees(self,
                       tree_list,
                       context_recommender='',
                       context_scoring_method='',
                       forward_scoring_method='',
                       tree_scoring_method='',
                       rank_threshold=5,
                       prob_threshold=0.2,
                       mincount=25,
                       nproc=1,
                       batch_size=500,
                       n=10,
                       nproc_t=1,
                       parallel=False,
                       template_count=10000):
        self.reset()
        self.recommender = context_recommender
        self.get_context_prioritizer(context_scoring_method)
        self.rank_threshold = rank_threshold
        self.prob_threshold = prob_threshold
        self.mincount = mincount
        self.nproc = nproc
        self.batch_size = batch_size
        self.forward_scorer = forward_scoring_method
        self.tree_scorer = tree_scoring_method
        self.template_count = template_count

        if not parallel:
            for tree in tree_list:
                self.scored_trees.append(
                    self.evaluate_tree(
                        tree,
                        context_recommender=context_recommender,
                        context_scoring_method=context_scoring_method,
                        forward_scoring_method=forward_scoring_method,
                        tree_scoring_method=tree_scoring_method,
                        rank_threshold=rank_threshold,
                        prob_threshold=prob_threshold,
                        is_target=True,
                        mincount=mincount,
                        nproc=1,
                        batch_size=batch_size,
                        n=n,
                        template_count=template_count))
        else:

            def work(self, i):
                while True:
                    # If done, stop
                    if self.done.value:
                        MyLogger.print_and_log(
                            'Worker {} saw done signal, terminating'.format(i),
                            treeEvaluator_loc)
                        break
            # If paused, wait and check again
                    if self.paused.value:
                        #print('Worker {} saw pause signal, sleeping for 1 second'.format(i))
                        time.sleep(1)
                        continue

            # Grab something off the queue
                    try:
                        tree = self.evaluation_queue.get(
                            timeout=0.5)  # short timeout
                        self.idle[i] = False
                        plausible, score = self.evaluate_tree(
                            tree,
                            context_recommender=context_recommender,
                            context_scoring_method=context_scoring_method,
                            forward_scoring_method=forward_scoring_method,
                            tree_scoring_method=tree_scoring_method,
                            rank_threshold=rank_threshold,
                            prob_threshold=prob_threshold,
                            is_target=True,
                            mincount=mincount,
                            nproc=1,
                            batch_size=batch_size,
                            n=n,
                            worker_no=i,
                            template_count=template_count)
                        self.results_queue.put([tree, plausible, score])
                #print('Worker {} added children of {} (ID {}) to results queue'.format(i, smiles, _id))
                    except VanillaQueue.Empty:
                        #print('Quee {} empty for worker {}'.format(j, i))
                        pass
                    except Exception as e:
                        print(e)
                    self.idle[i] = True
            self.work = work

            self.spin_up_workers(nproc_t)
            self.populate_queue(tree_list)
            self.get_scored_trees()

        return self.scored_trees

    def evaluate_tree(self,
                      tree,
                      context_recommender='',
                      context_scoring_method='',
                      forward_scoring_method='',
                      tree_scoring_method='',
                      rank_threshold=5,
                      prob_threshold=0.2,
                      mincount=25,
                      nproc=1,
                      batch_size=500,
                      n=10,
                      is_target=False,
                      reset=False,
                      worker_no=0,
                      template_count=10000):
        if is_target and reset:
            self.reset()
            self.get_context_prioritizer(context_scoring_method)
            self.rank_threshold = rank_threshold
            self.prob_threshold = prob_threshold
            self.mincount = mincount
            self.recommender = context_recommender
            self.nproc = nproc
            self.batch_size = batch_size
            self.forward_scorer = forward_scoring_method
            self.tree_scorer = tree_scoring_method
            self.template_count = template_count

        if not tree['children']:
            # Reached the end of the synthesis tree -> Stop
            if is_target:
                return {'tree': tree, 'plausible': True, 'score': 1.0}
            else:
                return True, 1.0
        else:
            if self.celery:
                from celery.result import allow_join_result
            else:
                from makeit.utilities.with_dummy import with_dummy as allow_join_result
            with allow_join_result():
                target = tree['smiles']
                reaction = tree['children'][0]
                reactants = [child['smiles'] for child in reaction['children']]
                reaction_smiles = reaction['smiles']
                necessary_reagent = reaction['necessary_reagent']
                ###############################################################
                # If reaction encountered before: get data from dict.
                ###############################################################
                if reaction_smiles in self.evaluation_dict:
                    evaluation = self.evaluation_dict[reaction_smiles]
                ###############################################################
                # Otherwise create data
                ###############################################################
                else:
                    #                   # TODO: better way of deciding if context recommendation is needed
                    contexts = None
                    if gc.forward_scoring_needs_context_necessary_reagent[
                            forward_scoring_method]:
                        if not self._loaded_context_recommender:
                            self.load_context_recommender()
                        if necessary_reagent:
                            contexts = self.get_contexts(reaction_smiles, 1)
                            if contexts is not None and len(
                                    contexts) > 0 and len(
                                        contexts[0]) >= 3 and contexts[0][2]:
                                reactants.extend(
                                    contexts[0][2].split('.'))  # add reagents
                        elif gc.forward_scoring_needs_context[
                                forward_scoring_method]:
                            contexts = self.get_contexts(reaction_smiles, n)
                        elif self.recommender != gc.nearest_neighbor:  #the not using the nearest neighbor model:
                            contexts = self.get_contexts(reaction_smiles, 1)
                        contexts = [
                            context_cleaner.clean_context(context)
                            for context in contexts
                        ]
                    if not contexts:
                        contexts = ['n/a']
                    # remove context without parsible smiles string

                    evaluation = self.evaluate_reaction('.'.join(reactants),
                                                        target,
                                                        contexts,
                                                        worker_no=worker_no)
                    self.evaluation_dict[reaction_smiles] = evaluation
                ###############################################################
                # Process data
                ###############################################################
                if len(evaluation) == 1:
                    top_result = evaluation[0]
                else:
                    top_result = self.get_top_context(evaluation)
                # Add evaluation information to the reaction
                MyLogger.print_and_log(
                    'Evaluated reaction: {} - ranked {} with a {}% probability.'
                    .format(reaction_smiles, top_result['target']['rank'],
                            top_result['target']['prob'] * 100.0),
                    treeEvaluator_loc)

                score = self.score_step(reaction['template_score'],
                                        top_result['target']['prob'])

                plausible = self.is_plausible(top_result)
                print((reaction_smiles, plausible))
                all_children_plausible = True
                for child in reaction['children']:
                    # TODO: pproperly pass arguments to next evaluate_tree call
                    child_plausible, score_child = self.evaluate_tree(
                        child,
                        context_recommender=context_recommender,
                        context_scoring_method=context_scoring_method,
                        forward_scoring_method=forward_scoring_method,
                        tree_scoring_method=tree_scoring_method,
                        rank_threshold=rank_threshold,
                        prob_threshold=prob_threshold,
                        mincount=mincount,
                        nproc=nproc,
                        batch_size=batch_size,
                        n=n,
                        is_target=False,
                        reset=False,
                        worker_no=worker_no,
                        template_count=template_count)
                    score *= score_child
                    if not child_plausible:
                        all_children_plausible = False

                if all_children_plausible and plausible and is_target:
                    MyLogger.print_and_log('Found a fully plausible tree!',
                                           treeEvaluator_loc)
                elif is_target:
                    MyLogger.print_and_log(
                        'Evaluated tree has unfeasible children.',
                        treeEvaluator_loc)

                reaction['top_product'] = {
                    'smiles': top_result['top_product']['smiles'],
                    'score': top_result['top_product']['score'],
                    'prob': top_result['top_product']['prob'],
                }
                reaction['forward_score'] = top_result['target']['prob']
                reaction['cumul_score'] = score
                reaction['rank'] = top_result['target']['rank']
                reaction['templates'] = top_result['target']['template_ids']
                reaction['context'] = top_result['context']
                # overwrite
                tree['children'] = [reaction]
                if is_target:
                    return {
                        'tree': tree,
                        'plausible': plausible and all_children_plausible,
                        'score': score
                    }
                else:
                    return plausible and all_children_plausible, score

    #############################################################
    # MULTIPROCESSING CODE
    #############################################################
    def spin_up_workers(self, nproc_t):
        self.running = True
        MyLogger.print_and_log(
            'Tree evaluator spinning off {} child processes'.format(nproc_t),
            treeEvaluator_loc)
        for i in range(nproc_t):
            self.idle.append(False)
            p = Process(target=self.work, args=(i, ))
            self.workers.append(p)
            p.start()

    def populate_queue(self, tree_list):
        for tree in tree_list:
            self.evaluation_queue.put(tree)

    def waiting_for_results(self):
        time.sleep(0.05)
        waiting = [self.evaluation_queue.empty()]
        waiting.append(self.results_queue.empty())
        waiting.extend(self.idle)
        return (not all(waiting))

    def get_scored_trees(self):
        while self.waiting_for_results():
            try:
                tree, plausible, score = self.results_queue.get(0.2)
                self.scored_trees.append({
                    'tree': tree,
                    'plausible': plausible,
                    'score': score
                })
            except VanillaQueue.Empty:
                pass
        self.terminate()

    def terminate(self):
        if not self.running:
            return
        self.done.value = 1
        MyLogger.print_and_log('Terminating tree evaluation process.',
                               treeEvaluator_loc)
        time.sleep(1)
        for p in self.workers:
            if p and p.is_alive():
                p.terminate()
        MyLogger.print_and_log('All tree evaluation processes done.',
                               treeEvaluator_loc)
        self.running = False