def __init__(self, celery=False, rank_inclusion=10, prob_inclusion=0.2, max_contexts=10, single_solv=True, with_smiles=True, context_recommender=None, batch_size=500): self.celery = celery self.single_solv = single_solv self.with_smiles = with_smiles self.batch_size = batch_size self.evaluator = Evaluator(celery=self.celery) self.rank_threshold = rank_inclusion self.prob_threshold = prob_inclusion self.recommender = context_recommender self.max_contexts = max_contexts self._loaded_context_recommender = False if self.celery: def evaluate_reaction(reactant_smiles, target, contexts, worker_no=0): res = evaluate.apply_async(args=(reactant_smiles, target, contexts), kwargs={ 'mincount': self.mincount, 'forward_scorer': self.forward_scorer, 'template_count': self.template_count }) return res.get(120) else: def evaluate_reaction(reactant_smiles, target, contexts, worker_no=0): return self.evaluator.evaluate( reactant_smiles, target, contexts, mincount=self.mincount, forward_scorer=self.forward_scorer, nproc=self.nproc, batch_size=self.batch_size, worker_no=worker_no, template_count=self.template_count) self.evaluate_reaction = evaluate_reaction
def configure_coordinator(options={}, **kwargs): if 'queues' not in options: return if CORRESPONDING_QUEUE not in options['queues'].split(','): return print('### STARTING UP A SCORING COORDINATOR ###') global evaluator evaluator = Evaluator(celery=True) #evaluator.evaluate('CCC(=O)O.CCCCN','CCC(=O)NCCCC',[(65, 'CCO', '', '', 24, 85)], mincount=100,forward_scorer='templatebased') print('### SCORING COORDINATOR STARTED UP ###')
def configure_coordinator(options={}, **kwargs): if 'queues' not in options: return if CORRESPONDING_QUEUE not in options['queues'].split(','): return print('### STARTING UP A TREE BUILDER COORDINATOR ###') global treeBuilder global evaluator evaluator = Evaluator(celery=True) # Prices print('Loading prices...') pricer = Pricer() pricer.load() print('Loaded known prices') treeBuilder = TreeBuilder(celery=True, pricer=pricer) print('Finished initializing treebuilder coordinator')
###label rxns i = 0 rxn_le = {} encoded_rxn_dict = {} for key, value in rxn_dict.items(): rxn_le[i] = key encoded_rxn_dict[i] = value i += 1 ##load conditionprediction and forward evaluation models cont = NeuralNetContextRecommender() cont.load_nn_model(model_path=gc.NEURALNET_CONTEXT_REC['model_path'], info_path=gc.NEURALNET_CONTEXT_REC['info_path'], weights_path=gc.NEURALNET_CONTEXT_REC['weights_path']) evaluator = Evaluator(celery=False) from tqdm import tqdm cond_dict = collections.defaultdict(list) ctr = 0 for key, value in tqdm(encoded_rxn_dict.items()): rxn = rxn_le[key] rsmi = rxn.split('>>')[0] psmi = rxn.split('>>')[1] uncleaned_contexts = cont.get_n_conditions(rxn, n=10, return_separate=True) contexts = cont.get_n_conditions(rxn, n=10, return_separate=False) contexts = [context_cleaner.clean_context(context) for context in contexts] try: eval_res = evaluator.evaluate(
class TreeEvaluator(): ''' Class for the evaluation of the found retrosynthetic tree ''' def __init__(self, celery=False, rank_inclusion=10, prob_inclusion=0.2, max_contexts=10, single_solv=True, with_smiles=True, context_recommender=None, batch_size=500): self.celery = celery self.single_solv = single_solv self.with_smiles = with_smiles self.batch_size = batch_size self.evaluator = Evaluator(celery=self.celery) self.rank_threshold = rank_inclusion self.prob_threshold = prob_inclusion self.recommender = context_recommender self.max_contexts = max_contexts self._loaded_context_recommender = False if self.celery: def evaluate_reaction(reactant_smiles, target, contexts, worker_no=0): res = evaluate.apply_async(args=(reactant_smiles, target, contexts), kwargs={ 'mincount': self.mincount, 'forward_scorer': self.forward_scorer, 'template_count': self.template_count }) return res.get(120) else: def evaluate_reaction(reactant_smiles, target, contexts, worker_no=0): return self.evaluator.evaluate( reactant_smiles, target, contexts, mincount=self.mincount, forward_scorer=self.forward_scorer, nproc=self.nproc, batch_size=self.batch_size, worker_no=worker_no, template_count=self.template_count) self.evaluate_reaction = evaluate_reaction def load_context_recommender(self): if not self.celery: self.context_recommender = model_loader.load_Context_Recommender( self.recommender, max_contexts=self.max_contexts) if self.celery: def get_contexts(rxn, n): res = get_context_recommendations.apply_async( args=(rxn, ), kwargs={ 'n': n, 'singleSlvt': self.single_solv, 'with_smiles': self.with_smiles, 'context_recommender': self.recommender }) return res.get(120) else: def get_contexts(rxn, n): return self.context_recommender.get_n_conditions( rxn, n=n, singleSlvt=self.single_solv, with_smiles=self.with_smiles) self.get_contexts = get_contexts self._loaded_context_recommender = True def get_context_prioritizer(self, context_method): if context_method == gc.probability: self.context_prioritizer = ProbabilityContextPrioritizer() elif context_method == gc.rank: self.context_prioritizer = RankContextPrioritizer() else: MyLogger.print_and_log( 'Specified prioritization method does not exist. Using default method.', treeEvaluator_loc, level=1) self.context_prioritizer = DefaultPrioritizer() self.context_prioritizer.load_model() def get_top_context(self, evaluation): return self.context_prioritizer.get_priority(evaluation)[0] def reset(self): if self.celery: self.evaluation_dict = {} else: self.evaluation_queue = Queue() self.results_queue = Queue() self.workers = [] self.manager = Manager() self.done = self.manager.Value('i', 0) self.paused = self.manager.Value('i', 0) self.idle = self.manager.list() self.evaluation_dict = self.manager.dict() self.scored_trees = [] def is_plausible(self, result): prob = result['target']['prob'] rank = result['target']['rank'] return prob > self.prob_threshold and rank < self.rank_threshold def score_step(self, template_score, forward_score): if self.tree_scorer == gc.templateonly: return template_score elif self.tree_scorer == gc.forwardonly: return forward_score elif self.tree_scorer == gc.product: return template_score * forward_score else: MyLogger.print_and_log( 'Specified tree scoring method is not implemented. Returning product', treeEvaluator_loc, level=2) return template_score * forward_score def evaluate_trees(self, tree_list, context_recommender='', context_scoring_method='', forward_scoring_method='', tree_scoring_method='', rank_threshold=5, prob_threshold=0.2, mincount=25, nproc=1, batch_size=500, n=10, nproc_t=1, parallel=False, template_count=10000): self.reset() self.recommender = context_recommender self.get_context_prioritizer(context_scoring_method) self.rank_threshold = rank_threshold self.prob_threshold = prob_threshold self.mincount = mincount self.nproc = nproc self.batch_size = batch_size self.forward_scorer = forward_scoring_method self.tree_scorer = tree_scoring_method self.template_count = template_count if not parallel: for tree in tree_list: self.scored_trees.append( self.evaluate_tree( tree, context_recommender=context_recommender, context_scoring_method=context_scoring_method, forward_scoring_method=forward_scoring_method, tree_scoring_method=tree_scoring_method, rank_threshold=rank_threshold, prob_threshold=prob_threshold, is_target=True, mincount=mincount, nproc=1, batch_size=batch_size, n=n, template_count=template_count)) else: def work(self, i): while True: # If done, stop if self.done.value: MyLogger.print_and_log( 'Worker {} saw done signal, terminating'.format(i), treeEvaluator_loc) break # If paused, wait and check again if self.paused.value: #print('Worker {} saw pause signal, sleeping for 1 second'.format(i)) time.sleep(1) continue # Grab something off the queue try: tree = self.evaluation_queue.get( timeout=0.5) # short timeout self.idle[i] = False plausible, score = self.evaluate_tree( tree, context_recommender=context_recommender, context_scoring_method=context_scoring_method, forward_scoring_method=forward_scoring_method, tree_scoring_method=tree_scoring_method, rank_threshold=rank_threshold, prob_threshold=prob_threshold, is_target=True, mincount=mincount, nproc=1, batch_size=batch_size, n=n, worker_no=i, template_count=template_count) self.results_queue.put([tree, plausible, score]) #print('Worker {} added children of {} (ID {}) to results queue'.format(i, smiles, _id)) except VanillaQueue.Empty: #print('Quee {} empty for worker {}'.format(j, i)) pass except Exception as e: print(e) self.idle[i] = True self.work = work self.spin_up_workers(nproc_t) self.populate_queue(tree_list) self.get_scored_trees() return self.scored_trees def evaluate_tree(self, tree, context_recommender='', context_scoring_method='', forward_scoring_method='', tree_scoring_method='', rank_threshold=5, prob_threshold=0.2, mincount=25, nproc=1, batch_size=500, n=10, is_target=False, reset=False, worker_no=0, template_count=10000): if is_target and reset: self.reset() self.get_context_prioritizer(context_scoring_method) self.rank_threshold = rank_threshold self.prob_threshold = prob_threshold self.mincount = mincount self.recommender = context_recommender self.nproc = nproc self.batch_size = batch_size self.forward_scorer = forward_scoring_method self.tree_scorer = tree_scoring_method self.template_count = template_count if not tree['children']: # Reached the end of the synthesis tree -> Stop if is_target: return {'tree': tree, 'plausible': True, 'score': 1.0} else: return True, 1.0 else: if self.celery: from celery.result import allow_join_result else: from makeit.utilities.with_dummy import with_dummy as allow_join_result with allow_join_result(): target = tree['smiles'] reaction = tree['children'][0] reactants = [child['smiles'] for child in reaction['children']] reaction_smiles = reaction['smiles'] necessary_reagent = reaction['necessary_reagent'] ############################################################### # If reaction encountered before: get data from dict. ############################################################### if reaction_smiles in self.evaluation_dict: evaluation = self.evaluation_dict[reaction_smiles] ############################################################### # Otherwise create data ############################################################### else: # # TODO: better way of deciding if context recommendation is needed contexts = None if gc.forward_scoring_needs_context_necessary_reagent[ forward_scoring_method]: if not self._loaded_context_recommender: self.load_context_recommender() if necessary_reagent: contexts = self.get_contexts(reaction_smiles, 1) if contexts is not None and len( contexts) > 0 and len( contexts[0]) >= 3 and contexts[0][2]: reactants.extend( contexts[0][2].split('.')) # add reagents elif gc.forward_scoring_needs_context[ forward_scoring_method]: contexts = self.get_contexts(reaction_smiles, n) elif self.recommender != gc.nearest_neighbor: #the not using the nearest neighbor model: contexts = self.get_contexts(reaction_smiles, 1) contexts = [ context_cleaner.clean_context(context) for context in contexts ] if not contexts: contexts = ['n/a'] # remove context without parsible smiles string evaluation = self.evaluate_reaction('.'.join(reactants), target, contexts, worker_no=worker_no) self.evaluation_dict[reaction_smiles] = evaluation ############################################################### # Process data ############################################################### if len(evaluation) == 1: top_result = evaluation[0] else: top_result = self.get_top_context(evaluation) # Add evaluation information to the reaction MyLogger.print_and_log( 'Evaluated reaction: {} - ranked {} with a {}% probability.' .format(reaction_smiles, top_result['target']['rank'], top_result['target']['prob'] * 100.0), treeEvaluator_loc) score = self.score_step(reaction['template_score'], top_result['target']['prob']) plausible = self.is_plausible(top_result) print((reaction_smiles, plausible)) all_children_plausible = True for child in reaction['children']: # TODO: pproperly pass arguments to next evaluate_tree call child_plausible, score_child = self.evaluate_tree( child, context_recommender=context_recommender, context_scoring_method=context_scoring_method, forward_scoring_method=forward_scoring_method, tree_scoring_method=tree_scoring_method, rank_threshold=rank_threshold, prob_threshold=prob_threshold, mincount=mincount, nproc=nproc, batch_size=batch_size, n=n, is_target=False, reset=False, worker_no=worker_no, template_count=template_count) score *= score_child if not child_plausible: all_children_plausible = False if all_children_plausible and plausible and is_target: MyLogger.print_and_log('Found a fully plausible tree!', treeEvaluator_loc) elif is_target: MyLogger.print_and_log( 'Evaluated tree has unfeasible children.', treeEvaluator_loc) reaction['top_product'] = { 'smiles': top_result['top_product']['smiles'], 'score': top_result['top_product']['score'], 'prob': top_result['top_product']['prob'], } reaction['forward_score'] = top_result['target']['prob'] reaction['cumul_score'] = score reaction['rank'] = top_result['target']['rank'] reaction['templates'] = top_result['target']['template_ids'] reaction['context'] = top_result['context'] # overwrite tree['children'] = [reaction] if is_target: return { 'tree': tree, 'plausible': plausible and all_children_plausible, 'score': score } else: return plausible and all_children_plausible, score ############################################################# # MULTIPROCESSING CODE ############################################################# def spin_up_workers(self, nproc_t): self.running = True MyLogger.print_and_log( 'Tree evaluator spinning off {} child processes'.format(nproc_t), treeEvaluator_loc) for i in range(nproc_t): self.idle.append(False) p = Process(target=self.work, args=(i, )) self.workers.append(p) p.start() def populate_queue(self, tree_list): for tree in tree_list: self.evaluation_queue.put(tree) def waiting_for_results(self): time.sleep(0.05) waiting = [self.evaluation_queue.empty()] waiting.append(self.results_queue.empty()) waiting.extend(self.idle) return (not all(waiting)) def get_scored_trees(self): while self.waiting_for_results(): try: tree, plausible, score = self.results_queue.get(0.2) self.scored_trees.append({ 'tree': tree, 'plausible': plausible, 'score': score }) except VanillaQueue.Empty: pass self.terminate() def terminate(self): if not self.running: return self.done.value = 1 MyLogger.print_and_log('Terminating tree evaluation process.', treeEvaluator_loc) time.sleep(1) for p in self.workers: if p and p.is_alive(): p.terminate() MyLogger.print_and_log('All tree evaluation processes done.', treeEvaluator_loc) self.running = False