def test_product_against_rules(): target_smiles = request.form['target_smiles'] smarts = request.form['smarts'] try: smarts_list = yaml.load(smarts, Loader=yaml.FullLoader) except: return jsonify(result={'status': 'fail'}) try: rxn_list = [] for sma in smarts_list: rxn_list.append(rdchiralReaction(sma)) except: return jsonify(result={'status': 'fail'}) try: network = Network() if request.form['combine_enantiomers'] == 'true': network.settings['combine_enantiomers'] = True else: network.settings['combine_enantiomers'] = False network.rxn_obj.rxns = {'test_smarts': rxn_list} network.rxns = {'test_smarts': rxn_list} network.generate(target_smiles, 1) nodes, edges = network.get_visjs_nodes_and_edges() print(nodes) result = {'status': 'success', 'nodes': nodes, 'edges': edges} except: result = {'status': 'fail'} return jsonify(result=result)
def create_pathways_for_test(self, target_product): network = Network() network.settings.update({ "combine_enantiomers": True, "remove_simple": True, "similarity_score_threshold": self.specificity_threshold, "num_enzymes": 1, "max_nodes": self.max_nodes, "prune_steps": 1, "only_postitive_enzyme_data": self.only_positive_specificity, 'prune_on_substrates': False, 'max_reactions': False, 'include_experimental': False, 'include_two_step': False }) network.generate(target_product, self.num_steps) bfs = BFS(network=network, max_pathways=self.max_pathways, min_weight=self.min_weight) bfs.run() pathways = bfs.get_pathways() if len(pathways) == self.max_pathways: self.log(f" ~max pathways reached") #pathways = group_pathways(pathways) return pathways
def apply_chemical_steps_molecule(): smiles = request.form['smiles'] smiles = rdkit_smile(smiles) if smiles is None or smiles == '': result = {'mol_dict': {}} return jsonify(result=result) network = Network(target_smiles=smiles) network.generate(smiles, 0, calculate_scores=False) new_substrate_nodes, new_reaction_nodes = network.add_chemical_step(smiles) list_processed = [] for smi in new_substrate_nodes: new_smi = re.sub(r"\[(?:[1-9]|[1-9][0-9])\*\]", '*', smi) list_processed.append(new_smi) mol_dict = {} for smi in list_processed: mol = Chem.MolFromSmiles(smi) img = get_images.moltosvg(mol, molSize=(200, 200), kekulize=True) mol_dict[smi] = img result = {'mol_dict': mol_dict} return jsonify(result=result)
def _get_retrobiocat_pathways(self, row, reaction_names, combine_enantiomers=True): product = row['product_1_smiles'] network = Network(print_log=False) network.settings.update({ "calculate_complexities": True, "calculate_substrate_specificity": False, "get_building_blocks": False, "combine_enantiomers": combine_enantiomers }) # Only want reactions in list rxns_to_keep = {} for rxn_name in network.rxns: if rxn_name in reaction_names: rxns_to_keep[rxn_name] = network.rxns[rxn_name] network.rxns = rxns_to_keep network.generate(product, 5) bfs = BFS(network=network, print_log=False, score_pathways=False) bfs.run() pathways = bfs.get_pathways() return pathways
def bfs_with_network_generation(): network = Network() network.generate('CCCCC=O', 5) bfs = BFS(network=network, max_pathways=10000) bfs.run() pathways = bfs.get_pathways() return pathways
pathway.other_varients_as_nodes.append(other_pathway.list_nodes) pathways.append(pathway) return pathways def group_pathways(pathways, scores_to_use=None, by_enzyme=True): if scores_to_use == None: scores_to_use = default_scores_to_use end_nodes_dict = _generate_end_nodes_dict( pathways, scores_to_use) # groups by end_nodes, reactions, scores grouped_pathways = _get_grouped_pathways( end_nodes_dict) # converts dict to a list of lists new_pathways = _collapse_groups(grouped_pathways, by_enzyme) return new_pathways if __name__ == '__main__': from retrobiocat_web.retro.generation.network_generation.network import Network from retrobiocat_web.retro.generation.pathway_generation.best_first_search import BFS network = Network(max_nodes=300) network.generate('[C@H]1(C2=CC=CC=C2)NCCCC1', 5) bfs = BFS(network=network, max_pathways=200) bfs.run() pathways = bfs.get_pathways() pathways = group_pathways(pathways) pathways = pathways[0:10]
def task_make_network(form_data): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() network = Network(include_experimental=bool(form_data['include_experimental']), include_two_step=bool(form_data['include_two_step']), include_requires_absence_of_water=bool(form_data['include_requires_absence_of_water']), print_log=not current_app.config['PRODUCTION']) network.update_settings({"allow_backwards_steps": bool(form_data['allow_backwards']), "remove_simple": bool(form_data['remove_small']), "similarity_score_threshold": float(form_data['sub_thres']), "combine_enantiomers" : bool(form_data['combine_enantiomers']), "num_enzymes": 1, "calculate_complexities": bool(form_data['calc_complexity']), "calculate_substrate_specificity": bool(form_data['sub_sim']), "max_nodes": int(form_data['max_initial_nodes'],), "colour_reactions" : form_data['colour_reactions'], "colour_arrows": form_data['colour_edges'], "show_negative_enzymes" : form_data['show_neg_enz'], "only_postitive_enzyme_data" : not form_data['show_neg_enz'], "max_reactions": form_data["max_reactions"], 'only_reviewed_activity_data': bool(form_data["only_reviewed"])}) if form_data["specificity_scoring_mode"] == 'Product + substrates (slower)': network.update_settings({'specificity_score_substrates' : True}) #print(f"include_experimental = {network.settings['include_experimental']}") #print(f"include_two_step = {network.settings['include_two_step']}") network.generate(form_data['target_smiles'], form_data['number_steps'], calculate_scores=False) job.meta['progress'] = 'network_generated' job.save_meta() network.calculate_scores() job.meta['progress'] = 'scores_calculated' job.save_meta() nodes, edges = network.get_visjs_nodes_and_edges() #options = {'interaction': {'multiselect': 'true',}} options = {} default_network_name = 'Network for ' + str(network.target_smiles) result = {'save_id':str(uuid.uuid4()), 'save_links' : [], 'save_name' : default_network_name, 'nodes':nodes, 'edges':edges, 'options':json.dumps(options), 'graph_dict':json.dumps(nx.to_dict_of_lists(network.graph)), 'target_smiles':str(network.target_smiles), 'network_options':json.dumps(network.settings), 'attr_dict':json.dumps(network.attributes_dict()), 'max_reactions' : int(network.settings['max_reactions'])} current_app.redis.mset({job.id: json.dumps(result)}) time_to_expire = 15*60 #15 mins * 60 seconds current_app.redis.expire(job.id, time_to_expire) return result
def select_best_enzyme(self, network): if network.settings['calculate_substrate_specificity'] == True: for node in network.reaction_nodes: current_enz = network.graph.nodes[node]['attributes'][ 'selected_enzyme'] current_score = network.graph.nodes[node]['attributes'][ 'specificity_scores'][current_enz] current_score_neg = True possible_enzymes = network.graph.nodes[node]['attributes'][ 'possible_enzymes'] for enz in possible_enzymes: score = network.graph.nodes[node]['attributes'][ 'specificity_scores'][enz] if (score > current_score and score != 0) or (abs(score) > current_score and current_score_neg == True): network.graph.nodes[node]['attributes'][ 'selected_enzyme'] = enz current_score = abs(score) if score < 0: current_score_neg = True if __name__ == '__main__': from retrobiocat_web.retro.generation.network_generation.network import Network network = Network() network.generate('CCCCC=O', 4) # evaluator calculate scores called during generate
class BFS(): def __init__(self, network=None, target=None, max_pathways=50000, max_pathway_length=5, min_weight=1, use_random=False, print_log=False, score_pathways=True, allow_longer_pathways=False): """ Best First Search object, for generating pathways from a network After initialising, run search using the .run() method Args: network: a network object which has been generated min_weight: the minimum weight to assign to zero complexity change (and Stop) max_pathways: the maximum number of pathways to generate before stopping use_random: set the bfs to use weighted random selection rather than always picking the best """ self.score_pathways = score_pathways self.print_log = print_log self.min_weight = min_weight self.choices = {} self.max_pathways = max_pathways self.max_pathway_length = max_pathway_length self.allow_longer_pathways = allow_longer_pathways self.pathways = [] self.use_random = use_random self.network = network self.generate_network = False if self.network == None: self.target = node_analysis.rdkit_smile(target, warning=True) self.generate_network = True self.network = Network(target_smiles=self.target, number_steps=self.max_pathway_length, print_log=False) self.network.generate(self.target, 0) self.log('BFS - will generate network') else: self.target = self.network.target_smiles def log(self, msg): if self.print_log == True: print(msg) def _get_context(self, nodes): """ Returns the pathway context, which is a string of node numbers""" list_node_numbers = [] context = '' for node in nodes: list_node_numbers.append( self.network.graph.nodes[node]['attributes']['node_num']) sorted_node_numbers = sorted(list_node_numbers) for node_num in sorted_node_numbers: context += str(node_num) context += '-' return context def _expand_network(self, smi): nodes_added = [] new_substrates, new_reactions = self.network.add_step(smi) nodes_added.extend(new_substrates) nodes_added.extend(new_reactions) return nodes_added def _get_choices(self, end_nodes): """ Returns a list of reaction nodes (and Stop) which are choices for the next step""" def get_choice_scores(choices): scores = [0] for node in choices[1:]: scores.append(self.network.graph.nodes[node]['attributes'] ['change_in_complexity']) return scores def get_weighted_scores(scores): # invert changes so decreases in complexity are favoured inverted_reaction_complexity_changes = [x * -1 for x in scores] min_change = min(inverted_reaction_complexity_changes) if min_change < 0: min_change = -min_change else: min_change = 0 non_neg_changes = [ x + self.min_weight + min_change for x in inverted_reaction_complexity_changes ] return non_neg_changes def get_choices(end_nodes, graph): successor_reactions = ['Stop'] for node in end_nodes: successor_reactions.extend(list(graph.successors(node))) return successor_reactions def make_choice_dict(choices, scores): choice_dict = {} for i, choice in enumerate(choices): choice_dict[choice] = scores[i] return choice_dict choices = get_choices(end_nodes, self.network.graph) scores = get_choice_scores(choices) weighted_scores = get_weighted_scores(scores) choice_dict = make_choice_dict(choices, weighted_scores) return choice_dict def _pick_choice(self, context): """ Given a context, picks an option to extend (or stop) that pathway """ def pick_best(choices, scores): sorted_options = node_analysis.sort_by_score(choices, scores, reverse=False) return sorted_options[0] def pick_weighted_random(choices, scores): return random.choices(choices, scores, k=1)[0] def get_lists_choices_scores(choices_dict): list_choices = [] list_scores = [] for choice in choices_dict: list_choices.append(choice) list_scores.append(choices_dict[choice]) return list_choices, list_scores choices, scores = get_lists_choices_scores(self.choices[context]) if self.use_random == False: option = pick_best(choices, scores) else: option = pick_weighted_random(choices, scores) return option def _add_reaction(self, reaction_choice): new_end_nodes = list(self.network.graph.successors(reaction_choice)) added_nodes = [reaction_choice] + new_end_nodes return added_nodes, new_end_nodes def _check_pathway_has_end(self, nodes): pathway_subgraph = self.network.graph.subgraph(nodes) end_nodes = node_analysis.get_nodes_with_no_successors( pathway_subgraph) if len(end_nodes) == 0: return False return True def _make_pathway(self, nodes): """ Create pathway object from list of nodes""" return Pathway(nodes, self.network, calc_scores=self.score_pathways) def _check_if_should_expand_network(self, end_nodes, pathway_nodes): if self.generate_network == True: if self._num_reactions(pathway_nodes) < self.max_pathway_length: for node in end_nodes: if len(list(self.network.graph.successors(node))) == 0: self._expand_network(node) def _is_node_already_in_pathway(self, current_nodes, new_nodes): for node in new_nodes: if node in current_nodes: return True return False def _num_reactions(self, nodes): count = 0 for node in nodes: if self.network.graph.nodes[node]['attributes'][ 'node_type'] == 'reaction': count += 1 return count def run(self): """ Generate pathways using best first search Returns: list of pathways """ self.log('Run BFS') self.pathways = [] self.choices = {} nodes = [self.target] context = self._get_context(nodes) self._check_if_should_expand_network(nodes, nodes) self.choices[context] = self._get_choices(nodes) start_context = copy.deepcopy(context) while (len(self.pathways) < self.max_pathways) and (len( self.choices[start_context]) > 0): nodes = [self.network.target_smiles] context = self._get_context(nodes) steps = 0 while len(self.choices[context]) > 0: if steps > self.max_pathway_length: self.choices[context] = [] if self._check_pathway_has_end(nodes) == True: self.pathways.append(nodes) break best_choice = self._pick_choice(context) if best_choice == 'Stop': if self._check_pathway_has_end(nodes) == True: self.pathways.append(nodes) self.choices[context].pop('Stop') steps = 0 break else: steps += 1 added_nodes, new_end_nodes = self._add_reaction( best_choice) if self._is_node_already_in_pathway(nodes, added_nodes) == True: self.choices[context].pop(best_choice) break else: new_context = self._get_context(nodes + added_nodes) if new_context not in self.choices: self._check_if_should_expand_network( new_end_nodes, nodes + added_nodes) self.choices[new_context] = self._get_choices( new_end_nodes) if len(self.choices[new_context]) == 0: self.choices[context].pop(best_choice) else: nodes = nodes + added_nodes context = new_context self.log('BFS complete') if len(self.pathways) >= self.max_pathways: self.log('Max pathways reached') return self.pathways def get_pathways(self): pathway_objects = [] for list_nodes in self.pathways: pathway = self._make_pathway(list_nodes) if self.allow_longer_pathways == True: pathway_objects.append(pathway) elif len(pathway.reactions) <= self.max_pathway_length: pathway_objects.append(pathway) return pathway_objects
def task_get_pathways(form_data): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() network = Network(print_log=not current_app.config['PRODUCTION'], include_experimental=form_data['include_experimental'], include_two_step=form_data['include_two_step'], include_requires_absence_of_water=bool( form_data['include_requires_absence_of_water'])) network.update_settings({ "remove_simple": bool(form_data['remove_small']), "combine_enantiomers": bool(form_data['combine_enantiomers']), 'max_nodes': int(form_data['max_nodes']), 'similarity_score_threshold': float(form_data['sub_thres']), 'colour_reactions': form_data['colour_reactions'], "colour_arrows": form_data['colour_edges'], "show_negative_enzymes": form_data['show_neg_enz'], "only_postitive_enzyme_data": not form_data['show_neg_enz'], 'only_reviewed_activity_data': bool(form_data["only_reviewed"]) }) if form_data[ "specificity_scoring_mode"] == 'Product + substrates (slower)': network.update_settings({'specificity_score_substrates': True}) network.generate(form_data['target_smiles'], form_data['number_steps'], calculate_scores=False) job.meta['progress'] = 'network_generated' job.save_meta() network.calculate_scores() job.meta['progress'] = 'network_scored' job.save_meta() network_data = { 'graph_dict': json.dumps(nx.to_dict_of_lists(network.graph)), 'target_smiles': str(network.target_smiles), 'network_options': json.dumps(network.settings), 'attr_dict': json.dumps(network.attributes_dict()) } current_app.redis.mset({f"{job.id}__network": json.dumps(network_data)}) current_app.redis.expire(f"{job.id}__network", 60 * 60) bfs = BFS(network=network, max_pathways=form_data['max_pathways'], max_pathway_length=form_data['number_steps'], min_weight=float(form_data['min_weight']), print_log=not current_app.config['PRODUCTION']) bfs.run() pathways = bfs.get_pathways() job.meta['progress'] = 'pathways_generated' job.save_meta() package_all_pathways(job.id, pathways) pathway_evaluator = evaluate_pathways(pathways, [ form_data['weight_num_enzymes'], form_data['weight_complexity'], form_data['weight_starting'], form_data['weight_known_enzymes'], form_data['weight_diversity'] ]) package_evaluated_pathways(pathway_evaluator.df, job.id) package_visjs_pathways(job.id) job.meta['progress'] = 'pathways_scored' job.save_meta() options = {} if form_data['hierarchical'] == True: options.update({ "layout": { "improvedLayout": 'true', 'hierarchical': { 'direction': 'DU', "sortMethod": "hubsize", "nodeSpacing": 200, "treeSpacing": 400 } } }) pathway_settings = { 'weight_num_enzymes': form_data['weight_num_enzymes'], 'weight_complexity': form_data['weight_complexity'], 'weight_starting': form_data['weight_starting'], 'weight_known_enzymes': form_data['weight_known_enzymes'], 'weight_diversity': form_data['weight_diversity'], 'options': options } current_app.redis.mset( {f"{job.id}__pathway_settings": json.dumps(pathway_settings)}) current_app.redis.expire(job.id, 60 * 60)
smiles = node_analysis.rdkit_smile(smiles) listSmiles, listReactions = self.retrosynthesisEngine.single_step( smiles, self.retrorules_rxns, self.network.graph) self.network.get_node_types() if calculate_scores == True: self.network.evaluator.calculate_scores(self.network) return listSmiles, listReactions if __name__ == '__main__': from retrobiocat_web.retro.generation.network_generation.network import Network target = 'CCCCCO' network = Network() network.generate(target, 2) network.retrorules.diameters = [2] network.retrorules.load() network.retrorules.add_step('CCCCCC(C)=O') """ file = str(Path(__file__).parents[3]) + '/data/reaction_rules/retrorules/retrorules_all.pkl' rxns = pickle.load(open(file, "rb")) for d in rxns: print(d) file_name = 'rules' + str(d) + '.pkl' with open(file_name, 'wb') as handle: pickle.dump(rxns[d], handle, protocol=pickle.HIGHEST_PROTOCOL) """