def get_pathways_to_substrates(row, pathways, ignore_substrate_two): substrate_one = rdkit_smile(row['Substrate 1 SMILES']) substrate_two = rdkit_smile(row['Substrate 2 SMILES']) pathways_to_keep = [] for pathway in pathways: if substrate_one in pathway.end_nodes: if ignore_substrate_two == True: pathways_to_keep.append(pathway) elif substrate_two != None: if substrate_two in pathway.end_nodes: pathways_to_keep.append(pathway) return pathways_to_keep
def _get_pathways_to_substrates(self, row, pathways, ignore_substrate_two): substrate_one = rdkit_smile(row['substrate_1_smiles']) substrate_two = rdkit_smile(row['substrate_2_smiles']) pathways_to_keep = [] for pathway in pathways: if substrate_one in pathway.end_nodes: if ignore_substrate_two == True: pathways_to_keep.append(pathway) elif substrate_two != None: if substrate_two in pathway.end_nodes: pathways_to_keep.append(pathway) return pathways_to_keep
def fragment_molecule(): smiles = request.form['smiles'] smiles = rdkit_smile(smiles) if smiles is None or smiles == '': result = {'mol_dict': {}} return jsonify(result=result) mol = Chem.MolFromSmiles(smiles) list_smi = list( BRICSDecompose(mol, minFragmentSize=5, keepNonLeafNodes=True)) list_processed = [] for smi in list_smi: new_smi = re.sub(r"\[(?:[1-9]|[1-9][0-9])\*\]", '*', smi) list_processed.append(new_smi) mol_dict = {} for smi in list_processed: mol = Chem.MolFromSmiles(smi) img = get_images.moltosvg(mol, molSize=(200, 200), kekulize=True) mol_dict[smi] = img result = {'mol_dict': mol_dict} return jsonify(result=result)
def apply_chemical_steps_molecule(): smiles = request.form['smiles'] smiles = rdkit_smile(smiles) if smiles is None or smiles == '': result = {'mol_dict': {}} return jsonify(result=result) network = Network(target_smiles=smiles) network.generate(smiles, 0, calculate_scores=False) new_substrate_nodes, new_reaction_nodes = network.add_chemical_step(smiles) list_processed = [] for smi in new_substrate_nodes: new_smi = re.sub(r"\[(?:[1-9]|[1-9][0-9])\*\]", '*', smi) list_processed.append(new_smi) mol_dict = {} for smi in list_processed: mol = Chem.MolFromSmiles(smi) img = get_images.moltosvg(mol, molSize=(200, 200), kekulize=True) mol_dict[smi] = img result = {'mol_dict': mol_dict} return jsonify(result=result)
def process_image(filename, osra_api_url, remove=True, log=False): save_path = f'{DATA_FOLDER}/{filename}' for filetype in ALLOWED_FILE_TYPES: if filename.endswith(filetype) == True: url = f'{osra_api_url}/process_image' files = {'image': open(save_path, 'rb')} r = requests.post(url, files=files) if remove == True: os.remove(save_path) text = r.text list_smi = json.loads(text) # if can load via rdkit, return the rdkit smile. list_rdkit_smi = [] for smi in list_smi: rd_smi = rdkit_smile(smi) if rd_smi != None: list_rdkit_smi.append(rd_smi) else: list_rdkit_smi.append(smi) if log == True: print(f"-- Processed file: {filename} --") for smi in list_rdkit_smi: print(smi) return list_rdkit_smi return False
def save_my_molecule(): user = user_datastore.get_user(current_user.id) smiles = request.form['smiles'] name = request.form['name'] if rdkit_smile(smiles) is None: result = {'status': 'danger', 'msg': 'Please enter a valid SMILES', 'issues': []} return jsonify(result=result) mol_query = MyMolecule.objects(db.Q(owner=user) & db.Q(smiles=smiles)) if len(mol_query) != 0: my_mol = mol_query[0] else: my_mol = MyMolecule(owner=user, smiles=smiles) try: mol = Chem.MolFromSmiles(smiles) my_mol.svg = get_images.moltosvg(mol,molSize=(100,100),kekulize=True) except: my_mol.svg = '' my_mol.name = name my_mol.save() result = {'status': 'success', 'msg': 'Molecule saved', 'issues': []} return jsonify(result=result)
def _negative_tests(self, negative_tests, list_rxns): empty_network = Network() rule_applicator = RuleApplicator(empty_network) rxns = {'tests': list_rxns} try: negative_tests = yaml.load(negative_tests, Loader=yaml.FullLoader) except: self.state = 'danger' self.issues.append('Could not load negative tests yaml') return for test_product in negative_tests: try: rdkit_smile(test_product) except: self.state = 'danger' self.issues.append( f'Negative test SMILE: {test_product} not accepted by rdkit' ) return for test_product in negative_tests: reaction_outcomes = self._apply_reactions(empty_network, rule_applicator, test_product, rxns) if len(reaction_outcomes) != 0: self.state = 'danger' self.issues.append( f'Reaction should not be outcomes for tested negative product: {test_product}' ) try: for test_product in negative_tests: reaction_outcomes = self._apply_reactions( empty_network, rule_applicator, test_product, rxns) if len(reaction_outcomes) != 0: self.state = 'danger' self.issues.append( f'Reaction should not be outcomes for tested negative product: {test_product}' ) except: self.state = 'danger' self.issues.append('Problem running negative tests') return return True
def __init__(self, graph=nx.DiGraph(), target_smiles='', number_steps=0, max_nodes=False, include_two_step=False, include_experimental=False, include_requires_absence_of_water=False, print_log=False): self.graph = graph self.number_steps = number_steps self.target_smiles = rdkit_smile(target_smiles, warning=True) self.rxn_obj = RetroBioCat_Reactions(include_experimental=include_experimental, include_two_step=include_two_step, include_requires_absence_of_water=include_requires_absence_of_water) self.rxns = self.rxn_obj.rxns self.substrate_nodes = [] self.reaction_nodes = [] self.end_nodes = [] self.evaluator = NetworkEvaluator(self, print_log=print_log) self.retrosynthesisEngine = RetrosynthesisEngine(self) #self.retrorules = RetroRules(self) self.settings = {"allow_backwards_steps": False, "add_if_precursor" : True, "print_precursors" : False, "combine_enantiomers" : True, "clean_brackets" : True, "print_log" : print_log, "remove_simple": False, "similarity_score_threshold": 0.6, "num_enzymes": 1, "complexity_score": "SCS", "calculate_complexities" : True, "calculate_substrate_specificity" : True, "get_building_blocks" : True, 'building_blocks_db_mode' : 'in_stock', "max_nodes": max_nodes, "prune_steps" : 1, "rank_pathways_by_enzyme" : True, "only_postitive_enzyme_data": False, "colour_substrates" : 'Starting material', #Starting material, Relative complexity, or Off "colour_reactions" : 'Substrate specificity', #Substrate specificity, Complexity change, or Off "colour_arrows" : "None", #None or Complexity change "show_negative_enzymes": True, "display_cofactors" : True, "molSize": (300,300), 'prune_on_substrates' : False, 'max_reactions' : False, 'specificity_score_substrates' : False, 'include_experimental' : include_experimental, 'include_two_step': include_two_step, 'include_requires_absence_of_water': include_requires_absence_of_water, 'rr_min_diameter': 2, 'rr_min_products': 10, 'rr_max_reactions': 1, 'aizynth_reaction_mode': 'policy', 'retrobiocat_reaction_mode': 'complexity', 'only_reviewed_activity_data': False}
def add_relative_complexity(graph, target_smile): target_smile = rdkit_smile(target_smile) tm_complexity = graph.nodes[target_smile]['attributes']['complexity'] for node in list(graph): if graph.nodes[node]['attributes']['node_type'] == 'substrate': if 'relative_complexity' not in graph.nodes[node]['attributes']: complexity = graph.nodes[node]['attributes']['complexity'] relative_complexity = complexity - tm_complexity graph.nodes[node]['attributes'][ 'relative_complexity'] = relative_complexity return graph
def apply_retrorules(self, smile, rxns, explicit_hydrogens=False): '''Function takes a smile and dictionary of reactions, applys the reactions and returns a dictionary of rxn_names : products ''' try: substrate_molecule = AllChem.MolFromSmiles(smile) except: return {} if explicit_hydrogens == True: substrate_molecule = rdmolops.AddHs(substrate_molecule) rxn_product_dict = {} for rxn_name, rxn in rxns.items(): try: products = rxn.RunReactants((substrate_molecule, )) except: products = [] print('Error running reactants for: ' + str(smile)) smiles_products = [] for product in products: sub_list = [] for mol in product: mols = [mol] if explicit_hydrogens == True: mol = rdmolops.RemoveHs(mol) try: mols = rdmolops.GetMolFrags(mol, asMols=True) except: pass for mol in mols: try: p_smile = AllChem.MolToSmiles(mol) p_smile = rdkit_smile(p_smile) if self._check_valid_smile( p_smile, rxn_name=rxn_name) == True: sub_list.append(p_smile) except: pass if (sub_list not in smiles_products) and (len(sub_list) != 0): smiles_products.append(sub_list) if len(smiles_products) != 0: rxn_product_dict[rxn_name] = smiles_products return rxn_product_dict
def initialise_graph(self, target_smiles=''): if target_smiles != '': self.target_smiles = rdkit_smile(target_smiles, warning=True) self._log(" -initialise graph, target smiles: " + str(self.target_smiles)) self.graph = nx.DiGraph() self.graph.add_node(self.target_smiles, attributes={'name': self.target_smiles, 'node_type': 'substrate', 'node_num': 0, 'substrate_num' : 1}) return [self.target_smiles]
def task_get_spec_data(form_data): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() print('Started') enzyme_names = list(form_data['enzymes'].split(", ")) reactions = list(form_data['reactions'].split(", ")) if form_data['target_smiles'] != '': product = rdkit_smile(form_data['target_smiles']) else: product = form_data['target_smiles'] similarity_cutoff = form_data['similarity'] num_choices = form_data['num_choices'] data_level = form_data['data_level'] max_hits = form_data['max_hits'] include_auto_data = bool(form_data['auto_data']) only_reviewed = bool(form_data['only_reviewed']) scorer = molecular_similarity.SubstrateSpecificityScorer(print_log=False) activity_df = scorer.querySpecificityDf(product, reactions, enzyme_names, dataLevel=data_level, numEnzymes=num_choices, simCutoff=similarity_cutoff, numHits=max_hits, include_auto_generated=include_auto_data, only_reviewed=only_reviewed) if activity_df is None: print('Activity df is none') return [] if len(activity_df.index) == 0: print('Len activity df index is 0') return [] activity_df = activity_df[process_activity_data.COLUMNS] activity_df = activity_df.round(2) activity_df.replace(np.nan, '', inplace=True) activity_df.replace(True, 'True', inplace=True) activity_df.replace(False, 'False', inplace=True) activity_data = activity_df.to_dict(orient='records') activity_data = process_activity_data.process_activity_data(activity_data) activity_data = process_activity_data.smiles_to_svg(activity_data) return activity_data
def add_step(self, smiles, calculate_scores=True): """ Add a single retrosynthetic step to graph from a single smiles node """ smiles = node_analysis.rdkit_smile(smiles) listSmiles, listReactions = self.retrosynthesisEngine.single_step( smiles, self.retrorules_rxns, self.network.graph) self.network.get_node_types() if calculate_scores == True: self.network.evaluator.calculate_scores(self.network) return listSmiles, listReactions
def _split_substrates_to_list(self, df): substrates_as_list = [] for index, row in df.iterrows(): if isinstance(row['Substrates'], str): split_list = row['Substrates'].split(", ") rdkit_split_list = [] for smi in split_list: rdkit_split_list.append(rdkit_smile(smi)) substrates_as_list.append(rdkit_split_list) else: substrates_as_list.append([]) df['Substrates_list'] = substrates_as_list return df
def custom_reaction(self, product_smiles, substrate_smiles, reaction_name): """ Add a custom reaction to self.graph""" product_smiles = rdkit_smile(product_smiles, warning=True) new_substrates, new_reactions = self.retrosynthesisEngine.custom_reaction(self.graph, product_smiles, substrate_smiles, reaction_name) self.substrate_nodes.extend(new_substrates) if product_smiles not in self.substrate_nodes: self.substrate_nodes.append(product_smiles) self.reaction_nodes.extend(new_reactions) self._log('Custom reaction added: ' + str(product_smiles) + '<--' + str(new_reactions) + '<--' + str(new_substrates)) self.calculate_scores() new_substrates.append(product_smiles) return new_substrates, new_reactions
def __init__(self, network=None, target=None, max_pathways=50000, max_pathway_length=5, min_weight=1, use_random=False, print_log=False, score_pathways=True, allow_longer_pathways=False): """ Best First Search object, for generating pathways from a network After initialising, run search using the .run() method Args: network: a network object which has been generated min_weight: the minimum weight to assign to zero complexity change (and Stop) max_pathways: the maximum number of pathways to generate before stopping use_random: set the bfs to use weighted random selection rather than always picking the best """ self.score_pathways = score_pathways self.print_log = print_log self.min_weight = min_weight self.choices = {} self.max_pathways = max_pathways self.max_pathway_length = max_pathway_length self.allow_longer_pathways = allow_longer_pathways self.pathways = [] self.use_random = use_random self.network = network self.generate_network = False if self.network == None: self.target = node_analysis.rdkit_smile(target, warning=True) self.generate_network = True self.network = Network(target_smiles=self.target, number_steps=self.max_pathway_length, print_log=False) self.network.generate(self.target, 0) self.log('BFS - will generate network') else: self.target = self.network.target_smiles
def _lit_end_nodes_as_rdkit_smiles(self, lit_end_nodes): rdkit_nodes = [] for node in lit_end_nodes: rdkit_nodes.append(rdkit_smile(node)) return rdkit_nodes
def convert_to_rdkit(smi): try: new_smi = rdkit_smile(smi) return new_smi except: return None
def is_accepted_by_rdkit(form, field): if node_analysis.rdkit_smile(field.data) == None: if field.data != '': raise ValidationError('SMILES not accepted by rdkit')
def df_to_json_tabulate(df, img_path='static/specificity_images/'): img_size = '50px' table_columns = [ { 'title': "Substrate 1", 'field': "Substrate 1" }, { 'title': "Substrate 2", 'field': "Substrate 2" }, { 'title': "Product 1", 'field': "Product 1" }, { 'title': "Substrate 1 Structure", 'field': "Substrate 1 Structure", 'formatter': "html", 'height': 50 }, { 'title': "Substrate 2 Structure", 'field': "Substrate 2 Structure", 'formatter': "html", 'height': 50, 'variableHeight': 'true' }, { 'title': "Product 1 Structure", 'field': "Product 1 Structure", 'formatter': "html", 'variableHeight': 'true' }, { 'title': "Similarity", 'field': "Similarity" }, { 'title': "Active", 'field': "Active", 'formatter': "tickCross" }, ] table_data = [] for index, row in df.iterrows(): substrate_1 = node_analysis.rdkit_smile(str(row['Substrate 1 SMILES'])) substrate_2 = node_analysis.rdkit_smile(str(row['Substrate 2 SMILES'])) product_1 = node_analysis.rdkit_smile(str(row['Product 1 SMILES'])) if substrate_1 == None: substrate_1 = '' if substrate_2 == None: substrate_2 = '' if product_1 == None: product_1 = '' for smiles in [substrate_1, substrate_2, product_1]: if smiles != '': try: get_images.get_images_of_substrates([smiles], img_dir=img_path) except: pass substrate_1_img = '<img src="static/specificity_images/' + str( get_images.apply_smiles_to_filename_check(substrate_1)) + '.png">' substrate_2_img = '<img src="static/specificity_images/' + str( get_images.apply_smiles_to_filename_check(substrate_2)) + '.png">' product_1_img = '<img src="static/specificity_images/' + str( get_images.apply_smiles_to_filename_check(product_1)) + '.png">' row_dict = { 'id': index, 'Substrate 1': substrate_1, 'Substrate 1 Structure': substrate_1_img, 'Substrate 2': substrate_2, 'Substrate 2 Structure': substrate_2_img, 'Product 1': product_1, 'Product 1 Structure': product_1_img, } table_data.append(row_dict) return table_columns, table_data
def _rdkit_products(self, listSmi): new_list = [] for smi in listSmi: new_list.append(rdkit_smile(smi)) return new_list