def extract_side_chains(mol, remove_duplicates=False, mark='[*]'): """ Extract side chains from a smiles string. Core is handled as Murcko scaffold. :param mol: {str} smiles string of a molecule. :param remove_duplicates: {bool} Keep or remove duplicates. :param mark: character to mark attachment points. :return: smiles strings of side chains in a list, attachment points replaced by [R]. """ pos = range(0, 20) set_pos = ['[' + str(x) + '*]' for x in pos] m1 = MolFromSmiles(mol) try: core = MurckoScaffold.GetScaffoldForMol(m1) side_chain = ReplaceCore(m1, core) smi = MolToSmiles(side_chain, isomericSmiles=True ) # isomericSmiles adds a number to the dummy atoms. except: return list() for i in pos: smi = smi.replace(''.join(set_pos[i]), mark) if remove_duplicates: return list(set(smi.split('.'))) else: return smi.split('.')
def canonicalize(mol_list, showprogress=False): smi_list = [] if showprogress: print('Canonicalising mols') for mol in tqdm(mol_list): #mol = MolFromSmiles(smi) if mol is not None: smi_list.append(MolToSmiles(mol)) else: for mol in mol_list: #mol = MolFromSmiles(smi) if mol is not None: smi_list.append(MolToSmiles(mol)) mol_list = list(set(smi_list)) if showprogress: mol_list = [MolFromSmiles(smi) for smi in tqdm(mol_list)] else: mol_list = [MolFromSmiles(smi) for smi in mol_list] #if showprogress: # mol_list = [MolFromSmiles(MolToSmiles(mol)) for mol in tqdm(mol_list)] #else: # mol_list = [MolFromSmiles(MolToSmiles(mol)) for mol in mol_list] #mol_list = [mol for mol in mol_list if mol] #mol_list = list(set([MolToSmiles(mol) for mol in mol_list])) #if showprogress: # mol_list = [MolFromSmiles(smi) for smi in tqdm(mol_list)] #else: # mol_list = [MolFromSmiles(smi) for smi in mol_list] return mol_list
def get_SMILES_objects(mols): if type(mols) == list: SMILES = [MolToSmiles(mol) for mol in mols] return [CanonSmiles(SMILES) for SMILES in SMILES] if str(type(mols)) == "<class 'rdkit.Chem.rdchem.Mol'>": SMILES = MolToSmiles(mols) return CanonSmiles(SMILES)
def canonicalize(smi_list, showprogress=False): mol_list = [] if showprogress: print('Canonicalising mols') for smi in tqdm(smi_list): mol = MolFromSmiles(smi) if mol is not None: mol_list.append(MolToSmiles(mol)) else: for smi in smi_list: mol = MolFromSmiles(smi) if mol is not None: mol_list.append(MolToSmiles(mol)) mol_list = list(set(mol_list)) final_list = [] if showprogress: print('Size of unfiltered final library: {}'.format(len(mol_list))) print('Filtering by n_heavy and logP:') for smi in tqdm(mol_list): mol = MolFromSmiles(smi) n_heavy = mol.GetNumHeavyAtoms() if n_heavy > 17: logP = Crippen.MolLogP(mol) if logP <= 5: final_list.append(smi) else: for smi in mol_list: mol = MolFromSmiles(smi) n_heavy = mol.GetNumHeavyAtoms() if n_heavy > 17: logP = Crippen.MolLogP(mol) if logP <= 5: final_list.append(smi) return final_list
def test_remove_stereo(): mol = Filters.remove_stereo(MolFromSmiles('C[C@@H](C(=O)[O-])O')) assert MolToSmiles(mol) == 'CC(O)C(=O)[O-]' mol = Filters.remove_stereo(MolFromInchi( 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+')) assert MolToSmiles(mol) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=CC1=C1C(O)=Nc2ccccc21' mol = Filters.commute_inchi(mol) # Expected to change tautomerism assert MolToSmiles(mol) == 'O=C1NC(C2=CNC3=C2C=C(O)C=C3)=CC1=C1C(=O)NC2=CC=CC=C21'
def AddMurckoToFrame(frame, molCol = 'ROMol', MurckoCol = 'Murcko_SMILES', Generic = False): ''' Adds column with SMILES of Murcko scaffolds to pandas DataFrame. Generic set to true results in SMILES of generic framework. ''' if Generic: frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.MakeScaffoldGeneric(MurckoScaffold.GetScaffoldForMol(x[molCol]))), axis=1) else: frame[MurckoCol] = frame.apply(lambda x: MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])), axis=1)
def retrieve_fragments(mol): substructures_smiles = MolToSmiles(mol) sidechains = [] sidechains_smiles_list = substructures_smiles.split(".") for sidechain_smiles in sidechains_smiles_list: sidechain = MolFromSmiles(sidechain_smiles) if sidechain_smiles != "": sidechains.append(sidechain) return sidechains, sidechains_smiles_list
def test_keep_biggest(): mol = Filters.keep_biggest(MolFromSmiles('CCCC.CC')) assert MolToSmiles(mol) == 'CCCC' mol = Filters.keep_biggest(MolFromSmiles('CCCCC.CC.[H].CCC')) assert MolToSmiles(mol) == 'CCCCC' mol = Filters.keep_biggest(MolFromInchi( 'InChI=1S/C5H12N2O2.C4H7NO4/c6-3-1-2-4(7)5(8)9;5-2(4(8)9)1-3(6)7/h4H,1-3,6-7H2,(H,8,9);2H,1,5H2,(H,6,7)(H,8,9)/t4-;2-/m00/s1')) assert MolToInchi(mol) == 'InChI=1S/C4H7NO4/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,(H,6,7)(H,8,9)/t2-/m0/s1' mol = Filters.keep_biggest(MolFromInchi('InChI=1S/Mo.4O/q;;;2*-1')) assert MolToInchi(mol) == 'InChI=1S/Mo'
def structure_standardization(smi): """ Standardization function to clean up smiles with RDKit. First, the input smiles is converted into a mol object. Not-readable SMILES are written to the log file. The molecule size is checked by the number of atoms (non-hydrogen). If the molecule has more than 100 non-hydrogen atoms, the compound is discarded and written in the log file. Molecules with number of non-hydrogen atoms <= 100 are standardized with the MolVS toolkit (https://molvs.readthedocs.io/en/latest/index.html) relying on RDKit. Molecules which failed the standardization process are saved in the log file. The remaining standardized structures are converted back into their canonical SMILES format. :param smi: Input SMILES from the given structure data file T4 :return: smi_clean: Cleaned and standardized canonical SMILES of the given input SMILES. """ tautomer.TAUTOMER_TRANSFORMS = update_tautomer_rules() importlib.reload(MolVS_standardizer) param = ReadConfig() standardization_param = param.get_conf_dict(parameters.get_parameters())['standardization'] max_num_atoms = standardization_param['max_num_atoms'] max_num_tautomers = standardization_param['max_num_tautomers'] include_stereoinfo = standardization_param['include_stereoinfo'] my_standardizer = MolVS_standardizer.Standardizer(max_tautomers=max_num_tautomers) mol = MolFromSmiles(smi) # Read SMILES and convert it to RDKit mol object. if mol is not None: # Check, if the input SMILES has been converted into a mol object. if mol.GetNumAtoms() <= max_num_atoms: # check size of the molecule based on the non-hydrogen atom count. try: mol = my_standardizer.charge_parent(mol) # standardize molecules using MolVS and RDKit mol = my_standardizer.isotope_parent(mol) if include_stereoinfo is False: mol = my_standardizer.stereo_parent(mol) mol = my_standardizer.tautomer_parent(mol) mol_clean = my_standardizer.standardize(mol) smi_clean = MolToSmiles(mol_clean) # convert mol object back to SMILES else: mol = my_standardizer.tautomer_parent(mol) mol_clean = my_standardizer.standardize(mol) smi_clean = MolToSmiles(mol_clean) except (ValueError, AttributeError) as e: smi_clean = np.nan logging.error( 'Standardization error, ' + smi + ', Error Type: ' + str( e)) # write failed molecules during standardization to log file else: smi_clean = np.nan logging.error('Molecule too large, ' + smi) else: smi_clean = np.nan logging.error('Reading Error, ' + smi) return smi_clean
def pair_rxnts(mol1_list, mol2_list, rxn, debug=False): prod_list = [] for mol1 in tqdm(mol1_list): for mol2 in mol2_list: products = rxn.RunReactants((Chem.AddHs(mol1), Chem.AddHs(mol2))) if debug: logging.info(products) if products != (): for prod in products: if debug: logging.info(MolToSmiles(prod[0])) prod_list.append(MolToSmiles(prod[0])) return prod_list
def _get_inputs(self, rxn_list, pickaxe): """rxn_list will be pickaxe eventually""" # Get reactions information def get_cpd_smiles(cpd_id): return pickaxe.compounds[cpd_id]["SMILES"] reactions_info = {} for rxn_id in rxn_list: rxn = pickaxe.reactions[rxn_id] reactants = [ get_cpd_smiles(v[1]) for v in rxn["Reactants"] if v[1].startswith("C") ] products = [ get_cpd_smiles(v[1]) for v in rxn["Products"] if v[1].startswith("C") ] pairs = product(reactants, products) reactions_info[rxn["_id"]] = list(pairs) # Process this information input_info = {} input_fails = {} for rxn_id, reaction_pairs in reactions_info.items(): if not reaction_pairs: continue for i, (reactant_smiles, product_smiles) in enumerate(reaction_pairs): if len(reactant_smiles) <= 120: if len(product_smiles) <= 120: mol1 = MolFromSmiles(reactant_smiles) mol2 = MolFromSmiles(product_smiles) mol1 = RemoveHs(mol1) mol2 = RemoveHs(mol2) reactant_smiles = MolToSmiles(mol1) product_smiles = MolToSmiles(mol2) # TODO what does this fix? from original code if "M" in reactant_smiles or "M" in product_smiles: input_fails[rxn_id + "_" + str(i)] = None else: input_info[rxn_id + "_" + str(i)] = [ reactant_smiles, product_smiles, ] else: input_fails[rxn_id + "_" + str(i)] = None else: input_fails[rxn_id + "_" + str(i)] = None return input_info, input_fails
def simple_rxn(mol_list, rxn, debug=False): prod_list = [] for mol in mol_list: if debug: print('Input: ' + MolToSmiles(mol)) products = rxn.RunReactants((Chem.AddHs(mol), )) if debug: print('Products: {}'.format(products)) if products != (): for prod in products: if debug: print(prod) print(MolToSmiles(prod[0])) prod_list.append(prod[0]) return prod_list
def pair_rxnts(mol1_list, mol2_list, rxn, debug=False): prod_list = [] for mol1 in mol1_list: # if debug: # logging.info(MolToSmiles(mol1)) for mol2 in mol2_list: # try: # mol.UpdatePropertyCache() # FastFindRings(mol) # except: # logging.info('This mol fails! ' + MolToSmiles(mol)) # logging.info('This mol fails! ' +mol) # continue products = rxn.RunReactants((Chem.AddHs(mol1),Chem.AddHs(mol2))) if debug: logging.info(products) # products = rxn.RunReactants((MolFromSmiles(mol),)) # if debug: if products != (): for prod in products: if debug: logging.info(MolToSmiles(prod[0])) # logging.info(prod) prod_list.append(prod[0]) # for prod in products: # logging.info(MolToSmiles(prod[0])) # prod_list.append(prod[0]) # if len(products)!=2: # logging.info(len(products)) return prod_list
def reaction(smarts,smiles1,smiles2): rxn = ReactionFromSmarts(smarts) mole1 = MolFromSmiles(smiles1) mole2 = MolFromSmiles(smiles2) #prod1 = rxn.RunReactants(mole1) #prod2 = rxn.RunReactants(mole2) #smiles_p = MolToSmiles(prod1, isomericSmiles=True) #print(smiles_p) mols = [mole1, mole2] products = rxn.RunReactants(mols) #print "Products: " list_prod = [] for product in products: for mol in product: smiles_p = MolToSmiles(mol, isomericSmiles=True) list_prod.append(smiles_p) #print(smiles_p) #for product in products: # print(product) # # smiles_p = MolToSmiles(product, isomericSmiles=True) # print(smiles_p) return list_prod
def smiles(self): """Smiles string of fragment Returns: str: Smiles """ return MolToSmiles(self.molecule)
def time_kekulize(file_path: str, sample_size: int = -1): curr_dir = os.path.dirname(__file__) file_path = os.path.join(curr_dir, file_path) # load data with open(file_path, 'r') as file: smiles = [line.rstrip() for line in file.readlines()] smiles.pop(0) if sample_size > 0: smiles = random.sample(smiles, sample_size) print(f"Timing Kekulization of {len(smiles)} SMILES from {file_path}") # time selfies kekulization start = time.time() for s in smiles: list(kekulize_parser(_parse_smiles(s))) selfies_time = time.time() - start print(f"--> selfies kekulize: {selfies_time:0.7f}s") # time RDKit kekulization start = time.time() for s in smiles: m = MolFromSmiles(s) Kekulize(m) MolToSmiles(m, kekuleSmiles=True) rdkit_time = time.time() - start print(f"--> RDKit kekulize: {rdkit_time:0.7f}s")
def graph(self, m): from rdkit.Chem import EditableMol, Atom, rdchem hcount = m.GetNumAtoms(False) - m.GetNumAtoms(True) # create new molecule using single bonds only em = EditableMol(Mol()) nbridx = [None] * m.GetNumAtoms() iatom = 0 for atom in m.GetAtoms(): atnum = atom.GetAtomicNum() if atnum == 1 and atom.GetIsotope() == 1: #if atom.GetMass() > 1: pass hcount += 1 else: newatom = Atom(atnum) #if atom.GetTotalDegree() == 0: newatom.SetNoImplicit(True) # otherwise [Na]. becomes [NaH]. #newatom.SetFormalCharge(atom.GetFormalCharge()) newatom.SetFormalCharge(0) em.AddAtom(newatom) aidx = atom.GetIdx() nbridx[aidx] = iatom iatom += 1 for a2 in atom.GetNeighbors(): a2idx = nbridx[a2.GetIdx()] if a2idx != None: em.AddBond(aidx, a2idx, rdchem.BondType.SINGLE) #cansmi = self.cansmiles(em.GetMol()) cansmi = MolToSmiles(m, isomericSmiles=True) #cansmi = cansmi.replace('+','').replace('-','').replace('[N]','N').replace('[O]','O').replace('[C]','C').replace('[I]','I').replace('[S]','S').replace('[P]','P').replace('[B]','B').replace('[Br]','Br').replace('[Cl]','Cl') return "%s%s%d%+d" % (cansmi, ' H', hcount, GetFormalCharge(m))
def main(args): data = pd.read_csv('data/unique_docked_smiles.csv') # filter out invalid SMILES data['mol'] = data['SMILES'].apply(lambda x: MolFromSmiles(x)) data = data[data['mol'].notnull()] print('Number of unique SMILES: {}'.format(len(data))) # Create dictionary of key: scaffold and value: list of smiles scaffold_dict = {} scaff_list = [] scaffold_generic_dict = {} scaff_generic_list = [] for i, smiles in enumerate(data['SMILES'].values): scaffold = MurckoScaffoldSmilesFromSmiles(smiles) scaffold_generic = MolToSmiles(MakeScaffoldGeneric(MolFromSmiles(scaffold))) #print(scaffold) #print(scaffold_generic) scaff_list.append(scaffold) scaff_generic_list.append(scaffold_generic) if scaffold not in scaffold_dict.keys(): scaffold_dict[scaffold] = [smiles] else: scaffold_dict[scaffold].append(smiles) if scaffold_generic not in scaffold_generic_dict.keys(): scaffold_generic_dict[scaffold_generic] = [smiles] else: scaffold_generic_dict[scaffold_generic].append(smiles) #print(scaffold_dict) data['scaffold'] = scaff_list data['scaffold_generic'] = scaff_generic_list data.drop(columns='mol').to_csv('data/unique_docked_smiles_by_scaffold.csv', index=False) scaff_list = [] print('\nNumber of distinct scaffolds: {}'.format(len(scaffold_dict))) n_scaff = 0 for scaffold in sorted(scaffold_dict, key=lambda k: len(scaffold_dict[k]), reverse=True): n_scaff += 1 if n_scaff<=20: print('#{} scaffold: {}, size: {}'.format(n_scaff, scaffold, len(scaffold_dict[scaffold]))) scaff_list.append(scaffold) else: break scaff_mols = [MolFromSmiles(scaffold) for scaffold in scaff_list] img = Draw.MolsToGridImage(scaff_mols, molsPerRow=5, subImgSize=(200,200), legends=['counts = {}'.format(len(scaffold_dict[scaffold])) for scaffold in scaff_list]) img.save('data/scaff_grid.png') scaff_generic_list = [] print('\nNumber of distinct generic scaffolds: {}'.format(len(scaffold_generic_dict))) n_scaff = 0 for scaffold_gen in sorted(scaffold_generic_dict, key=lambda k: len(scaffold_generic_dict[k]), reverse=True): n_scaff += 1 if n_scaff<=20: print('#{} generic scaffold: {}, size: {}'.format(n_scaff, scaffold_gen, len(scaffold_generic_dict[scaffold_gen]))) scaff_generic_list.append(scaffold_gen) else: break scaffgen_mols = [MolFromSmiles(scaffold) for scaffold in scaff_generic_list] img = Draw.MolsToGridImage(scaffgen_mols, molsPerRow=5, subImgSize=(200,200), legends=['counts = {}'.format(len(scaffold_generic_dict[scaffold])) for scaffold in scaff_generic_list]) img.save('data/scaff_generic_grid.png')
def get_annotated_murcko_scaffold(mol, scaffold=None, as_mol=True): """ Return an annotated murcko scaffold where side chains are replaced with a dummy atom ('*'). Parameters ---------- mol : rdkit.Chem.rdchem.Mol scaffold : rdkit.Chem.rdchem.Mol, optional If a murcko scaffold is already calculated for the `mol`, this can be supplied as a template. The default is None. as_mol : bool, optional If True return rdkit.Chem.rdchem.Mol object else return a SMILES string representation. The default is True. Returns ------- {str, rdkit.Chem.rdchem.Mol} Annotated Murcko scaffold. """ if not scaffold: scaffold = MurckoScaffold.GetScaffoldForMol(mol) annotated = rdmolops.ReplaceSidechains(mol, scaffold) if as_mol: return annotated if annotated is None: return '' return MolToSmiles(annotated)
def test_getitem(self, filled_fragmentsdb, expected_fragment): fragment = filled_fragmentsdb['1muu_GDX_frag7'] assert MolToSmiles(fragment['mol']) == '[*]COP(=O)([O-])OP(=O)([O-])OC1OC(C(=O)[O-])C(O)C(O)C1O' del fragment['mol'] assert fragment == expected_fragment
def get_desc_data(molecule): """ Custom function that calculates and returns every available molecular descriptor in RDKit chemoinfo toolkt with corresponding header (name) for each :param molecule: rdkit's molecule object :return: values of descriptors and their headers """ desc_dict = dict(Descriptors.descList) descs = list(desc_dict.keys()) descs.remove('Ipc') ans = {} for descname in descs: try: desc = desc_dict[descname] bin_value = desc(molecule) except (ValueError, TypeError, ZeroDivisionError) as exception: print( 'Descriptor {} wasn\'t calculated for a molecule {} due to {}'. format(str(descname), str(MolToSmiles(molecule)), str(exception))) bin_value = 'NaN' bin_name = 'DESC_{}'.format(descname) ans[bin_name] = bin_value molstring = np.fromiter(ans.values(), dtype=float) headers = np.fromiter(ans.keys(), dtype='S32') return molstring, headers
def _convert_depiction(self, idepic, itype='smiles', otype={'inchikey'}): # Import (if needed) if itype == 'smiles': rdmol = MolFromSmiles(idepic, sanitize=True) elif itype == 'inchi': rdmol = MolFromInchi(idepic, sanitize=True) else: raise NotImplementedError( '"{}" is not a valid input type'.format(itype)) if rdmol is None: # Check imprt raise self.DepictionError( 'Import error from depiction "{}" of type "{}"'.format( idepic, itype)) # Export odepic = dict() for item in otype: if item == 'smiles': odepic[item] = MolToSmiles( rdmol ) # MolToSmiles is tricky, one mays want to check the possible options.. elif item == 'inchi': odepic[item] = MolToInchi(rdmol) elif item == 'inchikey': odepic[item] = MolToInchiKey(rdmol) else: raise NotImplementedError( '"{}" is not a valid output type'.format(otype)) return odepic
def delete_atom_generate_smiles(origin_graph_data, delete_idx): ''' delete_idx is integer 0~N-1. ''' graph_data = copy.deepcopy(origin_graph_data) graph_data.x = torch.cat( [graph_data.x[:delete_idx, :], graph_data.x[delete_idx + 1:, :]], 0) edge_num = graph_data.edge_index.shape[1] remaining_edge_idx = list( filter( lambda i: True if delete_idx not in list(graph_data.edge_index[:, i].numpy()) else False, range(edge_num))) graph_data.edge_index = torch.cat( [graph_data.edge_index[:, i].view(-1, 1) for i in remaining_edge_idx], 1) graph_data.edge_attr = torch.cat( [graph_data.edge_attr[i, :].view(1, -1) for i in remaining_edge_idx], 0) f = lambda x: x - 1 if x > delete_idx else x for i in range(graph_data.edge_index.shape[0]): for j in range(graph_data.edge_index.shape[1]): graph_data.edge_index[i][j] = f(graph_data.edge_index[i][j]) try: mol = graph_data_obj_to_mol_simple(graph_data.x, graph_data.edge_index, graph_data.edge_attr) smiles = MolToSmiles(mol) return smiles except: return None
def convert_depiction(idepic, itype='smiles', otype={'inchikey'}): """Convert chemical depiction to others type of depictions :param idepic: string depiction to be converted, str :param itype: type of depiction provided as input, str :param otype: types of depiction to be generated, {"", "", ..} :return odepic: generated depictions, {"otype1": "odepic1", ..} Usage example: - convert_depiction(idepic='CCO', otype={'inchi', 'smiles', 'inchikey'}) - convert_depiction(idepic='InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3', itype='inchi', otype={'inchi', 'smiles', 'inchikey'}) """ # Import (if needed) if itype == 'smiles': rdmol = MolFromSmiles(idepic, sanitize=True) elif itype == 'inchi': rdmol = MolFromInchi(idepic, sanitize=True) else: raise NotImplementedError('"{}" is not a valid input type'.format(itype)) if rdmol is None: # Check imprt raise Exception('Import error from depiction "{}" of type "{}"'.format(idepic, itype)) # Export odepic = dict() for item in otype: if item == 'smiles': odepic[item] = MolToSmiles(rdmol) # MolToSmiles is tricky, one mays want to check the possible options.. elif item == 'inchi': odepic[item] = MolToInchi(rdmol) elif item == 'inchikey': odepic[item] = MolToInchiKey(rdmol) else: raise NotImplementedError('"{}" is not a valid output type'.format(otype)) return odepic
def test_parser_roundtrip_no_rule_sequence(self): mol = MolFromSmiles(smiles1) tree = hypergraph_parser(mol) graph4 = graph_from_graph_tree(tree) mol4 = to_mol(graph4) smiles4 = MolToSmiles(mol4) assert smiles4 == smiles1
def test_parser_roundtrip(self): mol = MolFromSmiles(smiles1) tree = hypergraph_parser(mol) graph4 = evaluate_rules(tree.rules()) mol4 = to_mol(graph4) smiles4 = MolToSmiles(mol4) assert smiles4 == smiles1
def pair_rxnts(mol1_list, mol2_list, rxn, debug=False): """ Function that applies a two-reactant one-product reaction SMILES to a list of input RDKit molecules, returning the products as a list of RDKit molecules. """ prod_list = [] for mol1 in mol1_list: for mol2 in mol2_list: products = rxn.RunReactants((Chem.AddHs(mol1),Chem.AddHs(mol2))) if debug: logging.info(products) if products != (): for prod in products: if debug: logging.info(MolToSmiles(prod[0])) prod_list.append(MolToSmiles(prod[0])) return prod_list
def test_sequence_minimal(): # Violacein mol = MolFromInchi('InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+') ans = Standardizer().compute(mol) assert MolToInchi(ans) == 'InChI=1S/C20H13N3O3/c24-10-5-6-15-12(7-10)14(9-21-15)17-8-13(19(25)23-17)18-11-3-1-2-4-16(11)22-20(18)26/h1-9,21,24H,(H,22,26)(H,23,25)/b18-13+' assert MolToSmiles(ans) == 'OC1=NC(c2c[nH]c3ccc(O)cc23)=C/C1=C1\\C(O)=Nc2ccccc21' # L-Lactate mol = MolFromInchi('')
def is_same_mol(smiles1, smiles2): """Helper method that returns True if smiles1 and smiles2 correspond to the same molecule. """ if smiles1 is None or smiles2 is None: return False m1 = MolFromSmiles(smiles1) m2 = MolFromSmiles(smiles2) if m1 is None or m2 is None: return False can1 = MolToSmiles(m1) can2 = MolToSmiles(m2) return can1 == can2
def write_BRICS_csv(BRICS_builds_gen, filename): with open(filename, "w", newline="") as f: writer = csv.writer(f, delimiter=",") writer.writerow(['CompoundSMILES']) for mol_list in BRICS_builds_gen: for mol in mol_list: mol.UpdatePropertyCache(strict=False) prods = [[MolToSmiles(mol)] for mol in mol_list] writer.writerows(prods)