def remove_bonds(mol, list_of_atomiso_bondsets_to_remove): """ This function removes bond from an rdkit mol based on a provided list. This list is a list of sets, with each set containing two atoms with the isotope label of that atom. Using Isotopes is to ensure that atom Idx dont change. Inputs: :param rdkit.Chem.rdchem.Mol mol: any rdkit mol :param list list_of_atomiso_bondsets_to_remove: a list of idx values to remove from mol Returns: :returns: rdkit.Chem.rdchem.Mol new_mol: the rdkit mol as input but with the atoms from the list removed """ # None's often end up in a pipeline use of RDKit so we handle this data type as return None # instead of raise TypeError if mol is None: return None # If mol is wrong data type (excluding None) raise TypeError if type(mol) != rdkit.Chem.rdchem.Mol and type( mol) != rdkit.Chem.rdchem.RWMol: printout = "mol is the wrong data type. \n" printout = printout + "Input should be a rdkit.Chem.rdchem.Mol\n" printout = printout + "Input mol was {} type.".format(type(mol)) raise TypeError(printout) new_mol = copy.deepcopy(mol) if len(list_of_atomiso_bondsets_to_remove) == 0: return None for atomiso_bondsets in list_of_atomiso_bondsets_to_remove: if len(atomiso_bondsets) == 0: continue if len(atomiso_bondsets) != 2: printout = "list_of_atomiso_bondsets_to_remove needs to be 2 isolabels for the atoms" raise TypeError(printout) atom_1_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[0])) atom_2_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[1])) try: new_mol = Chem.FragmentOnBonds(new_mol, [atom_1_idx, atom_2_idx], addDummies=False) except: return None new_mol = MOH.check_sanitization(new_mol) if new_mol is None: return None new_mol = MOH.check_sanitization(new_mol) if new_mol is None: return None return new_mol
def run_convert_on_single_pdb(pdb): """ This function converts a ligand into SMILES and returns the list with the smiles with a name. The names are the basename of the file minus the .pdb Inputs: :param str pdb: path to the folder to a pdb file Returns: :returns: list output_data: A list containing all SMILES info from the file """ try: mol = Chem.MolFromPDBFile(pdb) mol_sanitized = MOH.check_sanitization(mol) if mol_sanitized is not None: smiles = Chem.MolToSmiles(mol_sanitized, isomericSmiles=True) file_name = os.path.basename(pdb) file_stripped = file_name.replace(".pdb", "") output_data = smiles + "\t" + file_stripped except: pass return output_data
def get_ligands_from_smi(smi_file): """ Get the ligands from the smi_file Inputs: :param str smi_file: Path to smiles file Returns: :returns: list list_of_ligands: A list of lists containing the chosen unique fragments. final_frag_list[0] = [SMILE, mol_id] """ list_of_ligands = [] with open(smi_file, "r") as smiles_file: line_counter = 0 for line in smiles_file: line_counter = line_counter + 1 line = line.replace("\n", "") parts = line.split( '\t') # split line into parts separated by 4-spaces if len(parts) == 1: parts = line.split( ' ') # split line into parts separated by 4-spaces if len(parts) == 2 or len(parts) > 2: mol_string = parts[0] mol_id = parts[1] if type(mol_id) != str: print("Miss Formatted within .SMI. Line number {}".format( str(line_counter))) continue try: mol = Chem.MolFromSmiles(mol_string, sanitize=False) except: print("Miss Formatted within .SMI. Line number {}".format( str(line_counter))) continue mol = MOH.check_sanitization(mol) if mol is None: continue mol_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True) mol_info = [mol_smile, mol_id] list_of_ligands.append(mol_info) else: continue print("Was able to import and sanitize {} \ ligands from the .smi.".format(len(list_of_ligands))) if line_counter != len(list_of_ligands): print("\t Failed to sanitize/import \ {} ligands from the .smi".format(line_counter - len(list_of_ligands))) print("########") return list_of_ligands
def make_list_of_all_unique_frags(fragment_list): """ This function takes a list of all molecules after fragmentation and separates the the fragments into individual rdkit mol objects, sanitizes each, removes isotopes and converts them into a SMILES string. The SMILES are compiled into a list, and then redundant strings are reduced to a single entry. It returns a list of all unique sanitized canonical SMILES for every fragment made from all permutations of bond breaking. Inputs: :param list fragment_list: list of fragmented rdkit mols which haven't been separated yet Returns: :returns: list clean_frag_list: List of unique sanitized SMILES strings from all objects in fragment_list. Isotope labels are also removed here. """ clean_frag_list = [] for fragments in fragment_list: frags = Chem.GetMolFrags(fragments, asMols=True, sanitizeFrags=False) for frag in frags: frag = MOH.check_sanitization(frag) if frag is None: continue # Remove those under 2 atoms minimum list_mol_atoms = frag.GetAtoms() if len(list_mol_atoms) < 3: continue for atom in frag.GetAtoms(): atom.SetIsotope(0) clean_frag_list.append( Chem.MolToSmiles(frag, isomericSmiles=True, canonical=True)) list(set(list(clean_frag_list))) return clean_frag_list
def make_frag_list_for_one_mol(mol_info, frags_per_seed_lig, run_brics, run_frag, c_c_bonds_off=False): """ This will take a ligand string and ID encased in the list mol_info. This will then be fragmented along all non Carbon-carbon rotatable bonds which are not aromatic. It will make all permutations of all potential bond breaks, reduce to only unique fragments and than pick the number of chosen fragments. Then it will create unique ID's for each and return a list of lists containing the chosen unique fragments. Inputs: :param list mol_info: list containing [mol_string, mol_id] mol_info[0] = the SMILE string of the parent mol mol_info[1] = the Unique ID of the parent mol :param bool run_brics: whether to fragment using BRICS method :param bool run_frag: whether to fragment all bonds :param bool c_c_bonds_off: whether to fragment C-C bonds Returns: :returns: list final_frag_list: A list of lists containing the chosen unique fragments. final_frag_list[0] = [SMILE, mol_id] """ mol_smile = mol_info[0] lig_id = mol_info[1] mol = Chem.MolFromSmiles(mol_smile, sanitize=False) mol = MOH.check_sanitization(mol) if mol is None: printout = "\nMolecule {} failed to sanitize. \ Could not make any fragments from it".format(lig_id) raise Exception(printout) mol_smile = Chem.MolToSmiles(mol, isomericSmiles=True, canonical=True) mol = label_iso_num_w_idx(mol) mol_copy = copy.deepcopy(mol) bonds_to_remove_permutations = get_rot_bond_permutations_to_cut(mol_copy, c_c_bonds_off) fragment_list = [] for bond_set_to_del in bonds_to_remove_permutations: mol_copy = copy.deepcopy(mol) x = remove_bonds(mol_copy, bond_set_to_del) if x is None: continue fragment_list.append(x) clean_frag_list = [] if run_frag is True: clean_frag_list = make_list_of_all_unique_frags(fragment_list) clean_frag_list = list(set(clean_frag_list)) if run_brics is True: mol_copy = copy.deepcopy(mol) bric_mols = get_brics_permutations(mol_copy, min_frag_size=3) clean_frag_list.extend(bric_mols) clean_frag_list = list(set(clean_frag_list)) if len(clean_frag_list) == 0: printout = "\nNo fragments were made for {}.\n".format(lig_id) print(printout) return [[mol_smile, lig_id]] # Pick the number of ligands to make final_frag_list = [[mol_smile, lig_id]] if frags_per_seed_lig == -1: printout = "\nFor {}: {} fragmented were made.".format(lig_id, len(clean_frag_list)) print(printout) for frag in clean_frag_list: unique_lig_id = make_unique_lig_id(lig_id, final_frag_list) temp_frag_info = [frag, unique_lig_id] final_frag_list.append(temp_frag_info) return final_frag_list
def run_all_for_fun_group(vars, fun_group, rxns_by_fun_group, a_smiles_click_object): """ This runs the all testing for a single functional group. This will also write the compounds which pass to a .smi file. Inputs: :param dict vars: Dictionary of User variables :param str fun_group: functional group name :param dict rxns_by_fun_group: Dictionary of rxns names organized by functional groups :param obj a_smiles_click_object: a a_smiles_click_object class object. This provides useful pathing information. Returns: :returns: list failed_to_react: a list of mol names which failed to react :returns: list failed_to_sanitize: a list of mol names which failed to sanitize """ # unpack variables complementary_mol_dict = a_smiles_click_object.complementary_mol_dict reaction_dict = a_smiles_click_object.reaction_dict number_of_processors = vars["number_of_processors"] output_folder = vars["output_folder"] smi_comp_file = complementary_mol_dict[fun_group] fun_group_list = get_usable_format(smi_comp_file) fun_group_mol_list = [] failed_to_sanitize = [] for info in fun_group_list: mol = Chem.MolFromSmiles(info[0]) mol = MOH.check_sanitization(mol) if mol is None: failed_to_sanitize.append(info) continue temp = copy.deepcopy(info) temp.append(mol) fun_group_mol_list.append(temp) # print info about failures if len(failed_to_sanitize) != 0: printout = "{} compounds ".format(len(failed_to_sanitize)) printout = printout + "failed to sanitize from: {}".format(fun_group) print(printout) failed_to_react = [] for rxn_name in rxns_by_fun_group[fun_group]: current_rxn_dict = reaction_dict[rxn_name] example_reactants, rxn_obj = get_rxn_and_examples(current_rxn_dict) list_of_reactants = [] functional_groups_rxn = current_rxn_dict["functional_groups"] i_count_to_use = None for i_count in range(len(functional_groups_rxn)): f_group = functional_groups_rxn[i_count] if fun_group == f_group: i_count_to_use = i_count else: continue if i_count_to_use is None: raise Exception("This is a code error.") list_of_reactants = [] for mol_info in fun_group_mol_list: mol_tuple_temp = [] for i_count in range(len(functional_groups_rxn)): if i_count == i_count_to_use: mol_tuple_temp.append(mol_info[-1]) else: mol_tuple_temp.append(example_reactants[i_count]) list_of_reactants.append( tuple([tuple(mol_tuple_temp), mol_info[1], rxn_obj])) output = mp.multi_threading(list_of_reactants, number_of_processors, react_with_multiple_reactants) output = [x for x in output if x is not None] failed_to_react.append([rxn_name, output]) # print info about failures if len(output) != 0: printout = "{} compounds failed to react from ".format(len(output)) printout = printout + "react from {} ".format(fun_group) printout = printout + "in rxn: {}".format(rxn_name) print(printout) master_failed_to_react = [] master_passes_reactions = [] for fail_mol_list in failed_to_react: master_failed_to_react.extend(fail_mol_list[1]) for mol_info in fun_group_list: if mol_info[1] in master_failed_to_react: continue master_passes_reactions.append(" ".join(mol_info)) # write to output .smi file with open(output_folder + fun_group + ".smi", "w") as f: f.write("\n".join(master_passes_reactions)) return failed_to_react, failed_to_sanitize
def get_rxn_and_examples(current_rxn_dict): """ get the example reaction molecules from current_rxn_dict, create the rxn_obj, and test examples in the rxn. Inputs: :param dict current_rxn_dict: a dictionary of information about a reaction Returns: :returns: tuple example_rxn_reactants: a tuple of rdkit mol objects that are example compounds :returns: rdkit.Chem.rdChemReactions.ChemicalReaction rxn_obj: the reaction object to use """ rxn_name = current_rxn_dict["reaction_name"] # Test example reactants example_smiles_rxn_reactants = current_rxn_dict["example_rxn_reactants"] example_smiles_rxn_reactants = example_smiles_rxn_reactants.replace( "['", "").replace("']", "") example_smiles_rxn_reactants = example_smiles_rxn_reactants.replace( " ", "").replace('"', "") example_smiles_rxn_reactants = example_smiles_rxn_reactants.split("','") example_rxn_reactants = [] for smile_str in example_smiles_rxn_reactants: smile_str = smile_str.replace("'", "").replace('"', "") smile_str = smile_str.replace(" ", "") example_mol = Chem.MolFromSmiles(smile_str) example_mol = MOH.check_sanitization(example_mol) if example_mol is None: print(smile_str) printout = "example mol from rxn: {}".format(rxn_name) printout = printout + " failed to sanitize in RDKit" print(printout) raise Exception(printout) example_rxn_reactants.append(example_mol) # convert example_rxn_reactants to tuple example_rxn_reactants = tuple(example_rxn_reactants) reaction_string = current_rxn_dict["reaction_string"] try: rxn_obj = AllChem.ReactionFromSmarts(reaction_string) rxn_obj.Initialize() except: printout = "rxn {} failed to be created.".format(rxn_name) printout = printout + "Rxn SMART is flawed" print(printout) raise Exception(printout) # Demo on example reactants example_results = react_with_multiple_reactants(example_rxn_reactants, "test_reactions", rxn_obj) if example_results is not None: printout = "rxn {} failed to run on example compounds.".format( rxn_name) printout = printout + "\nPlease check example compounds" print(printout) raise Exception(printout) return example_rxn_reactants, rxn_obj