def load_in_reagents(library_title, path_to_file, reaction): """Function to load up a library of reagents - e.g. acyl chlorides""" # Open the file my_mols = Chem.SDMolSupplier(path_to_file) # Get the library name this corresponds to if len(ReactantLib.objects.filter(library_name=library_title)) == 0: out_lib = ReactantLib() out_lib.library_name = library_title out_lib.save() else: out_lib = ReactantLib.objects.filter(library_name=library_title)[0] # Go through the compounds for rdmol in my_mols: # Add them as a django thingy dj_comp = add_new_comp(rdmol) # Link it to the reaction my_r = Reactant() my_r.cmpd_id = dj_comp try: my_r.validate_unique() my_r.save() except ValidationError: my_r = Reactant.objects.get(cmpd_id=dj_comp) # Now update this my_r.is_available = True my_r.react_id.add(reaction) my_r.save() out_lib.reactant_id.add(my_r) out_lib.save() return out_lib
def load_in_follow_ups(library_title, path_to_file, reaction, mol_id): """Function to load in follow ups - ready made""" # Open the file my_mols = Chem.SDMolSupplier(path_to_file) # Get the library name this corresponds to if len(ProductLib.objects.filter(lib_name=library_title)) == 0: out_lib = ProductLib() out_lib.lib_name = library_title out_lib.save() else: out_lib = ProductLib.objects.filter(lib_name=library_title)[0] # Get the process my_process = Process() my_process.mol_id = mol_id my_process.is_made_lloommppaa = False my_process.reaction_id = reaction my_process.save() # Go through the compounds for rdmol in my_mols: # Add them as a django thingy dj_comp = add_new_comp(rdmol) # Link it to the reaction my_p = Product() my_p.cmpd_id = dj_comp try: my_p.validate_unique() my_p.save() except ValidationError: my_p = Product.objects.get(cmpd_id=dj_comp) # Now update this my_p.process_id.add(my_process) my_p.save() out_lib.product_id.add(my_p) out_lib.save() return out_lib
def load_activity_data(target, file_path): """Function to load in a CSV file of activity data Takes a Target object and a file path Returns None""" # Read the file into a CSV dict in_d = read_CSV(open(file_path)) # Fields looking for all_fields = ["smiles", "Activity", "ID", "operator"] mandatory_fields = ["smiles", "Activity"] # Check to see if fields are missing missing_fields = [x for x in mandatory_fields if x not in in_d.fieldnames] if len(missing_fields) != 0: print " ".join(missing_fields), " fields required" sys.exit() if len([x for x in all_fields if x not in in_d.fieldnames]) != 0: print " ".join([x for x in all_fields if x not in in_d.fieldnames]), " fields missing" tot = len(open(file_path).readlines()) - 1 if tot == 0: print "No activity data" return old = -1 print "Loading activity data" for i, l in enumerate(in_d): # Do the percent clock if i * 100 / tot != old: old = i * 100 / tot sys.stdout.write("\r%d%% complete..." % old) sys.stdout.flush() m = Chem.MolFromSmiles(str(l["smiles"])) if m is None: # Try doing this - in case it needs escaping m = Chem.MolFromSmiles(str(l["smiles"])).decode('string-escape') if m is None: print "Error None molecule", l["smiles"] continue comp_ref = add_new_comp(m) if comp_ref is None: continue # Now add the required information if no column is entered units = l.get("units") if units is None: units = "pnM" chid = l.get("ID") if chid is None: chid = "NONE" source = l.get("Source") if source is None: source = "IC50" operator = l.get("operator") if operator is None: operator = "NA" add_new_act(comp_ref, target, l["Activity"], units, chid, source, operator) old = 100 sys.stdout.write("\r%d%%" % old) sys.stdout.flush() print "\nAdding activity data complete" return None
def add_new_mol(rdmol, target): """Function to add a new bound Molecule object Takes an RDKit molecule and a Target Returns None""" import re new_mol = Molecule() rdProps = rdmol.GetProp("_Name").split("_") comp_ref = add_new_comp(rdmol) if comp_ref is None: return None # To get rid of the .pdb suffix pdb_id = rdProps[0].split(".")[0] # If it's an SGC model entry -> rename it appropriately if rdmol.HasProp("name"): if re.match("^m\d\d\d$", rdmol.GetProp("name")): pdb_id = rdmol.GetProp("name") + "_" + str(target.pk) # Check that the name is unique and more than 3 characters long # and doesn't contain the target.title if len(pdb_id) < 4 or len(Protein.objects.filter(code=pdb_id)) > 0 or target.title in pdb_id: # Make a new uniqid # First up check that this molecule has not been added before mols = [[Chem.MolFromMolBlock(str(x.sdf_info)), x.pk] for x in Molecule.objects.filter(prot_id__target_id=target, cmpd_id=comp_ref)] [x[0].SetProp("_Name", "N") for x in mols] rdmol.SetProp("_Name", "N") # If this is actually a duplicate then continue sd_block = Chem.MolToMolBlock(Chem.MolFromMolBlock(Chem.MolToMolBlock(rdmol))) matches = [x for x in mols if sd_block == Chem.MolToMolBlock(x[0])] if len(matches) > 0: # Just return return else: # We have not put this EXACT mol into the database # Now lets make an ID molid = uuid.uuid4().hex + "_" + pdb_id rdmol.SetProp("_Name", molid) else: molid = pdb_id # Make a protein object by which it is related in the DB new_mol.prot_id = Protein.objects.get_or_create(code=molid, target_id=target)[0] new_mol.sdf_info = Chem.MolToMolBlock(rdmol) new_mol.smiles = Chem.MolToSmiles(rdmol, isomericSmiles=True) try: new_mol.lig_id = rdProps[1] new_mol.chain_id = rdProps[2] new_mol.occupancy = float(rdProps[3]) except IndexError: new_mol.lig_id = "UNL" new_mol.chain_id = "Z" new_mol.occupancy = 0.0 # Add this to the compound list -> make sure this passes in for the # correct molecule. I.e. if it fails where does it go??? # Now link that compound back new_mol.cmpd_id = comp_ref try: new_mol.validate_unique() new_mol.save() except ValidationError: pass
def load_compounds(file_path): """Function to load compounds and make the MMPs Takes a file path Returns None""" mols = Chem.SDMolSupplier(file_path) counter = 0 for m in mols: if m is None: print "NONE MOL" continue counter += 1 print counter # add the new compound to the database comp_ref = add_new_comp(m) if comp_ref is None: continue new_m = Chem.MolFromSmiles(str(comp_ref.smiles)) # Filter too big molecules if Descriptors.ExactMolWt(new_m) > 560: continue make_mol_mmp(new_m, id="cmp" + str(comp_ref.pk), target_id=None)
def load_compounds(file_path): """Function to load compounds and make the MMPs Takes a file path Returns None""" mols = Chem.SDMolSupplier(file_path) counter = 0 for m in mols: if m is None: print "NONE MOL" continue counter +=1 print counter # add the new compound to the database comp_ref = add_new_comp(m) if comp_ref is None: continue new_m = Chem.MolFromSmiles(str(comp_ref.smiles)) # Filter too big molecules if Descriptors.ExactMolWt(new_m) > 560: continue make_mol_mmp(new_m, id="cmp" + str(comp_ref.pk), target_id=None)
def add_new_mol(rdmol, target): """Function to add a new bound Molecule object Takes an RDKit molecule and a Target Returns None""" import re new_mol = Molecule() rdProps = rdmol.GetProp("_Name").split("_") comp_ref = add_new_comp(rdmol) if comp_ref is None: return None # To get rid of the .pdb suffix pdb_id = rdProps[0].split(".")[0] # If it's an SGC model entry -> rename it appropriately if rdmol.HasProp("name"): if re.match("^m\d\d\d$", rdmol.GetProp("name")): pdb_id = rdmol.GetProp("name") + "_" + str(target.pk) # Check that the name is unique and more than 3 characters long # and doesn't contain the target.title if len(pdb_id) < 4 or len( Protein.objects.filter(code=pdb_id)) > 0 or target.title in pdb_id: # Make a new uniqid # First up check that this molecule has not been added before mols = [[Chem.MolFromMolBlock(str(x.sdf_info)), x.pk] for x in Molecule.objects.filter(prot_id__target_id=target, cmpd_id=comp_ref)] [x[0].SetProp("_Name", "N") for x in mols] rdmol.SetProp("_Name", "N") # If this is actually a duplicate then continue sd_block = Chem.MolToMolBlock( Chem.MolFromMolBlock(Chem.MolToMolBlock(rdmol))) matches = [x for x in mols if sd_block == Chem.MolToMolBlock(x[0])] if len(matches) > 0: # Just return return else: # We have not put this EXACT mol into the database # Now lets make an ID molid = uuid.uuid4().hex + "_" + pdb_id rdmol.SetProp("_Name", molid) else: molid = pdb_id # Make a protein object by which it is related in the DB new_mol.prot_id = Protein.objects.get_or_create(code=molid, target_id=target)[0] new_mol.sdf_info = Chem.MolToMolBlock(rdmol) new_mol.smiles = Chem.MolToSmiles(rdmol, isomericSmiles=True) try: new_mol.lig_id = rdProps[1] new_mol.chain_id = rdProps[2] new_mol.occupancy = float(rdProps[3]) except IndexError: new_mol.lig_id = "UNL" new_mol.chain_id = "Z" new_mol.occupancy = 0.0 # Add this to the compound list -> make sure this passes in for the # correct molecule. I.e. if it fails where does it go??? # Now link that compound back new_mol.cmpd_id = comp_ref try: new_mol.validate_unique() new_mol.save() except ValidationError: pass
def create_lib(rxn, react_proc, lib_name): """Function to create a library from a reaction""" from LLOOMMPPAA.pains_filter import pains_test # Make the molecule fit for reaction rdmol = Chem.MolFromSmiles(str(react_proc.react_frag)) p_lib = ProductLib() p_lib.lib_name = str(uuid.uuid4()) p_lib.save() # Get the process my_process = Process() my_process.mol_id = react_proc.mol_id my_process.is_made_lloommppaa = False my_process.reaction_id = react_proc.react_id my_process.save() # Loop through the library # Get the lib my_cmpd = react_proc.reactant_queue.all() tot = len(my_cmpd) old = -1 for i, cmpd in enumerate(my_cmpd): sys.stdout.write("\rCarried out reaction %d of %d..." % (i, tot)) sys.stdout.flush() re_mol = Chem.MolFromSmiles(str(cmpd.smiles)) out_prods = rxn.RunReactants((re_mol, rdmol)) if len(out_prods) == 0: out_prods = rxn.RunReactants((rdmol, re_mol)) if len(out_prods) == 0: print "NO PRODUCTS" print Chem.MolToSmiles(rdmol, isomericSmiles=True) print Chem.MolToSmiles(re_mol, isomericSmiles=True) continue products = out_prods[0] if len(products) > 1: print "MULTIPLE PRODUCTS" print products print Chem.MolToSmiles(rdmol) print Chem.MolToSmiles(re_mol) continue if pains_test(products[0]): print "PAINS FILTER SKIPPING!!!" continue # Register the compound dj_comp = add_new_comp(products[0]) # Add it to the list of products my_prod = Product.objects.filter(cmpd_id=dj_comp) if my_prod: my_prod = my_prod[0] else: my_prod = Product() my_prod.cmpd_id = dj_comp my_prod.save() my_prod.process_id.add(my_process) # Add the product to the product library p_lib.product_id.add(my_prod) p_lib.save() # Add it to the product queue react_proc.product_queue.add(dj_comp) react_proc.reactant_queue.remove(cmpd) my_prg = int((float(i) / float(tot)) * 100) if my_prg != old: react_proc.stage_completion = my_prg old = my_prg react_proc.save() react_proc.products_id.add(p_lib) react_proc.save() # Now return this return react_proc
def register_targ_data(tot_d, target, save_map=None, overwrite=None): """Function to register a targets data""" import gzip old = -1 tot = len(tot_d) prots_made = [] for tot_c, chain in enumerate(tot_d): if tot_c * 100 / tot != old: old = tot_c * 100 / tot sys.stdout.write("\rRegistering proteins %d%% complete..." % old) sys.stdout.flush() smiles = tot_d[chain]["smiles"] model = tot_d[chain]["model_id"] # Get the PDB file if tot_d[chain]["path_to_pdb"][-3:] == ".gz": file_lines = gzip.open(tot_d[chain]["path_to_pdb"]).readlines() else: file_lines = open(tot_d[chain]["path_to_pdb"]).readlines() #If we're saving maps and the map exists if save_map and tot_d[chain]["path_ to_map"]: # Get the map file map_lines = open(tot_d[chain]["path_to_map"]).readlines() # Get the mols from this mols = [assign_temp(block, smiles, model) for block in get_ligs(file_lines) if assign_temp(block, smiles, model) is not None] # So now we check that the model hasn't been updated OR exists # Check that everything's ok with the protein apo_prot = Chem.MolFromPDBBlock(remove_hetatm(file_lines), sanitize=False) if not apo_prot: print "NONE PROTEIN: ", model + "_" + tot_d[chain]["chain"] + "_" + str(target.title) continue # Check the protein exists prot_code = model + "_" + tot_d[chain]["chain"] + "_" + str(target.pk) prot_me = Protein.objects.get_or_create(target_id=target, code=prot_code) # Only proceed if this has been created OR overwrite / refresh is set my_prot = prot_me[0] # If this is a newly created prot - or overwrite is on carry on if prot_me[1] or overwrite: prots_made.append(my_prot) # Delete the molecules for this protein Molecule.objects.filter(prot_id=my_prot).delete() # Loop through them for i, mol in enumerate(mols): # Find the reference compounds comp_ref = add_new_comp(mol) # Find the molecules new_mol = Molecule.objects.get_or_create(smiles=smiles, sdf_info=Chem.MolToMolBlock(mol, includeStereo=True), lig_id=str(i), chain_id="A", cmpd_id=comp_ref, occupancy=0.0, prot_id=my_prot) my_mol = new_mol[0] ### Now add this to the user data if new_mol[1]: for user in UserData.objects.all(): user.new_mols.add(my_mol) user.save() # Calculate the RMSD between ligands if i > 0: my_mol.rmsd = AllChem.GetBestRMS(mols[0], mol) my_mol.save() else: my_mol.rmsd = 0.0 # Give the internal ID iidl = InternalIDLink() iidl.mol_id = my_mol iidl.internal_id = tot_d[chain]["cmpd_id"] iidl.save() # DEFINE THIS CLUSTER # Apo protein - within 5A of the ligand for this protein # if not Chem.MolToPDBBlock(apo_prot): # my_prot.delete() # print "APO PROTEIN NOT REAL" # print model # sys.exit() if prot_me[1] or overwrite: my_prot.pdb_info = remove_hetatm(file_lines) # If we have a map - add it if save_map: my_prot.cif_info = "".join(map_lines) my_prot.save() # Waters - within 5A of the main ligand cluster waters = Chem.MolFromPDBBlock(get_waters(file_lines)) if waters is not None: # Check the waters exist if len(Water.objects.filter(prot_id=my_prot)) != 0 and not overwrite: pass else: conf = waters.GetConformer() # Delete them for this protein for w in Water.objects.filter(prot_id=my_prot): w.delete() for i in range(waters.GetNumAtoms()): cp = conf.GetAtomPosition(i) if waters.GetAtomWithIdx(i).GetSmarts() != "O": continue Water.objects.get_or_create(water_num=i + 1, prot_id=my_prot, target_id=target,x_com=cp.x,y_com=cp.y,z_com=cp.z) print "\nRegistered proteins" return prots_made