def is_glyco_amino_bond(atom1, atom2, verbose=False): if verbose: print('----- is_glyco_amino_bond -----') print(atom1.quote()) print(atom2.quote()) print(get_type(atom1.parent().resname)) print(get_type(atom2.parent().resname)) print(sugar_types) print(get_type(atom1.parent().resname).upper()) print(get_type(atom2.parent().resname).upper()) if get_type(atom1.parent().resname) is None: return False if get_type(atom2.parent().resname) is None: return False sugars = 0 aminos = 0 if get_type(atom1.parent().resname).upper() in sugar_types: sugars+=1 elif get_type(atom1.parent().resname).upper() in amino_types: aminos+=1 if get_type(atom2.parent().resname).upper() in sugar_types: sugars+=1 elif get_type(atom2.parent().resname).upper() in amino_types: aminos+=1 if sugars==1 and aminos==1: return True return False
def is_o_glyco_bond(atom1, atom2): if get_type(atom1.parent().resname) is None: return False if get_type(atom2.parent().resname) is None: return False sugars = 0 o_links = 0 if get_type(atom1.parent().resname).upper() in sugar_types: sugars+=1 elif atom1.parent().resname in o_linking_residues: o_links+=1 if get_type(atom2.parent().resname).upper() in sugar_types: sugars+=1 elif atom2.parent().resname in o_linking_residues: o_links+=1 if sugars==1 and o_links==1: return True return False
def get_res_type(res_name): """(string) -> string process residue type by the residue name """ res_type = common_residue_names_get_class(res_name) if res_type == 'other': res_type = get_type(res_name) if res_type == None: res_type = 'None' return res_type
def is_glyco_bond(atom1, atom2, verbose=False): if verbose: print('----- is_glyco_bond -----') print(atom1.quote()) print(atom2.quote()) print(get_type(atom1.parent().resname)) print(get_type(atom2.parent().resname)) print(sugar_types) print(get_type(atom1.parent().resname).upper()) print(get_type(atom2.parent().resname).upper()) if get_type(atom1.parent().resname) is None: return False if get_type(atom2.parent().resname) is None: return False if not get_type(atom1.parent().resname).upper() in sugar_types: return False if not get_type(atom2.parent().resname).upper() in sugar_types: return False # #if atom2.parent().resname in not_correct_sugars: return False return True
def get_classes(atom, important_only=False, verbose=False): def _num_atoms_residue(atom): return len(atom.parent().parent().atoms()) def _filter_for_metal(atom, class_name): if class_name=="common_element": if atom.element.strip().upper() in ad_hoc_single_metal_residue_element_types: return "metal" if class_name=="other": if _num_atoms_residue(atom)==1: if atom.element.strip().upper() in ad_hoc_single_metal_residue_element_types: return "metal" return class_name # attrs = [ "common_saccharide", # not in get_class "common_water", "common_element", "common_small_molecule", "common_amino_acid", "common_rna_dna", "ccp4_mon_lib_rna_dna", "other", "uncommon_amino_acid", "unknown", ] redirect = {"modified_amino_acid" : "other", "modified_rna_dna" : "other", } atom_group = atom.parent() classes = empty() for attr in attrs: setattr(classes, attr, False) # only consider ccp4 names if not single atom - CD/Cd consider_ccp4_mon_lib_rna_dna=True if len(atom_group.atoms())==1: consider_ccp4_mon_lib_rna_dna=False gc = get_class(atom_group.resname, consider_ccp4_mon_lib_rna_dna=consider_ccp4_mon_lib_rna_dna) if verbose: print(' atom_group1: altloc="%s" resname="%s" class="%s"' % ( atom_group.altloc, atom_group.resname, get_class(atom_group.resname, consider_ccp4_mon_lib_rna_dna=consider_ccp4_mon_lib_rna_dna), )) if atom_group.resname == 'UNK': # does this need more checks gc = 'common_amino_acid' gc = redirect.get(gc,gc) if verbose: print('final class', gc) for i, attr in enumerate(attrs): rc = None if i: rc = gc else: if atom_group.resname in one_letter_given_three_letter: gotten_type = "L-PEPTIDE LINKING" elif atom_group.resname in ["HOH"]: gotten_type = "NON-POLYMER" else: gotten_type = get_type(atom_group.resname) if gotten_type is not None: if gotten_type.upper() in sugar_types: rc = attr if rc==attr: if important_only: return _filter_for_metal(atom, rc) setattr(classes, attr, True) return classes
def run_ligand_loop(params): pr = params.restraints only_i = pr.input.only_i only_code = pr.input.only_code chunks_n = pr.control.chunks_n chunks_i = pr.control.chunks_i list_skip = pr.output.list_skipped_ligands amber = pr.control.amber min_length_smiles = pr.control.min_length_smiles max_length_smiles = pr.control.max_length_smiles only_type = pr.control.only_type # try: only_i = int(only_i) except: pass try: chunks_n = int(chunks_n) except: pass try: chunks_i = int(chunks_i) except: pass if only_i == -1: only_i = None try: if only_code.lower() == "none": only_code = None except: pass # print 'hostname', os.environ.get("HOSTNAME", None) # if list_skip: assert only_i is None, "Can only list skipped ligands if only_i is None" assert only_code is None, "Can only list skipped ligands if only_code is None" all_codes = set() run_codes = set() # cwd = os.getcwd() if pr.output.method_basis_dir: method_basis_dir = pr.output.method_basis_dir elif amber: method_basis_dir = "amber_library" else: method_basis_dir = get_directory_name_from_method_basis( pr.input.qm_method, pr.input.qm_basis, pr.input.qm_solvent_model, ) print '\n Directory for output', method_basis_dir try: os.mkdir(method_basis_dir) except: pass os.chdir(method_basis_dir) try: os.mkdir("output") except: pass t0 = time.time() non_polymers = 0 stats = {} for i, ligand_code in enumerate( generate_ligand_codes( ligand_list_filename=pr.control.ligand_list)): stats.setdefault("all", set()) stats["all"].add(ligand_code) if list_skip: all_codes.add(ligand_code) assert 0 if only_i is not None and only_i != i + 1: continue if chunks_i is not None and chunks_n is not None: if chunks_i != i % chunks_n + 1: continue if only_code is not None and ligand_code.upper() != only_code.upper(): continue if only_code is not None: print i + 1, ligand_code, only_i, chunks_i, chunks_n print ligand_code, get_type(ligand_code) if only_type is not None and only_type.lower() != get_type( ligand_code).lower(): stats.setdefault("polymer", set()) stats["polymer"].add(ligand_code) continue # never run if ligand_code in skip_codes: print "Ligand skipped for any of a number of reasons" stats.setdefault("skip", set()) stats["skip"].add(ligand_code) continue # don't reproduce the geostd and monomer lib if not amber and pr.control.skip_ligands_in_library: if ligand_code in geostd_codes: print '\n\tCode %s in GeoStd' % ligand_code stats.setdefault("geostd", set()) stats["geostd"].add(ligand_code) continue if ligand_code in mon_lib_codes: print '\n\tCode %s in Monomer Library' % ligand_code stats.setdefault("monlib", set()) stats["monlib"].add(ligand_code) continue if chunks_n is None and chunks_i is not None and str( chunks_i) != ligand_code: continue if pr.control.only_non_polymers: print pr.control.only_non_polymers print type(pr.control.only_non_polymers) if ligand_is_polymer(ligand_code): stats.setdefault("polymer", set()) stats["polymer"].add(ligand_code) continue else: assert 0 smiles = get_smiles(ligand_code) if not smiles: print 'no SMILES found' stats.setdefault("no smiles", set()) stats["no smiles"].add(ligand_code) continue if len(smiles) > max_length_smiles: print 'SMILES too long', max_length_smiles, len(smiles), smiles stats.setdefault("smiles too long", set()) stats["smiles too long"].add(ligand_code) continue if len(smiles) <= min_length_smiles: print 'SMILES too short', min_length_smiles, len(smiles), smiles stats.setdefault("smiles too short", set()) stats["smiles too short"].add(ligand_code) continue for e in metals: if smiles.find(e) > -1: print 'SMILES has metal', e, smiles stats.setdefault("smiles has metal", set()) stats["smiles has metal"].add(ligand_code) break else: print '%5d %3s "%s"' % (i + 1, ligand_code, smiles) # if pr.control.run_if_pH_same != Auto: same = is_same_molecule_regardless_of_pH( ligand_code, pr.control.run_if_pH_same, ) if same is not None: if pr.control.run_if_pH_same: if not same: print '\n\tpH has an effect' continue else: if same: print '\n\tpH has no effect' continue else: assert 0 # if list_skip: run_codes.add(ligand_code) continue # if not os.path.exists("md5s"): os.mkdir("md5s") md5_filename = os.path.join("md5s", "%s.md5" % ligand_code) cc_md5 = get_cc_md5(ligand_code) if not pr.control.ignore_md5: if os.path.exists(md5_filename): f = file(md5_filename, "rb") old_md5 = pickle.load(f) f.close() else: old_md5 = None if old_md5 == cc_md5: print "\n\tChemical Component file unchanged\n" continue # stats.setdefault("run", set()) stats["run"].add(ligand_code) stats.setdefault("final", set()) ff = os.path.join( ligand_code[0].lower(), "%s.final.pdb" % ligand_code, ) if os.path.exists(ff): stats["final"].add(ligand_code) if pr.control.dry_run: print "\n\tRunning %s\n" % ligand_code if i > 99 and 0: print '\n\tLeaving loop' break continue # if amber: rc = get_amber_filenames_from_directory_tree( ligand_code, ignore_output_files=pr.control.ignore_output_files, pH=pr.elbow.pH, ) if rc is None: print 'Calculation of Amber files failed' assert 0 else: pickle_filename = get_elbow_molecule_cif_filename_from_directory_tree( ligand_code, params, ) # should only do this on success f = file(md5_filename, "wb") pickle.dump(cc_md5, f) f.close() if only_i is not None: break if only_code is not None: break os.chdir(cwd) # if list_skip: print '\n\nSkipped ligands' for i, ligand_code in enumerate(sorted( all_codes.difference(run_codes))): outl = "" smiles = get_smiles(ligand_code) for e in metals: if smiles.find(e) > -1: #print 'SMILES has metal',e,smiles outl += "%s " % e.replace("[", "") print " %-3d %-3s %-10s %-3d %s" % (i + 1, ligand_code, outl, len(smiles), smiles) print 'Statistics' total = 0 for act in stats: #print dir(stats[act]) print " %-20s : %5d %s" % (act, len(stats[act]), list( stats[act])[:10]) if act not in ["all", "final"]: total += len(stats[act]) print ' Total : %d' % total try: missing = list(stats["run"].difference(stats["final"])) except: missing = [] print ' : %s %d' % (missing, len(missing))
def ligand_is_polymer(code): print 'code', code lt = get_type(code) print lt assert 0
def run_ligand_loop(params): pr = params.restraints only_i = pr.input.only_i only_code = pr.input.only_code only_start= pr.input.only_start chunks_n = pr.control.chunks_n chunks_i = pr.control.chunks_i list_skip = pr.output.list_skipped_ligands amber = pr.control.amber min_length_smiles = pr.control.min_length_smiles max_length_smiles = pr.control.max_length_smiles only_type = pr.control.only_type # chromophore = pr.properties.chromophore # only_external_program = pr.control.only_external_program exclude_external_program = pr.control.exclude_external_program # try: only_i = int(only_i) except: pass try: chunks_n = int(chunks_n) except: pass try: chunks_i = int(chunks_i) except: pass if only_i==-1: only_i=None try: if only_code.lower()=="none": only_code=None except: pass # print 'hostname',os.environ.get("HOSTNAME", None) # if list_skip: assert only_i is None, "Can only list skipped ligands if only_i is None" assert only_code is None, "Can only list skipped ligands if only_code is None" all_codes = set() run_codes = set() # cwd = os.getcwd() if pr.output.method_basis_dir: method_basis_dir = pr.output.method_basis_dir elif amber: method_basis_dir = "amber_library" else: method_basis_dir = get_directory_name_from_method_basis( pr.input.qm_method, pr.input.qm_basis, pr.input.qm_solvent_model, ) print '\n Directory for output',method_basis_dir try: os.mkdir(method_basis_dir) except: pass os.chdir(method_basis_dir) try: os.mkdir("output") except: pass t0=time.time() non_polymers=0 stats = {} for i, ligand_code in enumerate(generate_ligand_codes( ligand_list_filename=pr.control.ligand_list )): stats.setdefault("all", set()) stats["all"].add(ligand_code) if list_skip: all_codes.add(ligand_code) assert 0 if only_i is not None and only_i!=i+1: continue if chunks_i is not None and chunks_n is not None: if chunks_i!=i%chunks_n+1: continue if only_code is not None and ligand_code.upper()!=only_code.upper(): continue if only_code is not None: print i+1, ligand_code, only_i, chunks_i, chunks_n if only_start is not None: if ligand_code.find(only_start)!=0: continue print ' %5d %3s %s' % (i, ligand_code, get_type(ligand_code)) if only_type is not None and only_type.lower()!=get_type(ligand_code).lower(): stats.setdefault("polymer", set()) stats["polymer"].add(ligand_code) continue # never run if ligand_code in skip_codes: print "Ligand skipped for any of a number of reasons" stats.setdefault("skip", set()) stats["skip"].add(ligand_code) continue # don't reproduce the geostd and monomer lib if not amber and pr.control.skip_ligands_in_library: if ligand_code in geostd_codes: print '\n\tCode %s in GeoStd' % ligand_code stats.setdefault("geostd", set()) stats["geostd"].add(ligand_code) continue if ligand_code in mon_lib_codes: print '\n\tCode %s in Monomer Library' % ligand_code stats.setdefault("monlib", set()) stats["monlib"].add(ligand_code) continue if only_external_program: lines = os.popen('iotbx.python %s %s' % (only_external_program, ligand_code)).read() lines = lines.splitlines() assert len(lines)==1 if lines[0] not in ['True']: print 'external program skipping',ligand_code continue if chunks_n is None and chunks_i is not None and str(chunks_i)!=ligand_code: continue if pr.control.only_non_polymers: print pr.control.only_non_polymers print type(pr.control.only_non_polymers) if ligand_is_polymer(ligand_code): stats.setdefault("polymer", set()) stats["polymer"].add(ligand_code) continue else: assert 0 smiles = get_smiles(ligand_code) if not smiles: print 'no SMILES found' stats.setdefault("no smiles", set()) stats["no smiles"].add(ligand_code) continue if len(smiles)>max_length_smiles: print 'SMILES too long',max_length_smiles,len(smiles),smiles stats.setdefault("smiles too long", set()) stats["smiles too long"].add(ligand_code) continue if len(smiles)<=min_length_smiles: print 'SMILES too short',min_length_smiles,len(smiles),smiles stats.setdefault("smiles too short", set()) stats["smiles too short"].add(ligand_code) continue for e in metals: if smiles.find(e)>-1: print 'SMILES has metal',e,smiles stats.setdefault("smiles has metal", set()) stats["smiles has metal"].add(ligand_code) break else: print '%5d %3s "%s"' % (i+1, ligand_code, smiles) # if pr.control.run_if_pH_same!=Auto: same = is_same_molecule_regardless_of_pH(ligand_code, pr.control.run_if_pH_same, ) if same is not None: if pr.control.run_if_pH_same: if not same: print '\n\tpH has an effect' continue else: if same: print '\n\tpH has no effect' continue else: assert 0 # if list_skip: run_codes.add(ligand_code) continue # if not os.path.exists("md5s"): os.mkdir("md5s") md5_filename = os.path.join("md5s", "%s.md5" % ligand_code) cc_md5 = get_cc_md5(ligand_code) if not pr.control.ignore_md5: if os.path.exists(md5_filename): f=file(md5_filename, "rb") old_md5 = pickle.load(f) f.close() else: old_md5 = None if old_md5==cc_md5: print "\n\tChemical Component file unchanged\n" continue # stats.setdefault("run", set()) stats["run"].add(ligand_code) stats.setdefault("final", set()) ff = os.path.join( ligand_code[0].lower(), "%s.final.pdb" % ligand_code, ) if os.path.exists(ff): stats["final"].add(ligand_code) if pr.control.dry_run: print "\n\tRunning %s\n" % ligand_code if i>99 and 0: print '\n\tLeaving loop' break continue # if chromophore: continue # if amber: rc = get_amber_filenames_from_directory_tree( ligand_code, ignore_output_files=pr.control.ignore_output_files, pH=pr.elbow.pH, ) if rc is None: print 'Calculation of Amber files failed' assert 0 else: pickle_filename = get_elbow_molecule_cif_filename_from_directory_tree( ligand_code, params, ) # should only do this on success f=file(md5_filename, "wb") pickle.dump(cc_md5, f) f.close() if only_i is not None: break if only_code is not None: break os.chdir(cwd) # double_single = {} for ligand_type, ligand_set in stats.items(): if ligand_type in ['all']: continue for i, ligand_code in enumerate(ligand_set): if chromophore: double_single.setdefault(ligand_code, []) mol = get_elbow_molecule_from_chemical_components(ligand_code) for bond1 in mol.bonds: if bond1.order!=2: continue for bond2 in mol.bonds: if bond2.order!=2: continue if bond1==bond2: continue for bond3 in mol.bonds: if bond3.order!=1: continue if((bond3[0] in bond1 or bond3[0] in bond2) and (bond3[1] in bond1 or bond3[1] in bond2)): if bond3 not in double_single[ligand_code]: double_single[ligand_code].append(bond3) if double_single: for ligand_code, singles in sorted(double_single.items()): if len(singles)==0: continue print 'CHROMOPHORE',ligand_code, len(singles) # if list_skip: print '\n\nSkipped ligands' for i, ligand_code in enumerate(sorted(all_codes.difference(run_codes))): outl = "" smiles = get_smiles(ligand_code) for e in metals: if smiles.find(e)>-1: #print 'SMILES has metal',e,smiles outl += "%s " % e.replace("[", "") print " %-3d %-3s %-10s %-3d %s" % (i+1, ligand_code, outl, len(smiles), smiles) print 'Statistics' total = 0 for act in stats: #print dir(stats[act]) print " %-20s : %5d %s" % (act, len(stats[act]), list(stats[act])[:10]) if act not in ["all", "final"]: total+=len(stats[act]) print ' Total : %d' % total try: missing = list(stats["run"].difference(stats["final"])) except: missing = [] print ' : %s %d' % (missing, len(missing))
def ligand_is_polymer(code): print 'code',code lt = get_type(code) print lt assert 0
def is_glyco_bond(atom1, atom2, verbose=False): if verbose: print '----- is_glyco_bond -----' print atom1.quote() print atom2.quote() print get_type(atom1.parent().resname) print get_type(atom2.parent().resname) print sugar_types print get_type(atom1.parent().resname).upper() print get_type(atom2.parent().resname).upper() if get_type(atom1.parent().resname) is None: return False if get_type(atom2.parent().resname) is None: return False if not get_type(atom1.parent().resname).upper() in sugar_types: return False if not get_type(atom2.parent().resname).upper() in sugar_types: return False # #if atom2.parent().resname in not_correct_sugars: return False return True
def get_classes(atom, important_only=False, verbose=False): def _num_atoms_residue(atom): return len(atom.parent().parent().atoms()) def _filter_for_metal(atom, class_name): if class_name=="common_element": if atom.element.strip().upper() in ad_hoc_single_metal_residue_element_types: return "metal" if class_name=="other": if _num_atoms_residue(atom)==1: if atom.element.strip().upper() in ad_hoc_single_metal_residue_element_types: return "metal" return class_name # attrs = [ "common_saccharide", # not in get_class "common_water", "common_element", "common_small_molecule", "common_amino_acid", "common_rna_dna", "ccp4_mon_lib_rna_dna", "other", "uncommon_amino_acid", "unknown", ] redirect = {"modified_amino_acid" : "other", "modified_rna_dna" : "other", } atom_group = atom.parent() classes = empty() for attr in attrs: setattr(classes, attr, False) # only consider ccp4 names if not single atom - CD/Cd consider_ccp4_mon_lib_rna_dna=True if len(atom_group.atoms())==1: consider_ccp4_mon_lib_rna_dna=False gc = get_class(atom_group.resname, consider_ccp4_mon_lib_rna_dna=consider_ccp4_mon_lib_rna_dna) if verbose: print ' atom_group1: altloc="%s" resname="%s" class="%s"' % ( atom_group.altloc, atom_group.resname, get_class(atom_group.resname, consider_ccp4_mon_lib_rna_dna=consider_ccp4_mon_lib_rna_dna), ) gc = redirect.get(gc,gc) for i, attr in enumerate(attrs): rc = None if i: rc = gc else: if atom_group.resname in one_letter_given_three_letter: gotten_type = "L-PEPTIDE LINKING" elif atom_group.resname in ["HOH"]: gotten_type = "NON-POLYMER" else: gotten_type = get_type(atom_group.resname) if gotten_type is not None: if gotten_type.upper() in sugar_types: rc = attr if rc==attr: if important_only: return _filter_for_metal(atom, rc) setattr(classes, attr, True) return classes
def is_glyco_amino_bond(atom1, atom2, verbose=False): if verbose: print '----- is_glyco_amino_bond -----' print atom1.quote() print atom2.quote() print get_type(atom1.parent().resname) print get_type(atom2.parent().resname) print sugar_types print get_type(atom1.parent().resname).upper() print get_type(atom2.parent().resname).upper() if get_type(atom1.parent().resname) is None: return False if get_type(atom2.parent().resname) is None: return False sugars = 0 aminos = 0 if get_type(atom1.parent().resname).upper() in sugar_types: sugars+=1 elif get_type(atom1.parent().resname).upper() in amino_types: aminos+=1 if get_type(atom2.parent().resname).upper() in sugar_types: sugars+=1 elif get_type(atom2.parent().resname).upper() in amino_types: aminos+=1 if sugars==1 and aminos==1: return True return False