def validate_residues(self): from mmtbx.conformation_dependent_library import generate_protein_threes from mmtbx.rotamer import ramachandran_eval, rotamer_eval # this is so we generate rama_eval only once rama_eval = ramachandran_eval.RamachandranEval() rota_eval = rotamer_eval.RotamerEval() rotamer_id = rotamer_eval.RotamerID() # loads in the rotamer names threes = generate_protein_threes(hierarchy=self.pdb_hierarchy, include_non_linked=True, backbone_only=False, geometry=None) for i, three in enumerate(threes): if i == 0: self.residues.append( ValidationResidue(three, rama_eval, rota_eval, rotamer_id, index=0)) self.residues.append( ValidationResidue(three, rama_eval, rota_eval, rotamer_id)) if three.end: self.residues.append( ValidationResidue(three, rama_eval, rota_eval, rotamer_id, index=2))
def run(args, log=sys.stdout): pdb_h = iotbx.pdb.input(source_info=None, file_name=args[0]).\ construct_hierarchy() r = ramachandran_eval.RamachandranEval() outp = list_rama_outliers_h(pdb_h, r.rama_eval) print(outp) print("END")
def find_region_max_value(rama_key, phi, psi, allow_outside=False): def normalize(angle): a = int(angle) while a >= 180: a -= 360 while a <= -180: a += 360 return a from mmtbx.rotamer import ramachandran_eval from collections import Counter r = ramachandran_eval.RamachandranEval() value = r.evaluate(rama_key, [phi, psi]) ev = ramalyze.evalScore(rama_key, value) if ev != RAMALYZE_FAVORED and not allow_outside: return None ph = int(phi) ps = int(psi) peaks = get_favored_peaks(rama_key) v = fav_tables[rama_key][normalize(ph) + 180][normalize(ps) + 180] values = [] if v == 0: # look around, rounding problems for i in [-1, 0, 1]: for j in [-1, 0, 1]: values.append(fav_tables[rama_key][normalize(ph + i) + 180][normalize(ps + j) + 180]) for e in Counter(values).elements(): if e != 0: return peaks[e - 1] if allow_outside: # do more comprehensive search, basically looking for the nearest # favorite region c = 1 flag = True while flag: for i in range(-c, c): for j in range(-c, c): reg_number = fav_tables[rama_key][normalize(ph + i) + 180][normalize(ps + j) + 180] if reg_number != 0: flag = False return peaks[reg_number - 1] c += 2 return peaks[reg_number - 1] if v == 0: return None else: return peaks[v - 1]
def __init__(self, residue, sites_cart, mon_lib_srv, params, prev_residue=None, next_residue=None, next_next_residue=None, evaluate_backbone_callback=None): adopt_init_args(self, locals()) from mmtbx.rotamer import rotamer_eval from mmtbx.rotamer import ramachandran_eval import iotbx.pdb from scitbx.array_family import flex get_class = iotbx.pdb.common_residue_names_get_class assert get_class( residue.resname) == "common_amino_acid", residue.resname self.rotamer_scorer = rotamer_eval.RotamerEval(data_version="8000") self.ramachandran_scorer = ramachandran_eval.RamachandranEval() self.sidechain_clusters = generate_sidechain_clusters( residue=residue, mon_lib_srv=mon_lib_srv) self.sites_start = sites_cart.deep_copy() self.i_seqs_residue = residue.atoms().extract_i_seq() self.i_seqs_sidechain = flex.size_t() for atom in self.residue.atoms(): if (not atom.name.strip() in ["C", "N", "H", "CA", "CB", "self"]): self.i_seqs_sidechain.append(atom.i_seq) self.i_seqs_primary = flex.size_t() if (not None in [prev_residue, next_residue]): self.set_up_backrub() if (next_next_residue is not None): self.set_up_shear() for i_seq in self.shear_i_seqs_primary1: self.i_seqs_primary.append(i_seq) for i_seq in self.shear_i_seqs_primary2: self.i_seqs_primary.append(i_seq) for i_seq in self.shear_i_seqs_middle: self.i_seqs_primary.append(i_seq) else: for i_seq in self.backrub_i_seqs: self.i_seqs_primary.append(i_seq) else: self.i_seqs_primary = self.i_seqs_residue
def __init__(self, rama_type, rama_region, start_point, grid_size, corners_inside, use_allowed=False): self.rama_type = rama_type self.rama_region = rama_region self.start_point = start_point self.grid_size = grid_size self.corners_inside = corners_inside self.r = ramachandran_eval.RamachandranEval() self.grid = grid() # list of square objects points_x = grid_over_favored._get_grid_points(start_point[0], grid_size) points_y = grid_over_favored._get_grid_points(start_point[1], grid_size) for x in points_x: for y in points_y: n_inside = 0 for dx in [0, 1]: for dy in [0, 1]: reg = find_region_max_value(rama_type, x + grid_size * dx, y + grid_size * dy) if reg is not None and reg[0] == rama_region: n_inside += 1 if n_inside >= corners_inside: v = self.r.evaluate( rama_type, [x + 0.5 * grid_size, y + 0.5 * grid_size]) self.grid.append( square((x, y), (x + grid_size, y + grid_size), v)) print " Number of grid cells", len(self.grid) self.grid.scale_to_1()
def __init__ (self, pdb_hierarchy, outliers_only=False, show_errors=False, out=sys.stdout, quiet=False) : # Optimization hint: make it possible to pass # ramachandran_eval.RamachandranEval() from outside. # Better - convert this to using mmtbx.model.manager where # RamachandranEval is already available. validation.__init__(self) self.n_allowed = 0 self.n_favored = 0 self.n_type = [ 0 ] * 6 self._outlier_i_seqs = flex.size_t() pdb_atoms = pdb_hierarchy.atoms() all_i_seqs = pdb_atoms.extract_i_seq() if (all_i_seqs.all_eq(0)) : pdb_atoms.reset_i_seq() use_segids = utils.use_segids_in_place_of_chainids( hierarchy=pdb_hierarchy) analysis = "" output_list = [] count_keys = [] uniqueness_keys = [] r = ramachandran_eval.RamachandranEval() ##if use_segids: ## chain_id = utils.get_segid_as_chainid(chain=chain) ## else: ## chain_id = chain.id for three in generate_protein_threes(hierarchy=pdb_hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms phi = get_dihedral(phi_atoms) psi = get_dihedral(psi_atoms) coords = get_center(main_residue) #should find the CA of the center residue if (phi is not None and psi is not None): res_type = RAMA_GENERAL #self.n_total += 1 if (main_residue.resname[0:3] == "GLY"): res_type = RAMA_GLYCINE elif (main_residue.resname[0:3] == "PRO"): is_cis = is_cis_peptide(three) if is_cis: res_type = RAMA_CISPRO else: res_type = RAMA_TRANSPRO elif (three[2].resname == "PRO"): res_type = RAMA_PREPRO elif (main_residue.resname[0:3] == "ILE" or \ main_residue.resname[0:3] == "VAL"): res_type = RAMA_ILE_VAL #self.n_type[res_type] += 1 value = r.evaluate(res_types[res_type], [phi, psi]) ramaType = self.evaluateScore(res_type, value) is_outlier = ramaType == RAMALYZE_OUTLIER c_alphas = None # XXX only save kinemage data for outliers if is_outlier : c_alphas = get_cas_from_three(three) assert (len(c_alphas) == 3) markup = self.as_markup_for_kinemage(c_alphas) else: markup = None result = ramachandran( model_id=main_residue.parent().parent().parent().id, chain_id=main_residue.parent().parent().id, resseq=main_residue.resseq, icode=main_residue.icode, resname=main_residue.resname, #altloc=main_residue.parent().altloc, altloc=get_altloc_from_three(three), segid=None, # XXX ??? phi=phi, psi=psi, rama_type=ramaType, res_type=res_type, score=value*100, outlier=is_outlier, xyz=coords, markup=markup) #if result.chain_id+result.resseq+result.icode not in count_keys: result_key = result.model_id+result.chain_id+result.resseq+result.icode if result.altloc in ['','A'] and result_key not in count_keys: self.n_total += 1 self.n_type[res_type] += 1 self.add_to_validation_counts(ramaType) count_keys.append(result_key) if (not outliers_only or is_outlier) : if (result.altloc != '' or result_key not in uniqueness_keys): #the threes/conformers method results in some redundant result # calculations in structures with alternates. Using the # uniqueness_keys list prevents redundant results being added to # the final list self.results.append(result) uniqueness_keys.append(result_key) if is_outlier : i_seqs = main_residue.atoms().extract_i_seq() assert (not i_seqs.all_eq(0)) self._outlier_i_seqs.extend(i_seqs) self.results.sort(key=lambda r: r.model_id+r.id_str()) out_count, out_percent = self.get_outliers_count_and_fraction() fav_count, fav_percent = self.get_favored_count_and_fraction() self.out_percent = out_percent * 100.0 self.fav_percent = fav_percent * 100.0
def load_emsley8k_tables(): tables = {} name_to_file = [("general", "rama8000-general-noGPIVpreP.data", 0), ("glycine", "rama8000-gly-sym.data", 1), ("cis-proline", "rama8000-cispro.data", 2), ("trans-proline", "rama8000-transpro.data", 3), ("pre-proline", "rama8000-prepro-noGP.data", 4), ("isoleucine or valine", "rama8000-ileval-nopreP.data", 5)] tmp = OrderedDict() rr = [i for i in range(-179, 180, 2)] for i in rr: for j in rr: tmp[(i, j)] = 0 R = ramachandran_eval.RamachandranEval() outlier = ramalyze.RAMALYZE_OUTLIER favored = ramalyze.RAMALYZE_FAVORED allowed = ramalyze.RAMALYZE_ALLOWED for (rama_key, file_name, selfstore) in name_to_file: file_name = libtbx.env.find_in_repositories( relative_path="chem_data/rotarama_data/%s" % (file_name), test=os.path.isfile) di = {} outlier_vals = flex.double() favored_vals = flex.double() allowed_vals = flex.double() status = {} with open(file_name, "r") as f: lines = f.readlines() for line in lines: if line[0] == "#": continue phi, psi, val = line.split() phi = int(float(phi)) psi = int(float(psi)) val = float(val) di[(phi, psi)] = val rama_score = R.rama_eval.get_score(selfstore, phi, psi) evaluation = R.rama_eval.evaluate_score(selfstore, rama_score) if (evaluation == outlier): outlier_vals.append(val) status[(phi, psi)] = outlier elif (evaluation == favored): favored_vals.append(val) status[(phi, psi)] = favored elif (evaluation == allowed): allowed_vals.append(val) status[(phi, psi)] = allowed else: raise RuntimeError("Not supposed to be here.") data = flex.double() max_outlier = flex.max(outlier_vals) max_favored = flex.max(favored_vals) min_favored = flex.min(favored_vals) max_allowed = flex.max(allowed_vals) for k, v in zip(tmp.keys(), tmp.values()): try: val = di[k] if (status[k] == outlier): val = -1 + val / max_outlier elif (status[k] == favored): val = val #math.exp(val)**0.5/2.71828182846**0.5 #math.exp(val)**3 elif (status[k] == allowed): val = val #math.exp(val)**0.5/2.71828182846**0.5 #math.exp(val/max_allowed)**3 except KeyError: val = -1 data.append(val) t = lookup_table(data, 180) tables[rama_key] = t return tables