def one(hierarchy, method): sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str) params = sec_str_master_phil.fetch().extract() params.secondary_structure.protein.search_method = method asc = hierarchy.atom_selection_cache() ssm = ss_manager(hierarchy, atom_selection_cache=asc, geometry_restraints_manager=None, sec_str_from_pdb_file=None, params=params.secondary_structure, was_initialized=False, verbose=-1, log=null_out()) filtered_ann = ssm.actual_sec_str.deep_copy() if (filter_short): filtered_ann.remove_short_annotations( helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True) mc_sel = asc.selection(mcss) h_sel = asc.selection(filtered_ann.overall_helices_selection()) s_sel = asc.selection(filtered_ann.overall_sheets_selection()) h_sel = h_sel & mc_sel s_sel = s_sel & mc_sel ss_counts = get_counts(hierarchy=hierarchy, h_sel=h_sel, s_sel=s_sel) return group_args(h_sel=h_sel, s_sel=s_sel, counts=ss_counts)
def __init__(self, model, log): db_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl", test=os.path.isfile) rmsd_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/rmsd.pkl", test=os.path.isfile) self.log = log # this takes ~0.15 seconds, so I don't see a need to cache it somehow. self.db = easy_pickle.load(db_path) self.rmsd_estimator = easy_pickle.load(rmsd_path) self.calibration_values = { 'H': (-0.045355950779513175, 0.1951165524439217), 'S': (-0.0425581278436754, 0.20068584887814633), 'L': (-0.018457764754231075, 0.15788374669456848), 'W': (-0.016806654295023003, 0.12044960331869274)} self.residue_counts = {"H": 0, "S": 0, "L":0} self.z_score = {"H": None, "S": None, "L":None, 'W': None} self.interpolation_fs = {"H": {}, "S": {}, "L": {}} self.means = {"H": {}, "S": {}, "L": {}} self.stds = {"H": {}, "S": {}, "L": {}} self.phi_step = 4 self.psi_step = 4 self.n_phi_half = 45 self.n_psi_half = 45 self.res_info = [] asc = model.get_atom_selection_cache() sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str) ss_params = sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.search_method = "from_ca" ss_params.secondary_structure.from_ca_conservative = True self.ssm = ss_manager(model.get_hierarchy(), atom_selection_cache=asc, geometry_restraints_manager=None, sec_str_from_pdb_file=None, # params=None, params = ss_params.secondary_structure, was_initialized=False, mon_lib_srv=None, verbose=-1, log=null_out(), # log=sys.stdout, ) filtered_ann = self.ssm.actual_sec_str.deep_copy() filtered_ann.remove_short_annotations( helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True) self.helix_sel = asc.selection(filtered_ann.overall_helices_selection()) self.sheet_sel = asc.selection(filtered_ann.overall_sheets_selection()) used_atoms = set() for three in generate_protein_threes(hierarchy=model.get_hierarchy(), geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms key = [x.i_seq for x in phi_atoms]+[psi_atoms[-1].i_seq] key = "%s" % key if key not in used_atoms: phi, psi = three.get_phi_psi_angles() rkey = three.get_ramalyze_key() resname = main_residue.resname ss_type = self._figure_out_ss(three) self.res_info.append( ["", rkey, resname, ss_type, phi, psi] ) self.residue_counts[ss_type] += 1 used_atoms.add(key) self.residue_counts["W"] = self.residue_counts["H"] + self.residue_counts["S"] + self.residue_counts["L"] for i in self.res_info: print(i, file=self.log)
def __init__(self, models, log): db_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl", test=os.path.isfile) self.log = log # this takes ~0.15 seconds, so I don't see a need to cache it somehow. self.db = easy_pickle.load(db_path) # ========================================================================= # change keys in pickle to Python 3 string # very temporary fix until pickle is updated if sys.version_info.major == 3: from libtbx.utils import to_str for key in list(self.db.keys()): self.db[to_str(key)] = self.db[key] for subkey in list(self.db[key].keys()): self.db[to_str(key)][to_str(subkey)] = self.db[key][subkey] # ========================================================================= self.calibration_values = { 'H': (-0.045355950779513175, 0.1951165524439217), 'S': (-0.0425581278436754, 0.20068584887814633), 'L': (-0.018457764754231075, 0.15788374669456848), 'W': (-0.016806654295023003, 0.12044960331869274) } self.residue_counts = {"H": 0, "S": 0, "L": 0} self.z_score = {"H": None, "S": None, "L": None, 'W': None} self.means = {"H": {}, "S": {}, "L": {}} self.stds = {"H": {}, "S": {}, "L": {}} self.phi_step = 4 self.psi_step = 4 self.n_phi_half = 45 self.n_psi_half = 45 # this is needed to disable e.g. selection functionality when # multiple models are present self.n_models = len(models) self.res_info = [] for model in models: if model.get_hierarchy().models_size() > 1: hierarchy = iotbx.pdb.hierarchy.root() m = model.get_hierarchy().models()[0].detached_copy() hierarchy.append_model(m) asc = hierarchy.atom_selection_cache() else: hierarchy = model.get_hierarchy() asc = model.get_atom_selection_cache() sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str) ss_params = sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.search_method = "from_ca" ss_params.secondary_structure.from_ca_conservative = True ssm = ss_manager( hierarchy, atom_selection_cache=asc, geometry_restraints_manager=None, sec_str_from_pdb_file=None, # params=None, params=ss_params.secondary_structure, was_initialized=False, mon_lib_srv=None, verbose=-1, log=null_out(), # log=sys.stdout, ) filtered_ann = ssm.actual_sec_str.deep_copy() filtered_ann.remove_short_annotations( helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True) self.helix_sel = asc.selection( filtered_ann.overall_helices_selection()) self.sheet_sel = asc.selection( filtered_ann.overall_sheets_selection()) used_atoms = set() for three in generate_protein_threes(hierarchy=hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms key = [x.i_seq for x in phi_atoms] + [psi_atoms[-1].i_seq] key = "%s" % key if key not in used_atoms: phi, psi = three.get_phi_psi_angles() rkey = three.get_ramalyze_key() resname = main_residue.resname ss_type = self._figure_out_ss(three) self.res_info.append( ["", rkey, resname, ss_type, phi, psi]) self.residue_counts[ss_type] += 1 used_atoms.add(key) self.residue_counts["W"] = self.residue_counts[ "H"] + self.residue_counts["S"] + self.residue_counts["L"]