def exercise_01 () : """ Sanity check - don't crash when mean intensity for a bin is zero. """ xrs = random_structure.xray_structure( unit_cell=(50,50,50,90,90,90), space_group_symbol="P1", n_scatterers=1200, elements="random") fc = abs(xrs.structure_factors(d_min=1.5).f_calc()) fc = fc.set_observation_type_xray_amplitude() cs = fc.complete_set(d_min=1.4) ls = cs.lone_set(other=fc) f_zero = ls.array(data=flex.double(ls.size(), 0)) f_zero.set_observation_type_xray_amplitude() fc = fc.concatenate(other=f_zero) sigf = flex.double(fc.size(), 0.1) + (fc.data() * 0.03) fc = fc.customized_copy(sigmas=sigf) try : fc_fc = french_wilson.french_wilson_scale(miller_array=fc, log=null_out()) except Sorry : pass else : raise Exception_expected ic = fc.f_as_f_sq().set_observation_type_xray_intensity() fc_fc = french_wilson.french_wilson_scale(miller_array=ic, log=null_out())
def exercise_intensity_output () : if (os.path.isfile("tst_fmodel_anomalous.mtz")) : os.remove("tst_fmodel_anomalous.mtz") pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl( file_base="tst_fmodel_anomalous") # phenix.fmodel (with wavelength) args = [ pdb_file, "high_resolution=1.0", "wavelength=1.116", "obs_type=intensities", "type=real", "output.file_name=tst_fmodel_intensity.mtz", "r_free_flags_fraction=0.1", ] args2 = args + ["label=Imodel"] fmodel.run(args=args2, log=null_out()) assert os.path.isfile("tst_fmodel_intensity.mtz") mtz_in = file_reader.any_file("tst_fmodel_intensity.mtz") assert mtz_in.file_server.miller_arrays[0].is_xray_intensity_array() try : fmodel.run(args=args, log=null_out()) except Sorry : pass else : raise Exception_expected try : fmodel.run(args=args+["format=cns"], log=null_out()) except Sorry : pass else : raise Exception_expected
def exercise () : from mmtbx.regression.make_fake_anomalous_data import generate_calcium_inputs from mmtbx.command_line import find_peaks_holes mtz_file, pdb_file = generate_calcium_inputs( file_base = "tst_find_peaks_holes", anonymize = True) out = StringIO() peaks_holes = find_peaks_holes.run( args=[pdb_file, mtz_file], out=out) peaks_holes.save_pdb_file(file_name="tst_fph_peaks.pdb", log=null_out()) p = easy_pickle.dumps(peaks_holes) s = peaks_holes.get_summary() sp = easy_pickle.dumps(s) out2 = StringIO() s.show(out=out2) lines = out2.getvalue().splitlines() assert (""" anomalous H2O (anomalous > 3): 1""" in lines) assert (""" anomalous non-water atoms: 0""" in lines) assert (""" mFo-DFc > 9: 0""" in lines) peaks_holes = find_peaks_holes.run( args=[pdb_file, mtz_file, "filter_peaks_by_2fofc=1.0"], out=null_out()) out3 = StringIO() peaks_holes.get_summary().show(out=out3) lines = out3.getvalue().splitlines() assert (""" anomalous > 3: 0""" in lines) out3 = StringIO() peaks_holes = find_peaks_holes.run( args=[pdb_file, mtz_file, "include_peaks_near_model=True",], out=out3) lines = out3.getvalue().splitlines() assert (""" mFo-DFc > 9: 1""" in lines) os.remove(mtz_file) os.remove(pdb_file)
def exercise_heavy () : from mmtbx.regression import make_fake_anomalous_data from mmtbx.command_line import validate_waters import mmtbx.ions.utils from iotbx.file_reader import any_file file_base = "tst_validate_waters_1" pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl(file_base=file_base) mtz_file = make_fake_anomalous_data.generate_mtz_file( file_base="tst_validate_waters_1", d_min=1.5, anomalous_scatterers=[ group_args(selection="element CD", fp=-0.29, fdp=2.676), group_args(selection="element CL", fp=0.256, fdp=0.5), ]) pdb_in = any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy, n = mmtbx.ions.utils.anonymize_ions(hierarchy, log=null_out()) hierarchy.write_pdb_file("%s_start.pdb" % file_base, crystal_symmetry=pdb_in.file_object.crystal_symmetry()) args = ["tst_validate_waters_1_start.pdb", "tst_validate_waters_1.mtz", "skip_twin_detection=True"] results = validate_waters.run(args=args, out=null_out()) out = StringIO() results.show(out=out) s = easy_pickle.dumps(results) r2 = easy_pickle.loads(s) out2 = StringIO() r2.show(out=out2) assert not show_diff(out.getvalue(), out2.getvalue()) assert (results.n_bad >= 1) and (results.n_heavy == 2)
def find_sec_str(self, pdb_hierarchy): if self.params.secondary_structure.protein.search_method == "ksdssp": pdb_str = pdb_hierarchy.as_pdb_string() print >> self.log, " running ksdssp..." (records, stderr) = run_ksdssp_direct(pdb_str) return iotbx.pdb.secondary_structure.annotation.from_records( records=records, log=self.log) elif self.params.secondary_structure.protein.search_method == "mmtbx_dssp": from mmtbx.secondary_structure import dssp print >> self.log, " running mmtbx.dssp..." return dssp.dssp( pdb_hierarchy=pdb_hierarchy, pdb_atoms=self.pdb_atoms, out=null_out()).get_annotation() elif self.params.secondary_structure.protein.search_method == "from_ca": from mmtbx.secondary_structure import find_ss_from_ca print >> self.log, " running find_ss_from_ca..." fss = find_ss_from_ca.find_secondary_structure( hierarchy=pdb_hierarchy, out=null_out()) return fss.get_annotation() elif self.params.secondary_structure.protein.search_method == "cablam": from mmtbx.validation import cablam print >> self.log, " running cablam..." cablam_results = cablam.cablamalyze( pdb_hierarchy = pdb_hierarchy, outliers_only=False, out=null_out(), quiet=False) return cablam_results.as_secondary_structure() else: print >> self.log, " WARNING: Unknown search method for SS. No SS found." return iotbx.pdb.secondary_structure.annotation.from_records()
def get_phaser_sad_llg_map_coefficients ( fmodel, pdb_hierarchy, log=None, verbose=False) : """ Calculates an anomalous log-likelihood gradient (LLG) map using the SAD target in Phaser. This is essentially similar to an anomalous difference- difference map, but more sensitive. """ if (not libtbx.env.has_module("phaser")) : raise Sorry("Phaser not available - required for SAD LLG maps.") from phaser.phenix_adaptors import sad_target assert (fmodel.f_model().anomalous_flag()) f_obs = fmodel.f_obs().select(fmodel.f_obs().data()>0) r_free_flags = fmodel.r_free_flags().common_set(other=f_obs) data = sad_target.data_adaptor( f_obs=f_obs, r_free_flags=r_free_flags, verbose=True) if (verbose) and (log is not None) : data.output.setPackagePhenix(log) else : data.output.setPackagePhenix(null_out()) t = data.target( xray_structure=fmodel.xray_structure, pdb_hierarchy=pdb_hierarchy, log=null_out()) t.set_f_calc(fmodel.f_model()) map_coeffs = t.llg_map_coeffs() return map_coeffs
def refine_window (self, window) : from mmtbx.building.alternate_conformations import real_space_annealing processed_pdb_file = self.get_processed_pdb_file() assert (processed_pdb_file is not None) hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy #print window.residue_id_str log = null_out() if (self.debug) : log = sys.stdout refinements = real_space_annealing.refine_into_difference_density( fmodel=self.fmodel, pdb_hierarchy=hierarchy.deep_copy(), processed_pdb_file=processed_pdb_file, selection=window.selection, selection_score=window.residue_selection, params=self.params, nproc=self.nproc_2, out=null_out()).get_filtered_trials(log=log) result = ensemble( window=window, sites_trials=refinements) n_keep = result.filter_trials( sites_cart=self.sites_cart, min_rmsd=self.params.min_rmsd, min_dev=self.min_required_deviation) if (n_keep > 0) : if (self.asynchronous_output) : print >> self.out, result return result return None
def format_all_for_phenix_refine(self,quiet=False,out=None, prefix="refinement.pdb_interpretation.ncs_group"): ''' This function is an older version of creating phil for phenix refine, it is modified to replicate a new phil parameters that can handle selection to the level of atoms, "format_phil_for_phenix_refine". When it will still can be used in the older form, which allows only residue level selection. ''' if hasattr(self._ncs_obj,'show'): if prefix == 'refinement.pdb_interpretation.ncs_group': prefix="pdb_interpretation" if quiet: out = null_out() elif out is None: out=sys.stdout all_text = self._ncs_obj.show(format='phil',log=null_out(),header=False) all_text = convert_phil_format(all_text,to_type=prefix) if all_text: if not quiet: print >> out, all_text + '\n' return all_text else: # this is only being used when only a spec file is provided if out == None: out=sys.stdout all_text="" for ncs_group in self._ncs_groups: text= ncs_group.format_for_phenix_refine(prefix=prefix) if text: if not quiet: out.write(text+'\n') all_text+="\n"+text return all_text
def exercise_twin_detwin () : random.seed(12345) flex.set_random_seed(12345) xrs = random_structure.xray_structure( unit_cell=(12,5,12,90,90,90), space_group_symbol="P1", n_scatterers=12, elements="random") fc = abs(xrs.structure_factors(d_min=1.5).f_calc()) fc = fc.set_observation_type_xray_amplitude() mtz_file = "tmp_massage_in.mtz" fc.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) massage_data.run( args=[ mtz_file, "aniso.action=None", "outlier.action=None", "symmetry.action=twin", "twin_law='l,-k,h'", "fraction=0.3", "hklout=tmp_massage_twinned.mtz", ], out=null_out()) assert op.isfile("tmp_massage_twinned.mtz") mtz_in = file_reader.any_file("tmp_massage_twinned.mtz") fc_twin = mtz_in.file_server.miller_arrays[0].f_sq_as_f() fc_twin, fc_tmp = fc_twin.common_sets(other=fc) for hkl, f1, f2 in zip(fc_tmp.indices(), fc_tmp.data(), fc_twin.data()) : if (abs(hkl[0]) != abs(hkl[2])) : assert not approx_equal(f1, f2, eps=0.01, out=null_out()), (hkl, f1, f2) massage_data.run( args=[ mtz_file, "aniso.action=None", "outlier.action=None", "symmetry.action=twin", "twin_law='l,-k,h'", "fraction=0.3", "hklout=tmp_massage_twinned.sca", ], out=null_out()) assert op.isfile("tmp_massage_twinned.sca") massage_data.run( args=[ "tmp_massage_twinned.mtz", "aniso.action=None", "outlier.action=None", "symmetry.action=detwin", "twin_law='l,-k,h'", "fraction=0.3", "hklout=tmp_massage_detwinned.mtz", ], out=null_out()) mtz_in = file_reader.any_file("tmp_massage_detwinned.mtz") fc_detwin = mtz_in.file_server.miller_arrays[0].f_sq_as_f() fc_detwin, fc_tmp = fc_detwin.common_sets(other=fc) # XXX we appear to lose some accuracy here, possibly due to the use of # MTZ format for hkl, f1, f2 in zip(fc_tmp.indices(), fc_tmp.data(), fc_detwin.data()) : assert approx_equal(f1, f2, eps=0.01), hkl
def exercise_full_validation () : from phenix.validation import analyze_all import iotbx.phil open("tmp_validation_neutron.pdb", "w").write(pdb_str_1) test_phil = iotbx.phil.parse(""" model_vs_data { pdb_file = tmp_validation_neutron.pdb scattering_table = *neutron } """) working_phil = analyze_all.model_vs_data_params.fetch(source=test_phil) params = working_phil.extract() validation = analyze_all.validation_result( params=params, tmp_dir=os.getcwd(), out=null_out(), quiet=True) c_score = validation.molprobity_result.get_clashscore() assert approx_equal(c_score, 58.82, eps=0.01) params.model_vs_data.scattering_table = "n_gaussian" validation = analyze_all.validation_result( params=params, tmp_dir=os.getcwd(), out=null_out(), quiet=True) c_score = validation.molprobity_result.get_clashscore() assert approx_equal(c_score, 35.29, eps=0.01)
def exercise () : # # Test command-line program # pdb_in, mtz_in = make_inputs() pdb_file = file_reader.any_file(pdb_in, force_type="pdb") hierarchy = pdb_file.file_object.hierarchy old_ligand = None for chain in hierarchy.only_model().chains() : if (chain.id != "B") : continue for residue_group in chain.residue_groups() : atom_group = residue_group.only_atom_group() if (atom_group.resname == "ACT") : old_ligand = atom_group.detached_copy() residue_group.remove_atom_group(atom_group) break assert old_ligand is not None open("tst_ligand_ncs_start.pdb", "w").write(hierarchy.as_pdb_string( crystal_symmetry=pdb_file.file_object.crystal_symmetry())) args = [ "tst_ligand_ncs_start.pdb", mtz_in, "ligand_code=ACT", ] from mmtbx.command_line import apply_ncs_to_ligand if op.isfile("ncs_ligands.pdb") : os.remove("ncs_ligands.pdb") result = apply_ncs_to_ligand.run(args=args, out=null_out()) assert result.n_ligands_new == 1 assert op.isfile("ncs_ligands.pdb") pdb_out = file_reader.any_file("ncs_ligands.pdb", force_type="pdb") hierarchy_new = pdb_out.file_object.hierarchy new_ligand = None for chain in hierarchy_new.only_model().chains() : if (chain.id != "B") : continue for residue_group in chain.residue_groups() : atom_group = residue_group.only_atom_group() if (atom_group.resname == "ACT") : new_ligand = atom_group.detached_copy() assert new_ligand is not None rmsd = old_ligand.atoms().extract_xyz().rms_difference( new_ligand.atoms().extract_xyz()) assert (rmsd < 0.5) # # Unit tests # import mmtbx.ncs.ligands operators = mmtbx.ncs.ligands.find_ncs_operators(hierarchy, log=null_out()) assert len(operators) == 1 group_ops = operators[0] assert len(group_ops) == 2 assert (len(group_ops[0].selection) == 7) for g_op in group_ops: out = StringIO() g_op.show_summary(out=out, prefix=" ") assert out.getvalue().count("Rotation:") == 1
def __init__ (self, xray_structure, pdb_hierarchy, f_obs, r_free_flags, rigid_body_refine=False, optimize_b_factors=False, skip_twin_detection=False, scattering_table="n_gaussian") : self.r_work = None self.r_free = None self.xray_structure = None from mmtbx.utils import fmodel_simple from cctbx import crystal combined_symmetry = crystal.symmetry( unit_cell=f_obs.unit_cell(), space_group=xray_structure.space_group()) xray_structure = xray_structure.customized_copy( crystal_symmetry=combined_symmetry) f_obs = f_obs.customized_copy( crystal_symmetry=combined_symmetry).eliminate_sys_absent() r_free_flags = r_free_flags.customized_copy( crystal_symmetry=combined_symmetry).eliminate_sys_absent() fmodel = fmodel_simple( f_obs=f_obs, r_free_flags=r_free_flags, xray_structures=[xray_structure], skip_twin_detection=skip_twin_detection, scattering_table=scattering_table) self.r_work_start = fmodel.r_work() self.r_free_start = fmodel.r_free() if (not rigid_body_refine) : self.r_work = self.r_work_start self.r_free = self.r_free_start self.xray_structure = xray_structure else : from mmtbx.refinement import rigid_body selection_strings = rigid_body.rigid_groups_from_pdb_chains( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, group_all_by_chain=True, check_for_atoms_on_special_positions=True, log=null_out()) selections = [] for sele_str in selection_strings : sele = pdb_hierarchy.atom_selection_cache().selection(sele_str) selections.append(sele.iselection()) refined = rigid_body.manager( fmodel=fmodel, selections=selections, params=rigid_body.master_params.extract(), log=null_out()) self.xray_structure = refined.fmodel.xray_structure self.r_work = refined.fmodel.r_work() self.r_free = refined.fmodel.r_free()
def exercise_combine_symmetry () : """ Test the extraction of symmetry from both a PDB file and an MTZ file. """ from mmtbx.regression import model_1yjp import mmtbx.command_line import iotbx.pdb.hierarchy from cctbx import sgtbx from cctbx import uctbx # 1yjp, as usual pdb_in = iotbx.pdb.hierarchy.input(pdb_string=model_1yjp) xrs = pdb_in.input.xray_structure_simple() f = open("tst_combine_symmetry.pdb", "w") f.write(pdb_in.hierarchy.as_pdb_string(crystal_symmetry=xrs)) f.close() f_calc = abs(xrs.structure_factors(d_min=1.5).f_calc()) # Make up slightly more exact unit cell, but set SG to P2 f_calc = f_calc.customized_copy( crystal_symmetry=f_calc.crystal_symmetry().customized_copy( space_group_info=sgtbx.space_group_info("P2"), unit_cell=uctbx.unit_cell((21.9371, 4.8659, 23.4774, 90.0, 107.0832, 90.00)))) flags = f_calc.generate_r_free_flags() mtz = f_calc.as_mtz_dataset(column_root_label="F") mtz.add_miller_array(flags, column_root_label="FreeR_flag") mtz.mtz_object().write("tst_combine_symmetry.mtz") cmdline = mmtbx.command_line.load_model_and_data( args=["tst_combine_symmetry.pdb", "tst_combine_symmetry.mtz"], master_phil=mmtbx.command_line.generate_master_phil_with_inputs(""), process_pdb_file=False, create_fmodel=True, out=null_out()) symm = cmdline.xray_structure.crystal_symmetry() assert (approx_equal(symm.unit_cell().parameters(), (21.9371, 4.8659, 23.4774, 90.0, 107.0832, 90.0))) assert (str(symm.space_group_info()) == "P 1 21 1") # Part 2: incompatible space groups f_calc_2 = f_calc.customized_copy( crystal_symmetry=f_calc.crystal_symmetry().customized_copy( space_group_info=sgtbx.space_group_info("P1"))) flags_2 = f_calc_2.generate_r_free_flags() mtz = f_calc_2.as_mtz_dataset(column_root_label="F") mtz.add_miller_array(flags_2, column_root_label="FreeR_flag") mtz.mtz_object().write("tst_combine_symmetry_2.mtz") try : cmdline = mmtbx.command_line.load_model_and_data( args=["tst_combine_symmetry.pdb", "tst_combine_symmetry_2.mtz"], master_phil=mmtbx.command_line.generic_simple_input_phil(), process_pdb_file=False, create_fmodel=True, out=null_out()) except Sorry, s : assert ("Incompatible space groups" in str(s))
def exercise () : from mmtbx.wwpdb import utils as wwpdb_utils for fn in ["3qgg.pdb", "3gqq.mtz", "3gqq-sf.cif"] : if (os.path.isfile(fn)) : os.remove(fn) pdb_id = "3gqq" wwpdb_utils.fetch_pdb_data("3gqq") program, program_full = wwpdb_utils.get_program("3gqq.pdb") assert (program == "PHENIX.REFINE") data = wwpdb_utils.find_data_arrays("3gqq.mtz", log=null_out()) filter = wwpdb_utils.filter_pdb_file("3gqq.pdb", log=null_out()) assert (filter.n_semet == 24) and (filter.n_unknown == 30) print "OK"
def exercise_space_group_handling () : flex.set_random_seed(123456) random.seed(123456) base = "tst_cc_star_space_group" pdb_in = iotbx.pdb.hierarchy.input(pdb_string=model_1yjp) xrs = pdb_in.xray_structure_simple() xrs.set_inelastic_form_factors( photon=1.54, table="sasaki") fc = abs(xrs.structure_factors(d_min=1.5).f_calc()).average_bijvoet_mates() fc.set_observation_type_xray_amplitude() flags = fc.generate_r_free_flags() mtz = fc.as_mtz_dataset(column_root_label="F") mtz.add_miller_array(flags, column_root_label="FreeR_flag") mtz.mtz_object().write(base + ".mtz") xrs_p1 = xrs.expand_to_p1() xrs_p1.shake_sites_in_place(rms_difference=0.1) fc_p1 = xrs_p1.structure_factors(d_min=1.4).f_calc() fc_p1_extra = fc_p1.randomize_amplitude_and_phase(amplitude_error=1.0, phase_error_deg=0, random_seed=123456) fc_p1 = abs(fc_p1.concatenate(other=fc_p1_extra)).sort( by_value="packed_indices") fc_p1.set_observation_type_xray_amplitude() sg_p2 = sgtbx.space_group_info("P2") ic = fc_p1.f_as_f_sq().customized_copy( space_group_info=sg_p2, sigmas=flex.double(fc_p1.size(), 10.0)) ic.export_as_scalepack_unmerged(file_name=base + ".sca") open(base + ".pdb", "w").write(model_1yjp) args = [ base + ".mtz", base + ".pdb", "unmerged_data=%s.sca" % base, ] cc_star.run(args=args, out=null_out()) # now with .sca in P1 (raises Sorry) ic2 = fc_p1.f_as_f_sq().customized_copy( sigmas=flex.double(fc_p1.size(), 10.0)) ic2.export_as_scalepack_unmerged(file_name=base + "_p1.sca") args = [ base + ".mtz", base + ".pdb", "unmerged_data=%s_p1.sca" % base, ] try : cc_star.run(args=args, out=null_out()) except Sorry, s : assert (str(s) == "Incompatible space groups in merged and unmerged data:P 1 21 1 versus P 1"), s
def run(fn): ''' ''' pdb_hierarchy = pdb.hierarchy.input(file_name=fn).hierarchy ss = 'pepnames and (name ca or name n or name c) and altloc " "' select_str = 'chain L and resseq 104:end and %s' % ss select_str = 'chain H and resseq 114:end and pepnames and (name ca or name n or name c) and altloc " "' pdb_selection_bool = pdb_hierarchy.atom_selection_cache().selection(select_str) const_L = pdb_hierarchy.select(pdb_selection_bool) ss = 'pepnames and (name ca or name n or name c) and altloc " "' select_str = 'chain H and resseq 114:end and %s' % ss pdb_selection_bool = pdb_hierarchy.atom_selection_cache().selection(select_str) const_H = pdb_hierarchy.select(pdb_selection_bool) params = superpose_pdbs.master_params.extract() x = superpose_pdbs.manager( params, log=null_out(), write_output=False, save_lsq_fit_obj=True, pdb_hierarchy_fixed=const_H, pdb_hierarchy_moving=const_L) print 'ok'
def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False if (molprobity_map_params is not None): rsc_params.map_file_name = molprobity_map_params.map_file_name rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise e
def exercise () : import mmtbx.validation.ligands import iotbx.pdb.hierarchy pdb_1 = iotbx.pdb.hierarchy.input(pdb_string="""\ HETATM 1 C ACT X 1 0.496 0.209 0.702 1.00 20.00 A C HETATM 2 O ACT X 1 1.691 0.458 0.395 1.00 20.00 A O HETATM 3 OXT ACT X 1 0.100 0.388 1.883 1.00 20.00 A O-1 HETATM 4 CH3 ACT X 1 -0.463 -0.305 -0.349 1.00 20.00 A C HETATM 5 H1 ACT X 1 -0.904 0.449 -0.784 1.00 20.00 A H HETATM 6 H2 ACT X 1 -1.135 -0.874 0.074 1.00 20.00 A H HETATM 7 H3 ACT X 1 0.029 -0.823 -1.014 1.00 20.00 A H END""") pdb_2 = iotbx.pdb.hierarchy.input(pdb_string="""\ HETATM 1 C ACT X 1 0.412 -0.000 0.714 1.00 20.00 A C HETATM 2 O ACT X 1 1.671 -0.000 0.714 1.00 20.00 A O HETATM 3 OXT ACT X 1 -0.217 -0.000 1.804 1.00 20.00 A O-1 HETATM 4 CH3 ACT X 1 -0.344 -0.000 -0.597 1.00 20.00 A C HETATM 5 H1 ACT X 1 -0.507 0.920 -0.878 1.00 20.00 A H HETATM 6 H2 ACT X 1 -1.197 -0.460 -0.480 1.00 20.00 A H HETATM 7 H3 ACT X 1 0.183 -0.460 -1.277 1.00 20.00 A H """) rmsds = mmtbx.validation.ligands.compare_ligands( ligand_code="ACT", hierarchy_1=pdb_1.hierarchy, hierarchy_2=pdb_2.hierarchy, out=null_out()) assert (len(rmsds) == 1) assert approx_equal(rmsds[0][0], 0.444, eps=0.0001) print "OK"
def clean_up_ions (fmodel, model, params, log=None, verbose=True) : """ Parameters ---------- fmodel : mmtbx.f_model.manager model : mmtbx.model.manager params : libtbx.phil.scope_extract log : file, optional verbose : bool, optional Returns ------- mmtbx.model.manager An updated model with ions corrected. """ if (log is None) : log = null_out() import mmtbx.ions.utils ion_selection = model.pdb_hierarchy().atom_selection_cache().selection( "segid ION") ion_iselection = ion_selection.iselection() if (len(ion_iselection) == 0) : print >> log, " No ions (segid=ION) found." return model n_sites_start = model.xray_structure.scatterers().size() new_model = model.select(~ion_selection) ion_model = model.select(ion_selection) ion_pdb_hierarchy = ion_model.pdb_hierarchy(sync_with_xray_structure=True) ion_atoms = ion_pdb_hierarchy.atoms() ion_xrs = ion_model.xray_structure perm = mmtbx.ions.utils.sort_atoms_permutation( pdb_atoms=ion_pdb_hierarchy.atoms(), xray_structure=ion_model.xray_structure) nonbonded_types = ion_model.restraints_manager.geometry.nonbonded_types nonbonded_charges = ion_model.restraints_manager.geometry.nonbonded_charges ion_atoms = ion_atoms.select(perm) new_model.append_single_atoms( new_xray_structure=ion_xrs.select(perm), atom_names=[ atom.name for atom in ion_atoms ], residue_names=[ atom.fetch_labels().resname for atom in ion_atoms ], nonbonded_types=nonbonded_types.select(perm), nonbonded_charges=nonbonded_charges.select(perm), chain_id=params.ion_chain_id, segids=[ "ION" for atom in ion_atoms ], refine_occupancies=params.refine_ion_occupancies, refine_adp="isotropic", reset_labels=True) n_sites_end = new_model.xray_structure.scatterers().size() new_hierarchy = new_model.pdb_hierarchy() n_sites_pdb = new_hierarchy.atoms().size() assert (n_sites_start == n_sites_end == n_sites_pdb) new_selection = new_hierarchy.atom_selection_cache().selection("segid ION") ion_atoms = new_hierarchy.atoms().select(new_selection) if (verbose) : print >> log, " Final list of ions:" for atom in ion_atoms : print >> log, " %s" % atom.id_str() print >> log, "" fmodel.update_xray_structure(new_model.xray_structure) return new_model
def write_pdb_files (self, output_base, serial, serial_format="%04d", pause=0, pause_at_end=False, log=None) : if (log is None) : log = null_out() file_format = "%s_%s.pdb" % (output_base, serial_format) k = serial if (pause != 0) : for j in range(pause) : self.pdb_hierarchy.atoms().set_xyz(self._frames[0]) file_name = file_format % k self._write_pdb(file_name) print >> log, " wrote %s" % os.path.basename(file_name) k += 1 for sites in self._frames : self.pdb_hierarchy.atoms().set_xyz(sites) file_name = file_format % k self._write_pdb(file_name) print >> log, " wrote %s" % os.path.basename(file_name) k += 1 if (pause_at_end) and (pause != 0) : for j in range(pause) : self.pdb_hierarchy.atoms().set_xyz(self._frames[-1]) file_name = file_format % k self._write_pdb(file_name) print >> log, " wrote %s" % os.path.basename(file_name) k += 1 return k
def exercise_get_atom_selections (verbose=False) : pdb_in = """\ CRYST1 15.000 15.000 15.000 90.00 90.00 90.00 P 212121 HETATM 115 O HOH A 18 3.000 5.000 5.000 1.00 10.00 O HETATM 115 O HOH A 19 5.000 5.000 8.000 1.00 10.00 O HETATM 115 O HOH A 20 5.000 5.000 8.000 1.00 10.00 O END""" log = null_out() if (verbose) : log = sys.stdout processed_pdb_files_srv = utils.process_pdb_file_srv(log=log) processed_pdb_file, pdb_inp = processed_pdb_files_srv.process_pdb_files( raw_records=pdb_in.splitlines()) selections1 = utils.get_atom_selections( all_chain_proxies=processed_pdb_file.all_chain_proxies, xray_structure=processed_pdb_file.xray_structure(), selection_strings=["resseq 18", "resseq 19", "resseq 20"], parameter_name="refine.occupancy") try : selections2 = utils.get_atom_selections( all_chain_proxies=processed_pdb_file.all_chain_proxies, xray_structure=processed_pdb_file.xray_structure(), selection_strings=["resseq 18:19", "resseq 19:20"], parameter_name="refine.occupancy") except Sorry, s : assert (str(s) == """\ One or more overlapping selections for refine.occupancy: resseq 18:19 resseq 19:20""")
def prepare_inputs (prefix="tst_build_alt_confs") : pdb_in = "%s_in.pdb" % prefix open(pdb_in, "w").write(pdb_raw) args = [ pdb_in, "high_resolution=1.2", "type=real", "label=F", "add_sigmas=True", "r_free_flags_fraction=0.1", "random_seed=12345", "output.file_name=%s.mtz" % prefix, ] fmodel.run(args=args, log=null_out()) pdb_file = file_reader.any_file(pdb_in) hierarchy = pdb_file.file_object.hierarchy xrs = pdb_file.file_object.xray_structure_simple() for chain in hierarchy.only_model().chains() : for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() if (len(atom_groups) > 1) : while (len(atom_groups) > 1) : residue_group.remove_atom_group(atom_groups[-1]) del atom_groups[-1] for atom in residue_group.atoms() : atom.occ = 1.0 atom_groups[0].altloc = '' assert hierarchy.atoms().extract_occ().all_eq(1.0) open("%s_start.pdb" % prefix, "w").write( hierarchy.as_pdb_string(crystal_symmetry=xrs))
def exercise_2(): import iotbx.pdb import mmtbx.maps.utils pdb_str1=""" CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 1 HETATM 1 C C 1 4.271 0.000 0.000 1.00 5.00 C HETATM 2 ? ? 2 5.729 0.000 0.000 1.00 5.00 ? HETATM 1 X D 1 4.271 0.000 0.000 1.00 5.00 X HETATM 1 Z E 1 4.271 0.000 0.000 1.00 5.00 Z END """ pdb_str2=""" CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 1 HETATM 1 C C 1 4.271 0.000 0.000 1.00 5.00 C END """ pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str1) pdb_inp.write_pdb_file(file_name = "tst_exercise_2_map_utils.pdb") fc = iotbx.pdb.input(source_info=None, lines=pdb_str2).xray_structure_simple().structure_factors(d_min=2).f_calc() class dummy: def amplitudes(self): return "2FOFCWT" def phases(self,root_label=None): return "PH2FOFCWT" mtz_dataset = fc.as_mtz_dataset(column_root_label=dummy().amplitudes(), label_decorator=dummy()) mtz_dataset.add_miller_array(miller_array=abs(fc), column_root_label="FOBS_X") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name = "tst_exercise_2_map_utils.mtz") mfn1 = "tst_exercise_2_map_utils_output.mtz" mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file="tst_exercise_2_map_utils.pdb", mtz_file="tst_exercise_2_map_utils.mtz", output_file=mfn1, out=null_out())
def get_validated_residues_in_selection (self, selection, require_n_residues=None, log=None) : if (log is None) : log = null_out() results = [] i_model = 0 while (i_model < len(self.pdb_hierarchies)) : hierarchy = self.pdb_hierarchies[i_model] sel_cache = self.selection_caches[i_model] rama, rota = self.validations[i_model] i_model += 1 isel = sel_cache.selection(selection).iselection() hierarchy_sel = hierarchy.select(isel) residue_groups = hierarchy_sel.only_model().only_chain().residue_groups() if (require_n_residues is not None) : if (len(residue_groups) != require_n_residues) : results.append(None) continue reject = False for residue_group in residue_groups : atom_group = residue_group.only_atom_group() rama_result = rama.find_atom_group(other=atom_group) rota_result = rota.find_atom_group(other=atom_group) assert (not None in [rama_result, rota_result]), atom_group.id_str() if (rama_result.is_outlier()) or (rota_result.is_outlier()) : reject = True break if (reject) : results.append(None) else : results.append(hierarchy_sel) return results
def exercise () : if (os.path.isfile("tst_fmodel_anomalous.mtz")) : os.remove("tst_fmodel_anomalous.mtz") pdb_file = make_fake_anomalous_data.write_pdb_input_cd_cl( file_base="tst_fmodel_anomalous") # phenix.fmodel (with wavelength) args = [ pdb_file, "high_resolution=1.0", "wavelength=1.116", "label=F", "type=real", "output.file_name=tst_fmodel_anomalous.mtz", "r_free_flags_fraction=0.1", ] fmodel.run(args=args, log=null_out()) assert os.path.isfile("tst_fmodel_anomalous.mtz") mtz_in = file_reader.any_file("tst_fmodel_anomalous.mtz") array = mtz_in.file_server.miller_arrays[0] assert (array.anomalous_flag()) anom_diffs = array.anomalous_differences() assert approx_equal(flex.max(anom_diffs.data()), 5.72, eps=0.01) # mmtbx.fmodel_simple result = easy_run.call( "mmtbx.fmodel_simple \"%s\" tst_fmodel_anomalous.mtz high_resolution=2.0" % pdb_file) print "OK"
def exercise_emringer_residue_scan(): pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/mmtbx/em_ringer/tst_emringer_model.pdb", test=os.path.isfile) map_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/mmtbx/em_ringer/tst_emringer_map.ccp4", test=os.path.isfile) assert (not None in [pdb_file, map_file]) results, scoring, rolling = emringer.run([pdb_file, map_file], out=null_out()) # Make sure the right number of residues (22 out of 28) get scanned assert len(results)==22 modelled_list = [290.742121792,192.844056257,45.4781110306,294.247825632,303.618891108,58.7694040824,331.70068496,46.7136045049,290.167261226,304.261231829,282.651244586,268.729721112,195.972333785,305.321933311,314.81066224,286.028424514,311.180807466,313.004918133,296.67781565,296.949191638,169.644245088,192.496265164] peak_list = [270,180,260,75,305,30,310,90,265,270,270,240,280,260,310,285,295,100,260,165,155,200] peak_rhos = [0.175600306502,0.351591946536,0.206238983746,0.3269057296,0.68375562882,0.251143527693,0.29106077218,0.199922124642,0.298461589197,0.563313760047,0.412696803251,0.511080434089,0.310001828446,0.228239176285,0.563148497472,0.490755919184,0.200978032127,0.274929619102,0.299229846335,0.179215798655,0.150783734124,0.210869945593] for i in range(22): # Make sure the modelled angle is correctly read assert approx_equal(results[i]._angles[1].angle_current, modelled_list[i]) # Make sure the peak is chosen correctly assert approx_equal(results[i]._angles[1].peak_chi, peak_list[i]) # Make sure the peak rhos are correct assert approx_equal(results[i]._angles[1].peak_rho, peak_rhos[i]) results, scoring2, rolling2 = emringer.run([pdb_file, map_file, "rolling_window_threshold=0.5"], out=null_out()) assert rolling.threshold == 0 assert rolling2.threshold == 0.5 #print rolling.results_a[0] #print rolling2.results_a[0] # just making sure this doesn't break! results, scoring2, rolling = emringer.run([pdb_file, map_file, "sampling_angle=2"], out=null_out())
def run(args): assert len(args) in [0,2] if (len(args) == 0): n_trials = 3 n_dynamics_steps = 30 out = null_out() else: n_trials = max(1, int(args[0])) n_dynamics_steps = max(1, int(args[1])) out = sys.stdout show_times_at_exit() if (1): exercise_six_dof( out=out, n_trials=n_trials, n_dynamics_steps=n_dynamics_steps) if (1): exercise_six_dof2( out=out, n_trials=n_trials, n_dynamics_steps=n_dynamics_steps) if (1): exercise_spherical( out=out, n_trials=n_trials, n_dynamics_steps=n_dynamics_steps) if (1): exercise_revolute( out=out, n_trials=n_trials, n_dynamics_steps=n_dynamics_steps) if (1): exercise_revolute2( out=out, n_trials=n_trials, n_dynamics_steps=n_dynamics_steps) print "OK"
def get_structure_factors(): """ Get f_obs and r_free_flags From cif file if available """ f_obs = None full_path_cif = '2qvz-sf.cif' iotbx.cif.reader() miller_arrays = iotbx.cif.reader( file_path=full_path_cif).\ as_miller_arrays(force_symmetry=True) # print miller_arrays[0].completeness() for ma in miller_arrays: if ma.is_xray_amplitude_array(): # Consider using Bijvoet mates ma = ma.average_bijvoet_mates() f_obs = abs(ma) break elif not f_obs and ma.is_xray_intensity_array(): # Consider using Bijvoet mates ma = ma.average_bijvoet_mates() # convert i_obs to f_obs f_obs = abs(ma.french_wilson(log=null_out())) if f_obs: r_free_flags = f_obs.generate_r_free_flags() # f_obs.show_summary() else: raise RuntimeError("Missing amplitude array.") return f_obs,r_free_flags
def enable_multiprocessing_if_possible (nproc=Auto, log=None) : """ Switch for using multiple CPUs with the pool_map function, usually called at the beginning of an app. If nproc is Auto or None and we are running Windows, it will be reset to 1. :param nproc: default number of processors to use :returns: number of processors to use (None or Auto means automatic) """ if (nproc == 1) or (nproc == 0) : return 1 if (log is None) : from libtbx.utils import null_out log = null_out() problems = detect_problem() if (problems is not None) and (problems is not Auto) : if (nproc is Auto) or (nproc is None) : return 1 else : from libtbx.utils import Sorry raise Sorry("%s. Please use nproc=1 or nproc=Auto." % str(problems)) else : print >> log, """ ****************************************************************** INFO: Some parts of this job will make use of multiple processors: ****************************************************************** nproc = %s Please ask your system administrator for advice about this, in particular if you run this job through a queuing system. """ % str(nproc) return nproc
def split_models (hierarchy, crystal_symmetry, output_base, original_file=None, log=None) : if (log is None) : log = null_out() import iotbx.pdb.hierarchy n_models = len(hierarchy.models()) file_names = [] for k, model in enumerate(hierarchy.models()) : k += 1 new_hierarchy = iotbx.pdb.hierarchy.root() new_hierarchy.append_model(model.detached_copy()) if (model.id == "") : model_id = str(k) else : model_id = model.id.strip() output_file = "%s_%s.pdb" % (output_base, model_id) f = open(output_file, "w") if (crystal_symmetry is not None) : print >> f, iotbx.pdb.format_cryst1_and_scale_records( crystal_symmetry=crystal_symmetry, write_scale_records=True) print >> f, "REMARK Model %d of %d" % (k, n_models) if (original_file is not None) : print >> f, "REMARK Original file:" print >> f, "REMARK %s" % original_file f.write(new_hierarchy.as_pdb_string()) f.close() file_names.append(output_file) print >> log, "Wrote %s" % output_file return file_names
def exercise_1(): pdb_raw = """\ CRYST1 23.000 6.666 25.000 90.00 107.08 90.00 P 1 21 1 2 ATOM 1 N GLY A 1 -9.009 4.612 6.102 1.00 16.77 N ATOM 2 CA GLY A 1 -9.052 4.207 4.651 1.00 16.57 C ATOM 3 C GLY A 1 -8.015 3.140 4.419 1.00 16.16 C ATOM 4 O GLY A 1 -7.523 2.521 5.381 1.00 16.78 O ATOM 5 N ASN A 2 -7.656 2.923 3.155 1.00 15.02 N ATOM 6 CA ASN A 2 -6.522 2.038 2.831 1.00 14.10 C ATOM 7 C ASN A 2 -5.241 2.537 3.427 1.00 13.13 C ATOM 8 O ASN A 2 -4.978 3.742 3.426 1.00 11.91 O ATOM 9 CB ASN A 2 -6.346 1.881 1.341 1.00 15.38 C ATOM 10 CG ASN A 2 -7.584 1.342 0.692 1.00 14.08 C ATOM 11 OD1 ASN A 2 -8.025 0.227 1.016 1.00 17.46 O ATOM 12 ND2 ASN A 2 -8.204 2.155 -0.169 1.00 11.72 N ATOM 13 N ASN A 3 -4.438 1.590 3.905 1.00 12.26 N ATOM 14 CA ASN A 3 -3.193 1.904 4.589 1.00 11.74 C ATOM 15 C ASN A 3 -1.955 1.332 3.895 1.00 11.10 C ATOM 16 O ASN A 3 -1.872 0.119 3.648 1.00 10.42 O ATOM 17 CB ASN A 3 -3.259 1.378 6.042 1.00 12.15 C ATOM 18 CG ASN A 3 -2.006 1.739 6.861 1.00 12.82 C ATOM 19 OD1 ASN A 3 -1.702 2.925 7.072 1.00 15.05 O ATOM 20 ND2 ASN A 3 -1.271 0.715 7.306 1.00 13.48 N ATOM 21 N MET A 4 -1.005 2.228 3.598 1.00 10.29 N ATOM 22 CA MET A 4 0.384 1.888 3.199 1.00 10.53 C ATOM 23 C MET A 4 1.435 2.606 4.088 1.00 10.24 C ATOM 24 O MET A 4 1.547 3.843 4.115 1.00 8.86 O ATOM 25 CB MET A 4 0.616 2.241 1.729 1.00 20.00 C ATOM 26 CG MET A 4 -0.207 1.416 0.754 1.00 20.00 C ATOM 27 SD MET A 4 0.132 -0.349 0.876 1.00 20.00 S ATOM 28 CE MET A 4 1.822 -0.411 0.285 1.00 20.00 C ATOM 29 N GLN A 5 2.154 1.821 4.871 1.00 10.38 N ATOM 30 CA GLN A 5 3.270 2.361 5.640 1.00 11.39 C ATOM 31 C GLN A 5 4.594 1.768 5.172 1.00 11.52 C ATOM 32 O GLN A 5 4.768 0.546 5.054 1.00 12.05 O ATOM 33 CB GLN A 5 3.056 2.183 7.147 1.00 11.96 C ATOM 34 CG GLN A 5 1.829 2.950 7.647 1.00 10.81 C ATOM 35 CD GLN A 5 1.344 2.414 8.954 1.00 13.10 C ATOM 36 OE1 GLN A 5 0.774 1.325 9.002 1.00 10.65 O ATOM 37 NE2 GLN A 5 1.549 3.187 10.039 1.00 12.30 N ATOM 38 N ASN A 6 5.514 2.664 4.856 1.00 11.99 N ATOM 39 CA ASN A 6 6.831 2.310 4.318 1.00 12.30 C ATOM 40 C ASN A 6 7.854 2.761 5.324 1.00 13.40 C ATOM 41 O ASN A 6 8.219 3.943 5.374 1.00 13.92 O ATOM 42 CB ASN A 6 7.065 3.016 2.993 1.00 12.13 C ATOM 43 CG ASN A 6 5.961 2.735 2.003 1.00 12.77 C ATOM 44 OD1 ASN A 6 5.798 1.604 1.551 1.00 14.27 O ATOM 45 ND2 ASN A 6 5.195 3.747 1.679 1.00 10.07 N ATOM 46 N TYR A 7 8.292 1.817 6.147 1.00 14.70 N ATOM 47 CA TYR A 7 9.159 2.144 7.299 1.00 15.18 C ATOM 48 C TYR A 7 10.603 2.331 6.885 1.00 15.91 C ATOM 49 O TYR A 7 11.041 1.811 5.855 1.00 15.76 O ATOM 50 CB TYR A 7 9.061 1.065 8.369 1.00 15.35 C ATOM 51 CG TYR A 7 7.665 0.929 8.902 1.00 14.45 C ATOM 52 CD1 TYR A 7 6.771 0.021 8.327 1.00 15.68 C ATOM 53 CD2 TYR A 7 7.210 1.756 9.920 1.00 14.80 C ATOM 54 CE1 TYR A 7 5.480 -0.094 8.796 1.00 13.46 C ATOM 55 CE2 TYR A 7 5.904 1.649 10.416 1.00 14.33 C ATOM 56 CZ TYR A 7 5.047 0.729 9.831 1.00 15.09 C ATOM 57 OH TYR A 7 3.766 0.589 10.291 1.00 14.39 O ATOM 58 OXT TYR A 7 11.358 2.999 7.612 1.00 17.49 O TER 59 TYR A 7 HETATM 1 CA CA A 8 10.431 1.858 3.216 1.00 30.00 CA HETATM 60 O HOH A 9 -6.471 5.227 7.124 1.00 22.62 O HETATM 62 O HOH A 10 -11.286 1.756 -1.468 1.00 17.08 O HETATM 63 O HOH A 11 11.808 4.179 9.970 1.00 23.99 O HETATM 64 O HOH A 12 13.605 1.327 9.198 1.00 26.17 O HETATM 65 O HOH A 13 -2.749 3.429 10.024 1.00 39.15 O HETATM 66 O HOH A 14 -1.500 0.682 10.967 1.00 43.49 O END """ pdb_file = "tst_xtriage_in.pdb" open(pdb_file, "w").write(pdb_raw) fmodel_args = [ pdb_file, "high_resolution=1.5", "k_sol=0.35", "b_sol=20", "wavelength=1.54", "add_random_error_to_amplitudes_percent=3", "random_seed=12345", "output.type=real", "output.label=F", "output.file_name=tst_xtriage_fmodel.mtz", ] # read it instead so python3 will be the same # fmodel.run(args=fmodel_args, log=null_out()) hkl_file = libtbx.env.find_in_repositories( relative_path="mmtbx/regression/mtz/tst_xtriage_fmodel.mtz", test=os.path.isfile) mtz_in = file_reader.any_file(hkl_file).assert_file_type("hkl") f_obs = mtz_in.file_server.miller_arrays[0].remove_cone(0.1) data = f_obs.data() # add some outliers #data[17] = 20 #data[334] = 26 #data[1908] = 13 # and sigmas sigf = flex.double(f_obs.size(), 0.1) + (f_obs.data() * 0.03) f_obs = f_obs.customized_copy(sigmas=sigf) mtz_file = "tst_xtriage_in.mtz" f_obs.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) seq_file = "tst_xtriage_in.fa" open(seq_file, "w").write("> tst_xtriage\nGNNMQNY") # check with completeness_as_non_anomalous=True xtriage_args = [ mtz_file, pdb_file, seq_file, "log=tst_xtriage_1.log", "l_test_dhkl=2,2,2", "completeness_as_non_anomalous=True", ] result = xtriage.run(args=xtriage_args, out=null_out()) test_pickle_consistency_and_size(result) assert (result.matthews.n_copies == 1) assert (str(result.matthews.table) == """\ Solvent content analysis Copies Solvent content Matthews coeff. P(solvent content) 1 0.472 2.33 1.000 """) data_strength = result.data_strength_and_completeness assert approx_equal(data_strength.data_strength.resolution_cut, 1.5351, eps=0.001) out1 = data_strength.low_resolution_completeness.format() assert (out1 == """\ --------------------------------------------------------- | Resolution range | N(obs)/N(possible) | Completeness | --------------------------------------------------------- | 21.9858 - 10.4368 | [6/7] | 0.857 | | 10.4368 - 8.4369 | [3/3] | 1.000 | | 8.4369 - 7.4172 | [3/4] | 0.750 | | 7.4172 - 6.7606 | [4/4] | 1.000 | | 6.7606 - 6.2882 | [5/5] | 1.000 | | 6.2882 - 5.9252 | [3/4] | 0.750 | | 5.9252 - 5.6337 | [7/7] | 1.000 | | 5.6337 - 5.3922 | [5/5] | 1.000 | | 5.3922 - 5.1874 | [4/4] | 1.000 | | 5.1874 - 5.0106 | [4/4] | 1.000 | ---------------------------------------------------------"""), out1 # ANOMALOUS SIGNAL a_meas = result.anomalous_info.measurability #assert approx_equal(a_meas.high_d_cut, 4.7636, eps=0.0001) # Why it's None? assert approx_equal(a_meas.low_d_cut, 2.3566, eps=0.0001) # ABSOLUTE SCALING ws = result.wilson_scaling assert ("%.2f" % ws.iso_p_scale) == "0.65", ws.iso_p_scale assert ("%.2f" % ws.iso_b_wilson) == "14.42", ws.iso_b_wilson # FIXME these may need to be adjusted for different hardware/OS assert approx_equal(ws.aniso_p_scale, 0.64723, eps=0.001) assert approx_equal( ws.aniso_u_star, [0.00034229, 0.00475982, 0.000285989, -0.0, 8.95386085999e-05, 0.0]) assert approx_equal(ws.aniso_b_cart, (13.218423, 16.840142, 12.948426, 1.0354e-15, -0.0685311, -7.92862e-16), 0.3) # convenience methods for GUI assert approx_equal(result.aniso_b_min, 12.895580) assert approx_equal(result.aniso_range_of_b, 3.804215) # assert approx_equal( ws.outlier_shell_table.data[0], # d_spacing [9.865131, 8.369653, 4.648634]) assert approx_equal( ws.outlier_shell_table.data[1], # z_score [5.306713, 18.068284, 5.319230]) assert (len(ws.outliers.acentric_outliers_table.data[0]) == 2) assert (ws.outliers.acentric_outliers_table.data[1] == [(0, -1, -1), (0, 1, 1)]) assert approx_equal(ws.outliers.acentric_outliers_table.data[2], [3.507247, 3.315550]) assert (ws.outliers.centric_outliers_table.data is None) assert (len(ws.ice_rings.table._rows) == 10) assert (ws.ice_rings.table._rows[0] == [' 3.897', ' 1.000', ' 0.76', ' 1.00']), \ ws.ice_rings.table._rows[0] tw = result.twin_results wm = tw.wilson_moments out = StringIO() wm.show(out) assert not show_diff( out.getvalue(), """ ----------Wilson ratio and moments---------- Acentric reflections: <I^2>/<I>^2 :2.063 (untwinned: 2.000; perfect twin 1.500) <F>^2/<F^2> :0.778 (untwinned: 0.785; perfect twin 0.885) <|E^2 - 1|> :0.745 (untwinned: 0.736; perfect twin 0.541) Centric reflections: <I^2>/<I>^2 :3.076 (untwinned: 3.000; perfect twin 2.000) <F>^2/<F^2> :0.628 (untwinned: 0.637; perfect twin 0.785) <|E^2 - 1|> :0.999 (untwinned: 0.968; perfect twin 0.736) """) # XXX PDB validation server assert approx_equal(result.iso_b_wilson, 14.51, eps=0.1) assert approx_equal(result.aniso_b_ratio, 0.271, eps=0.1) assert (result.number_of_wilson_outliers == 2) assert approx_equal(result.l_test_mean_l, 0.481, eps=0.1) assert approx_equal(result.l_test_mean_l_squared, 0.322, eps=0.1) assert approx_equal(result.i_over_sigma_outer_shell, 10.71, eps=0.01) assert ("indicating pseudo-translationa" in result.patterson_verdict) # check relative Wilson # FIXME #result.relative_wilson.show() #assert (result.relative_wilson.n_outliers() == 0) #show_pickled_object_sizes(result) # # check with completeness_as_non_anomalous=False xtriage_args = [ mtz_file, pdb_file, seq_file, "log=tst_xtriage_1.log", "l_test_dhkl=2,2,2", "completeness_as_non_anomalous=False", ] result = xtriage.run(args=xtriage_args, out=null_out()) test_pickle_consistency_and_size(result) assert (result.matthews.n_copies == 1) assert (str(result.matthews.table) == """\ Solvent content analysis Copies Solvent content Matthews coeff. P(solvent content) 1 0.472 2.33 1.000 """) data_strength = result.data_strength_and_completeness assert approx_equal(data_strength.data_strength.resolution_cut, 1.5351, eps=0.001) out1 = data_strength.low_resolution_completeness.format() assert (out1 == """\ --------------------------------------------------------- | Resolution range | N(obs)/N(possible) | Completeness | --------------------------------------------------------- | 21.9858 - 10.4368 | [ 6/7 ] | 0.857 | | 10.4368 - 8.4369 | [ 3/3 ] | 1.000 | | 8.4369 - 7.4172 | [ 3/4 ] | 0.750 | | 7.4172 - 6.7606 | [ 4/4 ] | 1.000 | | 6.7606 - 6.2882 | [ 8/8 ] | 1.000 | | 6.2882 - 5.9252 | [ 4/5 ] | 0.800 | | 5.9252 - 5.6337 | [11/11] | 1.000 | | 5.6337 - 5.3922 | [ 7/7 ] | 1.000 | | 5.3922 - 5.1874 | [ 6/6 ] | 1.000 | | 5.1874 - 5.0106 | [ 7/7 ] | 1.000 | ---------------------------------------------------------"""), out1 # ANOMALOUS SIGNAL a_meas = result.anomalous_info.measurability #assert approx_equal(a_meas.high_d_cut, 4.7636, eps=0.0001) # Why? assert approx_equal(a_meas.low_d_cut, 2.3565, eps=0.0001) # ABSOLUTE SCALING ws = result.wilson_scaling assert ("%.2f" % ws.iso_p_scale) == "0.65", ws.iso_p_scale assert ("%.2f" % ws.iso_b_wilson) == "14.42", ws.iso_b_wilson # FIXME these may need to be adjusted for different hardware/OS assert approx_equal(ws.aniso_p_scale, 0.64723, eps=0.001) assert approx_equal( ws.aniso_u_star, [0.00034473, 0.00479983, 0.000287162, -0.0, 9.00962e-05, 0.0], 6.e-5) assert approx_equal(ws.aniso_b_cart, [13.12, 16.69, 12.89, 0, -0.08, 0], 0.01) # convenience methods for GUI assert approx_equal(result.aniso_b_min, 12.9, 0.1) assert approx_equal(result.aniso_range_of_b, 3.8, 0.1) # assert approx_equal( ws.outlier_shell_table.data[0], # d_spacing [9.86, 8.36, 4.64], 0.02) assert approx_equal( ws.outlier_shell_table.data[1], # z_score [5.30, 18.06, 5.31], 0.01) assert (len(ws.outliers.acentric_outliers_table.data[0]) == 2) assert (ws.outliers.acentric_outliers_table.data[1] == [(0, -1, -1), (0, 1, 1)]) assert approx_equal(ws.outliers.acentric_outliers_table.data[2], [3.5, 3.3], 0.1) assert (ws.outliers.centric_outliers_table.data is None) assert (len(ws.ice_rings.table._rows) == 10) assert (ws.ice_rings.table._rows[0] == [' 3.897', ' 1.000', ' 0.76', ' 1.00']), \ ws.ice_rings.table._rows[0] tw = result.twin_results wm = tw.wilson_moments out = StringIO() wm.show(out) assert not show_diff( out.getvalue(), """ ----------Wilson ratio and moments---------- Acentric reflections: <I^2>/<I>^2 :2.063 (untwinned: 2.000; perfect twin 1.500) <F>^2/<F^2> :0.778 (untwinned: 0.785; perfect twin 0.885) <|E^2 - 1|> :0.745 (untwinned: 0.736; perfect twin 0.541) Centric reflections: <I^2>/<I>^2 :3.076 (untwinned: 3.000; perfect twin 2.000) <F>^2/<F^2> :0.628 (untwinned: 0.637; perfect twin 0.785) <|E^2 - 1|> :0.999 (untwinned: 0.968; perfect twin 0.736) """) # XXX PDB validation server assert approx_equal(result.iso_b_wilson, 14.51, eps=0.1) assert approx_equal(result.aniso_b_ratio, 0.271, eps=0.1) assert (result.number_of_wilson_outliers == 2) assert approx_equal(result.l_test_mean_l, 0.481, eps=0.1) assert approx_equal(result.l_test_mean_l_squared, 0.322, eps=0.1) assert approx_equal(result.i_over_sigma_outer_shell, 10.71, eps=0.01) assert ("indicating pseudo-translationa" in result.patterson_verdict) # check relative Wilson # FIXME #result.relative_wilson.show() #assert (result.relative_wilson.n_outliers() == 0) #show_pickled_object_sizes(result) # # test without sigmas f_obs_2 = f_obs.customized_copy(sigmas=None) mtz_file = "tst_xtriage_in_2.mtz" f_obs_2.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) xtriage_args = [ mtz_file, pdb_file, seq_file, "log=tst_xtriage_1.log", ] result = xtriage.run(args=xtriage_args, out=null_out()) result.summarize_issues() # test in lower symmetry f_obs_3 = f_obs.expand_to_p1() mtz_file = "tst_xtriage_in_3.mtz" f_obs_3.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) xtriage_args = [ mtz_file, seq_file, "log=tst_xtriage_2.log", ] result = xtriage.run(args=xtriage_args, out=null_out()) assert (( 1, 'One or more symmetry operators suggest that the data has a higher crystallographic symmetry (P 2 1 1).', 'Point group and R-factor analysis') in result.summarize_issues()._issues) # test with elliptical truncation f_obs_3 = f_obs.customized_copy( crystal_symmetry=crystal.symmetry((23, 5, 20, 90, 107.8, 90), "P 21")) f_obs_3 = f_obs_3.resolution_filter(d_min=1.5) f_obs_3 = f_obs_3.customized_copy( crystal_symmetry=f_obs.crystal_symmetry()) reso = ds.analyze_resolution_limits(f_obs_3) out = StringIO() reso.show(out=out) assert ("max. difference between axes = 0.652" in out.getvalue()), \ out.getvalue() assert ("elliptically truncated" in out.getvalue()) # make sure the elliptical truncation detection still works in higher space # groups - we only need a miller.set for this miller_set = miller.build_set(crystal_symmetry=crystal.symmetry( (20, 20, 20, 90, 90, 90), "P422"), d_min=1.5, anomalous_flag=False) reso = ds.analyze_resolution_limits(miller_set) out = StringIO() reso.show(out=out) assert ("Resolution limits are within expected tolerances" in out.getvalue()) # log binning out = StringIO() log_binned = ds.log_binned_completeness(f_obs_3) log_binned.show(out=out) assert ("""| 1.9724 - 1.5094 | 368/1230 | 29.9% |""" in out.getvalue()), out.getvalue() # test with no acentrics cf = f_obs.centric_flags().data() centrics = f_obs.select(cf) acentrics = f_obs.select(~cf) mtz_file = "tst_xtriage_in_3.mtz" centrics.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) args = [ mtz_file, pdb_file, seq_file, "log=tst_xtriage_3.log", ] try: xtriage.run(args=args, out=null_out()) except Sorry: pass else: raise Exception_expected # with only a handful of acentrics sel = flex.bool(acentrics.size(), False) for i in range(10): sel[i] = True f_obs_4 = centrics.concatenate(acentrics.select(sel)) f_obs_4.as_mtz_dataset(column_root_label="F").mtz_object().write(mtz_file) try: xtriage.run(args=args, out=null_out()) except Sorry: pass else: raise Exception_expected
def rama_z_score(self): return rama_z.rama_z(models = [self.model], log = null_out()).get_result()
def exercise_synthetic(): from mmtbx.regression import tst_build_alt_confs pdb_in = iotbx.pdb.hierarchy.input(pdb_string=tst_build_alt_confs.pdb_raw) xrs = pdb_in.input.xray_structure_simple() fc = abs(xrs.structure_factors(d_min=1.5).f_calc()) flags = fc.resolution_filter(d_min=1.6).generate_r_free_flags() ls = fc.lone_set(other=flags) # case 1: no work set in high-res shell flags2 = ls.array(data=flex.bool(ls.size(), True)) flags_all = flags.concatenate(other=flags2) mtz_out = fc.as_mtz_dataset(column_root_label="F") mtz_out.add_miller_array(flags_all, column_root_label="FreeR_flag") mtz_out.mtz_object().write("tst_molprobity_1.mtz") open("tst_molprobity_1.pdb", "w").write(tst_build_alt_confs.pdb_raw) args = [ "tst_molprobity_1.pdb", "tst_molprobity_1.mtz", "--kinemage", "--maps", "flags.clashscore=False", "flags.xtriage=True", ] result = molprobity.run(args=args, ignore_missing_modules=True, out=null_out()).validation out = StringIO() result.show(out=out) # case 2: no test set in high-res shell flags2 = ls.array(data=flex.bool(ls.size(), False)) flags_all = flags.concatenate(other=flags2) mtz_out = fc.as_mtz_dataset(column_root_label="F") mtz_out.add_miller_array(flags_all, column_root_label="FreeR_flag") result = molprobity.run(args=args, ignore_missing_modules=True, out=null_out()).validation out = StringIO() result.show(out=out) # case 3: multi-MODEL structure # XXX This is not a very sophisticated test - it only ensures that the # program does not crash. We need a test for expected output... hierarchy = pdb_in.hierarchy model2 = hierarchy.only_model().detached_copy() hierarchy.append_model(model2) hierarchy.models()[0].id = "1" hierarchy.models()[1].id = "2" open("tst_molprobity_multi_model.pdb", "w").write(hierarchy.as_pdb_string()) args = [ "tst_molprobity_multi_model.pdb", "tst_molprobity_1.mtz", "--kinemage", "--maps", ] result = molprobity.run(args=args, ignore_missing_modules=True, out=null_out()).validation out = StringIO() result.show(out=out) # test rotamer distributions open("tst_molprobity_misc1.pdb", "w").write(tst_build_alt_confs.pdb_raw) args = [ "tst_molprobity_1.pdb", "rotamer_library=8000", ] out = StringIO() result = molprobity.run(args=args, ignore_missing_modules=True, out=null_out()).validation result.show(outliers_only=False, out=out)
def exercise(): pdb_inp = iotbx.pdb.input(lines=pdb_str.split("\n"), source_info=None) model = mmtbx.model.manager(model_input=pdb_inp, log=null_out()) restraints_manager = model.get_restraints_manager() angle_proxies = restraints_manager.geometry.get_all_angle_proxies() connectivity_manager = connectivity.determine_connectivity( pdb_hierarchy=model.get_hierarchy(), geometry_restraints=restraints_manager.geometry) h_connectivity = connectivity_manager.h_connectivity # get bonds stored in connectivity bond_list = {} angle_list = {} for neighbors in h_connectivity: if (neighbors is None): continue ih = neighbors.ih a0 = neighbors.a0 i_a0 = a0['iseq'] a1 = neighbors.a1 i_a1 = a1['iseq'] bond_list[ih] = [i_a0, a0['dist_ideal']] selected_atoms = tuple(sorted([ih, i_a0, i_a1])) angle_list[selected_atoms] = a1['angle_ideal'] if neighbors.a2: a2 = neighbors.a2 selected_atoms2 = tuple(sorted([ih, i_a0, a2['iseq']])) angle_list[selected_atoms2] = a2['angle_ideal'] if neighbors.a3: a3 = neighbors.a3 selected_atoms3 = tuple(sorted([ih, i_a0, a3['iseq']])) angle_list[selected_atoms3] = a3['angle_ideal'] if neighbors.h1: h1 = neighbors.h1 selected_atoms4 = tuple(sorted([ih, i_a0, h1['iseq']])) angle_list[selected_atoms4] = h1['angle_ideal'] if neighbors.b1: i_b1 = neighbors.b1['iseq'] third_nb_dict = {ih: i_b1} bond_ctrl = {} for i in model.xh_connectivity_table(): bond_ctrl[i[1]] = [i[0], i[3]] # List of angle restraints angles = [(4, 1, 12), (0, 1, 12), (2, 1, 12), (13, 4, 14), (5, 4, 14), (5, 4, 13), (1, 4, 13), (1, 4, 14), (8, 6, 15), (5, 6, 15), (9, 7, 16), (5, 7, 16), (10, 8, 17), (6, 8, 17), (10, 11, 19), (7, 9, 18), (10, 9, 18)] angle_ctrl = {} for ap in angle_proxies: if (ap.i_seqs in angles): angle_ctrl[tuple(sorted(list(ap.i_seqs)))] = ap.angle_ideal # HH needs also third neighbors: third_nb_ctrl = {19: 8} assert ( bond_list == bond_ctrl), '1-2 neighbors and distance_ideal are wrong' assert ( angle_list == angle_ctrl), '1-3 neighbors and angle_ideal are wrong' assert (third_nb_dict == third_nb_ctrl), '1-4 neighbors are wrong'
def real_space_refine( pdb_hierarchy, fmodel, cif_objects, params, out, nproc=None, max_cycles=100, # arbitrarily large remediate=False): from scitbx.array_family import flex i_cycle = 0 while (i_cycle < max_cycles): print(" Cycle %d:" % (i_cycle + 1), file=out) # this keeps track of which residues were split in the previous cycle - # we only refine segments that have had residues added rebuilt_flags = pdb_hierarchy.atoms().extract_tmp_as_size_t() processed_pdb_file = building.reprocess_pdb( pdb_hierarchy=pdb_hierarchy, cif_objects=cif_objects, crystal_symmetry=fmodel.xray_structure, out=null_out()) # get the 2mFo-DFc map without new alternates! #two_fofc_map = fmodel.two_fofc_map(exclude_free_r_reflections=True) pdb_hierarchy = processed_pdb_file.all_chain_proxies.pdb_hierarchy pdb_atoms = pdb_hierarchy.atoms() xray_structure = processed_pdb_file.xray_structure() geometry_restraints_manager = \ processed_pdb_file.geometry_restraints_manager(show_energies=False) fmodel.update_xray_structure(xray_structure) sele_cache = pdb_hierarchy.atom_selection_cache() # FIXME very inefficient when looping! # this will include both the newly built residues and the original atoms, # including residues split to allow for backbone flexibility. sele_split = sele_cache.selection(alt_confs.SELECTION_MODIFIED) sele_main_conf = sele_cache.selection(alt_confs.SELECTION_OLD) assert (len(sele_split) > 0) k = 0 fragments = [] while (k < len(sele_split)): if (sele_split[k]): current_fragment = flex.size_t() while (sele_split[k]): current_fragment.append(k) k += 1 atom_start = pdb_atoms[current_fragment[0]].fetch_labels() atom_end = pdb_atoms[current_fragment[-1]].fetch_labels() frag_selection = flex.bool(sele_split.size(), current_fragment) if (i_cycle > 0): flags = rebuilt_flags.select(frag_selection) if flags.all_eq(0): continue fragments.append(current_fragment) else: k += 1 if (len(fragments) == 0): pass refine_fragments = rsr_fragments_parallel( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, processed_pdb_file=processed_pdb_file, sele_main_conf=sele_main_conf, rsr_fofc_map_target=(i_cycle == 0 and params.cleanup.rsr_fofc_map_target)) refined = easy_mp.pool_map(fixed_func=refine_fragments, iterable=fragments, processes=nproc) sites_refined = pdb_atoms.extract_xyz() for result in refined: assert (result is not None) result.show(out=out, prefix=" ") sites_refined.set_selected(result.selection, result.sites_cart) pdb_atoms.set_xyz(sites_refined) xray_structure.set_sites_cart(sites_refined) fmodel.update_xray_structure(xray_structure) if (not remediate) or (max_cycles == 1): break else: for atom in pdb_hierarchy.atoms(): if (atom.segid == alt_confs.SEGID_NEW_SPLIT): atom.segid = alt_confs.SEGID_NEW_REBUILT print(" checking for conformational strain...", file=out) n_split = alt_confs.spread_alternates( pdb_hierarchy=pdb_hierarchy, new_occupancy=params.residue_fitting.expected_occupancy, split_all_adjacent=False, selection=alt_confs.SELECTION_NEW_REBUILT) if (n_split > 0): print(" split another %d residue(s) - will re-run RSR" % \ n_split, file=out) else: break i_cycle += 1 xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=fmodel.xray_structure) fmodel.update_xray_structure(xray_structure, update_f_mask=True, update_f_calc=True) #fmodel.info().show_targets(out=out, text="After real-space refinement") t2 = time.time() return pdb_hierarchy
def write_map_file(self): # output MTZ file with map coefficients class map_coeffs_mtz_label_manager: def __init__(self, amplitudes, phases): self._amplitudes = amplitudes self._phases = phases def amplitudes(self): return self._amplitudes def phases(self, root_label, anomalous_sign=None): assert anomalous_sign is None or not anomalous_sign return self._phases mtz_history_buffer = flex.std_string() lbl_mgr = map_coeffs_mtz_label_manager(amplitudes = "FoFo", phases = "PHFc") mtz_dataset = self.map_coeff.as_mtz_dataset( column_root_label=lbl_mgr.amplitudes(), label_decorator=lbl_mgr) mtz_history_buffer.append("> column label %s = phenix %s" % ( lbl_mgr.amplitudes(), "FoFoPHFc")) if self.output_file is not None : file_name = self.output_file else : file_name = "FoFoPHFc.mtz" mtz_history_buffer.append("file name %s"%file_name) mtz_object = mtz_dataset.mtz_object() mtz_object.add_history(mtz_history_buffer) mtz_object.write(file_name=file_name) self.file_names = [ file_name ] if (self.peak_search): from mmtbx.command_line import find_peaks_holes from mmtbx import find_peaks peak_search_log = self.log if (self.silent) : peak_search_log = null_out() fmodel = self.fmodel peaks = find_peaks.manager( fmodel=fmodel, map_type=None, map_coeffs=self.map_coeff, map_cutoff=self.map_cutoff, params=self.peak_search_params, log=peak_search_log).peaks_mapped() peaks.sites = fmodel.xray_structure.unit_cell().orthogonalize(peaks.sites) holes = find_peaks.manager( fmodel=fmodel, map_type=None, map_coeffs=self.map_coeff, map_cutoff=-self.map_cutoff, params=self.peak_search_params, log=peak_search_log).peaks_mapped() holes.sites = fmodel.xray_structure.unit_cell().orthogonalize(holes.sites) result = find_peaks_holes.peaks_holes_container( peaks=peaks, holes=holes, map_cutoff=self.map_cutoff) pdb_out = os.path.splitext(file_name)[0] + "_peaks.pdb" result.save_pdb_file( file_name=pdb_out, include_anom=False, include_water=False, log=peak_search_log) self.file_names.append(pdb_out) return self.file_names
def _try_as_ccp4_map(self): from iotbx.map_manager import map_manager from libtbx.utils import null_out map_object=map_manager(file_name=str(self.file_name),log=null_out()) self._file_type = "ccp4_map" self._file_object = map_object
def exercise_space_group_handling(): flex.set_random_seed(123456) random.seed(123456) base = "tst_cc_star_space_group" pdb_in = iotbx.pdb.hierarchy.input(pdb_string=model_1yjp) xrs = pdb_in.xray_structure_simple() xrs.set_inelastic_form_factors(photon=1.54, table="sasaki") fc = abs(xrs.structure_factors(d_min=1.5).f_calc()).average_bijvoet_mates() fc.set_observation_type_xray_amplitude() flags = fc.generate_r_free_flags() mtz = fc.as_mtz_dataset(column_root_label="F") mtz.add_miller_array(flags, column_root_label="FreeR_flag") mtz.mtz_object().write(base + ".mtz") xrs_p1 = xrs.expand_to_p1() xrs_p1.shake_sites_in_place(rms_difference=0.1) fc_p1 = xrs_p1.structure_factors(d_min=1.4).f_calc() fc_p1_extra = fc_p1.randomize_amplitude_and_phase(amplitude_error=1.0, phase_error_deg=0, random_seed=123456) fc_p1 = abs( fc_p1.concatenate(other=fc_p1_extra)).sort(by_value="packed_indices") fc_p1.set_observation_type_xray_amplitude() sg_p2 = sgtbx.space_group_info("P2") ic = fc_p1.f_as_f_sq().customized_copy(space_group_info=sg_p2, sigmas=flex.double( fc_p1.size(), 10.0)) ic.export_as_scalepack_unmerged(file_name=base + ".sca") open(base + ".pdb", "w").write(model_1yjp) args = [ base + ".mtz", base + ".pdb", "unmerged_data=%s.sca" % base, ] cc_star.run(args=args, out=null_out()) # now with .sca in P1 (raises Sorry) ic2 = fc_p1.f_as_f_sq().customized_copy( sigmas=flex.double(fc_p1.size(), 10.0)) ic2.export_as_scalepack_unmerged(file_name=base + "_p1.sca") args = [ base + ".mtz", base + ".pdb", "unmerged_data=%s_p1.sca" % base, ] try: cc_star.run(args=args, out=null_out()) except Sorry as s: assert ( str(s) == "Incompatible space groups in merged and unmerged data:P 1 21 1 versus P 1" ), s else: raise Exception_expected # now with CIF (complete symmetry) f = open(base + ".cif", "w") ic.as_cif_simple(array_type="meas", out=f) f.close() args = [ base + ".mtz", base + ".pdb", "unmerged_data=%s.cif" % base, ] cc_star.run(args=args, out=null_out()) # bad unit cell uc2 = uctbx.unit_cell((23, 6.5, 23.5, 90, 108, 90)) ic3 = ic.customized_copy(unit_cell=uc2) f = open(base + "_new_uc.cif", "w") ic3.as_cif_simple(array_type="meas", out=f) f.close() args = [ base + ".mtz", base + ".pdb", "unmerged_data=%s_new_uc.cif" % base, ] try: cc_star.run(args=args, out=null_out()) except Sorry as s: assert ("Incompatible symmetry definitions:" in str(s)), s else: raise Exception_expected
def __init__(self, map_manager = None, resolution = None, molecular_mass = None, sequence = None, solvent_content = None): ''' Create a mask (map object) with values of 1 near molecule Parameters are: map_manager: source of information about density resolution : optional resolution of map molecular_mass: optional mass (Da) of object in density sequence: optional sequence of object in density solvent_content : optional solvent_content of map ''' assert (map_manager is not None) if not resolution: from cctbx.maptbx import d_min_from_map resolution = d_min_from_map( map_data=map_manager.map_data(), unit_cell=map_manager.crystal_symmetry().unit_cell()) self._crystal_symmetry = map_manager.crystal_symmetry() if (molecular_mass or sequence ) and ( not solvent_content): # Try to get a good starting value of solvent_content from cctbx.maptbx.segment_and_split_map import get_solvent_fraction solvent_content = get_solvent_fraction( params = None, molecular_mass = molecular_mass, sequence = sequence, do_not_adjust_dalton_scale = True, crystal_symmetry = self._crystal_symmetry, out = null_out()) # Now use automatic procedure to get a mask from cctbx.maptbx.segment_and_split_map import \ get_iterated_solvent_fraction self._mask, self._solvent_content = get_iterated_solvent_fraction( crystal_symmetry = self._crystal_symmetry, fraction_of_max_mask_threshold = 0.05, # solvent_content = solvent_content, cell_cutoff_for_solvent_from_mask = 1, # Use low-res method always use_solvent_content_for_threshold = True, mask_resolution = resolution, return_mask_and_solvent_fraction = True, map = map_manager.map_data(), verbose = False, out = null_out()) if self._solvent_content is None: raise Sorry("Unable to get solvent content in auto-masking") # Set up map_manager with this mask self._map_manager = map_manager.customized_copy(map_data = self._mask) self._map_manager.set_is_mask(True) # Initialize soft mask self._is_soft_mask = False self._is_soft_mask_around_edges = False
def exercise_2(): hkl_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/wizards/data/p9_se_w2.sca", test=os.path.isfile) if (hkl_file is None): warnings.warn("phenix_regression not available, skipping test") return hkl_in = file_reader.any_file(hkl_file).assert_file_type("hkl") i_obs_raw = hkl_in.file_object.as_miller_arrays( merge_equivalents=False, crystal_symmetry=crystal.symmetry(space_group_symbol="I4", unit_cell=(113.949, 113.949, 32.474, 90, 90, 90)))[0] i_obs = i_obs_raw.merge_equivalents().array() # completeness and data strength cstats = ds.i_sigi_completeness_stats(i_obs) d_min_cut = cstats.resolution_cut assert approx_equal(d_min_cut, 2.150815) ws = ds.wilson_scaling(miller_array=i_obs, n_residues=120) # outliers - this shouldn't actually work, since it requires additional # processing steps on the input data try: outliers = ds.possible_outliers(i_obs) except AssertionError: pass else: raise Exception_expected ###################################################################### # OVERALL ANALYSIS pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_examples/p9-build/p9.pdb", test=os.path.isfile) f_calc = None if (pdb_file is not None): pdb_in = file_reader.any_file(pdb_file).assert_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy xrs = pdb_in.file_object.xray_structure_simple(crystal_symmetry=i_obs) f_calc = xrs.structure_factors(d_min=i_obs.d_min()).f_calc() f_calc = abs(f_calc).generate_bijvoet_mates() f_calc = f_calc.set_observation_type_xray_amplitude() i_obs, f_calc = i_obs.common_sets(other=f_calc) open("tmp_xtriage.pdb", "w").write(hierarchy.as_pdb_string(crystal_symmetry=i_obs)) pdb_file = "tmp_xtriage.pdb" params = xtriage.master_params.extract() params.scaling.input.asu_contents.n_residues = 141 result = xtriage.xtriage_analyses(miller_obs=i_obs, miller_calc=f_calc, params=params, unmerged_obs=i_obs_raw, text_out=open("logfile3.log", "w")) #sys.stdout) # XXX there appears to be some system-dependence here, hence sloppy limits assert (15.5 < result.aniso_b_min < 15.9) assert (10 < result.aniso_range_of_b < 11) # check relative Wilson if (pdb_file is not None): assert (result.relative_wilson is not None) # FIXME #assert (result.relative_wilson.n_outliers() == 34) #show_pickled_object_sizes(result) test_pickle_consistency_and_size(result) # XXX PDB validation server assert approx_equal(result.iso_b_wilson, 18.33, eps=0.1) assert approx_equal(result.aniso_b_ratio, 0.546, eps=0.1) assert (result.number_of_wilson_outliers == 0) assert approx_equal(result.l_test_mean_l, 0.493, eps=0.1) assert approx_equal(result.l_test_mean_l_squared, 0.326, eps=0.1) assert approx_equal(result.i_over_sigma_outer_shell, 3.25, eps=0.1) assert approx_equal(result.overall_i_sig_i, 10.34, eps=0.1) assert approx_equal( result.anomalous_info.plan_sad_experiment_stats.get_overall( item="i_over_sigma_dict"), 10.61, eps=0.1) assert approx_equal( result.anomalous_info.plan_sad_experiment_stats.get_overall( item="anom_signal_dict"), 15.35, eps=0.1) assert ("No significant pseudotranslation is detected" in result.patterson_verdict) # test consistency of output after pickling and unpickling try: from phenix_dev.phenix_cloud import xtriage_json except ImportError: pass else: json_out = xtriage_json.json_output("p9.sca") result.show(out=json_out) open("xtriage.json", "w").write(json_out.export()) # unmerged data assert result.merging_stats is not None out = StringIO() result.merging_stats.show(out=out) assert ("R-merge: 0.073" in out.getvalue()) assert approx_equal(result.estimate_d_min(min_i_over_sigma=10), 1.9645, eps=0.001) # FIXME PDB doesn't actually have unit cell! # test detection of symmetry in reference file if (pdb_file is not None): args = [hkl_file, pdb_file] result = xtriage.run(args=args, out=null_out())
def find_alternate_residue(residue, pdb_hierarchy, fmodel, restraints_manager, params, verbose=False, debug=None, log=None): if (log is None): log = null_out() t1 = time.time() from scitbx.array_family import flex selection = flex.size_t() window = building.get_window_around_residue(residue, window_size=params.window_size) for pdb_object in window: selection.extend(pdb_object.atoms().extract_i_seq()) assert (len(selection) > 0) and (not selection.all_eq(0)) occupancies = [] if (params.expected_occupancy is not None): assert (0.0 <= params.expected_occupancy <= 1.0) occupancies = [params.expected_occupancy] else: occupancies = [0.2, 0.3, 0.4, 0.5] trials = [] sites_start_1d = pdb_hierarchy.atoms().extract_xyz().as_double() from mmtbx.rotamer import rotamer_eval rotamer_manager = rotamer_eval.RotamerEval(data_version="8000") id_str = residue.id_str() delete_selection = None if (params.omit_waters): delete_selection = building.get_nearby_water_selection( pdb_hierarchy=pdb_hierarchy, xray_structure=fmodel.xray_structure, selection=selection) for occupancy in occupancies: prefix = "%s_%.2f" % (id_str.replace(" ", "_"), occupancy) map_file_name = None if (debug > 1): map_file_name = prefix + ".mtz" two_fofc_map, fofc_map = alt_confs.get_partial_omit_map( fmodel=fmodel.deep_copy(), selection=selection, selection_delete=delete_selection, negate_surrounding=True, map_file_name=map_file_name, partial_occupancy=1.0 - occupancy) rebuild = rebuild_residue( target_map=fofc_map, pdb_hierarchy=pdb_hierarchy, xray_structure=fmodel.xray_structure, geometry_restraints_manager=restraints_manager, rotamer_eval=rotamer_manager, d_min=fmodel.f_obs().d_min()) new_hierarchy = rebuild( atom_group=residue, window_size=params.window_size, backbone_sample_angle=params.backbone_sample_angle, anneal=params.anneal, annealing_temperature=params.annealing_temperature, use_chi1_sampling=params.simple_chi1_sampling, log=log) trial = residue_trial(residue=residue, new_hierarchy=new_hierarchy, occupancy=occupancy, rotamer_eval=rotamer_manager, fmodel=fmodel, two_fofc_map=two_fofc_map, fofc_map=fofc_map) trials.append(trial) if (debug > 1): open("%s.pdb" % prefix, "w").write(trial.new_hierarchy.as_pdb_string()) sites_end_1d = pdb_hierarchy.atoms().extract_xyz().as_double() assert sites_start_1d.all_eq(sites_end_1d) t2 = time.time() if (debug > 1): print(" %d build trials (%s): %.3fs" % (len(occupancies), residue.id_str(), t2 - t1), file=log) return trials
def run(args, command_name = "phenix.fobs_minus_fobs_map", log=None): if(len(args) == 0): args = ["--help"] examples = """Examples: phenix.fobs_minus_fobs_map f_obs_1_file=data1.mtz f_obs_2_file=data2.sca \ f_obs_1_label=FOBS1 f_obs_2_label=FOBS2 model.pdb phenix.fobs_minus_fobs_map f_obs_1_file=data.mtz f_obs_2_file=data.mtz \ f_obs_1_label=FOBS1 f_obs_2_label=FOBS2 phase_source=model.pdb \ high_res=2.0 sigma_cutoff=2 scattering_table=neutron""" command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description=examples) .option("--silent", action="store_true", help="Suppress output to the screen.") .enable_symmetry_comprehensive() ).process(args=args) # if (log is None): log = sys.stdout if(not command_line.options.silent): utils.print_header("phenix.fobs_minus_fobs_map", out = log) print("Command line arguments: ", file=log) print(args, file=log) print(file=log) # processed_args = utils.process_command_line_args( args=command_line.args, cmd_cs=command_line.symmetry, master_params=fo_minus_fo_master_params(), absolute_angle_tolerance=5, absolute_length_tolerance=1, log=log, suppress_symmetry_related_errors=True) working_phil = processed_args.params if(not command_line.options.silent): print("*** Parameters:", file=log) working_phil.show(out = log) print(file=log) params = working_phil.extract() consensus_symmetry = None if (params.ignore_non_isomorphous_unit_cells): if (None in [params.f_obs_1_file_name, params.f_obs_2_file_name, params.phase_source]): raise Sorry("The file parameters (f_obs_1_file_name, f_obs_2_file_name, "+ "phase_source) must be specified explicitly when "+ "ignore_non_isomorphous_unit_cells=True.") symm_manager = iotbx.symmetry.manager() pdb_in = iotbx.file_reader.any_file(params.phase_source, force_type="pdb") symm_manager.process_pdb_file(pdb_in) hkl_in_1 = iotbx.file_reader.any_file(params.f_obs_1_file_name, force_type="hkl") sg_err_1, uc_err_1 = symm_manager.process_reflections_file(hkl_in_1) hkl_in_2 = iotbx.file_reader.any_file(params.f_obs_2_file_name, force_type="hkl") sg_err_2, uc_err_2 = symm_manager.process_reflections_file(hkl_in_2) out = StringIO() symm_manager.show(out=out) if (sg_err_1) or (sg_err_2): raise Sorry(("Incompatible space groups in input files:\n%s\nAll files "+ "must have the same point group (and ideally the same space group). "+ "Please note that any symmetry information in the PDB file will be "+ "used first.") % out.getvalue()) elif (uc_err_1) or (uc_err_2): libtbx.call_back(message="warn", data=("Crystal symmetry mismatch:\n%s\nCalculations will continue "+ "using the symmetry in the PDB file (or if not available, the "+ "first reflection file), but the maps should be treated with "+ "extreme suspicion.") % out.getvalue()) crystal_symmetry = symm_manager.as_symmetry_object() else : processed_args = utils.process_command_line_args( args=command_line.args, cmd_cs=command_line.symmetry, master_params=fo_minus_fo_master_params(), suppress_symmetry_related_errors = False, absolute_angle_tolerance=5, absolute_length_tolerance=1, log=StringIO()) crystal_symmetry = processed_args.crystal_symmetry # pdb_file_names = processed_args.pdb_file_names if(len(processed_args.pdb_file_names) == 0): if(params.phase_source is not None): pdb_file_names = [params.phase_source] else: raise Sorry("No PDB file found.") # Extaract Fobs1, Fobs2 f_obss = [] if(len(processed_args.reflection_files)==2): for reflection_file in processed_args.reflection_files: reflection_file_server = reflection_file_utils.reflection_file_server( crystal_symmetry = crystal_symmetry, force_symmetry = True, reflection_files = [reflection_file], err = null_out()) # XXX UGLY !!! try: parameters = extract_xtal_data.data_and_flags_master_params().extract() if(params.f_obs_1_label is not None): parameters.labels = [params.f_obs_1_label] determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server = reflection_file_server, keep_going = True, parameters = parameters) except: # intentional parameters = extract_xtal_data.data_and_flags_master_params().extract() if(params.f_obs_2_label is not None): parameters.labels = [params.f_obs_2_label] determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server = reflection_file_server, keep_going = True, parameters = parameters) f_obss.append(determine_data_and_flags_result.f_obs) else: if([params.f_obs_1_file_name,params.f_obs_2_file_name].count(None)==2): raise Sorry("No reflection data file found.") for file_name, label in zip([params.f_obs_1_file_name,params.f_obs_2_file_name], [params.f_obs_1_label,params.f_obs_2_label]): reflection_file = reflection_file_reader.any_reflection_file( file_name = file_name, ensure_read_access = False) reflection_file_server = reflection_file_utils.reflection_file_server( crystal_symmetry = crystal_symmetry, force_symmetry = True, reflection_files = [reflection_file], err = null_out()) parameters = extract_xtal_data.data_and_flags_master_params().extract() if(label is not None): parameters.labels = [label] determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server = reflection_file_server, parameters = parameters, keep_going = True) f_obss.append(determine_data_and_flags_result.f_obs) if(len(f_obss)!=2): raise Sorry(" ".join(errors)) if(not command_line.options.silent): for ifobs, fobs in enumerate(f_obss): print("*** Summary for data set %d:"%ifobs, file=log) fobs.show_comprehensive_summary(f = log) print(file=log) pdb_combined = combine_unique_pdb_files(file_names = pdb_file_names) pdb_combined.report_non_unique(out = log) if(len(pdb_combined.unique_file_names) == 0): raise Sorry("No coordinate file given.") # raw_recs = flex.std_string() for rec in pdb_combined.raw_records: if(rec.upper().count("CRYST1")==0): raw_recs.append(rec) raw_recs.append(iotbx.pdb.format_cryst1_record( crystal_symmetry = crystal_symmetry)) # pdb_in = iotbx.pdb.input(source_info = None, lines = raw_recs) model = mmtbx.model.manager(model_input = pdb_in) d_min = min(f_obss[0].d_min(), f_obss[1].d_min()) model.setup_scattering_dictionaries( scattering_table = params.scattering_table, d_min = d_min) xray_structure = model.get_xray_structure() hierarchy = model.get_hierarchy() # omit_sel = flex.bool(hierarchy.atoms_size(), False) if (params.advanced.omit_selection is not None): print("Will omit selection from phasing model:", file=log) print(" " + params.advanced.omit_selection, file=log) omit_sel = hierarchy.atom_selection_cache().selection( params.advanced.omit_selection) print("%d atoms selected for removal" % omit_sel.count(True), file=log) del hierarchy xray_structure = xray_structure.select(~omit_sel) if(not command_line.options.silent): print("*** Model summary:", file=log) xray_structure.show_summary(f = log) print(file=log) info0 = f_obss[0].info() info1 = f_obss[1].info() f_obss[0] = f_obss[0].resolution_filter(d_min = params.high_resolution, d_max = params.low_resolution).set_info(info0) f_obss[1] = f_obss[1].resolution_filter(d_min = params.high_resolution, d_max = params.low_resolution).set_info(info1) if(params.sigma_cutoff is not None): for i in [0,1]: if(f_obss[i].sigmas() is not None): sel = f_obss[i].data() > f_obss[i].sigmas()*params.sigma_cutoff f_obss[i] = f_obss[i].select(sel).set_info(info0) for k, f_obs in enumerate(f_obss): if (f_obs.indices().size() == 0): raise Sorry("No data left in array %d (labels=%s) after filtering!" % (k+1, f_obs.info().label_string())) output_file_name = params.output_file if (output_file_name is None) and (params.file_name_prefix is not None): output_file_name = "%s_%s.mtz" % (params.file_name_prefix, params.job_id) output_files = compute_fo_minus_fo_map( data_arrays = f_obss, xray_structure = xray_structure, log = log, silent = command_line.options.silent, output_file = output_file_name, peak_search=params.find_peaks_holes, map_cutoff=params.map_cutoff, peak_search_params=params.peak_search, multiscale=params.advanced.multiscale, anomalous=params.advanced.anomalous).file_names return output_files
def __call__(self, atom_group, log, window_size=2, backbone_sample_angle=10, anneal=False, annealing_temperature=1000, use_chi1_sampling=False): import iotbx.pdb.hierarchy from scitbx.array_family import flex assert (atom_group is not None) pdb_hierarchy = self.pdb_hierarchy.deep_copy() xray_structure = self.xray_structure.deep_copy_scatterers() geometry_restraints_manager = self.geometry_restraints_manager # FIXME this doesn't work - can't recover the atom_group afterwards! #hd_sel = xray_structure.hd_selection() #n_hydrogen = hd_sel.count(True) #if (n_hydrogen > 0): # non_hd_sel = ~hd_sel # pdb_hierarchy = pdb_hierarchy.select(non_hd_sel) # xray_structure = xray_structure.select(non_hd_sel) # geometry_restraints_manager = geometry_restraints_manager.select( # non_hd_sel) pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() isel = building.extract_iselection([atom_group]) atom_group = pdb_atoms[isel[0]].parent() atom_group_start = atom_group.detached_copy() needs_rebuild = not building.is_stub_residue(atom_group) residue_group = atom_group.parent() assert (len(residue_group.atom_groups()) == 1) sel_residues = building.get_window_around_residue( residue=atom_group, window_size=window_size) # get rid of sidechains for surrounding residues only adjacent_residues = [] for other_rg in sel_residues: if (other_rg != residue_group): adjacent_residues.append(other_rg) building.remove_sidechain_atoms(adjacent_residues) pdb_atoms = pdb_hierarchy.atoms() adjacent_trimmed_atom_names = pdb_atoms.extract_name() adjacent_trimmed_sel = pdb_atoms.extract_i_seq() xrs_adjacent_trimmed = xray_structure.select(adjacent_trimmed_sel) grm_adjacent_trimmed = geometry_restraints_manager.select( adjacent_trimmed_sel) pdb_atoms.reset_i_seq() # get rid of central sidechain and refine mainchain for entire window truncate = (not atom_group.resname in ["GLY", "ALA"]) # XXX PRO? if (truncate): building.remove_sidechain_atoms([atom_group]) pdb_atoms = pdb_hierarchy.atoms() all_mc_sel = pdb_atoms.extract_i_seq() xrs_mc = xrs_adjacent_trimmed.select(all_mc_sel) pdb_atoms.reset_i_seq() window_mc_sel = building.extract_iselection(sel_residues) selection = flex.bool(pdb_atoms.size(), False).set_selected(window_mc_sel, True) restraints_manager = grm_adjacent_trimmed.select(all_mc_sel) box = building.box_build_refine_base( xray_structure=xrs_mc, pdb_hierarchy=pdb_hierarchy, selection=selection, processed_pdb_file=None, target_map=self.target_map, geometry_restraints_manager=restraints_manager.geometry, d_min=self.d_min, out=null_out(), debug=True) box.restrain_atoms(selection=box.others_in_box, reference_sigma=0.1) box.real_space_refine(selection=box.selection_in_box) sites_new = box.update_original_coordinates() pdb_atoms.set_xyz(sites_new) # extend and replace existing residue. this is done in such a way that # the original atom ordering for the central residue is preserved, which # allows us to use the pre-existing geometry restraints instead of # re-calculating them every time this function is called. target_atom_group = self.ideal_dict[atom_group.resname.lower()].\ only_model().only_chain().only_residue_group().only_atom_group() new_atom_group_base = extend_sidechains.extend_residue( residue=atom_group, target_atom_group=target_atom_group, mon_lib_srv=self.mon_lib_srv) new_atom_group = iotbx.pdb.hierarchy.atom_group( resname=atom_group.resname) for atom in atom_group_start.atoms(): for new_atom in new_atom_group_base.atoms(): if (new_atom.name == atom.name): new_atom_group.append_atom(new_atom.detached_copy()) n_atoms_new = len(new_atom_group.atoms()) n_atoms_start = len(atom_group_start.atoms()) if (n_atoms_new != n_atoms_start): raise RuntimeError( ("Inconsistent atom counts for residue %s after " + "building (%d versus %d).") % (atom_group.id_str(), n_atoms_start, n_atoms_new)) rg = atom_group.parent() rg.remove_atom_group(atom_group) rg.append_atom_group(new_atom_group) pdb_atoms = pdb_hierarchy.atoms() pdb_atoms.reset_i_seq() new_names = pdb_atoms.extract_name() assert new_names.all_eq(adjacent_trimmed_atom_names) # get new box around this residue residue_sel = building.extract_iselection([new_atom_group]) selection = flex.bool(pdb_atoms.size(), False).set_selected(residue_sel, True) xrs_adjacent_trimmed.set_sites_cart(pdb_atoms.extract_xyz()) box = building.box_build_refine_base( xray_structure=xrs_adjacent_trimmed, pdb_hierarchy=pdb_hierarchy, selection=selection, processed_pdb_file=None, target_map=self.target_map, geometry_restraints_manager=grm_adjacent_trimmed.geometry, d_min=self.d_min, out=null_out(), debug=True) # place sidechain using mmtbx.refinement.real_space.fit_residue if ((atom_group.resname in rotatable_sidechain_atoms) and (use_chi1_sampling)): fit_chi1_simple(residue=box.only_residue(), unit_cell=box.unit_cell_box, target_map=box.target_map_box, rotamer_eval=self.rotamer_eval) box.update_sites_from_pdb_atoms() else: box.fit_residue_in_box(backbone_sample_angle=backbone_sample_angle) if (anneal): box.anneal(start_temperature=annealing_temperature) #box.real_space_refine() sites_new = box.update_original_coordinates() pdb_hierarchy.atoms().set_xyz(sites_new) return building.atom_group_as_hierarchy(new_atom_group)
def __init__(self, pdb_hierarchy, xray_structure=None, pdb_atoms=None, params=None, out=None, log=None): t0 = time.time() if (out is None): out = sys.stdout if (log is None): log = null_out() if (params is None): params = master_phil.extract() if (pdb_atoms is None): pdb_atoms = pdb_hierarchy.atoms() if (xray_structure is None): xray_structure = pdb_hierarchy.extract_xray_structure() self.hbonds = [] self.pdb_hierarchy = pdb_hierarchy self.pdb_atoms = pdb_atoms self.params = params self.log = log t1 = time.time() assert (not pdb_atoms.extract_i_seq().all_eq(0)) unit_cell = xray_structure.unit_cell() pair_asu_table = xray_structure.pair_asu_table( distance_cutoff=params.distance_cutoff) asu_mappings = pair_asu_table.asu_mappings() self.asu_table = pair_asu_table.table() self.pdb_labels = [] # first mark atoms in each residue with position in sequence k = 0 for chain in pdb_hierarchy.only_model().chains(): last_resseq = None for residue_group in chain.residue_groups(): resseq = residue_group.resseq_as_int() if (last_resseq is not None) and ((resseq - last_resseq) > 1): self.pdb_labels.append(None) k += 1 # extra increment to handle probable chain breaks last_resseq = resseq atom_group = residue_group.atom_groups()[0] self.pdb_labels.append(get_pdb_fields(atom_group)) for atom in atom_group.atoms(): #if (atom.name.strip() in ["N","C","O"]): atom.tmp = k k += 1 # now iterate over backbone O atoms and look for H-bonds # XXX this loop takes up most of the runtime t1 = time.time() if (self.params.verbosity >= 1): print("Time to initialize: %.3fs" % (t1 - t0), file=log) t_process = 0 t_find = 0 for chain in pdb_hierarchy.only_model().chains(): if (not chain.is_protein()): continue for residue_group in chain.residue_groups(): atom_group = residue_group.atom_groups()[0] ag_atoms = atom_group.atoms() for atom in ag_atoms: if (atom.name == " O "): tf0 = time.time() n_atoms = self.find_nearby_backbone_n(atom) t_find += time.time() - tf0 tp0 = time.time() for n_atom in n_atoms: hbond = self.process_o_n_interaction(atom, n_atom) if (hbond is not None): self.hbonds.append(hbond) t_process += time.time() - tp0 break t2 = time.time() if (self.params.verbosity >= 1): print("Time to find H-bonds: %.3f" % (t2 - t1), file=log) print(" local atom detection: %.3f" % t_find, file=log) print(" analysis: %.3f" % t_process, file=log) if (self.params.verbosity >= 2): print("All hydrogen bonds:", file=log) for hbond in self.hbonds: hbond.show(log, prefix=" ") if (self.params.pymol_script is not None): self._pml = open(self.params.pymol_script, "w") else: self._pml = null_out() t1 = time.time() self.helices = self.find_helices() t2 = time.time() if (self.params.verbosity >= 1): print("Time to find helices: %.3f" % (t2 - t1), file=log) self.sheets = self.find_sheets() t3 = time.time() if (self.params.verbosity >= 1): print("Time to find sheets: %.3f" % (t3 - t2), file=log) self.show(out=out) self._pml.close() self.log = None
def __init__(self, obs, r_free_flags, test_flag_value, phases=None, d_min=None, d_max=None, r_free_flags_params=None, merge_anomalous=False, log=sys.stdout, verbose=True): assert (log is not None) and (obs is not None) if (r_free_flags_params is None): from cctbx.r_free_utils import generate_r_free_params_str r_free_flags_params = libtbx.phil.parse( generate_r_free_params_str).extract() obs_info = obs.info() r_free_flags_info = phases_info = None sg = obs.space_group_info() obs = obs.map_to_asu().merge_equivalents().array() obs = obs.eliminate_sys_absent(log=log) obs = obs.resolution_filter(d_min=d_min, d_max=d_max) if (obs.is_xray_intensity_array()): from cctbx import french_wilson if (verbose): fw_out = log else : fw_out = null_out() obs = french_wilson.french_wilson_scale( miller_array=obs, params=None, log=fw_out) assert (obs is not None) merged_obs = obs.average_bijvoet_mates() if (merged_obs.completeness() < 0.9): print >> log, """ WARNING: data are incomplete (%.1f%% of possible reflections measured to %.2fA). This may cause problems if you plan to use the maps for building and/or ligand fitting! """ % (100*merged_obs.completeness(), merged_obs.d_min()) # XXX this is kind of a hack (the reconstructed arrays break some of my # assumptions about labels) if (merge_anomalous): obs = obs.average_bijvoet_mates() if (r_free_flags is not None): r_free_flags_info = r_free_flags.info() format = "cns" if (test_flag_value == 0): format = "ccp4" elif (test_flag_value == -1): format = "shelx" if (r_free_flags.anomalous_flag()): r_free_flags = r_free_flags.average_bijvoet_mates() is_compatible_symmetry = False obs_pg = obs.space_group().build_derived_point_group() flags_pg = r_free_flags.space_group().build_derived_point_group() if (obs_pg.type().number() == flags_pg.type().number()): is_compatible_symmetry = True else : pass # TODO unit cell comparison? if (is_compatible_symmetry): r_free_flags = r_free_flags.map_to_asu().merge_equivalents().array() r_free_flags = r_free_flags.eliminate_sys_absent(log=log) if (format == "cns"): r_free_flags = r_free_flags.customized_copy( crystal_symmetry=obs.crystal_symmetry(), data=(r_free_flags.data() == test_flag_value)) test_flag_value = True obs_tmp = obs.deep_copy() if (obs.anomalous_flag()): obs_tmp = obs.average_bijvoet_mates() r_free_flags = r_free_flags.common_set(other=obs_tmp) n_r_free = r_free_flags.indices().size() n_obs = obs_tmp.indices().size() if ((test_flag_value is None) or (r_free_flags.data().all_eq(r_free_flags.data()[0]))): print >> log, """ WARNING: uniform R-free flags detected; a new test set will be generated, but this will bias the refinement statistics. """ r_free_flags = None elif (n_r_free != n_obs): missing_set = obs_tmp.lone_set(other=r_free_flags) n_missing = missing_set.indices().size() if (n_missing > 0): print >> log, """ WARNING: R-free flags are incomplete relative to experimental data (%d vs. %d reflections). The flags will be extended to complete the set, but we recommend supplying flags that are already generated to the maximum expected resolution. """ % (n_r_free, n_obs) if (n_missing < 20) : # FIXME if (format == "cns"): missing_flags = missing_set.array(data=flex.bool(n_missing, False)) else : missing_flags = missing_set.array(data=flex.int(n_missing, 1)) else : missing_flags = missing_set.generate_r_free_flags( fraction=(r_free_flags.data().count(test_flag_value)/n_r_free), max_free=None, use_lattice_symmetry=True, format=format) r_free_flags = r_free_flags.concatenate(other=missing_flags) if (r_free_flags is not None): assert (r_free_flags.indices().size() == obs_tmp.indices().size()) else : print >> log, """ NOTE: incompatible symmetry between the data and the R-free flags: Data : %s %s Flags : %s %s A new test set will be generated. """ % (str(obs.space_group_info()), " ".join([ "%g" % x for x in obs.unit_cell().parameters() ]), str(r_free_flags.space_group_info()), " ".join(["%g" % x for x in r_free_flags.unit_cell().parameters()])) else : print >> log, """ WARNING: R-free flags not supplied. This may bias the refinement if the structures are very nearly isomorphous! """ self._generate_new = False if (r_free_flags is None): r_free_flags = obs.generate_r_free_flags( fraction=r_free_flags_params.fraction, max_free=r_free_flags_params.max_free, use_lattice_symmetry=r_free_flags_params.use_lattice_symmetry, use_dataman_shells=r_free_flags_params.use_dataman_shells, n_shells=r_free_flags_params.n_shells, format="ccp4") test_flag_value = 0 self._generate_new = True if (r_free_flags.anomalous_flag()): r_free_flags = r_free_flags.average_bijvoet_mates() if (phases is not None): phases_info = phases.info() phases = phases.map_to_asu().resolution_filter(d_min=d_min, d_max=d_max) assert (obs.is_xray_amplitude_array()) self.f_obs = obs.set_info(obs_info) self.r_free_flags = r_free_flags.set_info(r_free_flags_info) self.test_flag_value = test_flag_value self.phases = None if (phases is not None): self.phases = phases.set_info(phases_info)
def run(args): assert len(args) in [0,2], "n_sites, n_trials" if (len(args) == 0): n_sites, n_trials = 3, 2 out = null_out() else: n_sites, n_trials = [int(arg) for arg in args] out = sys.stdout # show_times_at_exit() class type_info(object): def __init__(O, type, use_analytical_gradients): O.type = type O.use_analytical_gradients = use_analytical_gradients def __str__(O): return "%s(use_analytical_gradients=%s)" % ( O.type.__name__, str(O.use_analytical_gradients)) spherical_types = [ type_info(euler_params, False), type_info(euler_params, True), type_info(euler_angles_xyz, False), type_info(euler_angles_xyz, True), type_info(euler_angles_zxz, False), type_info(euler_angles_zxz, True), type_info(euler_angles_yxyz, False), type_info(euler_angles_xyzy, False), type_info(inf_euler_params, False), type_info(inf_axis_angle, False)] nfun_accu = {} n_failed = {} for ti in spherical_types: nfun_accu[str(ti)] = flex.size_t() n_failed[str(ti)] = 0 mersenne_twister = flex.mersenne_twister(seed=0) for i_trial in range(n_trials): sites = [matrix.col(s) for s in flex.vec3_double( mersenne_twister.random_double(size=n_sites*3)*2-1)] c = center_of_mass_from_sites(sites) r = matrix.sqr(mersenne_twister.random_double_r3_rotation_matrix()) wells = [r*(s-c)+c for s in sites] for ti in spherical_types: r = refinery(spherical_type_info=ti, sites=sites, wells=wells, out=out) nfun_accu[str(ti)].append(r.nfun) if (r.failed): n_failed[str(ti)] += 1 nfun_sums = [] annotations = [] for ti in spherical_types: print(ti, file=out) nfuns = nfun_accu[str(ti)] stats = nfuns.as_double().min_max_mean() stats.show(out=out, prefix=" ") nfun_sums.append((str(ti), flex.sum(nfuns))) if (n_failed[str(ti)] == 0): annotations.append(None) else: annotations.append("failed: %d" % n_failed[str(ti)]) print(file=out) show_sorted_by_counts( label_count_pairs=nfun_sums, reverse=False, out=out, annotations=annotations) print(file=out) print("OK")
def __init__(self, pdb_file, output_file=None, log=None, quiet=False, set_se_occ=True, remove_atoms_with_zero_occupancy=False): from iotbx.file_reader import any_file import iotbx.pdb if (log is None): log = null_out() pdb_in = any_file(pdb_file, force_type="pdb") pdb_in.assert_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy if (len(hierarchy.models()) > 1): raise Sorry("Multi-MODEL PDB files are not supported.") n_unknown = 0 all_atoms = hierarchy.atoms() cache = hierarchy.atom_selection_cache() # resname UNK is now okay (with some restrictions) known_sel = cache.selection("not (element X or resname UNX or resname UNL)") semet_sel = cache.selection("element SE and resname MSE") zero_occ_sel = all_atoms.extract_occ() == 0 self.n_unknown = known_sel.count(False) self.n_semet = semet_sel.count(True) self.n_zero_occ = zero_occ_sel.count(True) keep_sel = known_sel modified = False if ((self.n_unknown > 0) or ((self.n_semet > 0) and (set_se_occ)) or (self.n_zero_occ > 0) and (remove_atoms_with_zero_occupancy)): modified = True if (output_file is None): output_file = pdb_file if (self.n_unknown > 0) and (not quiet): print >> log, "Warning: %d unknown atoms or ligands removed:" % \ self.n_unknown for i_seq in (~known_sel).iselection(): print >> log, " %s" % all_atoms[i_seq].id_str() if (self.n_zero_occ > 0): msg = "Warning: %d atoms with zero occupancy present in structure:" if (remove_atoms_with_zero_occupancy): msg = "Warning: %d atoms with zero occupancy removed:" keep_sel &= ~zero_occ_sel if (not quiet): print >> log, msg % self.n_zero_occ for i_seq in zero_occ_sel.iselection(): print >> log, " %s" % all_atoms[i_seq].id_str() hierarchy_filtered = hierarchy.select(keep_sel) if (self.n_semet > 0) and (set_se_occ): for atom in hierarchy_filtered.atoms(): if (atom.element == "SE") and (atom.fetch_labels().resname == "MSE"): if (atom.occ == 1.0): if (not quiet): print >> log, "Set occupancy of %s to 0.99" % atom.id_str() atom.occ = 0.99 # just enough to trigger occupancy refinement if (modified): f = open(output_file, "w") # if the input file is actually from the PDB, we need to preserve the # header information for downstream code. print >> f, "\n".join(pdb_in.file_object.input.title_section()) print >> f, "\n".join(pdb_in.file_object.input.remark_section()) print >> f, iotbx.pdb.format_cryst1_record( crystal_symmetry=pdb_in.file_object.crystal_symmetry()) print >> f, hierarchy_filtered.as_pdb_string() f.close()
def get_model(pdb_str): pdb_inp = iotbx.pdb.input(lines=pdb_str.split("\n"), source_info=None) model = mmtbx.model.manager(model_input=pdb_inp, log=null_out()) model_with_h = mmtbx.hydrogens.add(model=model) return model_with_h
def strip_model(pdb_hierarchy=None, xray_structure=None, file_name=None, params=None, remove_waters=True, remove_hydrogens=True, remove_alt_confs=True, convert_semet_to_met=True, convert_to_isotropic=True, reset_occupancies=True, remove_ligands=False, reset_hetatm_flag=False, preserve_remarks=False, preserve_symmetry=True, add_remarks=None, output_file=None, log=None): """ Utility for removing extraneous records from a model intended for use in molecular replacement, etc., including waters, alternate conformations, and other features specific to a particular dataset. """ if (params is not None): remove_waters = params.remove_waters remove_hydrogens = params.remove_hydrogens remove_alt_confs = params.remove_alt_confs convert_semet_to_met = params.convert_semet_to_met convert_to_isotropic = params.convert_to_isotropic reset_occupancies = params.reset_occupancies remove_ligands = params.remove_ligands reset_hetatm_flag = params.reset_hetatm_flag if (log is None): log = null_out() make_sub_header("Processing input model", out=log) remarks = None if (file_name is not None): print >> log, "Reading model from %s" % file_name assert ([pdb_hierarchy, xray_structure] == [None, None]) from iotbx import file_reader pdb_in = file_reader.any_file(file_name, force_type="pdb", raise_sorry_if_errors=True) pdb_in.check_file_type("pdb") remarks = pdb_in.file_object.input.remark_section() pdb_hierarchy = pdb_in.file_object.hierarchy xray_structure = pdb_in.file_object.xray_structure_simple() else: # XXX work with copies, not the original structure pdb_hierarchy = pdb_hierarchy.deep_copy() xray_structure = xray_structure.deep_copy_scatterers() pdb_hierarchy.atoms().reset_i_seq() if (len(pdb_hierarchy.models()) > 1): raise Sorry("Multiple models not supported.") if (remove_hydrogens): sele = ~(xray_structure.hd_selection()) n_hd = sele.count(False) if (n_hd > 0): pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d hydrogens" % n_hd pdb_hierarchy.atoms().reset_i_seq() if (remove_waters): sele = pdb_hierarchy.atom_selection_cache().selection( "not (resname HOH)") n_wat = sele.count(False) if (n_wat > 0): pdb_hierarchy = pdb_hierarchy.select(sele) xray_structure = xray_structure.select(sele) print >> log, " removed %d waters" % n_wat pdb_hierarchy.atoms().reset_i_seq() assert_identical_id_str = True if (remove_alt_confs): n_atoms_start = xray_structure.scatterers().size() pdb_hierarchy.remove_alt_confs(always_keep_one_conformer=False) i_seqs = pdb_hierarchy.atoms().extract_i_seq() n_atoms_end = i_seqs.size() if (n_atoms_end != n_atoms_start): print >> log, " removed %d atoms in alternate conformations" % \ (n_atoms_end - n_atoms_start) assert_identical_id_str = False xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() if (convert_semet_to_met): # XXX need to start from a copy here because the atom-parent relationship # seems to be messed up otherwise. this is probably a bug. pdb_hierarchy = pdb_hierarchy.deep_copy() pdb_hierarchy.convert_semet_to_met() if (convert_to_isotropic): xray_structure.convert_to_isotropic() pdb_hierarchy.adopt_xray_structure( xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " converted all atoms to isotropic B-factors" if (reset_occupancies): assert (remove_alt_confs) xray_structure.adjust_occupancy(occ_max=1.0, occ_min=1.0) pdb_hierarchy.adopt_xray_structure( xray_structure, assert_identical_id_str=assert_identical_id_str) print >> log, " reset occupancy to 1.0 for all atoms" if (reset_hetatm_flag): for atom in pdb_hierarchy.atoms(): atom.hetero = False if (remove_ligands): pdb_hierarchy.atoms().reset_i_seq() model = pdb_hierarchy.only_model() for chain in model.chains(): if (not chain.is_protein()) and (not chain.is_na()): print >> log, " removing %d ligand atoms in chain '%s'" % \ (len(chain.atoms()), chain.id) model.remove_chain(chain) i_seqs = pdb_hierarchy.atoms().extract_i_seq() xray_structure = xray_structure.select(i_seqs) pdb_hierarchy.atoms().reset_i_seq() assert xray_structure.scatterers().size() == pdb_hierarchy.atoms_size() if (output_file is not None): f = open(output_file, "w") if (add_remarks is not None): f.write("\n".join(add_remarks)) f.write("\n") if (preserve_remarks) and (remarks is not None): f.write("\n".join(remarks)) f.write("\n") symm = None if (preserve_symmetry): symm = xray_structure f.write(pdb_hierarchy.as_pdb_string(crystal_symmetry=symm)) f.close() print >> log, " wrote model to %s" % output_file return pdb_hierarchy, xray_structure
def run(self): ''' Function that places H atoms ''' model_has_bogus_cs = False # TODO temporary fix until the code is moved to model class # check if box cussion of 5 A is enough to prevent symm contacts cs = self.model.crystal_symmetry() if (cs is None) or (cs.unit_cell() is None): self.model = shift_and_box_model(model = self.model) model_has_bogus_cs = True # Remove existing H if requested self.n_H_initial = self.model.get_hd_selection().count(True) if not self.keep_existing_H: self.model = self.model.select(~self.model.get_hd_selection()) # Add H atoms and place them at center of coordinates pdb_hierarchy = self.add_missing_H_atoms_at_bogus_position() pdb_hierarchy.sort_atoms_in_place() pdb_hierarchy.atoms().reset_serial() #pdb_hierarchy.sort_atoms_in_place() p = mmtbx.model.manager.get_default_pdb_interpretation_params() p.pdb_interpretation.clash_guard.nonbonded_distance_threshold=None p.pdb_interpretation.use_neutron_distances = self.use_neutron_distances p.pdb_interpretation.proceed_with_excessive_length_bonds=True #p.pdb_interpretation.automatic_linking.link_metals = True #p.pdb_interpretation.restraints_library.cdl=False # XXX this triggers a bug !=360 ro = self.model.get_restraint_objects() self.model = mmtbx.model.manager( model_input = None, pdb_hierarchy = pdb_hierarchy, build_grm = True, stop_for_unknowns = self.stop_for_unknowns, crystal_symmetry = self.model.crystal_symmetry(), restraint_objects = ro, pdb_interpretation_params = p, log = null_out()) #f = open("intermediate1.pdb","w") #f.write(self.model.model_as_pdb()) # Only keep H that have been parameterized in riding H procedure sel_h = self.model.get_hd_selection() if sel_h.count(True) == 0: return # get rid of isolated H atoms. #For example when heavy atom is missing, H needs not to be placed sel_isolated = self.model.isolated_atoms_selection() self.sel_lone_H = sel_h & sel_isolated self.model = self.model.select(~self.sel_lone_H) # get riding H manager --> parameterize all H atoms sel_h = self.model.get_hd_selection() self.model.setup_riding_h_manager(use_ideal_dihedral = True) sel_h_in_para = flex.bool( [bool(x) for x in self.model.riding_h_manager.h_parameterization]) sel_h_not_in_para = sel_h_in_para.exclusive_or(sel_h) self.site_labels_no_para = [atom.id_str().replace('pdb=','').replace('"','') for atom in self.model.get_hierarchy().atoms().select(sel_h_not_in_para)] # self.model = self.model.select(~sel_h_not_in_para) self.exclude_H_on_disulfides() #self.exclude_h_on_coordinated_S() # f = open("intermediate2.pdb","w") # f.write(model.model_as_pdb()) # Reset occupancies, ADPs and idealize H atom positions self.model.reset_adp_for_hydrogens(scale = self.adp_scale) self.model.reset_occupancy_for_hydrogens_simple() self.model.idealize_h_riding() self.exclude_h_on_coordinated_S() # self.n_H_final = self.model.get_hd_selection().count(True)
def __init__ (self, model, pdb_hierarchy=None, # keep for mmtbx.validation_summary (multiple models) fmodel=None, fmodel_neutron=None, sequences=None, flags=None, header_info=None, raw_data=None, unmerged_data=None, keep_hydrogens=True, nuclear=False, save_probe_unformatted_file=None, show_hydrogen_outliers=False, min_cc_two_fofc=0.8, n_bins_data=10, count_anomalous_pairs_separately=False, use_internal_variance=True, outliers_only=True, use_pdb_header_resolution_cutoffs=False, file_name=None, ligand_selection=None, rotamer_library="8000", map_params=None) : assert rotamer_library == "8000", "data_version given to RotamerEval not recognized." for name in self.__slots__ : setattr(self, name, None) # use objects from model self.model = model if (self.model is not None): pdb_hierarchy = self.model.get_hierarchy() xray_structure = self.model.get_xray_structure() geometry_restraints_manager = self.model.get_restraints_manager().geometry crystal_symmetry = self.model.crystal_symmetry() all_chain_proxies = self.model.all_chain_proxies else: assert (pdb_hierarchy is not None) xray_structure = None geometry_restraints_manager = None crystal_symmetry = None all_chain_proxies = None # very important - the i_seq attributes may be extracted later pdb_hierarchy.atoms().reset_i_seq() self.pdb_hierarchy = pdb_hierarchy if (xray_structure is None) : if (fmodel is not None) : xray_structure = fmodel.xray_structure elif (crystal_symmetry is not None) : xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.crystal_symmetry = crystal_symmetry if (crystal_symmetry is None) and (fmodel is not None) : self.crystal_symmetry = fmodel.f_obs().crystal_symmetry() # use maps (fmodel is not used) # run earlier since pdb_hierarchy gets modified use_maps = False if (map_params is not None): use_maps = ( (map_params.input.maps.map_file_name) or ( (map_params.input.maps.map_coefficients_file_name) and (map_params.input.maps.map_coefficients_label) ) ) if (use_maps): if (flags.real_space): self.real_space = experimental.real_space( fmodel=None, model=self.model, cc_min=min_cc_two_fofc, molprobity_map_params=map_params.input.maps) if (flags.waters): self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=None, collect_all=True, molprobity_map_params=map_params.input.maps) self.header_info = header_info if (flags is None) : flags = molprobity_flags() import mmtbx.model.statistics self.model_statistics_geometry = mmtbx.model.statistics.geometry( pdb_hierarchy = pdb_hierarchy, geometry_restraints_manager = geometry_restraints_manager, use_hydrogens = keep_hydrogens, use_nuclear = nuclear) self.model_statistics_geometry_result = \ self.model_statistics_geometry.result() self.ramalyze = self.model_statistics_geometry_result.ramachandran.ramalyze self.omegalyze = self.model_statistics_geometry_result.omega.omegalyze self.rotalyze = self.model_statistics_geometry_result.rotamer.rotalyze self.cbetadev = self.model_statistics_geometry_result.c_beta.cbetadev self.clashes = self.model_statistics_geometry_result.clash.clashes if pdb_hierarchy.contains_protein() : self.find_missing_atoms(out=null_out()) if (flags.nqh) : self.nqh_flips = clashscore.nqh_flips( pdb_hierarchy=pdb_hierarchy) if (pdb_hierarchy.contains_rna() and flags.rna and libtbx.env.has_module(name="suitename")) : if (geometry_restraints_manager is not None) : self.rna = rna_validate.rna_validation( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, outliers_only=outliers_only, params=None) if (flags.model_stats) and (xray_structure is not None) : self.model_stats = model_properties.model_statistics( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, all_chain_proxies=all_chain_proxies, ignore_hd=(not nuclear), ligand_selection=ligand_selection) if (geometry_restraints_manager is not None) and (flags.restraints) : assert (xray_structure is not None) self.restraints = restraints.combined( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, geometry_restraints_manager=geometry_restraints_manager, ignore_hd=(not nuclear), cdl=getattr(all_chain_proxies, "use_cdl", None)) if (sequences is not None) and (flags.seq) : self.sequence = sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=null_out(), include_secondary_structure=True, extract_coordinates=True) if (fmodel is not None) : if (use_pdb_header_resolution_cutoffs) and (header_info is not None) : fmodel = fmodel.resolution_filter( d_min=header_info.d_min, d_max=header_info.d_max) if (flags.rfactors) : self.data_stats = experimental.data_statistics(fmodel, raw_data=raw_data, n_bins=n_bins_data, count_anomalous_pairs_separately=count_anomalous_pairs_separately) if (not use_maps): # if maps are used, keep previous results if (flags.real_space): self.real_space = experimental.real_space( model=model, fmodel=fmodel, cc_min=min_cc_two_fofc) if (flags.waters) : self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=fmodel, collect_all=True) if (unmerged_data is not None) : self.merging = experimental.merging_and_model_statistics( f_obs=fmodel.f_obs(), f_model=fmodel.f_model(), r_free_flags=fmodel.r_free_flags(), unmerged_i_obs=unmerged_data, anomalous=count_anomalous_pairs_separately, use_internal_variance=use_internal_variance, n_bins=n_bins_data) if (flags.xtriage) : import mmtbx.scaling.xtriage f_model = abs(fmodel.f_model()).set_observation_type_xray_amplitude() if (raw_data is not None) : f_model, obs = f_model.common_sets(other=raw_data) else : obs = fmodel.f_obs() self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses( miller_obs=obs, miller_calc=f_model, unmerged_obs=unmerged_data, # XXX some redundancy here... text_out=null_out()) if (fmodel_neutron is not None) and (flags.rfactors) : self.neutron_stats = experimental.data_statistics(fmodel_neutron, n_bins=n_bins_data, count_anomalous_pairs_separately=False) if (pdb_hierarchy.models_size() == 1) : self._multi_criterion = multi_criterion_view(pdb_hierarchy) # wilson B self.wilson_b = None if (fmodel is not None): self.wilson_b = fmodel.wilson_b() elif (fmodel_neutron is not None): self.wilson_b = fmodel_neutron.wilson_b() # validate hydrogens self.hydrogens = None if self.model is not None and self.model.has_hd(): # import here to avoid circular import issues from mmtbx.hydrogens.validate_H import validate_H, validate_H_results hydrogens = validate_H(model, nuclear) hydrogens.validate_inputs() hydrogens.run() self.hydrogens = validate_H_results(hydrogens.get_results()) # write probe file if needed (CLI and GUI) if (save_probe_unformatted_file is not None): pcm = self.clashes.probe_clashscore_manager try: with open(save_probe_unformatted_file, 'w') as f: f.write(pcm.probe_unformatted) self.clashes.probe_file = save_probe_unformatted_file except IOError as err: raise Sorry('%s could not be written correctly.\n%s' % (save_probe_unformatted_file, err))
def whole_minimization(self): t3 = time() # pre_result_h = edited_h # pre_result_h.reset_i_seq_if_necessary() bsel = flex.bool(self.model.get_number_of_atoms(), False) helix_selection = flex.bool(self.model.get_number_of_atoms(), False) sheet_selection = flex.bool(self.model.get_number_of_atoms(), False) other_selection = flex.bool(self.model.get_number_of_atoms(), False) ss_for_tors_selection = flex.bool(self.model.get_number_of_atoms(), False) nonss_for_tors_selection = flex.bool(self.model.get_number_of_atoms(), False) # set all CA atoms to True for other_selection #isel = self.model.get_atom_selection_cache().iselection("name ca") isel = self.model.get_atom_selection_cache().iselection("name ca or name n or name o or name c") other_selection.set_selected(isel, True) n_main_chain_atoms = other_selection.count(True) isel = self.model.get_atom_selection_cache().iselection("name ca or name n or name o or name c") nonss_for_tors_selection.set_selected(isel, True) main_chain_selection_prefix = "(name ca or name n or name o or name c) %s" t4 = time() print("Preparing selections...", file=self.log) self.log.flush() # Here we are just preparing selections for h in self.ss_annotation.helices: ss_sels = h.as_atom_selections()[0] selstring = main_chain_selection_prefix % ss_sels isel = self.model.get_atom_selection_cache().iselection(selstring) helix_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = self.model.get_atom_selection_cache().iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) for sheet in self.ss_annotation.sheets: for ss_sels in sheet.as_atom_selections(): selstring = main_chain_selection_prefix % ss_sels isel = self.model.get_atom_selection_cache().iselection(selstring) sheet_selection.set_selected(isel, True) other_selection.set_selected(isel, False) isel = self.model.get_atom_selection_cache().iselection(selstring) ss_for_tors_selection.set_selected(isel, True) nonss_for_tors_selection.set_selected(isel, False) t5 = time() # print("N idealized elements: %d" % n_idealized_elements, file=self.log) # print("Initial checking, init : %.4f" % (t1-t0), file=self.log) # print("Checking SS : %.4f" % (t2-t1), file=self.log) # print("Changing SS : %.4f" % (t3-t2), file=self.log) # print("Initializing selections : %.4f" % (t4-t3), file=self.log) # print("Looping for selections : %.4f" % (t5-t4), file=self.log) # with open('idealized.pdb', 'w') as f: # f.write(self.model.model_as_pdb()) # return isel = self.model.get_atom_selection_cache().iselection( "not name ca and not name n and not name o and not name c") other_selection.set_selected(isel, False) helix_sheet_intersection = helix_selection & sheet_selection if helix_sheet_intersection.count(True) > 0: sheet_selection = sheet_selection & ~helix_sheet_intersection assert ((helix_selection | sheet_selection) & other_selection).count(True)==0 from mmtbx.monomer_library.pdb_interpretation import grand_master_phil_str params_line = grand_master_phil_str params_line += "secondary_structure {%s}" % secondary_structure.sec_str_master_phil_str # print "params_line" # print params_line params = iotbx.phil.parse(input_string=params_line, process_includes=True)#.extract() # This does not work the same way for a strange reason. Need to investigate. # The number of resulting hbonds is different later. # w_params = params.extract() # w_params.pdb_interpretation.secondary_structure.protein.remove_outliers = False # w_params.pdb_interpretation.peptide_link.ramachandran_restraints = True # w_params.pdb_interpretation.c_beta_restraints = True # w_params.pdb_interpretation.secondary_structure.enabled = True # params.format(python_object=w_params) # params.show() # print "="*80 # print "="*80 # print "="*80 grm = self.model.get_restraints_manager() ssm_log = null_out() if self.processed_params.verbose: ssm_log = self.log ss_params = secondary_structure.sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.remove_outliers=False ss_manager = secondary_structure.manager( pdb_hierarchy=self.model.get_hierarchy(), geometry_restraints_manager=grm.geometry, sec_str_from_pdb_file=self.ss_annotation, params=ss_params.secondary_structure, mon_lib_srv=None, verbose=-1, log=ssm_log) grm.geometry.set_secondary_structure_restraints( ss_manager=ss_manager, hierarchy=self.model.get_hierarchy(), log=ssm_log) self.model.get_hierarchy().reset_i_seq_if_necessary() from mmtbx.geometry_restraints import reference if self.reference_map is None: if self.processed_params.verbose: print("Adding reference coordinate restraints...", file=self.log) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = self.model.get_sites_cart().select(helix_selection), selection = helix_selection, sigma = self.processed_params.sigma_on_reference_helix)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = self.model.get_sites_cart().select(sheet_selection), selection = sheet_selection, sigma = self.processed_params.sigma_on_reference_sheet)) grm.geometry.append_reference_coordinate_restraints_in_place( reference.add_coordinate_restraints( sites_cart = self.model.get_sites_cart().select(other_selection), selection = other_selection, sigma = self.processed_params.sigma_on_reference_non_ss)) # XXX Somewhere here we actually should check placed side-chains for # clashes because we used ones that were in original model and just moved # them to nearest allowed rotamer. The idealization may affect a lot # the orientation of side chain thus justifying changing rotamer on it # to avoid clashes. if self.processed_params.fix_rotamer_outliers: print("Fixing/checking rotamers...", file=self.log) # pre_result_h.write_pdb_file(file_name="before_rotamers.pdb") br_txt = self.model.model_as_pdb() with open("before_rotamers.pdb", 'w') as f: f.write(br_txt) if(self.reference_map is None): backbone_sample=False else: backbone_sample=True result = mmtbx.refinement.real_space.fit_residues.run( pdb_hierarchy = self.model.get_hierarchy(), crystal_symmetry = self.model.crystal_symmetry(), map_data = self.reference_map, rotamer_manager = mmtbx.idealized_aa_residues.rotamer_manager.load( rotamers="favored"), sin_cos_table = scitbx.math.sin_cos_table(n=10000), backbone_sample = backbone_sample, mon_lib_srv = self.model.get_mon_lib_srv(), log = self.log) self.model.set_sites_cart( sites_cart = result.pdb_hierarchy.atoms().extract_xyz()) if self.processed_params.verbose: print("Adding chi torsion restraints...", file=self.log) # only backbone grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = self.model.get_hierarchy(), sites_cart = self.model.get_sites_cart().\ select(ss_for_tors_selection), selection = ss_for_tors_selection, chi_angles_only = False, sigma = self.processed_params.sigma_on_torsion_ss) grm.geometry.add_chi_torsion_restraints_in_place( pdb_hierarchy = self.model.get_hierarchy(), sites_cart = self.model.get_sites_cart().\ select(nonss_for_tors_selection), selection = nonss_for_tors_selection, chi_angles_only = False, sigma = self.processed_params.sigma_on_torsion_nonss) # real_h.atoms().set_xyz(pre_result_h.atoms().extract_xyz()) # # Check and correct for special positions # real_h = self.model.get_hierarchy() # just a shortcut here... special_position_settings = crystal.special_position_settings( crystal_symmetry = self.model.crystal_symmetry()) site_symmetry_table = \ special_position_settings.site_symmetry_table( sites_cart = self.model.get_sites_cart(), unconditional_general_position_flags=( self.model.get_atoms().extract_occ() != 1)) spi = site_symmetry_table.special_position_indices() if spi.size() > 0: print("Moving atoms from special positions:", file=self.log) for spi_i in spi: if spi_i not in self.original_spi: new_coords = ( real_h.atoms()[spi_i].xyz[0]+0.2, real_h.atoms()[spi_i].xyz[1]+0.2, real_h.atoms()[spi_i].xyz[2]+0.2) print(" ", real_h.atoms()[spi_i].id_str(), end=' ', file=self.log) print(tuple(real_h.atoms()[spi_i].xyz), "-->", new_coords, file=self.log) real_h.atoms()[spi_i].set_xyz(new_coords) self.model.set_sites_cart_from_hierarchy() self.model_before_regularization = self.model.deep_copy() t9 = time() if self.processed_params.file_name_before_regularization is not None: grm.geometry.pair_proxies(sites_cart=self.model.get_sites_cart()) grm.geometry.update_ramachandran_restraints_phi_psi_targets( hierarchy=self.model.get_hierarchy()) print("Outputting model before regularization %s" % self.processed_params.file_name_before_regularization, file=self.log) m_txt = self.model.model_as_pdb() g_txt = self.model.restraints_as_geo() with open(self.processed_params.file_name_before_regularization, 'w') as f: f.write(m_txt) geo_fname = self.processed_params.file_name_before_regularization[:-4]+'.geo' print("Outputting geo file for regularization %s" % geo_fname, file=self.log) with open(geo_fname, 'w') as f: f.write(g_txt) #testing number of restraints assert grm.geometry.get_n_den_proxies() == 0 if self.reference_map is None: assert grm.geometry.get_n_reference_coordinate_proxies() == n_main_chain_atoms, "" +\ "%d %d" % (grm.geometry.get_n_reference_coordinate_proxies(), n_main_chain_atoms) refinement_log = null_out() self.log.write( "Refining geometry of substituted secondary structure elements\n") self.log.write( " for %s macro_cycle(s).\n" % self.processed_params.n_macro) self.log.flush() if self.processed_params.verbose: refinement_log = self.log t10 = time() if self.reference_map is None: n_cycles = self.processed_params.n_macro if self.processed_params.n_macro == Auto: n_cycles=5 minimize_wrapper_for_ramachandran( model = self.model, original_pdb_h = None, excl_string_selection = "", log = refinement_log, number_of_cycles = n_cycles) else: ref_xrs = self.model.crystal_symmetry() minimize_wrapper_with_map( model = self.model, target_map=self.reference_map, refine_ncs_operators=False, number_of_cycles=self.processed_params.n_macro, min_mode='simple_cycles', log=self.log) self.model.set_sites_cart_from_hierarchy() self.log.write(" Done\n") self.log.flush() t11 = time() # print("Initial checking, init : %.4f" % (t1-t0), file=self.log) # print("Checking SS : %.4f" % (t2-t1), file=self.log) # print("Initializing selections : %.4f" % (t4-t3), file=self.log) # print("Looping for selections : %.4f" % (t5-t4), file=self.log) # print("Finalizing selections : %.4f" % (t6-t5), file=self.log) # print("PDB interpretation : %.4f" % (t7-t6), file=self.log) # print("Get GRM : %.4f" % (t8-t7), file=self.log) # print("Adding restraints to GRM : %.4f" % (t9-t8), file=self.log) # print("Running GM : %.4f" % (t11-t10), file=self.log) # print_hbond_proxies(grm.geometry,real_h) grm.geometry.remove_reference_coordinate_restraints_in_place() grm.geometry.remove_chi_torsion_restraints_in_place(nonss_for_tors_selection) return grm.geometry.get_chi_torsion_proxies()
def exercise(): from mmtbx.regression.make_fake_anomalous_data import generate_cd_cl_inputs from mmtbx.command_line import mtz2map import mmtbx.utils from iotbx import file_reader from scitbx.array_family import flex mtz_file, pdb_file = generate_cd_cl_inputs(file_base = "tst_mmtbx_mtz2map") pdb_in = file_reader.any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy xrs = pdb_in.file_object.xray_structure_simple() mtz_in = file_reader.any_file(mtz_file) f_obs = mtz_in.file_server.miller_arrays[0] f_obs_mean = f_obs.average_bijvoet_mates() flags = mtz_in.file_server.miller_arrays[1] flags = flags.customized_copy(data=flags.data()==1) fmodel = mmtbx.utils.fmodel_simple( f_obs=f_obs, r_free_flags=flags, xray_structures=[xrs], scattering_table="n_gaussian", skip_twin_detection=True) assert f_obs.anomalous_flag() mtz_data = f_obs.as_mtz_dataset(column_root_label="F") #mtz_data.add_miller_array( # miller_array=f_obs.average_bijvoet_mates(), # column_root_label="F_mean") mtz_data.add_miller_array( miller_array=fmodel.f_model(), column_root_label="FMODEL") mtz_data.add_miller_array( miller_array=fmodel.f_model().average_bijvoet_mates().phases(deg=True), column_root_label="PHI", column_types="P") mtz_data.add_miller_array( miller_array=f_obs_mean.customized_copy( data=flex.double(f_obs_mean.data().size(), 0.95), sigmas=None).set_observation_type(None), column_root_label="FOM", column_types="W") two_fofc_map = fmodel.map_coefficients(map_type="2mFo-DFc") fofc_map = fmodel.map_coefficients(map_type="mFo-Dfc") anom_map = fmodel.map_coefficients(map_type="anom") mtz_data.add_miller_array( miller_array=two_fofc_map.average_bijvoet_mates(), column_root_label="2FOFCWT") mtz_data.add_miller_array( miller_array=fmodel.map_coefficients(map_type="mFo-DFc"), column_root_label="FOFCWT") mtz_data.add_miller_array( miller_array=fmodel.map_coefficients(map_type="anom"), column_root_label="ANOM") mtz_data.add_miller_array(flags, column_root_label="FreeR_flag") map_file = "tst_mmtbx_mtz2map_map_coeffs.mtz" mtz_data.mtz_object().write(map_file) # exercise defaults with PDB file file_info = mtz2map.run([pdb_file, map_file], log=null_out()) file_info = [ (os.path.basename(fn), desc) for fn, desc in file_info ] assert (file_info == [ ('tst_mmtbx_mtz2map_map_coeffs_2mFo-DFc.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_mFo-DFc.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_anom.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_4.ccp4', 'CCP4 map') ]) # without PDB file file_info_2 = mtz2map.run([map_file], log=null_out()) file_info_2 = [ (os.path.basename(fn), desc) for fn, desc in file_info_2 ] assert file_info_2 == file_info, file_info_2 # with FMODEL file_info_3 = mtz2map.run([pdb_file, map_file, "include_fmodel=True"], log=null_out()) file_info_3 = [ (os.path.basename(fn), desc) for fn, desc in file_info_3 ] assert (file_info_3 == [ ('tst_mmtbx_mtz2map_map_coeffs_fmodel.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_2mFo-DFc.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_mFo-DFc.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_anom.ccp4', 'CCP4 map'), ('tst_mmtbx_mtz2map_map_coeffs_5.ccp4', 'CCP4 map') ]) # exercise bad parameter try : file_info = mtz2map.run([pdb_file, "1yjp_mtz2map_map_coeffs.mtz", "output.directory=1yjp_mtz2map_map_coeffs.mtz"], log=null_out()) except Sorry : pass else : raise Exception_expected # bad atom selection try : file_info = mtz2map.run([pdb_file, map_file, "selection=\"resname ZN\""], log=null_out()) except Sorry, s : assert (str(s) == "No atoms found matching the specified selection.")
def ramalyze_parallel(hierarchy): return ramalyze(hierarchy, out=null_out())
def fetch(id, data_type="pdb", format="pdb", mirror="rcsb", log=None, force_download=False, local_cache=None): """ Locate and open a data file for the specified PDB ID and format, either in a local mirror or online. :param id: 4-character PDB ID (e.g. '1hbb') :param data_type: type of content to download: pdb, xray, or fasta :param format: format of data: cif, pdb, or xml :param mirror: remote site to use, either rcsb, pdbe, pdbj or pdb-redo :returns: a filehandle-like object (with read() method) """ assert data_type in ["pdb", "xray", "fasta", "seq"] assert format in ["cif", "pdb", "xml"] assert mirror in ["rcsb", "pdbe", "pdbj", "pdb-redo"] validate_pdb_id(id) if (log is None): log = null_out() id = id.lower() if (not force_download): if (local_cache is not None) and (data_type == "pdb"): from iotbx.file_reader import guess_file_type if (local_cache is Auto): local_cache = os.getcwd() cache_files = os.listdir(local_cache) for file_name in cache_files: if (len(file_name) > 4): file_id = re.sub("^pdb", "", file_name)[0:4] if (file_id.lower() == id): if (guess_file_type(file_name) == "pdb"): file_name = os.path.join(local_cache, file_name) print("Reading from cache directory:", file=log) print(" " + file_name, file=log) f = smart_open.for_reading(file_name) return f # try local mirror for PDB and X-ray data files first, if it exists if (data_type == "pdb") and (format == "pdb") and \ ("PDB_MIRROR_PDB" in os.environ): subdir = os.path.join(os.environ["PDB_MIRROR_PDB"], id[1:3]) if (os.path.isdir(subdir)): file_name = os.path.join(subdir, "pdb%s.ent.gz" % id) if (os.path.isfile(file_name)): print("Reading from local mirror:", file=log) print(" " + file_name, file=log) f = smart_open.for_reading(file_name) return f if (data_type == "pdb") and (format == "cif") and \ ("PDB_MIRROR_MMCIF" in os.environ): subdir = os.path.join(os.environ["PDB_MIRROR_MMCIF"], id[1:3]) if (os.path.isdir(subdir)): file_name = os.path.join(subdir, "%s.cif.gz" % id) if (os.path.isfile(file_name)): print("Reading from local mirror:", file=log) print(" " + file_name, file=log) f = smart_open.for_reading(file_name) return f if ((data_type == "xray") and ("PDB_MIRROR_STRUCTURE_FACTORS" in os.environ)): sf_dir = os.environ["PDB_MIRROR_STRUCTURE_FACTORS"] subdir = os.path.join(sf_dir, id[1:3]) if (os.path.isdir(subdir)): file_name = os.path.join(subdir, "r%ssf.ent.gz" % id) if (os.path.isfile(file_name)): print("Reading from local mirror:", file=log) print(" " + file_name, file=log) f = smart_open.for_reading(file_name) return f # No mirror found (or out of date), default to HTTP download url = None compressed = False if (mirror == "rcsb"): url_base = 'https://files.rcsb.org/download/' pdb_ext = ".pdb" sf_prefix = "" sf_ext = "-sf.cif" elif (mirror == "pdbe"): url_base = "https://www.ebi.ac.uk/pdbe-srv/view/files/" pdb_ext = ".ent" sf_prefix = "r" sf_ext = "sf.ent" elif (mirror == "pdbj"): url_base = "ftp://ftp.pdbj.org/pub/pdb/data/structures/divided/" if (data_type == "pdb"): compressed = True if (format == "pdb"): url = url_base + "pdb/%s/pdb%s.ent.gz" % (id[1:3], id) elif (format == "cif"): url = url_base + "mmCIF/%s/%s.cif.gz" % (id[1:3], id) elif (data_type == "xray"): compressed = True url = url_base + "structure_factors/%s/r%ssf.ent.gz" % (id[1:3], id) elif (data_type in ["fasta", "seq"]): url = "https://pdbj.org/rest/downloadPDBfile?format=fasta&id=%s" % id if (url is None) and (data_type != "fasta"): raise Sorry( "Can't determine PDBj download URL for this data/format " + "combination.") elif mirror == "pdb-redo": url_base = "https://pdb-redo.eu/db/" pdb_ext = "_final.pdb" cif_ext = "_final.cif" sf_prefix = "" sf_ext = "_final.mtz" if (data_type == 'pdb'): if (format == 'pdb'): url = url_base + "{id}/{id}{format}".format(id=id, format=pdb_ext) elif (format == 'cif'): url = url_base + "{id}/{id}{format}".format(id=id, format=cif_ext) elif (data_type == 'xray'): url = url_base + "{id}/{id}{format}".format(id=id, format=sf_ext) if (data_type in ["fasta", "seq"]): if (url is None): # TODO PDBe equivalent doesn't exist? # Seems that this url should be working: url = "https://www.rcsb.org/fasta/entry/%s" % id try: data = libtbx.utils.urlopen(url) except HTTPError as e: if e.getcode() == 404: raise RuntimeError("Couldn't download sequence for %s." % id) else: raise elif data_type == "xray": if (url is None): url = url_base + sf_prefix + id + sf_ext try: data = libtbx.utils.urlopen(url) except HTTPError as e: if e.getcode() == 404: raise RuntimeError( "Couldn't download structure factors for %s." % id) else: raise else: if (url is None): if format == "pdb": url = url_base + id + pdb_ext else: url = url_base + id + "." + format try: data = libtbx.utils.urlopen(url) except HTTPError as e: if e.getcode() == 404: raise RuntimeError("Couldn't download model for %s." % id) else: raise if (compressed): try: import gzip except ImportError: raise Sorry( "gzip module not available - please use an uncompressed " + "source of PDB data.") else: # XXX due to a bug in urllib2, we can't pass the supposedly file-like # object directly, so we read the data into a StringIO object instead return gzip.GzipFile(fileobj=StringIO(data.read())) return data
def exercise_protein(): pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/3ifk.pdb", test=op.isfile) hkl_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/reflection_files/3ifk.mtz", test=op.isfile) if (pdb_file is None): print("phenix_regression not available, skipping.") return args1 = [ pdb_file, "outliers_only=True", "output.prefix=tst_molprobity", "--pickle", "flags.xtriage=True", ] result = molprobity.run(args=args1, out=null_out()).validation out1 = StringIO() result.show(out=out1) result = loads(dumps(result)) out2 = StringIO() result.show(out=out2) assert (result.nqh_flips.n_outliers == 6) assert (not "RNA validation" in out2.getvalue()) assert (out2.getvalue() == out1.getvalue()) dump("tst_molprobity.pkl", result) mc = result.as_multi_criterion_view() assert (result.neutron_stats is None) mpscore = result.molprobity_score() # percentiles out4 = StringIO() result.show_summary(out=out4, show_percentiles=True) assert (""" Clashscore = 49.59 (percentile: 0.2)""" in out4.getvalue()) # misc assert approx_equal(result.r_work(), 0.237) # from PDB header assert approx_equal(result.r_free(), 0.293) # from PDB header assert approx_equal(result.d_min(), 2.03) # from PDB header assert (result.d_max_min() is None) assert approx_equal(result.rms_bonds(), 0.02586, 1e-5) assert approx_equal(result.rms_angles(), 2.35285, 1e-5) assert approx_equal(result.rama_favored(), 96.47059) assert (result.cbeta_outliers() == 10) assert approx_equal(result.molprobity_score(), 3.39, eps=0.01) summary = result.summarize() gui_fields = list(summary.iter_molprobity_gui_fields()) assert (len(gui_fields) == 6) #result.show() assert (str(mc.data()[2]) == ' A 5 THR rota,cb,clash') import mmtbx.validation.molprobity from iotbx import file_reader pdb_in = file_reader.any_file(pdb_file) model = mmtbx.model.manager(pdb_in.file_object.input) result = mmtbx.validation.molprobity.molprobity(model) out3 = StringIO() result.show_summary(out=out3) assert """\ Ramachandran outliers = 1.76 % favored = 96.47 % Rotamer outliers = 20.00 % """ in out3.getvalue() # now with data args2 = args1 + [hkl_file, "--maps"] result, cmdline = molprobity.run(args=args2, out=null_out(), return_input_objects=True) out = StringIO() result.show(out=out) stats = result.get_statistics_for_phenix_gui() #print stats stats = result.get_polygon_statistics([ "r_work", "r_free", "adp_mean_all", "angle_rmsd", "bond_rmsd", "clashscore" ]) #print stats assert approx_equal(result.r_work(), 0.2291, eps=0.001) assert approx_equal(result.r_free(), 0.2804, eps=0.001) assert approx_equal(result.d_min(), 2.0302, eps=0.0001) assert approx_equal(result.d_max_min(), [34.546125, 2.0302], eps=0.0001) assert approx_equal(result.rms_bonds(), 0.02586, 1e-5) assert approx_equal(result.rms_angles(), 2.35285, 1e-5) assert approx_equal(result.rama_favored(), 96.47059) assert (result.cbeta_outliers() == 10) assert approx_equal(result.unit_cell().parameters(), (55.285, 58.851, 67.115, 90, 90, 90)) assert (str(result.space_group_info()) == "P 21 21 21") bins = result.fmodel_statistics_by_resolution() assert (len(bins) == 10) assert approx_equal(result.atoms_to_observations_ratio(), 0.09755, eps=0.0001) assert approx_equal(result.b_iso_mean(), 31.11739) assert op.isfile("tst_molprobity_maps.mtz") bins = result.fmodel_statistics_by_resolution() #bins.show() bin_plot = result.fmodel_statistics_graph_data() lg = bin_plot.format_loggraph() # fake fmodel_neutron fmodel_neutron = cmdline.fmodel.deep_copy() result2 = mmtbx.validation.molprobity.molprobity( cmdline.model, fmodel=cmdline.fmodel, fmodel_neutron=fmodel_neutron, nuclear=True, keep_hydrogens=True) stats = result2.get_statistics_for_phenix_gui() assert ('R-work (neutron)' in [label for (label, stat) in stats])
def run(args=None, target_hierarchy=None, chain_hierarchy=None, target_file=None, # model chain_file=None, # query crystal_symmetry=None, max_dist=None, quiet=None, verbose=None, use_crystal_symmetry=None, chain_type=None, params=None, target_length_from_matching_chains=None, distance_per_site=None, out=sys.stdout): if not args: args=[] if not params: params=get_params(args,out=out) if params.input_files.pdb_in: print >>out,"Using %s as target" %(params.input_files.pdb_in[0]) elif chain_file or chain_hierarchy: pass # it is fine else: raise Sorry("Need target model (pdb_in)") if params.input_files.unique_target_pdb_in and params.input_files.unique_only: print >>out,"Using %s as target for unique chains" %( params.input_files.unique_target_pdb_in) if params.input_files.query_dir and \ os.path.isdir(params.input_files.query_dir): print >>out,"\nUsing all files in %s as queries\n" %( params.input_files.query_dir) return run_all(params=params,out=out) if verbose is None: verbose=params.control.verbose if quiet is None: quiet=params.control.quiet if chain_type is None: chain_type=params.crystal_info.chain_type if use_crystal_symmetry is None: use_crystal_symmetry=params.crystal_info.use_crystal_symmetry params.crystal_info.use_crystal_symmetry=use_crystal_symmetry if max_dist is None: max_dist=params.comparison.max_dist if distance_per_site is None: distance_per_site=params.comparison.distance_per_site if target_length_from_matching_chains is None: target_length_from_matching_chains=\ params.comparison.target_length_from_matching_chains if verbose: local_out=out else: local_out=null_out() if not target_file and len(params.input_files.pdb_in)>0: target_file=params.input_files.pdb_in[0] # model if not chain_file and len(params.input_files.pdb_in)>1: chain_file=params.input_files.pdb_in[1] # query # get the hierarchies if not chain_hierarchy or not target_hierarchy: assert chain_file and target_file pdb_inp=get_pdb_inp(file_name=chain_file ) if params.input_files.unique_target_pdb_in: target_unique_hierarchy=get_pdb_inp( file_name=params.input_files.unique_target_pdb_in).construct_hierarchy() else: target_unique_hierarchy=None if not crystal_symmetry: crystal_symmetry=pdb_inp.crystal_symmetry_from_cryst1() chain_hierarchy=pdb_inp.construct_hierarchy() if params.input_files.unique_only: print >>out,"\nUsing only unique part of query\n" chain_hierarchy=extract_unique_part_of_hierarchy( chain_hierarchy,target_ph=target_unique_hierarchy,out=local_out) target_pdb_inp=get_pdb_inp(file_name=target_file) if not crystal_symmetry or not crystal_symmetry.unit_cell(): crystal_symmetry=target_pdb_inp.crystal_symmetry_from_cryst1() target_hierarchy=target_pdb_inp.construct_hierarchy() if params.crystal_info.use_crystal_symmetry is None: # set default if crystal_symmetry and crystal_symmetry.space_group() and \ (not crystal_symmetry.space_group().type().number() in [0,1]): params.crystal_info.use_crystal_symmetry=True else: params.crystal_info.use_crystal_symmetry=False crystal_symmetry=None elif params.crystal_info.use_crystal_symmetry==False: crystal_symmetry=None if not crystal_symmetry or not crystal_symmetry.unit_cell(): crystal_symmetry=get_pdb_inp( text="CRYST1 1000.000 1000.000 1000.000 90.00 90.00 90.00 P 1" ).crystal_symmetry_from_cryst1() print >>out,"\nCrystal symmetry will not be used in comparison.\n" if use_crystal_symmetry: raise Sorry("Please set use_crystal_symmetry"+ "=False (no crystal symmetry supplied)") else: print >>out,"\nCrystal symmetry will be used in comparison.\n" print>>out, "Space group: %s" %(crystal_symmetry.space_group().info()), \ "Unit cell: %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f \n" %( crystal_symmetry.unit_cell().parameters()) use_crystal_symmetry=True if not quiet: print >>out,"Looking for chain similarity for "+\ "%s (%d residues) in the model %s (%d residues)" %( chain_file,chain_hierarchy.overall_counts().n_residues, target_file,target_hierarchy.overall_counts().n_residues) if verbose: print >>out,"Chain type is: %s" %(chain_type) if crystal_symmetry is None or crystal_symmetry.unit_cell() is None: raise Sorry("Need crystal symmetry in at least one input file") # get the CA residues if chain_type in ["RNA","DNA"]: atom_selection="name P" if not distance_per_site: distance_per_site=8. else: atom_selection="name ca and (not element Ca)" if not distance_per_site: distance_per_site=3.8 chain_ca=apply_atom_selection(atom_selection,chain_hierarchy) chain_ca_lines=select_atom_lines(chain_ca) target_ca=apply_atom_selection(atom_selection,target_hierarchy) target_xyz_lines=select_atom_lines(target_ca) chain_xyz_cart=chain_ca.atoms().extract_xyz() target_xyz_cart=target_ca.atoms().extract_xyz() # for each xyz in chain, figure out closest atom in target and dist best_i=None best_i_dd=None best_pair=None pair_list=[] from scitbx.array_family import flex chain_xyz_fract=crystal_symmetry.unit_cell().fractionalize(chain_xyz_cart) target_xyz_fract=crystal_symmetry.unit_cell().fractionalize(target_xyz_cart) far_away_match_list=[] far_away_match_rmsd_list=flex.double() if use_crystal_symmetry: working_crystal_symmetry=crystal_symmetry else: working_crystal_symmetry=None for i in xrange(chain_xyz_fract.size()): best_j=None best_dd=None distance=None if working_crystal_symmetry: info=get_best_match( flex.vec3_double([chain_xyz_fract[i]]),target_xyz_fract, crystal_symmetry=working_crystal_symmetry, distance_per_site=distance_per_site) if info: distance=info.dist() else: info=get_best_match( flex.vec3_double([chain_xyz_cart[i]]),target_xyz_cart) distance=info.distance if info and (best_dd is None or distance<best_dd): best_dd=distance best_j=info.j if best_dd > max_dist: far_away_match_list.append(i) far_away_match_rmsd_list.append(best_dd**2) if (not quiet) and verbose: print >>out,"%s" %(chain_ca_lines[i]) continue if best_i is None or best_dd<best_i_dd: best_i=i best_i_dd=best_dd best_pair=[i,best_j] pair_list.append([i,best_j,best_dd]) n_forward=0 n_reverse=0 forward_match_list=[] reverse_match_list=[] forward_match_rmsd_list=flex.double() reverse_match_rmsd_list=flex.double() unaligned_match_list=[] unaligned_match_rmsd_list=flex.double() close_match_rmsd_list=flex.double() close_match_list=[] last_i=None last_j=None for [i,j,dd],[next_i,next_j,next_dd] in zip( pair_list,pair_list[1:]+[[None,None,None]]): if i is None or j is None: continue found=False if last_i is None: # first time if next_i==i+1: # starting a segment if next_j==j+1: n_forward+=1 forward_match_list.append([i,j]) close_match_list.append([i,j]) forward_match_rmsd_list.append(dd**2) close_match_rmsd_list.append(dd**2) found=True elif next_j==j-1: n_reverse+=1 reverse_match_list.append([i,j]) close_match_list.append([i,j]) reverse_match_rmsd_list.append(dd**2) close_match_rmsd_list.append(dd**2) found=True else: # not the first time if i==last_i+1: # continuing a segment if j==last_j+1: n_forward+=1 forward_match_list.append([i,j]) close_match_list.append([i,j]) forward_match_rmsd_list.append(dd**2) close_match_rmsd_list.append(dd**2) found=True elif j==last_j-1: n_reverse+=1 reverse_match_list.append([i,j]) close_match_list.append([i,j]) reverse_match_rmsd_list.append(dd**2) close_match_rmsd_list.append(dd**2) found=True if not found: last_i=None last_j=None unaligned_match_list.append([i,j]) close_match_list.append([i,j]) unaligned_match_rmsd_list.append(dd**2) close_match_rmsd_list.append(dd**2) else: last_i=i last_j=j if n_forward==n_reverse==0: direction='none' elif n_forward>= n_reverse: direction='forward' else: direction='reverse' if (not quiet) and verbose: print >>out,"%s %d %d N: %d" %( direction,n_forward,n_reverse,chain_xyz_fract.size()) rv=rmsd_values() if forward_match_rmsd_list.size(): id='forward' rmsd=forward_match_rmsd_list.min_max_mean().mean**0.5 n=forward_match_rmsd_list.size() rv.add_rmsd(id=id,rmsd=rmsd,n=n) if reverse_match_rmsd_list.size(): id='reverse' rmsd=reverse_match_rmsd_list.min_max_mean().mean**0.5 n=reverse_match_rmsd_list.size() rv.add_rmsd(id=id,rmsd=rmsd,n=n) if unaligned_match_rmsd_list.size(): id='unaligned' rmsd=unaligned_match_rmsd_list.min_max_mean().mean**0.5 n=unaligned_match_rmsd_list.size() rv.add_rmsd(id=id,rmsd=rmsd,n=n) if close_match_rmsd_list.size(): id='close' rmsd=close_match_rmsd_list.min_max_mean().mean**0.5 n=close_match_rmsd_list.size() rv.add_rmsd(id=id,rmsd=rmsd,n=n) if far_away_match_rmsd_list.size(): id='far_away' rmsd=far_away_match_rmsd_list.min_max_mean().mean**0.5 n=far_away_match_rmsd_list.size() rv.add_rmsd(id=id,rmsd=rmsd,n=n) if not quiet: if verbose: print >>out,"Total CA: %d Too far to match: %d " %( chain_xyz_fract.size(),len(far_away_match_list)) rmsd,n=rv.get_values(id='forward') if n: print >>out,\ "\nResidues matching in forward direction: %4d RMSD: %6.2f" %( n,rmsd) if verbose: for i,j in forward_match_list: print >>out,"ID:%d:%d RESIDUES: \n%s\n%s" %( i,j, chain_ca_lines[i], target_xyz_lines[j]) rmsd,n=rv.get_values(id='reverse') if n: print >>out,\ "Residues matching in reverse direction: %4d RMSD: %6.2f" %( n,rmsd) if verbose: for i,j in reverse_match_list: print >>out,"ID:%d:%d RESIDUES: \n%s\n%s" %( i,j, chain_ca_lines[i], target_xyz_lines[j]) rmsd,n=rv.get_values(id='unaligned') if n: print >>out,\ "Residues near but not matching one-to-one:%4d RMSD: %6.2f" %( n,rmsd) if verbose: for i,j in unaligned_match_list: print >>out,"ID:%d:%d RESIDUES: \n%s\n%s" %(i,j, chain_ca_lines[i], target_xyz_lines[j]) rmsd,n=rv.get_values(id='close') if n: lines_chain_ca=[] lines_target_xyz=[] for i,j in close_match_list: lines_chain_ca.append(chain_ca_lines[i]) lines_target_xyz.append(target_xyz_lines[j]) seq_chain_ca=get_seq_from_lines(lines_chain_ca) seq_target_xyz=get_seq_from_lines(lines_target_xyz) target_chain_ids=get_chains_from_lines(lines_target_xyz) target_length=get_target_length(target_chain_ids=target_chain_ids, hierarchy=target_ca) rv.add_target_length(id='close',target_length=target_length) if verbose: print "SEQ1:",seq_chain_ca,len(lines_chain_ca) print "SEQ2:",seq_target_xyz,len(lines_target_xyz) match_n,match_percent=get_match_percent(seq_chain_ca,seq_target_xyz) rv.add_match_percent(id='close',match_percent=match_percent) percent_close=rv.get_close_to_target_percent('close') print >>out,\ "\nAll residues near target: "+\ "%4d RMSD: %6.2f Seq match (%%):%5.1f %% Found: %5.1f" %( n,rmsd,match_percent,percent_close) if verbose: for i,j in close_match_list: print >>out,"ID:%d:%d RESIDUES: \n%s\n%s" %(i,j, chain_ca_lines[i], target_xyz_lines[j]) rmsd,n=rv.get_values(id='far_away') if n: print >>out,\ "Residues far from target: %4d RMSD: %6.2f" %( n,rmsd) if verbose: for i in far_away_match_list: print >>out,"ID:%d RESIDUES: \n%s" %(i,chain_ca_lines[i]) rv.n_forward=n_forward rv.n_reverse=n_reverse rv.n=len(pair_list) return rv
def format_miller_arrays(self, iparams): ''' Read in mtz file and format to miller_arrays_out object with index[0] --> FP, SIGFP index[1] --> PHIB index[2] --> FOM index[3] --> HLA, HLB, HLC, HLD index[4] --> optional PHIC ''' #readin reflection file reflection_file = reflection_file_reader.any_reflection_file( iparams.data) file_content = reflection_file.file_content() column_labels = file_content.column_labels() col_name = iparams.column_names.split(',') miller_arrays = reflection_file.as_miller_arrays() flex_centric_flags = miller_arrays[0].centric_flags().data() crystal_symmetry = crystal.symmetry( unit_cell=miller_arrays[0].unit_cell(), space_group=miller_arrays[0].space_group()) #grab all required columns flag_fp_found = 0 flag_phib_found = 0 flag_fom_found = 0 flag_hl_found = 0 ind_miller_array_fp = 0 ind_miller_array_phib = 0 ind_miller_array_fom = 0 ind_miller_array_hl = 0 for i in range(len(miller_arrays)): label_string = miller_arrays[i].info().label_string() labels = label_string.split(',') #only look at first index string if labels[0] == col_name[0]: #grab FP, SIGFP flex_fp_all = miller_arrays[i].data() flex_sigmas_all = miller_arrays[i].sigmas() flag_fp_found = 1 ind_miller_array_fp = i elif labels[0] == col_name[2]: #grab PHIB flex_phib_all = miller_arrays[i].data() flag_phib_found = 1 ind_miller_array_phib = i elif labels[0] == col_name[3]: #grab FOM flex_fom_all = miller_arrays[i].data() flag_fom_found = 1 ind_miller_array_fom = i elif labels[0] == col_name[4]: #grab HLA,HLB,HLC,HLD flex_hl_all = miller_arrays[i].data() flag_hl_found = 1 ind_miller_array_hl = i if flag_hl_found == 1 and flag_phib_found == 0: #calculate PHIB and FOM from HL miller_array_phi_fom = miller_arrays[ ind_miller_array_hl].phase_integrals() flex_phib_all = miller_array_phi_fom.phases(deg=True).data() flex_fom_all = miller_array_phi_fom.amplitudes().data() flag_phib_found = 1 flag_fom_found = 1 if flag_fp_found == 0 or flag_phib_found == 0 or flag_fom_found == 0 or flag_hl_found == 0: print "couldn't find all required columns" sys.exit() miller_indices_sel = miller_arrays[ind_miller_array_fp].indices() print 'No. reflections for read-in miller arrays - indices:%6.0f fp:%6.0f phib:%6.0f fom:%6.0f HL:%6.0f)'%( \ len(miller_indices_sel), len(flex_fp_all), len(flex_phib_all), len(flex_fom_all), len(flex_hl_all)) miller_indices = flex.miller_index() flex_fp = flex.double() flex_sigmas = flex.double() flex_phib = flex.double() flex_fom = flex.double() flex_hl = flex.hendrickson_lattman() #format all miller arrays to the same length for miller_index in miller_indices_sel: fp_cn, phib_cn, fom_cn, hl_cn = (0, 0, 0, 0) matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_fp].indices()) if len(matches.pairs()) > 0: fp_cn = 1 fp = flex_fp_all[matches.pairs()[0][1]] sigmas = flex_sigmas_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_phib].indices()) if len(matches.pairs()) > 0: phib_cn = 1 phib = flex_phib_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_fom].indices()) if len(matches.pairs()) > 0: fom_cn = 1 fom = flex_fom_all[matches.pairs()[0][1]] matches = miller.match_multi_indices( miller_indices_unique=flex.miller_index([miller_index]), miller_indices=miller_arrays[ind_miller_array_hl].indices()) if len(matches.pairs()) > 0: hl_cn = 1 hl = flex_hl_all[matches.pairs()[0][1]] if (fp_cn + phib_cn + fom_cn + hl_cn) == 4: miller_indices.append(miller_index) flex_fp.append(fp) flex_sigmas.append(sigmas) flex_phib.append(phib) flex_fom.append(fom) flex_hl.append(hl) print 'No. reflections after format - indices:%6.0f fp:%6.0f phib:%6.0f fom:%6.0f HL:%6.0f)'%( \ len(miller_indices), len(flex_fp), len(flex_phib), len(flex_fom), len(flex_hl)) flex_hla = flex.double() flex_hlb = flex.double() flex_hlc = flex.double() flex_hld = flex.double() for i in range(len(flex_hl)): data_hl_row = flex_hl[i] flex_hla.append(data_hl_row[0]) flex_hlb.append(data_hl_row[1]) flex_hlc.append(data_hl_row[2]) flex_hld.append(data_hl_row[3]) ''' Read benchmark MTZ (PHICalc) for MPE calculation ''' flex_phic = flex.double([0] * len(flex_fp)) if iparams.hklrefin is not None: reflection_file = reflection_file_reader.any_reflection_file( iparams.hklrefin) miller_arrays_bench = reflection_file.as_miller_arrays() flex_phic_raw = None for i in range(len(miller_arrays_bench)): label_string = miller_arrays_bench[i].info().label_string() labels = label_string.split(',') #only look at first index string if labels[0] == iparams.column_phic: #grab PHIC if miller_arrays_bench[i].is_complex_array(): flex_phic_raw = miller_arrays_bench[i].phases( deg=True).data() else: flex_phic_raw = miller_arrays_bench[i].data() miller_indices_phic = miller_arrays_bench[i].indices() if flex_phic is not None: matches = miller.match_multi_indices( miller_indices_unique=miller_indices, miller_indices=miller_indices_phic) flex_phic = flex.double( [flex_phic_raw[pair[1]] for pair in matches.pairs()]) #format miller_arrays_out miller_set = miller.set(crystal_symmetry=crystal_symmetry, indices=miller_indices, anomalous_flag=False) miller_array_out = miller_set.array( data=flex_fp, sigmas=flex_sigmas).set_observation_type_xray_amplitude() #check if Wilson B-factor is applied flex_fp_for_sort = flex_fp[:] if iparams.flag_apply_b_factor: try: #get wilson_plot from mmtbx.scaling import xtriage from libtbx.utils import null_out xtriage_args = [iparams.data, "", "", "log=tst_xtriage_1.log"] result = xtriage.run(args=xtriage_args, out=null_out()) ws = result.wilson_scaling print 'Wilson K=%6.2f B=%6.2f' % (ws.iso_p_scale, ws.iso_b_wilson) sin_theta_over_lambda_sq = miller_array_out.two_theta(wavelength=iparams.wavelength) \ .sin_theta_over_lambda_sq().data() wilson_expect = flex.exp(-2 * ws.iso_b_wilson * sin_theta_over_lambda_sq) flex_fp_for_sort = wilson_expect * flex_fp except Exception: print 'Error calculating Wilson scale factors. Continue without applying B-factor.' flex_d_spacings = miller_array_out.d_spacings().data() mtz_dataset = miller_array_out.as_mtz_dataset(column_root_label="FP") for data, lbl, typ in [(flex_phib, "PHIB", "P"), (flex_fom, "FOMB", "W"), (flex_hla, "HLA", "A"), (flex_hlb, "HLB", "A"), (flex_hlc, "HLC", "A"), (flex_hld, "HLD", "A"), (flex_phic, "PHIC", "P")]: mtz_dataset.add_miller_array(miller_array_out.array(data=data), column_root_label=lbl, column_types=typ) miller_arrays_out = mtz_dataset.mtz_object().as_miller_arrays() ''' getting sorted indices for the selected reflections in input mtz file list_fp_sort_index: stores indices of sorted FP in descending order ''' import operator fp_sort_index = [ i for (i, j) in sorted(enumerate(flex_fp_for_sort), key=operator.itemgetter(1)) ] fp_sort_index.reverse() """ for i in range(100): print miller_indices[fp_sort_index[i]], flex_d_spacings[fp_sort_index[i]], flex_fp[fp_sort_index[i]], flex_sigmas[fp_sort_index[i]], wilson_expect[fp_sort_index[i]] exit() """ #calculate sum of fp^2 from percent_f_squared flex_fp_squared = flex_fp**2 f_squared_per_stack = (iparams.percent_f_squared * np.sum(flex_fp_squared)) / 100 fp_sort_index_stacks = [] sum_fp_now, i_start = (0, 0) for i in range(len(fp_sort_index)): i_sel = fp_sort_index[i_start:i + 1] sum_fp_now = np.sum([flex_fp_squared[ii_sel] for ii_sel in i_sel]) if sum_fp_now >= f_squared_per_stack: fp_sort_index_stacks.append(fp_sort_index[i_start:i + 1]) i_start = i + 1 if len(fp_sort_index_stacks) == iparams.n_stacks: break txt_out = 'stack_no sum(f_squared) %total n_refl\n' for i in range(len(fp_sort_index_stacks)): sum_fp = np.sum([ flex_fp_squared[ii_sel] for ii_sel in fp_sort_index_stacks[i] ]) txt_out += '%6.0f %14.2f %8.2f %6.0f\n'%(i+1, sum_fp, \ (sum_fp/np.sum(flex_fp_squared))*100, len(fp_sort_index_stacks[i])) return miller_arrays_out, fp_sort_index_stacks, txt_out
def exercise_bond_over_symmetry_2(mon_lib_srv, ener_lib): """ This test is to illustrate that bond over symmetry actually adds 2 proxies. """ from cctbx.geometry_restraints.linking_class import linking_class origin_ids = linking_class() pdb_inp = iotbx.pdb.input(source_info=None, lines=raw_records10) params = mmtbx.model.manager.get_default_pdb_interpretation_params() params.pdb_interpretation.restraints_library.mcl = False model = mmtbx.model.manager(model_input=pdb_inp, log=null_out()) model.process(pdb_interpretation_params=params, make_restraints=True) grm = model.get_restraints_manager().geometry simple, asu = grm.get_all_bond_proxies() assert (simple.size(), asu.size()) == (0, 0) h = model.get_hierarchy() sites_cart = h.atoms().extract_xyz() site_labels = model.get_xray_structure().scatterers().extract_labels() pair_proxies = grm.pair_proxies(flags=None, sites_cart=sites_cart) out = StringIO() pair_proxies.bond_proxies.show_sorted(by_value="residual", sites_cart=sites_cart, site_labels=site_labels, f=out, prefix="") outtxt = out.getvalue() # print(outtxt) proxy = geometry_restraints.bond_simple_proxy( i_seqs=(0, 1), distance_ideal=2.9, weight=400, origin_id=origin_ids.get_origin_id('hydrogen bonds')) grm.add_new_bond_restraints_in_place(proxies=[proxy], sites_cart=h.atoms().extract_xyz()) simple, asu = grm.get_all_bond_proxies() # print(simple.size(), asu.size()) assert (simple.size(), asu.size()) == (0, 2) sites_cart = h.atoms().extract_xyz() site_labels = model.get_xray_structure().scatterers().extract_labels() pair_proxies = grm.pair_proxies(flags=None, sites_cart=sites_cart) out = StringIO() pair_proxies.bond_proxies.show_sorted(by_value="residual", sites_cart=sites_cart, site_labels=site_labels, f=out, prefix="") outtxt = out.getvalue() # print(outtxt) assert_lines_in_text( outtxt, """\ bond pdb=" CA HIS A 2 " pdb=" N MET A 1 " ideal model delta sigma weight residual sym.op. 2.900 1.998 0.902 5.00e-02 4.00e+02 3.25e+02 x,y+1,z bond pdb=" N MET A 1 " pdb=" CA HIS A 2 " ideal model delta sigma weight residual sym.op. 2.900 1.998 0.902 5.00e-02 4.00e+02 3.25e+02 x,y-1,z """) es = grm.energies_sites(sites_cart=sites_cart, compute_gradients=True) out = StringIO() es.show(f=out) outtxt = out.getvalue() # print(outtxt) # do for x coordinate # ATOM 1 N MET A 1 9.821 1.568 5.000 1.00 66.07 N # ATOM 2 CA HIS A 2 9.946 12.171 5.357 1.00 66.55 C # calculation is from geometry_restraints/bond.h: gradient_0() # weight * 2 * delta_slack * d_distance_d_site_0(epsilon); # print("X gradient:", 400*2*0.902*(9.946-9.821)) # 90 # Note that n=2 but residual sum is 325.349. 349 is chopped off in rounding in # cctbx/geometry_restraints/__init__py, def _bond_show_sorted_impl(...) # where %6.2e is used. in cctbx/geometry_restraints/energies.py: def show() # %.6g is used which is showing more numbers. assert_lines_in_text(outtxt, """\ bond_residual_sum (n=2): 325.349""") # print("Gradients:", list(es.gradients)) # Seems that gradients were splitted in half (note the X gradient is 90 8 lines above) assert approx_equal( list(es.gradients), [(45.135801792665134, -708.451544937652, 128.90784991984805), (-45.13580179266516, 708.4515449376522, -128.90784991984813)])
def __init__( self, map_manager, model=None, target_ncs_au_model=None, regions_to_keep=None, solvent_content=None, resolution=None, sequence=None, molecular_mass=None, symmetry=None, chain_type='PROTEIN', keep_low_density=True, # default from map_box box_cushion=5, soft_mask=True, mask_expand_ratio=1, wrapping=None, log=None): self.model_can_be_outside_bounds = None # not used but required to be set self._map_manager = map_manager self._model = model self._mask_data = None self._force_wrapping = wrapping if wrapping is None: wrapping = self.map_manager().wrapping() self.basis_for_boxing_string = 'around_unique, wrapping = %s' % ( wrapping) if log is None: log = null_out() # Print only if a log is supplied assert isinstance(map_manager, iotbx.map_manager.map_manager) assert self._map_manager.map_data().accessor().origin() == (0, 0, 0) assert resolution is not None if model is not None: assert isinstance(model, mmtbx.model.manager) assert map_manager.is_compatible_model(model) if self.map_manager().wrapping(): # map must be entire unit cell assert map_manager.unit_cell_grid == map_manager.map_data().all() # Get crystal_symmetry self.crystal_symmetry = map_manager.crystal_symmetry() # Convert to map_data from cctbx.maptbx.segment_and_split_map import run as segment_and_split_map assert self._map_manager.map_data().origin() == (0, 0, 0) args = [] ncs_group_obj, remainder_ncs_group_obj, tracking_data = \ segment_and_split_map(args, map_data = self._map_manager.map_data(), crystal_symmetry = self.crystal_symmetry, ncs_obj = self._map_manager.ncs_object(), target_model = target_ncs_au_model, write_files = False, auto_sharpen = False, add_neighbors = False, density_select = False, save_box_map_ncs_au = True, resolution = resolution, solvent_content = solvent_content, chain_type = chain_type, sequence = sequence, molecular_mass = molecular_mass, symmetry = symmetry, keep_low_density = keep_low_density, regions_to_keep = regions_to_keep, box_buffer = box_cushion, soft_mask_extract_unique = soft_mask, mask_expand_ratio = mask_expand_ratio, out = log) from scitbx.matrix import col if not hasattr(tracking_data, 'box_mask_ncs_au_map_data'): raise Sorry(" Extraction of unique part of map failed...") ncs_au_mask_data = tracking_data.box_mask_ncs_au_map_data lower_bounds = ncs_au_mask_data.origin() upper_bounds = tuple(col(ncs_au_mask_data.focus()) - col((1, 1, 1))) print("\nBounds for unique part of map: %s to %s " % (str(lower_bounds), str(upper_bounds)), file=log) # shift the map so it is in the same position as the box map will be in ncs_au_mask_data.reshape(flex.grid(ncs_au_mask_data.all())) assert col(ncs_au_mask_data.all()) == \ col(upper_bounds)-col(lower_bounds)+col((1, 1, 1)) self.gridding_first = lower_bounds self.gridding_last = upper_bounds # Ready with gridding...set up shifts and box crystal_symmetry self.set_shifts_and_crystal_symmetry() # Apply boxing to model, ncs, and map (if available) self.apply_to_model_ncs_and_map() # Note that at this point, self._map_manager has been boxed assert ncs_au_mask_data.all() == self._map_manager.map_data().all() self._mask_data = ncs_au_mask_data # Now separately apply the mask to the boxed map self.apply_around_unique_mask(self._map_manager, resolution=resolution, soft_mask=soft_mask)