def get_inputs(args, log, master_params, need_map=True, need_model_hierarchy=True, need_crystal_symmetry=True): """ Eventually, this will be centralized. """ inputs = mmtbx.utils.process_command_line_args(args=args, master_params=master_params) # Model pdb_file_name, pdb_hierarchy = None, None if (need_model_hierarchy): file_names = inputs.pdb_file_names if (len(file_names) != 1): raise Sorry("One model (PDB or mmCIF) required.") pdb_file_name = file_names[0] pdb_inp = iotbx.pdb.input(file_name=pdb_file_name) pdb_hierarchy = pdb_inp.construct_hierarchy() # Map map_inp = None if (need_map): if (inputs.ccp4_map is None): raise Sorry("Map file has to given.") map_inp = inputs.ccp4_map broadcast(m="Input map:", log=log) map_inp.show_summary(prefix=" ") # Crystal symmetry crystal_symmetry = None if (need_crystal_symmetry): crystal_symmetry = inputs.crystal_symmetry if (crystal_symmetry is None): raise Sorry("No box (unit cell) info found.") # model = None if (pdb_inp is not None): model = mmtbx.model.manager(model_input=pdb_inp) broadcast(m="Input PDB:", log=log) print >> log, pdb_file_name # ideally this should not be available here model.get_hierarchy().show(level_id="chain") crystal_symmetry = cl_mtriage.check_and_set_crystal_symmetry( models=[model], map_inps=[map_inp]) # Crystal symmetry broadcast(m="Box (unit cell) info:", log=log) inputs.crystal_symmetry.show_summary(f=log) base = map_and_model.input(map_data=map_inp.map_data(), model=model, crystal_symmetry=crystal_symmetry, box=True) params = inputs.params.extract() return group_args(params=params, pdb_file_name=pdb_file_name, map_data=base.map_data(), model=base.model(), crystal_symmetry=base.crystal_symmetry())
def run_one(arg): try: bug_log, pkl_result, pdb_file, map_file, map_file_1, map_file_2, \ info_file = arg # map_inp = iotbx.ccp4_map.map_reader(file_name=map_file) map_inp_1, map_inp_2 = None, None if (map_file_1 is not None): map_inp_1 = iotbx.ccp4_map.map_reader(file_name=map_file_1) map_inp_2 = iotbx.ccp4_map.map_reader(file_name=map_file_2) pdb_inp = iotbx.pdb.input(file_name=pdb_file) model = mmtbx.model.manager(model_input=pdb_inp, stop_for_unknowns=False, log=null_out()) cs_consensus = mmtbx.utils.check_and_set_crystal_symmetry( models=[model], map_inps=[map_inp, map_inp_1, map_inp_2]) map_data_1, map_data_2 = None, None if (map_file_1 is not None): map_data_1 = map_inp_1.map_data() map_data_2 = map_inp_2.map_data() base = map_and_model.input(map_data=map_inp.map_data(), map_data_1=map_data_1, map_data_2=map_data_2, model=model, box=True) # inf = easy_pickle.load(info_file) resolutions = [inf.d_emdb, inf.d_pdb, inf.d_cif] while None in resolutions: resolutions.remove(None) resolution = min(resolutions) # params = validation_cryoem.master_params().extract() params.resolution = resolution params.scattering_table = "n_gaussian" params.mtriage.include_curves = False params.mtriage.include_mask = False o = validation_cryoem.validation(model=base.model(), map_data=base.map_data(), map_data_1=base.map_data_1(), map_data_2=base.map_data_2(), params=params).get_results() o.source_info = inf # easy_pickle.dump(file_name=pkl_result, obj=o) except Exception, e: of = open(bug_log, "w") for a in arg: print >> of, a print >> of, str(e) of.close()
def run(self): hierarchy = self.model.get_hierarchy() map_data, grid_unit_cell = None, None # sanity check for map and model if self.map_inp is not None: base = map_and_model.input(map_manager=self.map_inp, model=self.model, crystal_symmetry=self.cs_consensus, box=False) hierarchy = base.model().get_hierarchy() map_data = base.map_data() grid_unit_cell = self.map_inp.grid_unit_cell() hierarchy.atoms().reset_i_seq() self.ringer_result = iterate_over_residues( pdb_hierarchy=hierarchy, map_coeffs=self.miller_array, map_data=map_data, unit_cell=grid_unit_cell, params=self.params, log=self.out).results if (self.params.output_base is not None): plots_dir = self.params.output_base + "_plots" else: plots_dir = 'emringer_plots' import matplotlib matplotlib.use("Agg") self.scoring_result = em_scoring.main( file_name=self.params.output_base, ringer_result=self.ringer_result, out_dir=plots_dir, sampling_angle=self.params.sampling_angle, quiet=self.params.quiet, out=self.out) rolling_window_threshold = self.params.rolling_window_threshold self.rolling_result = em_rolling.main( ringer_results=self.ringer_result, dir_name=plots_dir, threshold=rolling_window_threshold, #scoring.optimal_threshold, graph=False, save=not self.params.quiet, out=self.out)
def __init__(self, map_inp, map_inp_1=None, map_inp_2=None, pdb_inp=None, params=None): self.params = params self.results_masked = None self.results_unmasked = None self.time_cumulative = 0 if (self.params is None): self.params = master_params().extract() self.caller = caller(show=self.params.show_time) self.base = map_and_model.input(map_inp=map_inp, map_inp_1=map_inp_1, map_inp_2=map_inp_2, pdb_inp=pdb_inp, box=self.params.use_box)
def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None): from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) pdb_hierarchy = model.get_hierarchy() crystal_symmetry = model.crystal_symmetry() # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try: rsc_params = real_space_correlation.master_params().extract() rsc_params.detail = "residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from iotbx import map_and_model from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file( molprobity_map_params.map_file_name).file_object # check that model crystal symmetry matches map crystal symmetry mmi = map_and_model.input(map_data=map_object.map_data(), model=model) rsc_object = map_model_cc.map_model_cc( mmi.map_data(), mmi.model().get_hierarchy(), mmi.crystal_symmetry(), params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) self.fsc = get_fsc(mmi.map_data(), mmi.model(), params.map_model_cc) self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception as e: raise else: assert ((self.overall_rsc is not None) and (rsc is not None)) for i, result_ in enumerate(rsc): if (use_maps ): # new rsc calculation (mmtbx/maps/model_map_cc.py) result = residue_real_space(chain_id=result_.chain_id, resname=result_.resname, resseq=result_.resseq, icode=result_.icode, altloc="", score=result_.cc, b_iso=result_.b_iso_mean, occupancy=result_.occ_mean, outlier=result_.cc < cc_min, xyz=result_.xyz_mean) else: # old rsc calculation (mmtbx/maps/real_space_correlation.py) result = residue_real_space( chain_id=result_.chain_id, resname=result_.residue.resname, resseq=result_.residue.resseq, icode=result_.residue.icode, altloc="", score=result_.cc, b_iso=result_.b, occupancy=result_.occupancy, fmodel=result_.map_value_1, two_fofc=result_.map_value_2, outlier=result_.cc < cc_min, xyz=result_.residue.atoms().extract_xyz().mean()) if result.is_outlier(): self.n_outliers += 1 # XXX unlike other validation metrics, we always save the results for # the real-space correlation, since these are used as the basis for # the multi-criterion plot in Phenix. The show() method will only # print outliers, however. if (result_.residue.resname != 'HOH'): # water is handled by waters.py self.everything.append(result) if result_.residue.resname in one_letter_given_three_letter: self.protein.append(result) else: self.other.append(result) self.everything += self.water self.results = self.protein
def run(args, log=sys.stdout): """phenix.mtriage: Given map file and optionally model and half-map files compute map statistics. How to run: phenix.mtriage model_file_name=m.pdb map_file_name=m.map half_map_file_name_1=m1.map half_map_file_name_2=m2.map Optional: model_file_name=, half_map_file_name_1=, half_map_file_name_2= Feedback: [email protected] [email protected] """ assert len(locals().keys()) == 2 # intentional print >> log, "-" * 79 print >> log, run.__doc__ print >> log, "-" * 79 if (len(args) == 0): return introspection.virtual_memory_info().show_if_available( out=null_out(), show_max=True) # just to initialize something # Get inputs inputs = get_inputs(args=args, log=log, master_params=master_params) if (inputs.model is not None): inputs.model.setup_scattering_dictionaries( scattering_table=inputs.params.scattering_table) base = map_and_model.input(map_data=inputs.map_data, map_data_1=inputs.map_data_1, map_data_2=inputs.map_data_2, model=inputs.model, crystal_symmetry=inputs.crystal_symmetry, box=True) # task_obj = mmtbx.maps.mtriage.mtriage( map_data=base.map_data(), map_data_1=base.map_data_1(), map_data_2=base.map_data_2(), xray_structure=base.xray_structure(), crystal_symmetry=base.crystal_symmetry(), params=inputs.params) results = task_obj.get_results() results.counts = base.counts() results.histograms = base.histograms() # # Map statistics # broadcast(m="Map statistics:", log=log) print >> log, "Map:" print >> log, " origin: ", results.counts.origin print >> log, " last: ", results.counts.last print >> log, " focus: ", results.counts.focus print >> log, " all: ", results.counts.all print >> log, " min,max,mean:", results.counts.min_max_mean print >> log, " d_min_corner:", "%7.3f" % results.counts.d_min_corner # print >> log, "Half-maps:" if (inputs.map_data_1 is None): print >> log, " Half-maps are not provided." # print >> log, "Histogram(s) of map values (masked):" show_histogram(map_histograms=results.histograms, log=log) # show results fv = format_value fs = "%8.2f" rm = results.masked ru = results.unmasked if ([rm, ru].count(None) == 0): print >> log, "Map resolution estimates: masked unmasked" print >> log, " using map alone (d99) :", fv( fs, rm.d99), fv(fs, ru.d99) print >> log, " using map alone (d9999) :", fv( fs, rm.d9999), fv(fs, ru.d9999) print >> log, " using map alone (d99999) :", fv( fs, rm.d99999), fv(fs, ru.d99999) print >> log, " comparing with model (d_model) :", fv( fs, rm.d_model), fv(fs, ru.d_model) print >> log, " b_iso_overall :", fv( fs, rm.b_iso_overall), fv(fs, ru.b_iso_overall) print >> log, " comparing with model (d_model_b0):", fv( fs, rm.d_model_b0), fv(fs, ru.d_model_b0) print >> log, " b_iso_overall=0" print >> log, " d_fsc_model:" print >> log, " FSC(map,model map)=0 :", fv( fs, rm.d_fsc_model_0), fv(fs, ru.d_fsc_model_0) print >> log, " FSC(map,model map)=0.143 :", fv( fs, rm.d_fsc_model_0143), fv(fs, ru.d_fsc_model_0143) print >> log, " FSC(map,model map)=0.5 :", fv( fs, rm.d_fsc_model_05), fv(fs, ru.d_fsc_model_05) print >> log, " d99 (half map 1) :", fv( fs, rm.d99_1), fv(fs, ru.d99_1) print >> log, " d99 (half map 2) :", fv( fs, rm.d99_2), fv(fs, ru.d99_2) print >> log, " FSC(half map 1,2)=0.143 (d_fsc) :", fv( fs, rm.d_fsc), fv(fs, ru.d_fsc) print >> log # print >> log, "Radius used for mask smoothing:", format_value( "%6.2f", results.masked.radius_smooth) print >> log else: r = rm if (r is None): r = ru print >> log, "Map resolution estimates: masked unmasked" print >> log, " using map alone (d99) :", fv(fs, r.d99) print >> log, " using map alone (d9999) :", fv(fs, r.d9999) print >> log, " using map alone (d99999) :", fv(fs, r.d99999) print >> log, " comparing with model (d_model) :", fv(fs, r.d_model) print >> log, " b_iso_overall :", fv( fs, r.b_iso_overall) print >> log, " comparing with model (d_model_b0):", fv( fs, r.d_model_b0) print >> log, " b_iso_overall=0" print >> log, " d_fsc_model:" print >> log, " FSC(map,model map)=0 :", fv( fs, r.d_fsc_model_0) print >> log, " FSC(map,model map)=0.143 :", fv( fs, r.d_fsc_model_0143) print >> log, " FSC(map,model map)=0.5 :", fv( fs, r.d_fsc_model_05) print >> log, " d99 (half map 1) :", fv(fs, r.d99_1) print >> log, " d99 (half map 2) :", fv(fs, r.d99_2) print >> log, " FSC(half map 1,2)=0.143 (d_fsc) :", fv(fs, r.d_fsc) print >> log # print >> log, "Radius used for mask smoothing:", format_value( "%s", str(r.radius_smooth)) print >> log # for r in [(results.masked, "masked"), (results.unmasked, "unmasked")]: if (r[0] is None): continue # FSC_model curve if (r[0].fsc_curve_model is not None): file_name = "%s.%s.mtriage.log" % ( inputs.params.fsc_model_plot_file_name_prefix, r[1]) of = open(file_name, "w") for a, b in zip(r[0].fsc_curve_model.d_inv, r[0].fsc_curve_model.fsc): print >> of, "%15.9f %15.9f" % (a, b) of.close() print >> log, "FSC(model map, map) is written to %s" % file_name # Mask if (inputs.params.write_mask_file and r[0].mask is not None): print >> log, "Mask is written to %s" % inputs.params.mask_file_name ccp4_map.write_ccp4_map( file_name=inputs.params.mask_file_name, unit_cell=inputs.crystal_symmetry.unit_cell(), space_group=inputs.crystal_symmetry.space_group(), map_data=r[0].mask, labels=flex.std_string(["mask"])) # FSC (half-maps) curve if (r[0].fsc_curve is not None): file_name = "%s.%s.mtriage.log" % ( inputs.params.fsc_half_maps_file_name_prefix, r[1]) of = open(file_name, "w") for a, b in zip(r[0].fsc_curve.fsc.d_inv, r[0].fsc_curve.fsc.fsc): print >> of, "%15.9f %15.9f" % (a, b) of.close() print >> log, "FSC(half map 1, half map 1) is written to %s" % file_name # return results # required for GUI
def run(args, out=None, verbose=True, plots_dir=None): t0 = time.time() if (out is None): out = sys.stdout import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="map_coeffs", map_file_def="map_file", usage_string="""\ phenix.emringer model.pdb map.mrc [cif_file ...] [options] %s """ % __doc__) params = cmdline.work.extract() validate_params(params) pdb_in = cmdline.get_file(params.model) pdb_in.check_file_type("pdb") pdb_inp = iotbx.pdb.input(file_name=params.model) model = mmtbx.model.manager(model_input=pdb_inp) crystal_symmetry_model = model.crystal_symmetry() if crystal_symmetry_model is not None: crystal_symmetry_model.show_summary() hierarchy = model.get_hierarchy() map_coeffs = map_inp = None map_data, unit_cell = None, None if (params.map_coeffs is not None): mtz_in = cmdline.get_file(params.map_coeffs) mtz_in.check_file_type("hkl") best_guess = None best_labels = [] all_labels = [] for array in mtz_in.file_server.miller_arrays: if (array.info().label_string() == params.map_label): map_coeffs = array break elif (params.map_label is None): if (array.is_complex_array()): labels = array.info().label_string() all_labels.append(labels) if (labels.startswith("2FOFCWT") or labels.startswith("2mFoDFc") or labels.startswith("FWT")): best_guess = array best_labels.append(labels) if (map_coeffs is None): if (len(all_labels) == 0): raise Sorry( "No valid (pre-weighted) map coefficients found in file.") elif (best_guess is None): raise Sorry( "Couldn't automatically determine appropriate map labels. " + "Choices:\n %s" % " \n".join(all_labels)) elif (len(best_labels) > 1): raise Sorry( "Multiple appropriate map coefficients found in file. " + "Choices:\n %s" % "\n ".join(best_labels)) map_coeffs = best_guess print(" Guessing %s for input map coefficients" % best_labels[0], file=out) else: ccp4_map_in = cmdline.get_file(params.map_file) ccp4_map_in.check_file_type("ccp4_map") map_inp = ccp4_map_in.file_object cs_consensus = mmtbx.utils.check_and_set_crystal_symmetry( models=[model], map_inps=[map_inp]) base = map_and_model.input(map_data=map_inp.map_data(), model=model, box=False) hierarchy = base.model().get_hierarchy() map_data = base.map_data() unit_cell = map_inp.grid_unit_cell() hierarchy.atoms().reset_i_seq() make_header("Iterating over residues", out=out) t1 = time.time() from mmtbx.ringer import iterate_over_residues results = iterate_over_residues(pdb_hierarchy=hierarchy, map_coeffs=map_coeffs, map_data=map_data, unit_cell=unit_cell, params=params, log=out).results t2 = time.time() if (verbose): print("Time excluding I/O: %8.1fs" % (t2 - t1), file=out) print("Overall runtime: %8.1fs" % (t2 - t0), file=out) if (params.output_base is None): pdb_base = os.path.basename(params.model) params.output_base = os.path.splitext(pdb_base)[0] + "_emringer" easy_pickle.dump("%s.pkl" % params.output_base, results) print("Wrote %s.pkl" % params.output_base, file=out) csv = "\n".join([r.format_csv() for r in results]) open("%s.csv" % params.output_base, "w").write(csv) print("Wrote %s.csv" % params.output_base, file=out) if (plots_dir is None): plots_dir = params.output_base + "_plots" if (not os.path.isdir(plots_dir)): os.makedirs(plots_dir) from mmtbx.ringer import em_rolling from mmtbx.ringer import em_scoring import matplotlib matplotlib.use("Agg") make_header("Scoring results", out=out) scoring = em_scoring.main(file_name=params.output_base, ringer_result=results, out_dir=plots_dir, sampling_angle=params.sampling_angle, quiet=False, out=out) make_header("Inspecting chains", out=out) rolling_window_threshold = params.rolling_window_threshold rolling = em_rolling.main( ringer_results=results, dir_name=plots_dir, threshold=rolling_window_threshold, #scoring.optimal_threshold, graph=False, save=True, out=out) scoring.show_summary(out=out) print("\nReferences:", file=out) references = """\ Barad BA, Echols N, Wang RYR, Cheng YC, DiMaio F, Adams PD, Fraser JS. (2015) Side-chain-directed model and map validation for 3D Electron Cryomicroscopy. Nature Methods, in press. Lang PT, Ng HL, Fraser JS, Corn JE, Echols N, Sales M, Holton JM, Alber T. Automated electron-density sampling reveals widespread conformational polymorphism in proteins. Protein Sci. 2010 Jul;19(7):1420-31. PubMed PMID: 20499387""" print(references, file=out) if (params.show_gui): run_app(results) else: return (results, scoring, rolling)
def run(args, out=None, verbose=True): t0 = time.time() if (out is None): out = sys.stdout from iotbx import file_reader import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="map_coeffs", map_file_def="map_file", cif_file_def="cif_file", usage_string="""\ mmtbx.ringer model.pdb map_coeffs.mtz [cif_file ...] [options] %s """ % __doc__) cmdline.work.show() params = cmdline.work.extract() validate_params(params) pdb_in = file_reader.any_file(params.model, force_type="pdb") pdb_in.check_file_type("pdb") pdb_inp = iotbx.pdb.input(file_name=params.model) model = mmtbx.model.manager(model_input=pdb_inp) crystal_symmetry_model = model.crystal_symmetry() if crystal_symmetry_model is not None: crystal_symmetry_model.show_summary() hierarchy = model.get_hierarchy() map_coeffs = map_inp = difference_map_coeffs = None map_data, unit_cell = None, None # get miller array if map coefficients are provided if (params.map_coeffs is not None): mtz_in = file_reader.any_file(params.map_coeffs, force_type="hkl") mtz_in.check_file_type("hkl") best_guess = None best_labels = [] all_labels = [] for array in mtz_in.file_server.miller_arrays: if (array.is_complex_array()): labels = array.info().label_string() if (labels == params.map_label): map_coeffs = array elif (labels == params.difference_map_label): difference_map_coeffs = array else: if (params.map_label is None): all_labels.append(labels) if (labels.startswith("2FOFCWT") or labels.startswith("2mFoDFc") or labels.startswith("FWT")): best_guess = array best_labels.append(labels) if (params.difference_map_label is None): if (labels.startswith("FOFCWT") or labels.startswith("DELFWT")): difference_map_coeffs = array if (map_coeffs is None): if (len(all_labels) == 0): raise Sorry( "No valid (pre-weighted) map coefficients found in file.") elif (best_guess is None): raise Sorry( "Couldn't automatically determine appropriate map labels. " + "Choices:\n %s" % " \n".join(all_labels)) elif (len(best_labels) > 1): raise Sorry( "Multiple appropriate map coefficients found in file. " + "Choices:\n %s" % "\n ".join(best_labels)) map_coeffs = best_guess print(" Guessing %s for input map coefficients" % best_labels[0], file=out) # get map_inp object and do sanity checks if map is provided else: ccp4_map_in = file_reader.any_file(params.map_file, force_type="ccp4_map") ccp4_map_in.check_file_type("ccp4_map") map_inp = ccp4_map_in.file_object cs_consensus = mmtbx.utils.check_and_set_crystal_symmetry( models=[model], map_inps=[map_inp]) base = map_and_model.input(map_manager=map_inp, model=model, box=False) hierarchy = base.model().get_hierarchy() map_data = base.map_data() unit_cell = map_inp.grid_unit_cell() hierarchy.atoms().reset_i_seq() make_header("Iterating over residues", out=out) t1 = time.time() results = iterate_over_residues( pdb_hierarchy=hierarchy, map_coeffs=map_coeffs, difference_map_coeffs=difference_map_coeffs, map_data=map_data, unit_cell=unit_cell, params=params, log=out).results t2 = time.time() if (verbose): print("Time excluding I/O: %8.1fs" % (t2 - t1), file=out) print("Overall runtime: %8.1fs" % (t2 - t0), file=out) if (params.output_base is None): pdb_base = os.path.basename(params.model) params.output_base = os.path.splitext(pdb_base)[0] + "_ringer" easy_pickle.dump("%s.pkl" % params.output_base, results) print("Wrote %s.pkl" % params.output_base, file=out) csv = "\n".join([r.format_csv() for r in results]) open("%s.csv" % params.output_base, "w").write(csv) print("Wrote %s.csv" % params.output_base, file=out) print("\nReference:", file=out) print("""\ Lang PT, Ng HL, Fraser JS, Corn JE, Echols N, Sales M, Holton JM, Alber T. Automated electron-density sampling reveals widespread conformational polymorphism in proteins. Protein Sci. 2010 Jul;19(7):1420-31. PubMed PMID: 20499387""", file=out) if (params.gui): run_app(results) else: return results
def run(self): hierarchy = self.model.get_hierarchy() map_data, grid_unit_cell = None, None #### <Begin> sanity check for map and model if self.map_inp is not None: base = map_and_model.input(map_data=self.map_inp.map_data(), model=self.model, crystal_symmetry=self.cs_consensus, box=False) hierarchy = base.model().get_hierarchy() map_data = base.map_data() grid_unit_cell = self.map_inp.grid_unit_cell() hierarchy.atoms().reset_i_seq() #### <End> sanity check for map and model # Initialize states accumulator # Pavel's original states = mmtbx.utils.states( pdb_hierarchy=self.model.get_hierarchy(), xray_structure=self.model.get_xray_structure()) states.add(sites_cart=self.model.get_xray_structure().sites_cart()) params = sa.master_params().extract( ) # because of params = sa.master_params().extract() above, core parameters need to be redefined params.start_temperature = self.params.start_temperature params.final_temperature = self.params.final_temperature params.cool_rate = self.params.cool_rate #params.MD_in_each_cycle = self.params.MD_in_each_cycle # "AttributeError: Assignment to non-existing attribute "MD_in_each_cycle" params.number_of_steps = self.params.number_of_steps max_steps_for_final_MD = '' if (self.params.max_steps_for_final_MD != None): max_steps_for_final_MD = self.params.max_steps_for_final_MD params.update_grads_shift = 0. params.interleave_minimization = False #Pavel will fix the error that occur when params.interleave_minimization=True #print ("params:",params) # object like <libtbx.phil.scope_extract object at 0x1146ae210> map_inp = self.map_inp user_map_weight = self.user_map_weight map_weight_multiply = self.map_weight_multiply if ( self.params.record_states == False ): # default choice to avoid > 160 GB memory issue with recording all states for L1 stalk states = None if (self.params.reoptimize_map_weight_after_each_cycle_during_final_MD == True): cycle_so_far_for_map_weight_reoptimization = 0 splited_model_name = self.model_name[:-4].split("/") model_file_name_only = splited_model_name[len(splited_model_name) - 1] #number_of_atoms_in_input_pdb = know_number_of_atoms_in_input_pdb(self.logfile, self.model_name) # tRNA : 1,563 # L1 stalk : 3,289 # Mg channel: 14,940 # number_of_atoms_in_input_pdb seems irrelevant to check_cc_after_these_cycles assignment. # but Mg channel with 10k check took 10 days! #### <begin> prepare/initialize for iteration check_cc_after_these_steps = '' # use '# of steps' not '# of iterations' if (("tst_cryo_fit2" in model_file_name_only) == True): #check_cc_after_these_steps = 1000 # tst_2 took 2 min? check_cc_after_these_steps = 700 # if this is too small (like 100), it may run forever # I confirmed that 500 is definitely too small to explore properly (a helix), #but this is just for test else: check_cc_after_these_steps = 10000 #check_cc_after_these_steps = 2000 #even if this value is so small like this, empty 1st_2nd_array error is avoided by following fail-proof hook number_of_MD_in_each_cycle = 1 + ( (params.start_temperature - params.final_temperature) / params.cool_rate) # same value as MD_in_each_cycle # Regardless of above assignment, re-assign check_cc_after_these_steps to avoid empty 1st_2nd_array situation check_cc_after_these_steps = check_cc_after_these_steps + params.number_of_steps * number_of_MD_in_each_cycle * 2 # reoptimize_map_weight_after_these_steps = '' # if (self.params.reoptimize_map_weight_after_each_cycle_during_final_MD == True): # if (("tst_cryo_fit2" in model_file_name_only) == True): # reoptimize_map_weight_after_these_steps = 5 # else: # reoptimize_map_weight_after_these_steps = 100 # after 123~171 cycles, full tRNA crashes (when map_weight is multiplied too crazy back then,,,) if (("tst_cryo_fit2_" in self.model_name) == True): self.params.max_steps_for_exploration = 100 #max_steps_for_final_MD = 10000 #max_steps_for_final_MD = 3000 map_weight_before_multiplication = self.params.map_weight self.params.map_weight = self.params.map_weight * map_weight_multiply #### This is the only place where map_weight_multiply is applied (other than reoptimize_map_weight_if_not_specified for final MD) best_cc_so_far = -999 # tRNA has a negative value of initial cc cc_1st_array = [] cc_2nd_array = [] result = '' total_steps_so_far_for_exploration_and_final_MD = 0 total_steps_so_far_for_cc_check = 0 # initialization #### <end> prepare/initialize for iteration grm = self.model.get_restraints_manager() pdb_hierarchy = self.model.get_hierarchy() ''' #get_stacking_proxies() takes exactly 5 arguments stacking_proxies = nucleic_acids.get_stacking_proxies( pdb_hierarchy = pdb_hierarchy, stacking_phil_params = self.params.secondary_structure.nucleic_acid.stacking_pair, grm=grm) #, #mon_lib_srv=self.mon_lib_srv, # AttributeError: 'cryo_fit2_class' object has no attribute 'mon_lib_sr #plane_cache=plane_cache) print(" %d stacking parallelities" % len(stacking_proxies), file=log) STOP() ''' write_this = "\nself.params.map_weight after multiplication (" + str( map_weight_multiply) + ") = " + str( round(self.params.map_weight, 1)) + "\n" print(write_this) self.logfile.write(str(write_this)) ########################### <begin> iterate until cryo_fit2 derived cc saturates for i in range( 100000000 ): # runs well with cryo_fit2.run_tests #for i in range(1000000000): # fails with cryo_fit2.run_tests with too much memory (bigger than 30 GB) write_this = "\n" + str(i + 1) + "th iteration: \n" print(write_this) self.logfile.write(str(write_this)) try: if (self.params.progress_on_screen == True): # default choice result = sa.run( params=params, xray_structure=self.model.get_xray_structure(), #restraints_manager = self.model.get_restraints_manager(), restraints_manager=grm, target_map=map_data, real_space=True, wx=self.params.map_weight, wc=1, # weight for geometry conformation states_collector=states) else: # (self.params.progress_on_screen = False): result = sa.run( params=params, xray_structure=self.model.get_xray_structure(), #restraints_manager = self.model.get_restraints_manager(), restraints_manager=grm, target_map=map_data, real_space=True, wx=self.params.map_weight, wc=1, # weight for geometry conformation states_collector=states, log=self.logfile ) # if this is commented, temp= xx dist_moved= xx angles= xx bonds= xx is shown on screen rather than cryo_fit2.log except Exception as ex: write_this = "exception message:" + str(ex) print(write_this) self.logfile.write(str(write_this)) write_this = "Failed during core map weight multiplied phenix.dynamics run.\n" print(write_this) self.logfile.write(str(write_this)) return self.output_dir total_steps_so_far_for_exploration_and_final_MD = total_steps_so_far_for_exploration_and_final_MD \ + int(params.number_of_steps*number_of_MD_in_each_cycle) cc_after_small_MD = calculate_overall_cc( map_data=map_data, model=self.model, resolution=self.params.resolution) write_this = "CC after this cycle (a small MD iteration): " + str( round(cc_after_small_MD, 7)) + "\n" self.logfile.write(str(write_this)) if (self.params.explore == True): if (total_steps_so_far_for_exploration_and_final_MD < self.params.max_steps_for_exploration): write_this = "\ntotal_steps_so_far_for_exploration_and_final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") < max_steps_for_exploration (" + str(self.params.max_steps_for_exploration) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) continue else: write_this = "\ntotal_steps_so_far_for_exploration_and_final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") >= max_steps_for_exploration (" + str(self.params.max_steps_for_exploration) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break ############# All below is for final MD total_steps_so_far_for_cc_check = total_steps_so_far_for_cc_check + int( params.number_of_steps * number_of_MD_in_each_cycle) cc_improvement_threshold = '' if (("tst_cryo_fit2" in model_file_name_only) == True): #cc_improvement_threshold = 0.01 # to finish regression quickly # took 2 min for tst_2? cc_improvement_threshold = 0.05 # to finish regression quickly # took 2 min for tst_2? else: cc_improvement_threshold = 0.00001 # even a 0.0001 improved cc further eventually significantly if (max_steps_for_final_MD != ''): if (total_steps_so_far_for_exploration_and_final_MD >= max_steps_for_final_MD): write_this = '' if (self.params.explore == True): write_this = "\ntotal_steps_so_far_for_exploration_and_final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") >= A specified max_steps_for_final_MD (" + str(max_steps_for_final_MD) + ")\n" else: write_this = "\ntotal steps final_MD (" + str(total_steps_so_far_for_exploration_and_final_MD) + \ ") >= A specified max_steps_for_final_MD (" + str(max_steps_for_final_MD) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break if (float(total_steps_so_far_for_cc_check) < float( check_cc_after_these_steps / 2)): cc_1st_array.append(cc_after_small_MD) else: cc_2nd_array.append(cc_after_small_MD) ''' if (self.params.reoptimize_map_weight_after_each_cycle_during_final_MD == True): if (cycle_so_far_for_map_weight_reoptimization >= reoptimize_map_weight_after_these_steps): self.params.map_weight = reoptimize_map_weight_if_not_specified(self, user_map_weight, map_inp) self.params.map_weight = self.params.map_weight * map_weight_multiply cycle_so_far_for_map_weight_reoptimization = 0 # reinitialization # I confirmed that reoptimizing map_weight_after_each_cycle did change result (cc, SS stat) significantly ''' # total_steps_so_far_for_cc_check is thought to be re-initialized in all circumstances. However, it seems not. if (total_steps_so_far_for_cc_check >= check_cc_after_these_steps): if (cc_after_small_MD > best_cc_so_far): write_this = "current_cc (" + str( cc_after_small_MD) + ") > best_cc_so_far (" + str( best_cc_so_far ) + "). \nTherefore, cryo_fit2 will run longer MD.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) write_this = "cc_after_small_MD - best_cc_so_far = " + str( float_to_str(cc_after_small_MD - best_cc_so_far) ) + "\n" # this "\n" is essential for screen print print('%s' % (write_this)) self.logfile.write(str(write_this)) if ( float(cc_after_small_MD - best_cc_so_far) > cc_improvement_threshold ): # without this if clause, later MD cycles that improve just tiny fractions of cc take too long time write_this = "cc_after_small_MD - best_cc_so_far > cc_improvement_threshold (" + str( float_to_str(cc_improvement_threshold) ) + "). Iterates longer.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) best_cc_so_far = cc_after_small_MD cc_1st_array = [] # reset cc_2nd_array = [] # reset total_steps_so_far_for_cc_check = 0 # reset continue else: write_this = "cc_after_small_MD - best_cc_so_far <= " + str( float_to_str(cc_improvement_threshold) ) + ". Goes to mean_array_comparison.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) else: write_this = "current_cc (" + str( cc_after_small_MD) + ") <= best_cc_so_far (" + str( best_cc_so_far) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (np.mean(cc_2nd_array) > np.mean(cc_1st_array)): write_this = "mean of cc_2nd_array (" + str( np.mean(cc_2nd_array) ) + ") > mean of cc_1st_array (" + str( np.mean(cc_1st_array)) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) write_this = "(mean of cc_2nd_array) - (mean of cc_1st_array): " + str( np.mean(cc_2nd_array) - np.mean(cc_1st_array)) + "\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if ( (np.mean(cc_2nd_array) - np.mean(cc_1st_array)) > cc_improvement_threshold ): # without this if clause, later MD cycles that improve just tiny fractions of cc take too long time cc_1st_array = [] # reset cc_2nd_array = [] # reset total_steps_so_far_for_cc_check = 0 # reset else: write_this = "cc values are saturated\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (self.params.explore == True ): # no need to report cc after explore write_this = "total_steps_so_far_for_exploration_and_final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" else: # (self.params.explore = False): # no need to report cc after explore write_this = "total_steps for final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break else: #(np.mean(cc_2nd_array) <= np.mean(cc_1st_array)): write_this = "mean of cc_2nd_array (" + str( np.mean(cc_2nd_array) ) + ") <= mean of cc_1st_array (" + str( np.mean(cc_1st_array)) + ")\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) write_this = "cc values are saturated\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (self.params.explore == True ): # no need to report cc after explore write_this = "total_steps_so_far_for_exploration_and_final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" else: # (self.params.explore = False): # no need to report cc after explore write_this = "total_steps for final_MD: " + str( total_steps_so_far_for_exploration_and_final_MD ) + "\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) break ######################### <end> iterate until cryo_fit2 derived cc saturates overall_cc_after_cryo_fit2 = calculate_overall_cc( map_data=map_data, model=self.model, resolution=self.params.resolution) write_this = "\nFinal MD of cryo_fit2 is done.\n" print('%s' % (write_this)) self.logfile.write(str(write_this)) if (self.params.explore == False ): # no need to report cc after explore ''' since this differs from CC_box, let's not use this (??) write_this = "\nCC_overall after cryo_fit2 (both exploration and final MD): " + str(round(overall_cc_after_cryo_fit2, 4)) + "\n" print('%s' %(write_this)) self.logfile.write(str(write_this)) #''' output_dir_w_CC = str(self.output_dir) + "_cc_" + str( round(overall_cc_after_cryo_fit2, 3)) if os.path.exists(output_dir_w_CC): shutil.rmtree(output_dir_w_CC) os.mkdir(output_dir_w_CC) if (self.params.record_states == True): all_state_file = os.path.join(output_dir_w_CC, "all_states.pdb") states.write(file_name=all_state_file) self.model.set_xray_structure(result.xray_structure) fitted_file_name = model_file_name_only + "_cryo_fit2_fitted.pdb" fitted_file_name_w_path = os.path.join(output_dir_w_CC, fitted_file_name) ##### this is essential to spit cryo_fitted2 file with open(fitted_file_name_w_path, "w") as f: f.write(self.model.model_as_pdb()) f.close() #print_this =''' ######## How to fix map origin problem in cryo_fit2 ####### ''' With 0,0,0 origin map, cryo_fit2 has no problem. However, with non-0,0,0 origin cryo-EM map, cryo_fit2 results cryo_fitted pdb model at "wrong" origin This is because probably dynamics part uses map at 0,0,0 origin. Therefore, cryo_fit2 identifies how much the map origin was moved, then update all xyz coordinates of output pdb file. In user's perspective, there is nothing to bother. All kinds of mrc files (e.g. "Regular", emdb downloaded, went through phenix.map_box, gaussian filtered by UCSF Chimera and went through relion_image_handler) work fine. ############################################################# #print (print_this,"\n") ''' try: bp_num_in_fitted_file, sp_num_in_fitted_file, H_num_in_fitted_file, E_num_in_fitted_file = \ count_bp_sp_H_E_in_fitted_file(fitted_file_name_w_path, output_dir_w_CC, self.logfile) except Exception as ex: write_this = "exception message:" + str(ex) print(write_this) self.logfile.write(str(write_this)) write_this = "(in task_obj loop) An exception occurred in cryo_fit2_run. \n" + \ " Maybe cryo_fit2 failed to run (\"nan\" or secondary_structure_restraint file generataion failure) for this condition:" + \ " cool_rate (" + str(round(params.cool_rate, 1)) + ")\n" + \ " number_of_steps (" + str(params.number_of_steps) + ")\n" + \ " start_temperature (" + str(params.start_temperature) + ")\n" + \ " map_weight_multiply (" + str(map_weight_multiply) + ")\n" + \ " final_temperature (" + str(params.final_temperature) + ")\n" + \ " map_weight (" + str(round(self.params.map_weight,2)) + ")\n" + \ " max_steps_for_final_MD (" + str(max_steps_for_final_MD) + ")" print(write_this) self.logfile.write(str(write_this)) if (os.path.isdir("parameters_exploration/bp_H_E_not_calculated") == False): os.mkdir("parameters_exploration/bp_H_E_not_calculated") command_string = "mv " + str( output_dir_w_CC ) + " parameters_exploration/bp_H_E_not_calculated" logfile.write(str(command_string)) libtbx.easy_run.fully_buffered( command=command_string).raise_if_errors().stdout_lines return output_dir_w_CC returned = know_how_much_map_origin_moved(str(self.map_name)) if (returned != "origin_is_all_zero" and self.params.keep_origin == True): write_this = "Restoring original xyz position for a cryo_fit2 fitted atomistic model\n" print(write_this) self.logfile.write(str(write_this)) return_to_origin_of_pdb_file(fitted_file_name_w_path, returned[0], returned[1], returned[2], returned[3]) if (("tst_cryo_fit2" in fitted_file_name_w_path) == False): calculate_RMSD(self, fitted_file_name_w_path) output_dir_final = output_dir_w_CC + "_bp_" + str(bp_num_in_fitted_file) + "_sp_" + str(sp_num_in_fitted_file) \ + "_H_" + str(H_num_in_fitted_file) + "_E_" + str(E_num_in_fitted_file) if os.path.exists(output_dir_final): shutil.rmtree(output_dir_final) mv_command_string = "mv " + output_dir_w_CC + " " + output_dir_final libtbx.easy_run.fully_buffered(mv_command_string) ############################ current_dir = os.getcwd() os.chdir(output_dir_final) command_string = "echo " + str( map_weight_before_multiplication ) + " >> used_map_weight_before_multiplication.txt" libtbx.easy_run.fully_buffered( command=command_string).raise_if_errors().stdout_lines os.chdir(current_dir) ############################ return output_dir_final
def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None): from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) pdb_hierarchy = model.get_hierarchy() crystal_symmetry = model.crystal_symmetry() # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try: rsc_params = real_space_correlation.master_params().extract() rsc_params.detail = "residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from iotbx import map_and_model from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file( molprobity_map_params.map_file_name).file_object # check that model crystal symmetry matches map crystal symmetry mmi = map_and_model.input(map_data=map_object.map_data(), model=model) rsc_object = map_model_cc.map_model_cc( mmi.map_data(), mmi.model().get_hierarchy(), mmi.crystal_symmetry(), params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) self.fsc = get_fsc(mmi.map_data(), mmi.model(), params.map_model_cc) self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e: raise