def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False if (molprobity_map_params is not None): rsc_params.map_file_name = molprobity_map_params.map_file_name rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise
def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False if (molprobity_map_params is not None): rsc_params.map_file_name = molprobity_map_params.map_file_name rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise e
def __init__ (self, fmodel, pdb_hierarchy, cc_min=0.8) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise "Error: %s" % str(e)
def exercise_1(): pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/1yjp_h.pdb", test=os.path.isfile) mtz_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/reflection_files/1yjp.mtz", test=os.path.isfile) if (None in [pdb_file, mtz_file]) : print "phenix_regression not found, skipping test" return False pdb_in = file_reader.any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() xrs = pdb_in.file_object.xray_structure_simple() mtz_in = file_reader.any_file(mtz_file) f_obs = mtz_in.file_server.miller_arrays[0] r_free = mtz_in.file_server.miller_arrays[1] r_free = r_free.customized_copy(data=(r_free.data()==1)) fmodel = mmtbx.utils.fmodel_simple( f_obs=f_obs, r_free_flags=r_free, xray_structures=[xrs], scattering_table="n_gaussian") map_stats = real_space_correlation.map_statistics_for_fragment( fragment=hierarchy, fmodel=fmodel) assert approx_equal(map_stats.cc, 0.960, eps=0.01) edm = fmodel.electron_density_map() map1_coeffs = edm.map_coefficients("2mFo-DFc") map1 = map1_coeffs.fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map() map2_coeffs = edm.map_coefficients("Fmodel") map2 = map2_coeffs.fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map() xray_structure = fmodel.xray_structure map_stats2 = real_space_correlation.map_statistics_for_atom_selection( atom_selection=flex.bool(xrs.sites_cart().size(), True), map1=map1, map2=map2, xray_structure=xrs) assert approx_equal(map_stats2.cc, map_stats.cc, 0.01) # XXX other code outside cctbx depends on the current API - do not simply # change the test if this breaks! results = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=hierarchy, log=null_out()) assert isinstance(results, list) assert isinstance(results[0], group_args) assert (results[0].n_atoms == 1) assert (results[0].id_str == " A GLY 1 N ") return True
def exercise_1(): pdb_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/pdb/1yjp_h.pdb", test=os.path.isfile) mtz_file = libtbx.env.find_in_repositories( relative_path="phenix_regression/reflection_files/1yjp.mtz", test=os.path.isfile) if (None in [pdb_file, mtz_file]): print "phenix_regression not found, skipping test" return False pdb_in = file_reader.any_file(pdb_file) hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() xrs = pdb_in.file_object.xray_structure_simple() mtz_in = file_reader.any_file(mtz_file) f_obs = mtz_in.file_server.miller_arrays[0] r_free = mtz_in.file_server.miller_arrays[1] r_free = r_free.customized_copy(data=(r_free.data() == 1)) fmodel = mmtbx.utils.fmodel_simple(f_obs=f_obs, r_free_flags=r_free, xray_structures=[xrs], scattering_table="n_gaussian") map_stats = real_space_correlation.map_statistics_for_fragment( fragment=hierarchy, fmodel=fmodel) assert approx_equal(map_stats.cc, 0.960, eps=0.01) edm = fmodel.electron_density_map() map1_coeffs = edm.map_coefficients("2mFo-DFc") map1 = map1_coeffs.fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map() map2_coeffs = edm.map_coefficients("Fmodel") map2 = map2_coeffs.fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map() xray_structure = fmodel.xray_structure map_stats2 = real_space_correlation.map_statistics_for_atom_selection( atom_selection=flex.bool(xrs.sites_cart().size(), True), map1=map1, map2=map2, xray_structure=xrs) assert approx_equal(map_stats2.cc, map_stats.cc, 0.01) # XXX other code outside cctbx depends on the current API - do not simply # change the test if this breaks! overall_cc, results = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=hierarchy, log=null_out()) assert isinstance(overall_cc, float) assert isinstance(results, list) assert isinstance(results[0], group_args) assert (results[0].n_atoms == 1) assert (results[0].id_str == " A GLY 1 N ") return True
def run(args, command_name = "mmtbx.model_vs_data", show_geometry_statistics = True, model_size_max_atoms = 80000, data_size_max_reflections= 1000000, unit_cell_max_dimension = 800., return_fmodel_and_pdb = False, out = None, log = sys.stdout): import mmtbx.f_model_info if(len(args)==0) or (args == ["--help"]) : print >> log, msg defaults(log=log, silent=False) return parsed = defaults(log=log, silent=True) # mvd_obj = mvd() # processed_args = utils.process_command_line_args(args = args, log = log, master_params = parsed) params = processed_args.params.extract() # reflection_files = processed_args.reflection_files if(len(reflection_files) == 0): raise Sorry("No reflection file found.") crystal_symmetry = processed_args.crystal_symmetry if(crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if(len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") pdb_file_names = processed_args.pdb_file_names # rfs = reflection_file_server( crystal_symmetry = crystal_symmetry, reflection_files = reflection_files) parameters = utils.data_and_flags_master_params().extract() if(params.f_obs_label is not None): parameters.labels = params.f_obs_label if(params.r_free_flags_label is not None): parameters.r_free_flags.label = params.r_free_flags_label if (params.high_resolution is not None) : parameters.high_resolution = params.high_resolution determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = rfs, parameters = parameters, data_parameter_scope = "refinement.input.xray_data", flags_parameter_scope = "refinement.input.xray_data.r_free_flags", data_description = "X-ray data", keep_going = True, log = StringIO()) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() if(params.ignore_giant_models_and_datasets and number_of_reflections > data_size_max_reflections): raise Sorry("Too many reflections: %d"%number_of_reflections) # max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3]) if(params.ignore_giant_models_and_datasets and max_unit_cell_dimension > unit_cell_max_dimension): raise Sorry("Too large unit cell (max dimension): %s"% str(max_unit_cell_dimension)) # r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if(r_free_flags is None): r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False)) test_flag_value=None # mmtbx_pdb_file = mmtbx.utils.pdb_file( pdb_file_names = pdb_file_names, cif_objects = processed_args.cif_objects, crystal_symmetry = crystal_symmetry, use_neutron_distances = (params.scattering_table=="neutron"), ignore_unknown_nonbonded_energy_types = not show_geometry_statistics, log = log) mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels = False) processed_pdb_file = mmtbx_pdb_file.processed_pdb_file pdb_raw_records = mmtbx_pdb_file.pdb_raw_records pdb_inp = mmtbx_pdb_file.pdb_inp # # just to avoid going any further with bad PDB file.... pdb_inp.xray_structures_simple() # acp = processed_pdb_file.all_chain_proxies atom_selections = group_args( all = acp.selection(string = "all"), macromolecule = acp.selection(string = "protein or dna or rna"), solvent = acp.selection(string = "water"), # XXX single_atom_residue ligand = acp.selection(string = "not (protein or dna or rna or water)"), backbone = acp.selection(string = "backbone"), sidechain = acp.selection(string = "sidechain")) # scattering_table = params.scattering_table exptl_method = pdb_inp.get_experiment_type() if (exptl_method is not None) and ("NEUTRON" in exptl_method) : scattering_table = "neutron" xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file( processed_pdb_file = processed_pdb_file, scattering_table = scattering_table, d_min = f_obs.d_min()) xray_structures = xsfppf.xray_structures if(0): #XXX normalize occupancies if all models have occ=1 so the total=1 n_models = len(xray_structures) for xrs in xray_structures: occ = xrs.scatterers().extract_occupancies() occ = occ/n_models xrs.set_occupancies(occ) model_selections = xsfppf.model_selections mvd_obj.collect(crystal = group_args( uc = f_obs.unit_cell(), sg = f_obs.crystal_symmetry().space_group_info().symbol_and_number(), n_sym_op = f_obs.crystal_symmetry().space_group_info().type().group().order_z(), uc_vol = f_obs.unit_cell().volume())) # hierarchy = pdb_inp.construct_hierarchy() pdb_atoms = hierarchy.atoms() pdb_atoms.reset_i_seq() # # Extract TLS pdb_tls = None pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy) pdb_tls = group_args(pdb_inp_tls = pdb_inp_tls, tls_selections = [], tls_selection_strings = []) # XXX no TLS + multiple models if(pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None and len(xray_structures)==1): pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb( pdb_inp_tls = pdb_inp_tls, all_chain_proxies = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies, xray_structure = xsfppf.xray_structure_all) if(len(pdb_tls.tls_selections)==len(pdb_inp_tls.tls_params) and len(pdb_inp_tls.tls_params) > 0): xray_structures = [utils.extract_tls_and_u_total_from_pdb( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xray_structures[0], # XXX no TLS + multiple models tls_selections = pdb_tls.tls_selections, tls_groups = pdb_inp_tls.tls_params)] ########################### geometry_statistics = show_geometry( xray_structures = xray_structures, processed_pdb_file = processed_pdb_file, scattering_table = scattering_table, hierarchy = hierarchy, model_selections = model_selections, show_geometry_statistics = show_geometry_statistics, mvd_obj = mvd_obj, atom_selections = atom_selections) ########################### mp = mmtbx.masks.mask_master_params.extract() f_obs_labels = f_obs.info().label_string() f_obs = f_obs.sort(reverse=True, by_value="packed_indices") r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices") fmodel = utils.fmodel_simple( xray_structures = xray_structures, scattering_table = scattering_table, mask_params = mp, f_obs = f_obs, r_free_flags = r_free_flags, skip_twin_detection = params.skip_twin_detection) n_outl = f_obs.data().size() - fmodel.f_obs().data().size() mvd_obj.collect(model_vs_data = show_model_vs_data(fmodel)) # Extract information from PDB file header and output (if any) pub_r_work = None pub_r_free = None pub_high = None pub_low = None pub_sigma = None pub_program_name = None pub_solv_cont = None pub_matthews = None published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0]) if(published_results is not None): pub_r_work = published_results.r_work pub_r_free = published_results.r_free pub_high = published_results.high pub_low = published_results.low pub_sigma = published_results.sigma pub_program_name = pdb_inp.get_program_name() pub_solv_cont = pdb_inp.get_solvent_content() pub_matthews = pdb_inp.get_matthews_coeff() mvd_obj.collect(pdb_header = group_args( program_name = pub_program_name, year = pdb_inp.extract_header_year(), r_work = pub_r_work, r_free = pub_r_free, high_resolution = pub_high, low_resolution = pub_low, sigma_cutoff = pub_sigma, matthews_coeff = pub_matthews, solvent_cont = pub_solv_cont, tls = pdb_tls, exptl_method = exptl_method)) # # Recompute R-factors using published cutoffs fmodel_cut = fmodel tmp_sel = flex.bool(fmodel.f_obs().data().size(), True) if(pub_sigma is not None and fmodel.f_obs().sigmas() is not None): tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas()*pub_sigma if(pub_high is not None and abs(pub_high-fmodel.f_obs().d_min()) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high if(pub_low is not None and abs(pub_low-fmodel.f_obs().d_max_min()[0]) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low if(tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0): fmodel_cut = utils.fmodel_simple( xray_structures = xray_structures, scattering_table = scattering_table, f_obs = fmodel.f_obs().select(tmp_sel), r_free_flags = fmodel.r_free_flags().select(tmp_sel), skip_twin_detection = params.skip_twin_detection) mvd_obj.collect(misc = group_args( r_work_cutoff = fmodel_cut.r_work(), r_free_cutoff = fmodel_cut.r_free(), n_refl_cutoff = fmodel_cut.f_obs().data().size())) mvd_obj.collect(data = show_data(fmodel = fmodel, n_outl = n_outl, test_flag_value = test_flag_value, f_obs_labels = f_obs_labels, fmodel_cut = fmodel_cut)) # map statistics if(len(xray_structures)==1): # XXX no multi-model support yet mvd_obj.collect(maps = maps(fmodel = fmodel, mvd_obj = mvd_obj)) # CC* and friends cc_star_stats = None if (params.unmerged_data is not None) : import mmtbx.validation.experimental import mmtbx.command_line f_obs = fmodel.f_obs().average_bijvoet_mates() unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=null_out()) cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=fmodel.f_model().average_bijvoet_mates(), f_obs=f_obs, r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(), unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins) mvd_obj.show(log=out) if (cc_star_stats is not None) : cc_star_stats.show_model_vs_data(out=out, prefix=" ") if return_fmodel_and_pdb : mvd_obj.pdb_file = processed_pdb_file mvd_obj.fmodel = fmodel if(len(params.map) > 0): for map_name_string in params.map: map_type_obj = mmtbx.map_names(map_name_string = map_name_string) map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch( mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract() maps_obj = mmtbx.maps.compute_map_coefficients(fmodel = fmodel_cut, params = map_params.map_coefficients) fn = os.path.basename(processed_args.reflection_file_names[0]) if(fn.count(".")): prefix = fn[:fn.index(".")] else: prefix= fn file_name = prefix+"_%s_map_coeffs.mtz"%map_type_obj.format() maps_obj.write_mtz_file(file_name = file_name) # statistics in bins if(not fmodel.twin): print >> log, "Statistics in resolution bins:" mmtbx.f_model_info.r_work_and_completeness_in_resolution_bins( fmodel = fmodel, out = log, prefix=" ") # report map cc if(params.comprehensive and not fmodel_cut.twin and fmodel_cut.xray_structure is not None): rsc_params = real_space_correlation.master_params().extract() rsc_params.scattering_table = scattering_table real_space_correlation.simple( fmodel = fmodel_cut, pdb_hierarchy = hierarchy, params = rsc_params, log = log, show_results = True) # if(params.dump_result_object_as_pickle): output_prefixes = [] for op in processed_args.pdb_file_names+processed_args.reflection_file_names: op = os.path.basename(op) try: op = op[:op.index(".")] except Exception: pass if(not op in output_prefixes): output_prefixes.append(op) output_prefix = "_".join(output_prefixes) easy_pickle.dump("%s.pickle"%output_prefix, mvd_obj) return mvd_obj
def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None): from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) pdb_hierarchy = model.get_hierarchy() crystal_symmetry = model.crystal_symmetry() # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try: rsc_params = real_space_correlation.master_params().extract() rsc_params.detail = "residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from iotbx import map_and_model from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file( molprobity_map_params.map_file_name).file_object # check that model crystal symmetry matches map crystal symmetry mmi = map_and_model.input(map_data=map_object.map_data(), model=model) rsc_object = map_model_cc.map_model_cc( mmi.map_data(), mmi.model().get_hierarchy(), mmi.crystal_symmetry(), params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) self.fsc = get_fsc(mmi.map_data(), mmi.model(), params.map_model_cc) self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception as e: raise else: assert ((self.overall_rsc is not None) and (rsc is not None)) for i, result_ in enumerate(rsc): if (use_maps ): # new rsc calculation (mmtbx/maps/model_map_cc.py) result = residue_real_space(chain_id=result_.chain_id, resname=result_.resname, resseq=result_.resseq, icode=result_.icode, altloc="", score=result_.cc, b_iso=result_.b_iso_mean, occupancy=result_.occ_mean, outlier=result_.cc < cc_min, xyz=result_.xyz_mean) else: # old rsc calculation (mmtbx/maps/real_space_correlation.py) result = residue_real_space( chain_id=result_.chain_id, resname=result_.residue.resname, resseq=result_.residue.resseq, icode=result_.residue.icode, altloc="", score=result_.cc, b_iso=result_.b, occupancy=result_.occupancy, fmodel=result_.map_value_1, two_fofc=result_.map_value_2, outlier=result_.cc < cc_min, xyz=result_.residue.atoms().extract_xyz().mean()) if result.is_outlier(): self.n_outliers += 1 # XXX unlike other validation metrics, we always save the results for # the real-space correlation, since these are used as the basis for # the multi-criterion plot in Phenix. The show() method will only # print outliers, however. if (result_.residue.resname != 'HOH'): # water is handled by waters.py self.everything.append(result) if result_.residue.resname in one_letter_given_three_letter: self.protein.append(result) else: self.other.append(result) self.everything += self.water self.results = self.protein
def run(args, command_name="mmtbx.model_vs_data", show_geometry_statistics=True, model_size_max_atoms=80000, data_size_max_reflections=1000000, unit_cell_max_dimension=800., return_fmodel_and_pdb=False, out=None, log=sys.stdout): import mmtbx.f_model.f_model_info if (len(args) == 0) or (args == ["--help"]): print >> log, msg defaults(log=log, silent=False) return parsed = defaults(log=log, silent=True) # mvd_obj = mvd() # processed_args = utils.process_command_line_args(args=args, log=log, master_params=parsed) params = processed_args.params.extract() # reflection_files = processed_args.reflection_files if (len(reflection_files) == 0): raise Sorry("No reflection file found.") crystal_symmetry = processed_args.crystal_symmetry if (crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if (len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") pdb_file_names = processed_args.pdb_file_names # rfs = reflection_file_server(crystal_symmetry=crystal_symmetry, reflection_files=reflection_files) parameters = utils.data_and_flags_master_params().extract() if (params.f_obs_label is not None): parameters.labels = params.f_obs_label if (params.r_free_flags_label is not None): parameters.r_free_flags.label = params.r_free_flags_label if (params.high_resolution is not None): parameters.high_resolution = params.high_resolution determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server=rfs, parameters=parameters, data_parameter_scope="refinement.input.xray_data", flags_parameter_scope="refinement.input.xray_data.r_free_flags", data_description="X-ray data", keep_going=True, log=StringIO()) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() if (params.ignore_giant_models_and_datasets and number_of_reflections > data_size_max_reflections): raise Sorry("Too many reflections: %d" % number_of_reflections) # max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3]) if (params.ignore_giant_models_and_datasets and max_unit_cell_dimension > unit_cell_max_dimension): raise Sorry("Too large unit cell (max dimension): %s" % str(max_unit_cell_dimension)) # r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if (r_free_flags is None): r_free_flags = f_obs.array(data=flex.bool(f_obs.data().size(), False)) test_flag_value = None # mmtbx_pdb_file = mmtbx.utils.pdb_file( pdb_file_names=pdb_file_names, cif_objects=processed_args.cif_objects, crystal_symmetry=crystal_symmetry, use_neutron_distances=(params.scattering_table == "neutron"), ignore_unknown_nonbonded_energy_types=not show_geometry_statistics, log=log) mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels=False) processed_pdb_file = mmtbx_pdb_file.processed_pdb_file pdb_raw_records = mmtbx_pdb_file.pdb_raw_records pdb_inp = mmtbx_pdb_file.pdb_inp # # just to avoid going any further with bad PDB file.... pdb_inp.xray_structures_simple() # acp = processed_pdb_file.all_chain_proxies atom_selections = group_args( all=acp.selection(string="all"), macromolecule=acp.selection(string="protein or dna or rna"), solvent=acp.selection(string="water"), # XXX single_atom_residue ligand=acp.selection(string="not (protein or dna or rna or water)"), backbone=acp.selection(string="backbone"), sidechain=acp.selection(string="sidechain")) # scattering_table = params.scattering_table exptl_method = pdb_inp.get_experiment_type() if (exptl_method is not None) and ("NEUTRON" in exptl_method): scattering_table = "neutron" xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file( processed_pdb_file=processed_pdb_file, scattering_table=scattering_table, d_min=f_obs.d_min()) xray_structures = xsfppf.xray_structures if (0): #XXX normalize occupancies if all models have occ=1 so the total=1 n_models = len(xray_structures) for xrs in xray_structures: occ = xrs.scatterers().extract_occupancies() occ = occ / n_models xrs.set_occupancies(occ) model_selections = xsfppf.model_selections mvd_obj.collect(crystal=group_args( uc=f_obs.unit_cell(), sg=f_obs.crystal_symmetry().space_group_info().symbol_and_number(), n_sym_op=f_obs.crystal_symmetry().space_group_info().type().group( ).order_z(), uc_vol=f_obs.unit_cell().volume())) # hierarchy = pdb_inp.construct_hierarchy() pdb_atoms = hierarchy.atoms() pdb_atoms.reset_i_seq() # # Extract TLS pdb_tls = None pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy) pdb_tls = group_args(pdb_inp_tls=pdb_inp_tls, tls_selections=[], tls_selection_strings=[]) # XXX no TLS + multiple models if (pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None and len(xray_structures) == 1): pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb( pdb_inp_tls=pdb_inp_tls, all_chain_proxies=mmtbx_pdb_file.processed_pdb_file. all_chain_proxies, xray_structure=xsfppf.xray_structure_all) if (len(pdb_tls.tls_selections) == len(pdb_inp_tls.tls_params) and len(pdb_inp_tls.tls_params) > 0): xray_structures = [ utils.extract_tls_and_u_total_from_pdb( f_obs=f_obs, r_free_flags=r_free_flags, xray_structure=xray_structures[ 0], # XXX no TLS + multiple models tls_selections=pdb_tls.tls_selections, tls_groups=pdb_inp_tls.tls_params) ] ########################### geometry_statistics = show_geometry( xray_structures=xray_structures, processed_pdb_file=processed_pdb_file, scattering_table=scattering_table, hierarchy=hierarchy, model_selections=model_selections, show_geometry_statistics=show_geometry_statistics, mvd_obj=mvd_obj, atom_selections=atom_selections) ########################### mp = mmtbx.masks.mask_master_params.extract() f_obs_labels = f_obs.info().label_string() f_obs = f_obs.sort(reverse=True, by_value="packed_indices") r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices") fmodel = utils.fmodel_simple( xray_structures=xray_structures, scattering_table=scattering_table, mask_params=mp, f_obs=f_obs, r_free_flags=r_free_flags, skip_twin_detection=params.skip_twin_detection) n_outl = f_obs.data().size() - fmodel.f_obs().data().size() mvd_obj.collect(model_vs_data=show_model_vs_data(fmodel)) # Extract information from PDB file header and output (if any) pub_r_work = None pub_r_free = None pub_high = None pub_low = None pub_sigma = None pub_program_name = None pub_solv_cont = None pub_matthews = None published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0]) if (published_results is not None): pub_r_work = published_results.r_work pub_r_free = published_results.r_free pub_high = published_results.high pub_low = published_results.low pub_sigma = published_results.sigma pub_program_name = pdb_inp.get_program_name() pub_solv_cont = pdb_inp.get_solvent_content() pub_matthews = pdb_inp.get_matthews_coeff() mvd_obj.collect(pdb_header=group_args(program_name=pub_program_name, year=pdb_inp.extract_header_year(), r_work=pub_r_work, r_free=pub_r_free, high_resolution=pub_high, low_resolution=pub_low, sigma_cutoff=pub_sigma, matthews_coeff=pub_matthews, solvent_cont=pub_solv_cont, tls=pdb_tls, exptl_method=exptl_method)) # # Recompute R-factors using published cutoffs fmodel_cut = fmodel tmp_sel = flex.bool(fmodel.f_obs().data().size(), True) if (pub_sigma is not None and fmodel.f_obs().sigmas() is not None): tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas() * pub_sigma if (pub_high is not None and abs(pub_high - fmodel.f_obs().d_min()) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high if (pub_low is not None and abs(pub_low - fmodel.f_obs().d_max_min()[0]) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low if (tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0): fmodel_cut = utils.fmodel_simple( xray_structures=xray_structures, scattering_table=scattering_table, f_obs=fmodel.f_obs().select(tmp_sel), r_free_flags=fmodel.r_free_flags().select(tmp_sel), skip_twin_detection=params.skip_twin_detection) mvd_obj.collect( misc=group_args(r_work_cutoff=fmodel_cut.r_work(), r_free_cutoff=fmodel_cut.r_free(), n_refl_cutoff=fmodel_cut.f_obs().data().size())) mvd_obj.collect(data=show_data(fmodel=fmodel, n_outl=n_outl, test_flag_value=test_flag_value, f_obs_labels=f_obs_labels, fmodel_cut=fmodel_cut)) # CC* and friends cc_star_stats = None if (params.unmerged_data is not None): import mmtbx.validation.experimental import mmtbx.command_line f_obs = fmodel.f_obs().average_bijvoet_mates() unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=null_out()) cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=fmodel.f_model().average_bijvoet_mates(), f_obs=f_obs, r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(), unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins) mvd_obj.show(log=out) if (cc_star_stats is not None): cc_star_stats.show_model_vs_data(out=out, prefix=" ") if return_fmodel_and_pdb: mvd_obj.pdb_file = processed_pdb_file mvd_obj.fmodel = fmodel if (len(params.map) > 0): for map_name_string in params.map: map_type_obj = mmtbx.map_names(map_name_string=map_name_string) map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch( mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract() maps_obj = mmtbx.maps.compute_map_coefficients( fmodel=fmodel_cut, params=map_params.map_coefficients) fn = os.path.basename(processed_args.reflection_file_names[0]) if (fn.count(".")): prefix = fn[:fn.index(".")] else: prefix = fn file_name = prefix + "_%s_map_coeffs.mtz" % map_type_obj.format() maps_obj.write_mtz_file(file_name=file_name) # statistics in bins if (not fmodel.twin): print >> log, "Statistics in resolution bins:" mmtbx.f_model.f_model_info.r_work_and_completeness_in_resolution_bins( fmodel=fmodel, out=log, prefix=" ") # report map cc if (params.comprehensive and not fmodel_cut.twin and fmodel_cut.xray_structure is not None): rsc_params = real_space_correlation.master_params().extract() rsc_params.scattering_table = scattering_table real_space_correlation.simple(fmodel=fmodel_cut, pdb_hierarchy=hierarchy, params=rsc_params, log=log, show_results=True) # if (params.dump_result_object_as_pickle): output_prefixes = [] for op in processed_args.pdb_file_names + processed_args.reflection_file_names: op = os.path.basename(op) try: op = op[:op.index(".")] except Exception: pass if (not op in output_prefixes): output_prefixes.append(op) output_prefix = "_".join(output_prefixes) easy_pickle.dump("%s.pickle" % output_prefix, mvd_obj) return mvd_obj
def __init__(self, model, fmodel, cc_min=0.8, molprobity_map_params=None): from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) pdb_hierarchy = model.get_hierarchy() crystal_symmetry = model.crystal_symmetry() # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try: rsc_params = real_space_correlation.master_params().extract() rsc_params.detail = "residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from iotbx import map_and_model from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file( molprobity_map_params.map_file_name).file_object # check that model crystal symmetry matches map crystal symmetry mmi = map_and_model.input(map_data=map_object.map_data(), model=model) rsc_object = map_model_cc.map_model_cc( mmi.map_data(), mmi.model().get_hierarchy(), mmi.crystal_symmetry(), params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) self.fsc = get_fsc(mmi.map_data(), mmi.model(), params.map_model_cc) self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e: raise
def __init__ (self, fmodel, pdb_hierarchy, crystal_symmetry=None, cc_min=0.8, molprobity_map_params=None) : from iotbx.pdb.amino_acid_codes import one_letter_given_three_letter from mmtbx import real_space_correlation validation.__init__(self) # arrays for different components self.everything = list() self.protein = list() self.other = list() self.water = list() aa_codes = one_letter_given_three_letter.keys() # redo real_space_corelation.simple to use map objects instead of filenames self.overall_rsc = None rsc = None try : rsc_params = real_space_correlation.master_params().extract() rsc_params.detail="residue" rsc_params.map_1.fill_missing_reflections = False rsc_params.map_2.fill_missing_reflections = False use_maps = False if (molprobity_map_params is not None): rsc_params.map_coefficients_file_name = \ molprobity_map_params.map_coefficients_file_name rsc_params.map_coefficients_label = \ molprobity_map_params.map_coefficients_label if (molprobity_map_params.map_file_name is not None): use_maps = True # use mmtbx/command_line/map_model_cc.py for maps self.fsc = None if (use_maps): from scitbx.array_family import flex import iotbx.pdb from mmtbx.maps import map_model_cc from mmtbx.command_line.map_model_cc import get_fsc from iotbx.file_reader import any_file from cctbx import crystal, sgtbx params = map_model_cc.master_params().extract() params.map_model_cc.resolution = molprobity_map_params.d_min map_object = any_file(molprobity_map_params.map_file_name).file_object # --------------------------------------------------------------------- # check that model crystal symmetry matches map crystal symmetry # if inconsistent, map parameters take precedence # TODO: centralize data consistency checks prior to running validation map_crystal_symmetry = crystal.symmetry( unit_cell=map_object.unit_cell(), space_group=sgtbx.space_group_info( map_object.space_group_number).group()) if (not map_crystal_symmetry.is_similar_symmetry(crystal_symmetry)): crystal_symmetry = map_crystal_symmetry # --------------------------------------------------------------------- map_data = map_object.map_data() rsc_object = map_model_cc.map_model_cc( map_data, pdb_hierarchy, crystal_symmetry, params.map_model_cc) rsc_object.validate() rsc_object.run() rsc = rsc_object.get_results() self.overall_rsc = (rsc.cc_mask, rsc.cc_volume, rsc.cc_peaks) # pdb_hierarchy.as_pdb_input is being phased out since that function # just re-processes the file from text and can be lossy # this is a placeholder until tools get updated to use the model class pdb_input = iotbx.pdb.input( source_info='pdb_hierarchy', lines=flex.split_lines(pdb_hierarchy.as_pdb_string())) model = mmtbx.model.manager(model_input = pdb_input) self.fsc = get_fsc(map_data, model, params.map_model_cc) # self.fsc.atom_radius = rsc.atom_radius rsc = rsc.cc_per_residue # mmtbx/real_space_correlation.py for X-ray/neutron data and map # coefficients else: self.overall_rsc, rsc = real_space_correlation.simple( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, params=rsc_params, log=null_out()) except Exception, e : raise