def __init__ (self, xray_structure, pdb_hierarchy, f_obs, r_free_flags, rigid_body_refine=False, optimize_b_factors=False, skip_twin_detection=False, scattering_table="n_gaussian") : self.r_work = None self.r_free = None self.xray_structure = None from mmtbx.utils import fmodel_simple from cctbx import crystal combined_symmetry = crystal.symmetry( unit_cell=f_obs.unit_cell(), space_group=xray_structure.space_group()) xray_structure = xray_structure.customized_copy( crystal_symmetry=combined_symmetry) f_obs = f_obs.customized_copy( crystal_symmetry=combined_symmetry).eliminate_sys_absent() r_free_flags = r_free_flags.customized_copy( crystal_symmetry=combined_symmetry).eliminate_sys_absent() fmodel = fmodel_simple( f_obs=f_obs, r_free_flags=r_free_flags, xray_structures=[xray_structure], skip_twin_detection=skip_twin_detection, scattering_table=scattering_table) self.r_work_start = fmodel.r_work() self.r_free_start = fmodel.r_free() if (not rigid_body_refine) : self.r_work = self.r_work_start self.r_free = self.r_free_start self.xray_structure = xray_structure else : from mmtbx.refinement import rigid_body selection_strings = rigid_body.rigid_groups_from_pdb_chains( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, group_all_by_chain=True, check_for_atoms_on_special_positions=True, log=null_out()) selections = [] for sele_str in selection_strings : sele = pdb_hierarchy.atom_selection_cache().selection(sele_str) selections.append(sele.iselection()) refined = rigid_body.manager( fmodel=fmodel, selections=selections, params=rigid_body.master_params.extract(), log=null_out()) self.xray_structure = refined.fmodel.xray_structure self.r_work = refined.fmodel.r_work() self.r_free = refined.fmodel.r_free()
def __init__(self, xray_structure, pdb_hierarchy, f_obs, r_free_flags, rigid_body_refine=False, optimize_b_factors=False, skip_twin_detection=False, scattering_table="n_gaussian"): self.r_work = None self.r_free = None self.xray_structure = None from mmtbx.utils import fmodel_simple from cctbx import crystal combined_symmetry = crystal.symmetry( unit_cell=f_obs.unit_cell(), space_group=xray_structure.space_group()) xray_structure = xray_structure.customized_copy( crystal_symmetry=combined_symmetry) f_obs = f_obs.customized_copy( crystal_symmetry=combined_symmetry).eliminate_sys_absent() r_free_flags = r_free_flags.customized_copy( crystal_symmetry=combined_symmetry).eliminate_sys_absent() fmodel = fmodel_simple(f_obs=f_obs, r_free_flags=r_free_flags, xray_structures=[xray_structure], skip_twin_detection=skip_twin_detection, scattering_table=scattering_table) self.r_work_start = fmodel.r_work() self.r_free_start = fmodel.r_free() if (not rigid_body_refine): self.r_work = self.r_work_start self.r_free = self.r_free_start self.xray_structure = xray_structure else: from mmtbx.refinement import rigid_body selection_strings = rigid_body.rigid_groups_from_pdb_chains( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, group_all_by_chain=True, check_for_atoms_on_special_positions=True, log=null_out()) selections = [] for sele_str in selection_strings: sele = pdb_hierarchy.atom_selection_cache().selection(sele_str) selections.append(sele.iselection()) refined = rigid_body.manager( fmodel=fmodel, selections=selections, params=rigid_body.master_params.extract(), log=null_out()) self.xray_structure = refined.fmodel.xray_structure self.r_work = refined.fmodel.r_work() self.r_free = refined.fmodel.r_free()
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb' % command_name).option( None, "--show_defaults", action="store_true", help="Show list of parameters.")).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args=args, log=sys.stdout, master_params=master_phil) params = processed_args.params if (params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1: raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register(label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open( pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.log', "w") self.log.replace_stringio(old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out=self.log) params.show(out=self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry( "Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print('Reflection file : ', processed_args.reflection_file_names[0], file=self.log) utils.print_header("Model and data statistics", out=self.log) rfs = reflection_file_server( crystal_symmetry=crystal_symmetry, reflection_files=processed_args.reflection_files, log=self.log) parameters = extract_xtal_data.data_and_flags_master_params( ).extract() determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server=rfs, parameters=parameters, data_parameter_scope="refinement.input.xray_data", flags_parameter_scope="refinement.input.xray_data.r_free_flags", data_description="X-ray data", keep_going=True, log=self.log) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if (r_free_flags is None): r_free_flags = f_obs.array( data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print('Number of structure in ensemble : ', number_structures, file=self.log) # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file( self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl": if input_file.file_object.file_type() != "ccp4_mtz": raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, map_coeffs_1=map_coeffs_1, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, log=self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace( '.pdb', '') + '_vs_' + self.params.ensemble_sigma_map_input.replace( '.mtz', '') + '_pensemble.pdb' write_ensemble_pdb(filename=filename, xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry( "Please supply high resolution limit or input mtz file." ) dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print( 'Supplied mtz used to determine high and low resolution cuttoffs', file=self.log) dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs( ens_pdb_xrs.structure_factors( d_min=dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array( data=flex.bool(dummy_fobs.data().size(), False)) # fmodel = utils.fmodel_simple( scattering_table="wk1995", xray_structures=[ens_pdb_xrs], f_obs=dummy_fobs, target_name='ls', bulk_solvent_and_scaling=False, r_free_flags=r_free_flags) f_calc_ave = fmodel.f_calc().array( data=fmodel.f_calc().data() * 0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure=ens_pdb_xrs, update_f_calc=True, update_f_mask=False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print('Model :', model + 1, file=self.log) print("\nStructure vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write( file_name=str(model + 1) + "_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total / number_structures)) print("\nEnsemble vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients( map_type='Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print("<Fcalc> Map Stats :", file=self.log) fcalc_ave_map_stats.show_summary(f=self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print('\n\nModel : ', model + 1, file=self.log) # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print("Fcalc map stats :", file=self.log) fcalc_map_stats.show_summary(f=self.log) xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_map, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_ave_map_coeffs, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n, p in enumerate(prob): print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print('Model probability stats :', file=self.log) print(prob.min_max_mean().show(), file=self.log) print(' Count < 0.0 : ', prob_lss_zero.count(True), file=self.log) print(' Count > 1.0 : ', prob_grt_one.count(True), file=self.log) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb( filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[ int(number_atoms * percentile) - 1] cutoff_selections = flex.bool( nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected( cutoff_selections, 0.0) #XXX Debug if False: print('\nDebug') for x in range(len(cutoff_selections)): print(cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x]) print(percentile) print(percentile_prob_cutoff) print(cutoff_selections.count(True)) print(cutoff_selections.size()) print(cutoff_nll_occ.count(0.0)) print('Count q = 1 : ', cutoff_nll_occ.count(1.0)) print('Count scatterers size : ', cutoff_nll_occ.size()) xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure=xrs, update_f_calc=True, update_f_mask=True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered = fmodel.f_masks( )[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered += fmodel.f_masks( )[0].data().deep_copy() cntr += 1 fmodel.update( f_calc=f_calc_ave.array( f_calc_ave_total_reordered / cntr).deep_copy(), f_mask=f_calc_ave.array( f_mask_ave_total_reordered / cntr).deep_copy()) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([ cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free() ]) # Order models by nll and print summary print( '\nModels ranked by nll <Fcalc> R-factors recalculated', file=self.log) print('Percentile cutoff : {0:5.3f}'.format(percentile), file=self.log) xrs_list_sorted_nll = [] print(' | NLL <Rw> <Rf> Ens Model', file=self.log) for info in print_list: print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'. format( info[0], info[1], info[3], info[4], info[2] + 1, ), file=self.log) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb( filename='nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list_sorted_nll, ens_pdb_hierarchy=ens_pdb_hierarchy)
def run(args=None, params=None, out=sys.stdout): assert [args, params].count(None) == 1 if args is not None: if (len(args) == 0) or ("--help" in args): raise Usage(""" phenix.cc_star model.pdb data.mtz unmerged_data=data.hkl [n_bins=X] [options] phenix.cc_star model_refine_001.mtz unmerged_data=data.hkl [...] Implementation of the method for assessing data and model quality described in: Karplus PA & Diederichs K (2012) Science 336:1030-3. Full parameters: %s """ % master_phil.as_str(prefix=" ", attributes_level=1)) import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="data") params = cmdline.work.extract() import mmtbx.command_line import mmtbx.validation.experimental from iotbx import merging_statistics from iotbx import file_reader if (params.data is None): raise Sorry("Please specify a data file (usually MTZ format).") if (params.unmerged_data is None): raise Sorry("Please specify unmerged_data file") hkl_in = file_reader.any_file(params.data, force_type="hkl") hkl_in.check_file_type("hkl") f_model = f_obs = r_free_flags = None f_models = [] data_arrays = [] f_model_labels = [] if (params.f_model_labels is None): for array in hkl_in.file_server.miller_arrays: labels = array.info().label_string() if (array.is_complex_array()): if (labels.startswith("F-model") or labels.startswith("FMODEL")): f_models.append(array) f_model_labels.append(labels) if (len(f_models) > 1): raise Sorry( ("Multiple F(model) arrays found:\n%s\nPlease specify the " + "'labels' parameter.") % "\n".join(f_model_labels)) elif (len(f_models) == 1): f_model = f_models[0] if (f_model.anomalous_flag()): info = f_model.info() f_model = f_model.average_bijvoet_mates().set_info(info) print("F(model):", file=out) f_model.show_summary(f=out, prefix=" ") else: data_array = hkl_in.file_server.get_xray_data( file_name=params.data, labels=params.f_obs_labels, ignore_all_zeros=True, parameter_scope="") if (data_array.is_xray_intensity_array()): from cctbx import french_wilson f_obs = french_wilson.french_wilson_scale( miller_array=data_array, out=out) else: f_obs = data_array else: for array in hkl_in.file_server.miller_arrays: array_labels = array.info().label_string() if (array_labels == params.f_model_labels): if (array.is_complex_array()): f_model = array break else: raise Sorry( "The data in %s are not of the required type." % array_labels) if (f_model is not None): assert (f_obs is None) for array in hkl_in.file_server.miller_arrays: labels = array.info().label_string() if (labels == params.f_obs_labels): f_obs = array break else: try: f_obs = hkl_in.file_server.get_amplitudes( file_name=params.f_obs_labels, labels=None, convert_to_amplitudes_if_necessary=False, parameter_name="f_obs_labels", parameter_scope="", strict=True) except Sorry: raise Sorry( "You must supply a file containing both F-obs and F-model " + "if you want to use a pre-calculated F-model array.") assert (f_obs.is_xray_amplitude_array()) if (f_obs.anomalous_flag()): info = f_obs.info() f_obs = f_obs.average_bijvoet_mates().set_info(info) print("F(obs):", file=out) f_obs.show_summary(f=out, prefix=" ") print("", file=out) r_free_flags, test_flag_value = hkl_in.file_server.get_r_free_flags( file_name=params.data, label=params.r_free_flags.label, test_flag_value=params.r_free_flags.test_flag_value, disable_suitability_test=False, parameter_scope="") info = r_free_flags.info() r_free_flags = r_free_flags.customized_copy( data=r_free_flags.data() == test_flag_value).set_info(info) if (r_free_flags.anomalous_flag()): r_free_flags = r_free_flags.average_bijvoet_mates().set_info(info) print("R-free flags:", file=out) r_free_flags.show_summary(f=out, prefix=" ") print("", file=out) unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=out) print("Unmerged intensities:", file=out) unmerged_i_obs.show_summary(f=out, prefix=" ") print("", file=out) if (f_model is None): assert (f_obs is not None) if (params.model is None): raise Sorry( "A PDB file is required if F(model) is not pre-calculated.") make_sub_header("Calculating F(model)", out=out) pdb_in = file_reader.any_file(params.model, force_type="pdb") pdb_in.check_file_type("pdb") pdb_symm = pdb_in.file_object.crystal_symmetry() if (pdb_symm is None): pdb_symm = f_obs else: if (f_obs.crystal_symmetry() is None): f_obs = f_obs.customized_copy(crystal_symmetry=pdb_symm) elif (not pdb_symm.is_similar_symmetry(f_obs)): mmtbx.command_line.show_symmetry_error(file1="PDB file", file2="data file", symm1=pdb_symm, symm2=f_obs) xray_structure = pdb_in.file_object.xray_structure_simple( crystal_symmetry=pdb_symm) from mmtbx.utils import fmodel_simple # XXX this gets done anyway later, but they need to be consistent before # creating the fmodel manager if (f_obs.anomalous_flag()): f_obs = f_obs.average_bijvoet_mates() f_obs = f_obs.eliminate_sys_absent() f_obs, r_free_flags = f_obs.map_to_asu().common_sets( other=r_free_flags.map_to_asu()) fmodel = fmodel_simple(f_obs=f_obs, r_free_flags=r_free_flags, xray_structures=[xray_structure], skip_twin_detection=True, scattering_table="n_gaussian") fmodel.show(log=out) f_model = fmodel.f_model() f_obs = fmodel.f_obs() r_free_flags = fmodel.r_free_flags() else: if (f_model.anomalous_flag()): f_model = f_model.average_bijvoet_mates() stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=f_model, f_obs=f_obs, r_free_flags=r_free_flags, unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins, sigma_filtering=params.sigma_filtering) stats.show_cc_star(out=out) if (params.loggraph): stats.show_loggraph(out=out) print("", file=out) print("Reference:", file=out) print(" Karplus PA & Diederichs K (2012) Science 336:1030-3.", file=out) print("", file=out) return stats
def run(args, command_name = "mmtbx.model_vs_data", show_geometry_statistics = True, model_size_max_atoms = 80000, data_size_max_reflections= 1000000, unit_cell_max_dimension = 800., return_fmodel_and_pdb = False, out = None, log = sys.stdout): import mmtbx.f_model_info if(len(args)==0) or (args == ["--help"]) : print >> log, msg defaults(log=log, silent=False) return parsed = defaults(log=log, silent=True) # mvd_obj = mvd() # processed_args = utils.process_command_line_args(args = args, log = log, master_params = parsed) params = processed_args.params.extract() # reflection_files = processed_args.reflection_files if(len(reflection_files) == 0): raise Sorry("No reflection file found.") crystal_symmetry = processed_args.crystal_symmetry if(crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if(len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") pdb_file_names = processed_args.pdb_file_names # rfs = reflection_file_server( crystal_symmetry = crystal_symmetry, reflection_files = reflection_files) parameters = utils.data_and_flags_master_params().extract() if(params.f_obs_label is not None): parameters.labels = params.f_obs_label if(params.r_free_flags_label is not None): parameters.r_free_flags.label = params.r_free_flags_label if (params.high_resolution is not None) : parameters.high_resolution = params.high_resolution determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = rfs, parameters = parameters, data_parameter_scope = "refinement.input.xray_data", flags_parameter_scope = "refinement.input.xray_data.r_free_flags", data_description = "X-ray data", keep_going = True, log = StringIO()) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() if(params.ignore_giant_models_and_datasets and number_of_reflections > data_size_max_reflections): raise Sorry("Too many reflections: %d"%number_of_reflections) # max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3]) if(params.ignore_giant_models_and_datasets and max_unit_cell_dimension > unit_cell_max_dimension): raise Sorry("Too large unit cell (max dimension): %s"% str(max_unit_cell_dimension)) # r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if(r_free_flags is None): r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False)) test_flag_value=None # mmtbx_pdb_file = mmtbx.utils.pdb_file( pdb_file_names = pdb_file_names, cif_objects = processed_args.cif_objects, crystal_symmetry = crystal_symmetry, use_neutron_distances = (params.scattering_table=="neutron"), ignore_unknown_nonbonded_energy_types = not show_geometry_statistics, log = log) mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels = False) processed_pdb_file = mmtbx_pdb_file.processed_pdb_file pdb_raw_records = mmtbx_pdb_file.pdb_raw_records pdb_inp = mmtbx_pdb_file.pdb_inp # # just to avoid going any further with bad PDB file.... pdb_inp.xray_structures_simple() # acp = processed_pdb_file.all_chain_proxies atom_selections = group_args( all = acp.selection(string = "all"), macromolecule = acp.selection(string = "protein or dna or rna"), solvent = acp.selection(string = "water"), # XXX single_atom_residue ligand = acp.selection(string = "not (protein or dna or rna or water)"), backbone = acp.selection(string = "backbone"), sidechain = acp.selection(string = "sidechain")) # scattering_table = params.scattering_table exptl_method = pdb_inp.get_experiment_type() if (exptl_method is not None) and ("NEUTRON" in exptl_method) : scattering_table = "neutron" xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file( processed_pdb_file = processed_pdb_file, scattering_table = scattering_table, d_min = f_obs.d_min()) xray_structures = xsfppf.xray_structures if(0): #XXX normalize occupancies if all models have occ=1 so the total=1 n_models = len(xray_structures) for xrs in xray_structures: occ = xrs.scatterers().extract_occupancies() occ = occ/n_models xrs.set_occupancies(occ) model_selections = xsfppf.model_selections mvd_obj.collect(crystal = group_args( uc = f_obs.unit_cell(), sg = f_obs.crystal_symmetry().space_group_info().symbol_and_number(), n_sym_op = f_obs.crystal_symmetry().space_group_info().type().group().order_z(), uc_vol = f_obs.unit_cell().volume())) # hierarchy = pdb_inp.construct_hierarchy() pdb_atoms = hierarchy.atoms() pdb_atoms.reset_i_seq() # # Extract TLS pdb_tls = None pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy) pdb_tls = group_args(pdb_inp_tls = pdb_inp_tls, tls_selections = [], tls_selection_strings = []) # XXX no TLS + multiple models if(pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None and len(xray_structures)==1): pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb( pdb_inp_tls = pdb_inp_tls, all_chain_proxies = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies, xray_structure = xsfppf.xray_structure_all) if(len(pdb_tls.tls_selections)==len(pdb_inp_tls.tls_params) and len(pdb_inp_tls.tls_params) > 0): xray_structures = [utils.extract_tls_and_u_total_from_pdb( f_obs = f_obs, r_free_flags = r_free_flags, xray_structure = xray_structures[0], # XXX no TLS + multiple models tls_selections = pdb_tls.tls_selections, tls_groups = pdb_inp_tls.tls_params)] ########################### geometry_statistics = show_geometry( xray_structures = xray_structures, processed_pdb_file = processed_pdb_file, scattering_table = scattering_table, hierarchy = hierarchy, model_selections = model_selections, show_geometry_statistics = show_geometry_statistics, mvd_obj = mvd_obj, atom_selections = atom_selections) ########################### mp = mmtbx.masks.mask_master_params.extract() f_obs_labels = f_obs.info().label_string() f_obs = f_obs.sort(reverse=True, by_value="packed_indices") r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices") fmodel = utils.fmodel_simple( xray_structures = xray_structures, scattering_table = scattering_table, mask_params = mp, f_obs = f_obs, r_free_flags = r_free_flags, skip_twin_detection = params.skip_twin_detection) n_outl = f_obs.data().size() - fmodel.f_obs().data().size() mvd_obj.collect(model_vs_data = show_model_vs_data(fmodel)) # Extract information from PDB file header and output (if any) pub_r_work = None pub_r_free = None pub_high = None pub_low = None pub_sigma = None pub_program_name = None pub_solv_cont = None pub_matthews = None published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0]) if(published_results is not None): pub_r_work = published_results.r_work pub_r_free = published_results.r_free pub_high = published_results.high pub_low = published_results.low pub_sigma = published_results.sigma pub_program_name = pdb_inp.get_program_name() pub_solv_cont = pdb_inp.get_solvent_content() pub_matthews = pdb_inp.get_matthews_coeff() mvd_obj.collect(pdb_header = group_args( program_name = pub_program_name, year = pdb_inp.extract_header_year(), r_work = pub_r_work, r_free = pub_r_free, high_resolution = pub_high, low_resolution = pub_low, sigma_cutoff = pub_sigma, matthews_coeff = pub_matthews, solvent_cont = pub_solv_cont, tls = pdb_tls, exptl_method = exptl_method)) # # Recompute R-factors using published cutoffs fmodel_cut = fmodel tmp_sel = flex.bool(fmodel.f_obs().data().size(), True) if(pub_sigma is not None and fmodel.f_obs().sigmas() is not None): tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas()*pub_sigma if(pub_high is not None and abs(pub_high-fmodel.f_obs().d_min()) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high if(pub_low is not None and abs(pub_low-fmodel.f_obs().d_max_min()[0]) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low if(tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0): fmodel_cut = utils.fmodel_simple( xray_structures = xray_structures, scattering_table = scattering_table, f_obs = fmodel.f_obs().select(tmp_sel), r_free_flags = fmodel.r_free_flags().select(tmp_sel), skip_twin_detection = params.skip_twin_detection) mvd_obj.collect(misc = group_args( r_work_cutoff = fmodel_cut.r_work(), r_free_cutoff = fmodel_cut.r_free(), n_refl_cutoff = fmodel_cut.f_obs().data().size())) mvd_obj.collect(data = show_data(fmodel = fmodel, n_outl = n_outl, test_flag_value = test_flag_value, f_obs_labels = f_obs_labels, fmodel_cut = fmodel_cut)) # map statistics if(len(xray_structures)==1): # XXX no multi-model support yet mvd_obj.collect(maps = maps(fmodel = fmodel, mvd_obj = mvd_obj)) # CC* and friends cc_star_stats = None if (params.unmerged_data is not None) : import mmtbx.validation.experimental import mmtbx.command_line f_obs = fmodel.f_obs().average_bijvoet_mates() unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=null_out()) cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=fmodel.f_model().average_bijvoet_mates(), f_obs=f_obs, r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(), unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins) mvd_obj.show(log=out) if (cc_star_stats is not None) : cc_star_stats.show_model_vs_data(out=out, prefix=" ") if return_fmodel_and_pdb : mvd_obj.pdb_file = processed_pdb_file mvd_obj.fmodel = fmodel if(len(params.map) > 0): for map_name_string in params.map: map_type_obj = mmtbx.map_names(map_name_string = map_name_string) map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch( mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract() maps_obj = mmtbx.maps.compute_map_coefficients(fmodel = fmodel_cut, params = map_params.map_coefficients) fn = os.path.basename(processed_args.reflection_file_names[0]) if(fn.count(".")): prefix = fn[:fn.index(".")] else: prefix= fn file_name = prefix+"_%s_map_coeffs.mtz"%map_type_obj.format() maps_obj.write_mtz_file(file_name = file_name) # statistics in bins if(not fmodel.twin): print >> log, "Statistics in resolution bins:" mmtbx.f_model_info.r_work_and_completeness_in_resolution_bins( fmodel = fmodel, out = log, prefix=" ") # report map cc if(params.comprehensive and not fmodel_cut.twin and fmodel_cut.xray_structure is not None): rsc_params = real_space_correlation.master_params().extract() rsc_params.scattering_table = scattering_table real_space_correlation.simple( fmodel = fmodel_cut, pdb_hierarchy = hierarchy, params = rsc_params, log = log, show_results = True) # if(params.dump_result_object_as_pickle): output_prefixes = [] for op in processed_args.pdb_file_names+processed_args.reflection_file_names: op = os.path.basename(op) try: op = op[:op.index(".")] except Exception: pass if(not op in output_prefixes): output_prefixes.append(op) output_prefix = "_".join(output_prefixes) easy_pickle.dump("%s.pickle"%output_prefix, mvd_obj) return mvd_obj
def exercise(): from mmtbx.building.alternate_conformations import density_sampling from mmtbx.utils import fmodel_simple from mmtbx.monomer_library import server from iotbx import file_reader import iotbx.pdb.hierarchy generate_inputs() fmodel_params = """ high_resolution = 1.2 r_free_flags_fraction = 0.1 add_sigmas = True pdb_file = ser_frag.pdb output { label = F type = *real complex file_name = ser_frag.mtz } fmodel.k_sol = 0.3 fmodel.b_sol = 20 """ open("ser_frag_fmodel.eff", "w").write(fmodel_params) assert (easy_run.fully_buffered("phenix.fmodel ser_frag_fmodel.eff"). raise_if_errors().return_code == 0) assert os.path.isfile("ser_frag.mtz") mtz_in = file_reader.any_file("ser_frag.mtz") f_obs = mtz_in.file_server.miller_arrays[0] flags = mtz_in.file_server.miller_arrays[1] flags = flags.customized_copy(data=(flags.data() == 1)) mon_lib_srv = server.server() pdb_in = iotbx.pdb.hierarchy.input(file_name="ser_frag_single.pdb") hierarchy = pdb_in.hierarchy pdb_atoms = hierarchy.atoms() pdb_atoms.reset_i_seq() sites_cart = pdb_atoms.extract_xyz() xrs = pdb_in.input.xray_structure_simple() fmodel = fmodel_simple(f_obs=f_obs, xray_structures=[xrs], scattering_table="n_gaussian", r_free_flags=flags, skip_twin_detection=True) models = [] prev_res = next_res = next_next_res = None for chain in hierarchy.only_model().chains(): residue_groups = chain.residue_groups() n_rg = len(residue_groups) for i_res, residue_group in enumerate(residue_groups): sites_orig = sites_cart.deep_copy() next_res = next_next_res = None if (i_res < (n_rg - 1)): next_res = residue_groups[i_res + 1].atom_groups()[0] if (i_res < (n_rg - 2)): next_next_res = residue_groups[i_res + 2].atom_groups()[0] atom_groups = residue_group.atom_groups() primary_conf = atom_groups[0] out = StringIO() confs = density_sampling.screen_residue( residue=primary_conf, prev_residue=prev_res, next_residue=next_res, next_next_residue=next_next_res, sites_cart=sites_cart, fmodel=fmodel, mon_lib_srv=mon_lib_srv, params=None, backrub=True, shear=False, verbose=True, out=out) prev_res = primary_conf if (confs is None): continue # TODO tweak density sampling to allow a backrubbed conformer with a # chi1 t rotamer for Ser 99 if (i_res == 1): assert (""" A SER 99 20.0 None t""" in out.getvalue()) for conf in confs: sites_new = sites_cart.set_selected(conf.sites_selection, conf.sites_selected()) pdb_atoms.set_xyz(sites_new) models.append(hierarchy.only_model().detached_copy()) confs = density_sampling.screen_residue( residue=primary_conf, prev_residue=prev_res, next_residue=next_res, next_next_residue=next_next_res, sites_cart=sites_cart, fmodel=fmodel, mon_lib_srv=mon_lib_srv, params=None, backrub=False, out=out) if (i_res == 1): print len(confs) new_hierarchy = iotbx.pdb.hierarchy.root() for i_model, conf in enumerate(models): conf.id = str(i_model + 1) new_hierarchy.append_model(conf) open("ser_frag_guided_ensemble.pdb", "w").write(new_hierarchy.as_pdb_string())
def run (args=None, params=None, out=sys.stdout) : assert [args, params].count(None) == 1 if args is not None: if (len(args) == 0) or ("--help" in args) : raise Usage(""" phenix.cc_star model.pdb data.mtz unmerged_data=data.hkl [n_bins=X] [options] phenix.cc_star model_refine_001.mtz unmerged_data=data.hkl [...] Implementation of the method for assessing data and model quality described in: Karplus PA & Diederichs K (2012) Science 336:1030-3. Full parameters: %s """ % master_phil.as_str(prefix=" ", attributes_level=1)) import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="data") params = cmdline.work.extract() import mmtbx.command_line import mmtbx.validation.experimental from iotbx import merging_statistics from iotbx import file_reader if (params.data is None) : raise Sorry("Please specify a data file (usually MTZ format).") if (params.unmerged_data is None) : raise Sorry("Please specify unmerged_data file") hkl_in = file_reader.any_file(params.data, force_type="hkl") hkl_in.check_file_type("hkl") f_model = f_obs = r_free_flags = None f_models = [] data_arrays = [] f_model_labels = [] if (params.f_model_labels is None) : for array in hkl_in.file_server.miller_arrays : labels = array.info().label_string() if (array.is_complex_array()) : if (labels.startswith("F-model") or labels.startswith("FMODEL")) : f_models.append(array) f_model_labels.append(labels) if (len(f_models) > 1) : raise Sorry(("Multiple F(model) arrays found:\n%s\nPlease specify the "+ "'labels' parameter.") % "\n".join(f_model_labels)) elif (len(f_models) == 1) : f_model = f_models[0] if (f_model.anomalous_flag()) : info = f_model.info() f_model = f_model.average_bijvoet_mates().set_info(info) print >> out, "F(model):" f_model.show_summary(f=out, prefix=" ") else : data_array = hkl_in.file_server.get_xray_data( file_name=params.data, labels=params.f_obs_labels, ignore_all_zeros=True, parameter_scope="") if (data_array.is_xray_intensity_array()) : from cctbx import french_wilson f_obs = french_wilson.french_wilson_scale( miller_array=data_array, out=out) else : f_obs = data_array else : for array in hkl_in.file_server.miller_arrays : array_labels = array.info().label_string() if (array_labels == params.f_model_labels) : if (array.is_complex_array()) : f_model = array break else : raise Sorry("The data in %s are not of the required type." % array_labels) if (f_model is not None) : assert (f_obs is None) for array in hkl_in.file_server.miller_arrays : labels = array.info().label_string() if (labels == params.f_obs_labels) : f_obs = array break else : try : f_obs = hkl_in.file_server.get_amplitudes( file_name=params.f_obs_labels, labels=None, convert_to_amplitudes_if_necessary=False, parameter_name="f_obs_labels", parameter_scope="", strict=True) except Sorry : raise Sorry("You must supply a file containing both F-obs and F-model "+ "if you want to use a pre-calculated F-model array.") assert (f_obs.is_xray_amplitude_array()) if (f_obs.anomalous_flag()) : info = f_obs.info() f_obs = f_obs.average_bijvoet_mates().set_info(info) print >> out, "F(obs):" f_obs.show_summary(f=out, prefix=" ") print >> out, "" r_free_flags, test_flag_value = hkl_in.file_server.get_r_free_flags( file_name=params.data, label=params.r_free_flags.label, test_flag_value=params.r_free_flags.test_flag_value, disable_suitability_test=False, parameter_scope="") info = r_free_flags.info() r_free_flags = r_free_flags.customized_copy( data=r_free_flags.data()==test_flag_value).set_info(info) if (r_free_flags.anomalous_flag()) : r_free_flags = r_free_flags.average_bijvoet_mates().set_info(info) print >> out, "R-free flags:" r_free_flags.show_summary(f=out, prefix=" ") print >> out, "" unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=out) print >> out, "Unmerged intensities:" unmerged_i_obs.show_summary(f=out, prefix=" ") print >> out, "" if (f_model is None) : assert (f_obs is not None) if (params.model is None) : raise Sorry("A PDB file is required if F(model) is not pre-calculated.") make_sub_header("Calculating F(model)", out=out) pdb_in = file_reader.any_file(params.model, force_type="pdb") pdb_in.check_file_type("pdb") pdb_symm = pdb_in.file_object.crystal_symmetry() if (pdb_symm is None) : pdb_symm = f_obs else : if (f_obs.crystal_symmetry() is None) : f_obs = f_obs.customized_copy(crystal_symmetry=pdb_symm) elif (not pdb_symm.is_similar_symmetry(f_obs)) : mmtbx.command_line.show_symmetry_error( file1="PDB file", file2="data file", symm1=pdb_symm, symm2=f_obs) xray_structure = pdb_in.file_object.xray_structure_simple( crystal_symmetry=pdb_symm) from mmtbx.utils import fmodel_simple # XXX this gets done anyway later, but they need to be consistent before # creating the fmodel manager if (f_obs.anomalous_flag()) : f_obs = f_obs.average_bijvoet_mates() f_obs = f_obs.eliminate_sys_absent() f_obs, r_free_flags = f_obs.map_to_asu().common_sets( other=r_free_flags.map_to_asu()) fmodel = fmodel_simple( f_obs=f_obs, r_free_flags=r_free_flags, xray_structures=[xray_structure], skip_twin_detection=True, scattering_table="n_gaussian") fmodel.show(log=out) f_model = fmodel.f_model() r_free_flags = f_model.customized_copy(data=fmodel.arrays.free_sel) else : if (f_model.anomalous_flag()) : f_model = f_model.average_bijvoet_mates() f_model, r_free_flags = f_model.common_sets(other=r_free_flags) stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=f_model, f_obs=f_obs, r_free_flags=r_free_flags, unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins, sigma_filtering=params.sigma_filtering) stats.show_cc_star(out=out) if (params.loggraph) : stats.show_loggraph(out=out) print >> out, "" print >> out, "Reference:" print >> out, " Karplus PA & Diederichs K (2012) Science 336:1030-3." print >> out, "" return stats
def exercise () : from mmtbx.building.alternate_conformations import density_sampling from mmtbx.utils import fmodel_simple from mmtbx.monomer_library import server from iotbx import file_reader import iotbx.pdb.hierarchy generate_inputs() fmodel_params = """ high_resolution = 1.2 r_free_flags_fraction = 0.1 add_sigmas = True pdb_file = shear_frag.pdb output { label = F type = *real complex file_name = shear_frag.mtz } fmodel.k_sol = 0.3 fmodel.b_sol = 20 """ open("shear_frag_fmodel.eff", "w").write(fmodel_params) assert (easy_run.fully_buffered("phenix.fmodel shear_frag_fmodel.eff" ).raise_if_errors().return_code == 0) assert os.path.isfile("shear_frag.mtz") mtz_in = file_reader.any_file("shear_frag.mtz") f_obs = mtz_in.file_server.miller_arrays[0] flags = mtz_in.file_server.miller_arrays[1] flags = flags.customized_copy(data=(flags.data()==1)) mon_lib_srv = server.server() pdb_in = iotbx.pdb.hierarchy.input(file_name="shear_frag_single.pdb") hierarchy = pdb_in.hierarchy pdb_atoms = hierarchy.atoms() pdb_atoms.reset_i_seq() sites_cart = pdb_atoms.extract_xyz() xrs = pdb_in.input.xray_structure_simple() fmodel = fmodel_simple( f_obs=f_obs, xray_structures=[xrs], scattering_table="n_gaussian", r_free_flags=flags, skip_twin_detection=True) models = [] prev_res = next_res = next_next_res = None for chain in hierarchy.only_model().chains() : residue_groups = chain.residue_groups() n_rg = len(residue_groups) # should be 4 for i_res, residue_group in enumerate(residue_groups) : sites_orig = sites_cart.deep_copy() next_res = next_next_res = None if (i_res < (n_rg - 1)) : next_res = residue_groups[i_res+1].atom_groups()[0] if (i_res < (n_rg - 2)) : next_next_res = residue_groups[i_res+2].atom_groups()[0] atom_groups = residue_group.atom_groups() primary_conf = atom_groups[0] out = StringIO() confs = density_sampling.screen_residue( residue=primary_conf, prev_residue=prev_res, next_residue=next_res, next_next_residue=next_next_res, sites_cart=sites_cart, fmodel=fmodel, mon_lib_srv=mon_lib_srv, params=None, backrub=True, shear=True, verbose=True, out=out) prev_res = primary_conf if (confs is None) : continue if (i_res == 1) : assert (""" A ILE 7 None 4.0 mt""") for conf in confs : sites_new = sites_cart.set_selected(conf.sites_selection, conf.sites_selected()) pdb_atoms.set_xyz(sites_new) models.append(hierarchy.only_model().detached_copy()) new_hierarchy = iotbx.pdb.hierarchy.root() for i_model, conf in enumerate(models) : conf.id = str(i_model + 1) new_hierarchy.append_model(conf) open("shear_frag_naive_ensemble.pdb", "w").write( new_hierarchy.as_pdb_string())
def run(args, command_name="mmtbx.model_vs_data", show_geometry_statistics=True, model_size_max_atoms=80000, data_size_max_reflections=1000000, unit_cell_max_dimension=800., return_fmodel_and_pdb=False, out=None, log=sys.stdout): import mmtbx.f_model.f_model_info if (len(args) == 0) or (args == ["--help"]): print >> log, msg defaults(log=log, silent=False) return parsed = defaults(log=log, silent=True) # mvd_obj = mvd() # processed_args = utils.process_command_line_args(args=args, log=log, master_params=parsed) params = processed_args.params.extract() # reflection_files = processed_args.reflection_files if (len(reflection_files) == 0): raise Sorry("No reflection file found.") crystal_symmetry = processed_args.crystal_symmetry if (crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if (len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") pdb_file_names = processed_args.pdb_file_names # rfs = reflection_file_server(crystal_symmetry=crystal_symmetry, reflection_files=reflection_files) parameters = utils.data_and_flags_master_params().extract() if (params.f_obs_label is not None): parameters.labels = params.f_obs_label if (params.r_free_flags_label is not None): parameters.r_free_flags.label = params.r_free_flags_label if (params.high_resolution is not None): parameters.high_resolution = params.high_resolution determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server=rfs, parameters=parameters, data_parameter_scope="refinement.input.xray_data", flags_parameter_scope="refinement.input.xray_data.r_free_flags", data_description="X-ray data", keep_going=True, log=StringIO()) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() if (params.ignore_giant_models_and_datasets and number_of_reflections > data_size_max_reflections): raise Sorry("Too many reflections: %d" % number_of_reflections) # max_unit_cell_dimension = max(f_obs.unit_cell().parameters()[:3]) if (params.ignore_giant_models_and_datasets and max_unit_cell_dimension > unit_cell_max_dimension): raise Sorry("Too large unit cell (max dimension): %s" % str(max_unit_cell_dimension)) # r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if (r_free_flags is None): r_free_flags = f_obs.array(data=flex.bool(f_obs.data().size(), False)) test_flag_value = None # mmtbx_pdb_file = mmtbx.utils.pdb_file( pdb_file_names=pdb_file_names, cif_objects=processed_args.cif_objects, crystal_symmetry=crystal_symmetry, use_neutron_distances=(params.scattering_table == "neutron"), ignore_unknown_nonbonded_energy_types=not show_geometry_statistics, log=log) mmtbx_pdb_file.set_ppf(stop_if_duplicate_labels=False) processed_pdb_file = mmtbx_pdb_file.processed_pdb_file pdb_raw_records = mmtbx_pdb_file.pdb_raw_records pdb_inp = mmtbx_pdb_file.pdb_inp # # just to avoid going any further with bad PDB file.... pdb_inp.xray_structures_simple() # acp = processed_pdb_file.all_chain_proxies atom_selections = group_args( all=acp.selection(string="all"), macromolecule=acp.selection(string="protein or dna or rna"), solvent=acp.selection(string="water"), # XXX single_atom_residue ligand=acp.selection(string="not (protein or dna or rna or water)"), backbone=acp.selection(string="backbone"), sidechain=acp.selection(string="sidechain")) # scattering_table = params.scattering_table exptl_method = pdb_inp.get_experiment_type() if (exptl_method is not None) and ("NEUTRON" in exptl_method): scattering_table = "neutron" xsfppf = mmtbx.utils.xray_structures_from_processed_pdb_file( processed_pdb_file=processed_pdb_file, scattering_table=scattering_table, d_min=f_obs.d_min()) xray_structures = xsfppf.xray_structures if (0): #XXX normalize occupancies if all models have occ=1 so the total=1 n_models = len(xray_structures) for xrs in xray_structures: occ = xrs.scatterers().extract_occupancies() occ = occ / n_models xrs.set_occupancies(occ) model_selections = xsfppf.model_selections mvd_obj.collect(crystal=group_args( uc=f_obs.unit_cell(), sg=f_obs.crystal_symmetry().space_group_info().symbol_and_number(), n_sym_op=f_obs.crystal_symmetry().space_group_info().type().group( ).order_z(), uc_vol=f_obs.unit_cell().volume())) # hierarchy = pdb_inp.construct_hierarchy() pdb_atoms = hierarchy.atoms() pdb_atoms.reset_i_seq() # # Extract TLS pdb_tls = None pdb_inp_tls = pdb_inp.extract_tls_params(hierarchy) pdb_tls = group_args(pdb_inp_tls=pdb_inp_tls, tls_selections=[], tls_selection_strings=[]) # XXX no TLS + multiple models if (pdb_inp_tls.tls_present and pdb_inp_tls.error_string is None and len(xray_structures) == 1): pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb( pdb_inp_tls=pdb_inp_tls, all_chain_proxies=mmtbx_pdb_file.processed_pdb_file. all_chain_proxies, xray_structure=xsfppf.xray_structure_all) if (len(pdb_tls.tls_selections) == len(pdb_inp_tls.tls_params) and len(pdb_inp_tls.tls_params) > 0): xray_structures = [ utils.extract_tls_and_u_total_from_pdb( f_obs=f_obs, r_free_flags=r_free_flags, xray_structure=xray_structures[ 0], # XXX no TLS + multiple models tls_selections=pdb_tls.tls_selections, tls_groups=pdb_inp_tls.tls_params) ] ########################### geometry_statistics = show_geometry( xray_structures=xray_structures, processed_pdb_file=processed_pdb_file, scattering_table=scattering_table, hierarchy=hierarchy, model_selections=model_selections, show_geometry_statistics=show_geometry_statistics, mvd_obj=mvd_obj, atom_selections=atom_selections) ########################### mp = mmtbx.masks.mask_master_params.extract() f_obs_labels = f_obs.info().label_string() f_obs = f_obs.sort(reverse=True, by_value="packed_indices") r_free_flags = r_free_flags.sort(reverse=True, by_value="packed_indices") fmodel = utils.fmodel_simple( xray_structures=xray_structures, scattering_table=scattering_table, mask_params=mp, f_obs=f_obs, r_free_flags=r_free_flags, skip_twin_detection=params.skip_twin_detection) n_outl = f_obs.data().size() - fmodel.f_obs().data().size() mvd_obj.collect(model_vs_data=show_model_vs_data(fmodel)) # Extract information from PDB file header and output (if any) pub_r_work = None pub_r_free = None pub_high = None pub_low = None pub_sigma = None pub_program_name = None pub_solv_cont = None pub_matthews = None published_results = pdb_inp.get_r_rfree_sigma(file_name=pdb_file_names[0]) if (published_results is not None): pub_r_work = published_results.r_work pub_r_free = published_results.r_free pub_high = published_results.high pub_low = published_results.low pub_sigma = published_results.sigma pub_program_name = pdb_inp.get_program_name() pub_solv_cont = pdb_inp.get_solvent_content() pub_matthews = pdb_inp.get_matthews_coeff() mvd_obj.collect(pdb_header=group_args(program_name=pub_program_name, year=pdb_inp.extract_header_year(), r_work=pub_r_work, r_free=pub_r_free, high_resolution=pub_high, low_resolution=pub_low, sigma_cutoff=pub_sigma, matthews_coeff=pub_matthews, solvent_cont=pub_solv_cont, tls=pdb_tls, exptl_method=exptl_method)) # # Recompute R-factors using published cutoffs fmodel_cut = fmodel tmp_sel = flex.bool(fmodel.f_obs().data().size(), True) if (pub_sigma is not None and fmodel.f_obs().sigmas() is not None): tmp_sel &= fmodel.f_obs().data() > fmodel.f_obs().sigmas() * pub_sigma if (pub_high is not None and abs(pub_high - fmodel.f_obs().d_min()) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() > pub_high if (pub_low is not None and abs(pub_low - fmodel.f_obs().d_max_min()[0]) > 0.03): tmp_sel &= fmodel.f_obs().d_spacings().data() < pub_low if (tmp_sel.count(True) != tmp_sel.size() and tmp_sel.count(True) > 0): fmodel_cut = utils.fmodel_simple( xray_structures=xray_structures, scattering_table=scattering_table, f_obs=fmodel.f_obs().select(tmp_sel), r_free_flags=fmodel.r_free_flags().select(tmp_sel), skip_twin_detection=params.skip_twin_detection) mvd_obj.collect( misc=group_args(r_work_cutoff=fmodel_cut.r_work(), r_free_cutoff=fmodel_cut.r_free(), n_refl_cutoff=fmodel_cut.f_obs().data().size())) mvd_obj.collect(data=show_data(fmodel=fmodel, n_outl=n_outl, test_flag_value=test_flag_value, f_obs_labels=f_obs_labels, fmodel_cut=fmodel_cut)) # CC* and friends cc_star_stats = None if (params.unmerged_data is not None): import mmtbx.validation.experimental import mmtbx.command_line f_obs = fmodel.f_obs().average_bijvoet_mates() unmerged_i_obs = mmtbx.command_line.load_and_validate_unmerged_data( f_obs=f_obs, file_name=params.unmerged_data, data_labels=params.unmerged_labels, log=null_out()) cc_star_stats = mmtbx.validation.experimental.merging_and_model_statistics( f_model=fmodel.f_model().average_bijvoet_mates(), f_obs=f_obs, r_free_flags=fmodel.r_free_flags().average_bijvoet_mates(), unmerged_i_obs=unmerged_i_obs, n_bins=params.n_bins) mvd_obj.show(log=out) if (cc_star_stats is not None): cc_star_stats.show_model_vs_data(out=out, prefix=" ") if return_fmodel_and_pdb: mvd_obj.pdb_file = processed_pdb_file mvd_obj.fmodel = fmodel if (len(params.map) > 0): for map_name_string in params.map: map_type_obj = mmtbx.map_names(map_name_string=map_name_string) map_params = mmtbx.maps.map_and_map_coeff_master_params().fetch( mmtbx.maps.cast_map_coeff_params(map_type_obj)).extract() maps_obj = mmtbx.maps.compute_map_coefficients( fmodel=fmodel_cut, params=map_params.map_coefficients) fn = os.path.basename(processed_args.reflection_file_names[0]) if (fn.count(".")): prefix = fn[:fn.index(".")] else: prefix = fn file_name = prefix + "_%s_map_coeffs.mtz" % map_type_obj.format() maps_obj.write_mtz_file(file_name=file_name) # statistics in bins if (not fmodel.twin): print >> log, "Statistics in resolution bins:" mmtbx.f_model.f_model_info.r_work_and_completeness_in_resolution_bins( fmodel=fmodel, out=log, prefix=" ") # report map cc if (params.comprehensive and not fmodel_cut.twin and fmodel_cut.xray_structure is not None): rsc_params = real_space_correlation.master_params().extract() rsc_params.scattering_table = scattering_table real_space_correlation.simple(fmodel=fmodel_cut, pdb_hierarchy=hierarchy, params=rsc_params, log=log, show_results=True) # if (params.dump_result_object_as_pickle): output_prefixes = [] for op in processed_args.pdb_file_names + processed_args.reflection_file_names: op = os.path.basename(op) try: op = op[:op.index(".")] except Exception: pass if (not op in output_prefixes): output_prefixes.append(op) output_prefix = "_".join(output_prefixes) easy_pickle.dump("%s.pickle" % output_prefix, mvd_obj) return mvd_obj
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb'%command_name) .option(None, "--show_defaults", action="store_true", help="Show list of parameters.") ).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args = args, log = sys.stdout, master_params = master_phil) params = processed_args.params if(params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1 : raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register( label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open(pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.log', "w") self.log.replace_stringio( old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out = self.log) params.show(out = self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry("Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print >> self.log, 'Reflection file : ', processed_args.reflection_file_names[0] utils.print_header("Model and data statistics", out = self.log) rfs = reflection_file_server( crystal_symmetry = crystal_symmetry, reflection_files = processed_args.reflection_files, log = self.log) parameters = utils.data_and_flags_master_params().extract() determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = rfs, parameters = parameters, data_parameter_scope = "refinement.input.xray_data", flags_parameter_scope = "refinement.input.xray_data.r_free_flags", data_description = "X-ray data", keep_going = True, log = self.log) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if(r_free_flags is None): r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print >> self.log, 'Number of structure in ensemble : ', number_structures # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file(self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl" : if input_file.file_object.file_type() != "ccp4_mtz" : raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, map_coeffs_1 = map_coeffs_1, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, log = self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_vs_' + self.params.ensemble_sigma_map_input.replace('.mtz','') + '_pensemble.pdb' write_ensemble_pdb(filename = filename, xrs_list = xrs_list, ens_pdb_hierarchy = ens_pdb_hierarchy ) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry("Please supply high resolution limit or input mtz file.") dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print >> self.log, 'Supplied mtz used to determine high and low resolution cuttoffs' dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs(ens_pdb_xrs.structure_factors(d_min = dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array(data=flex.bool(dummy_fobs.data().size(),False)) # fmodel = utils.fmodel_simple( scattering_table = "wk1995", xray_structures = [ens_pdb_xrs], f_obs = dummy_fobs, target_name = 'ls', bulk_solvent_and_scaling = False, r_free_flags = r_free_flags ) f_calc_ave = fmodel.f_calc().array(data = fmodel.f_calc().data()*0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure = ens_pdb_xrs, update_f_calc = True, update_f_mask = False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print >> self.log, 'Model :', model+1 print >> self.log, "\nStructure vs real Fobs (no bulk solvent or scaling)" print >> self.log, 'Rwork : %5.4f '%fmodel.r_work() print >> self.log, 'Rfree : %5.4f '%fmodel.r_free() print >> self.log, 'K1 : %5.4f '%fmodel.scale_k1() fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type = 'Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(column_root_label ='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write(file_name = str(model+1)+"_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total / number_structures)) print >> self.log, "\nEnsemble vs real Fobs (no bulk solvent or scaling)" print >> self.log, 'Rwork : %5.4f '%fmodel.r_work() print >> self.log, 'Rfree : %5.4f '%fmodel.r_free() print >> self.log, 'K1 : %5.4f '%fmodel.scale_k1() # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(map_type = 'Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(column_root_label ='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name = "aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print >> self.log, "<Fcalc> Map Stats :" fcalc_ave_map_stats.show_summary(f = self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print >> self.log, '\n\nModel : ', model+1 # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print >> self.log, "Fcalc map stats :" fcalc_map_stats.show_summary(f = self.log) xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, fft_map_1 = fcalc_map, model_i = model, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, number_previous_scatters = number_previous_scatters, log = self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, fft_map_1 = fcalc_ave_map_coeffs, model_i = model, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, number_previous_scatters = number_previous_scatters, log = self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n,p in enumerate(prob): print >> self.log, ' {0:5d} {1:5.3f}'.format(n,p) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print >> self.log, 'Model probability stats :' print >> self.log, prob.min_max_mean().show() print >> self.log, ' Count < 0.0 : ', prob_lss_zero.count(True) print >> self.log, ' Count > 1.0 : ', prob_grt_one.count(True) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb(filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb', xrs_list = xrs_list, ens_pdb_hierarchy = ens_pdb_hierarchy ) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0,0.975,0.95,0.9,0.8,0.6,0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[int(number_atoms * percentile)-1] cutoff_selections = flex.bool(nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected(cutoff_selections, 0.0) #XXX Debug if False: print '\nDebug' for x in xrange(len(cutoff_selections)): print cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x] print percentile print percentile_prob_cutoff print cutoff_selections.count(True) print cutoff_selections.size() print cutoff_nll_occ.count(0.0) print 'Count q = 1 : ', cutoff_nll_occ.count(1.0) print 'Count scatterers size : ', cutoff_nll_occ.size() xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure = xrs, update_f_calc = True, update_f_mask = True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data().deep_copy() f_mask_ave_total_reordered = fmodel.f_masks()[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data().deep_copy() f_mask_ave_total_reordered += fmodel.f_masks()[0].data().deep_copy() cntr+=1 fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total_reordered / cntr).deep_copy(), f_mask = f_calc_ave.array(f_mask_ave_total_reordered / cntr).deep_copy() ) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free()]) # Order models by nll and print summary print >> self.log, '\nModels ranked by nll <Fcalc> R-factors recalculated' print >> self.log, 'Percentile cutoff : {0:5.3f}'.format(percentile) xrs_list_sorted_nll = [] print >> self.log, ' | NLL <Rw> <Rf> Ens Model' for info in print_list: print >> self.log, ' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.format( info[0], info[1], info[3], info[4], info[2]+1, ) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb(filename = 'nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb', xrs_list = xrs_list_sorted_nll, ens_pdb_hierarchy = ens_pdb_hierarchy )