def run(args, command_name="phenix.tls"): if (len(args) == 0): args = ["--help"] usage_fmt = "%s pdb_file [parameters: file or command line string]" des_fmt = "Example: %s model.pdb fit_tls_to.selection='%s' fit_tls_to.selection='%s'" command_line = (iotbx_option_parser( usage=usage_fmt % command_name, description=banner).option( "--show_defaults", action="store_true", help="Do not output to the screen (except errors).").option( "--silent", action="store_true", help="Suppress output to the screen.")).process(args=args) # log = sys.stdout if (not command_line.options.silent): utils.print_header("TLS tools", out=log) if (command_line.options.show_defaults): master_params.show(out=log) print >> log return if (not command_line.options.silent): print >> log, banner # processed_args = utils.process_command_line_args( args=command_line.args, master_params=master_params, log=log) reflection_files = processed_args.reflection_files if (processed_args.crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if (len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") params = processed_args.params if (not command_line.options.silent): utils.print_header("Input parameters", out=log) params.show(out=log) params = params.extract() # if (processed_args.crystal_symmetry.unit_cell() is None or processed_args.crystal_symmetry.space_group() is None): raise Sorry("No CRYST1 record found.") pdb_combined = iotbx.pdb.combine_unique_pdb_files( file_names=processed_args.pdb_file_names) pdb_combined.report_non_unique(out=log) if (len(pdb_combined.unique_file_names) == 0): raise Sorry("No coordinate file given.") raw_records = pdb_combined.raw_records try: pdb_inp = iotbx.pdb.input(source_info=None, lines=flex.std_string(raw_records)) except ValueError, e: raise Sorry("Model format (PDB or mmCIF) error:\n%s" % str(e))
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb' % command_name).option( None, "--show_defaults", action="store_true", help="Show list of parameters.")).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args=args, log=sys.stdout, master_params=master_phil) params = processed_args.params if (params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1: raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register(label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open( pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.log', "w") self.log.replace_stringio(old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out=self.log) params.show(out=self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry( "Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print('Reflection file : ', processed_args.reflection_file_names[0], file=self.log) utils.print_header("Model and data statistics", out=self.log) rfs = reflection_file_server( crystal_symmetry=crystal_symmetry, reflection_files=processed_args.reflection_files, log=self.log) parameters = extract_xtal_data.data_and_flags_master_params( ).extract() determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server=rfs, parameters=parameters, data_parameter_scope="refinement.input.xray_data", flags_parameter_scope="refinement.input.xray_data.r_free_flags", data_description="X-ray data", keep_going=True, log=self.log) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if (r_free_flags is None): r_free_flags = f_obs.array( data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print('Number of structure in ensemble : ', number_structures, file=self.log) # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file( self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl": if input_file.file_object.file_type() != "ccp4_mtz": raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, map_coeffs_1=map_coeffs_1, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, log=self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace( '.pdb', '') + '_vs_' + self.params.ensemble_sigma_map_input.replace( '.mtz', '') + '_pensemble.pdb' write_ensemble_pdb(filename=filename, xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry( "Please supply high resolution limit or input mtz file." ) dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print( 'Supplied mtz used to determine high and low resolution cuttoffs', file=self.log) dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs( ens_pdb_xrs.structure_factors( d_min=dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array( data=flex.bool(dummy_fobs.data().size(), False)) # fmodel = utils.fmodel_simple( scattering_table="wk1995", xray_structures=[ens_pdb_xrs], f_obs=dummy_fobs, target_name='ls', bulk_solvent_and_scaling=False, r_free_flags=r_free_flags) f_calc_ave = fmodel.f_calc().array( data=fmodel.f_calc().data() * 0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure=ens_pdb_xrs, update_f_calc=True, update_f_mask=False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print('Model :', model + 1, file=self.log) print("\nStructure vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write( file_name=str(model + 1) + "_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total / number_structures)) print("\nEnsemble vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients( map_type='Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print("<Fcalc> Map Stats :", file=self.log) fcalc_ave_map_stats.show_summary(f=self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print('\n\nModel : ', model + 1, file=self.log) # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print("Fcalc map stats :", file=self.log) fcalc_map_stats.show_summary(f=self.log) xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_map, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_ave_map_coeffs, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n, p in enumerate(prob): print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print('Model probability stats :', file=self.log) print(prob.min_max_mean().show(), file=self.log) print(' Count < 0.0 : ', prob_lss_zero.count(True), file=self.log) print(' Count > 1.0 : ', prob_grt_one.count(True), file=self.log) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb( filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[ int(number_atoms * percentile) - 1] cutoff_selections = flex.bool( nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected( cutoff_selections, 0.0) #XXX Debug if False: print('\nDebug') for x in range(len(cutoff_selections)): print(cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x]) print(percentile) print(percentile_prob_cutoff) print(cutoff_selections.count(True)) print(cutoff_selections.size()) print(cutoff_nll_occ.count(0.0)) print('Count q = 1 : ', cutoff_nll_occ.count(1.0)) print('Count scatterers size : ', cutoff_nll_occ.size()) xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure=xrs, update_f_calc=True, update_f_mask=True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered = fmodel.f_masks( )[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered += fmodel.f_masks( )[0].data().deep_copy() cntr += 1 fmodel.update( f_calc=f_calc_ave.array( f_calc_ave_total_reordered / cntr).deep_copy(), f_mask=f_calc_ave.array( f_mask_ave_total_reordered / cntr).deep_copy()) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([ cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free() ]) # Order models by nll and print summary print( '\nModels ranked by nll <Fcalc> R-factors recalculated', file=self.log) print('Percentile cutoff : {0:5.3f}'.format(percentile), file=self.log) xrs_list_sorted_nll = [] print(' | NLL <Rw> <Rf> Ens Model', file=self.log) for info in print_list: print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'. format( info[0], info[1], info[3], info[4], info[2] + 1, ), file=self.log) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb( filename='nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list_sorted_nll, ens_pdb_hierarchy=ens_pdb_hierarchy)
def run(args, command_name = "phenix.tls"): if(len(args) == 0): args = ["--help"] usage_fmt = "%s pdb_file [parameters: file or command line string]" des_fmt = "Example: %s model.pdb fit_tls_to.selection='%s' fit_tls_to.selection='%s'" command_line = (iotbx_option_parser( usage = usage_fmt % command_name, description = banner) .option("--show_defaults", action="store_true", help="Do not output to the screen (except errors).") .option("--silent", action="store_true", help="Suppress output to the screen.") ).process(args=args) # log = sys.stdout if(not command_line.options.silent): utils.print_header("TLS tools", out = log) if(command_line.options.show_defaults): master_params.show(out = log) print >> log return if(not command_line.options.silent): print >> log, banner # processed_args = utils.process_command_line_args(args = command_line.args, master_params = master_params, log = log) reflection_files = processed_args.reflection_files if(processed_args.crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if(len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") params = processed_args.params if(not command_line.options.silent): utils.print_header("Input parameters", out = log) params.show(out = log) params = params.extract() # if(processed_args.crystal_symmetry.unit_cell() is None or processed_args.crystal_symmetry.space_group() is None): raise Sorry("No CRYST1 record found.") mmtbx_pdb_file = utils.pdb_file( pdb_file_names = processed_args.pdb_file_names, cif_objects = processed_args.cif_objects, crystal_symmetry = processed_args.crystal_symmetry, log = log) # if(not command_line.options.silent): utils.print_header("TLS groups from PDB file header", out = log) pdb_inp_tls = mmtbx.tls.tools.tls_from_pdb_inp( remark_3_records = mmtbx_pdb_file.pdb_inp.extract_remark_iii_records(3), pdb_hierarchy = mmtbx_pdb_file.pdb_inp.construct_hierarchy()) # tls_groups = [] if(pdb_inp_tls.tls_present): if(pdb_inp_tls.error_string is not None): raise Sorry(pdb_inp_tls.error_string) mmtbx_pdb_file.set_ppf() xray_structure = get_xrs_helper(mmtbx_pdb_file = mmtbx_pdb_file, log = log, silent = command_line.options.silent) pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb( pdb_inp_tls = pdb_inp_tls, all_chain_proxies = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies, xray_structure = xray_structure) tls_groups = pdb_tls.pdb_inp_tls.tls_params # tls_selections_strings = [] # if(len(tls_groups) == 0 and not command_line.options.silent): print >> log, "No TLS groups found in PDB file header." else: for i_seq, tls_group in enumerate(tls_groups): tls_selections_strings.append(tls_group.selection_string) if(not command_line.options.silent): print >> log, "TLS group %d: %s" % (i_seq+1, tls_group.selection_string) mmtbx.tls.tools.show_tls_one_group(tlso = tls_group, out = log) print >> log # if(len(tls_selections_strings) > 0 and len(params.selection) > 0): raise Sorry("Two TLS selection sources found: PDB file header and parameters.") if(len(params.selection) > 0): tls_selections_strings = params.selection xray_structure = get_xrs_helper(mmtbx_pdb_file = mmtbx_pdb_file, log = log, silent = command_line.options.silent) if([params.combine_tls, params.extract_tls].count(True) > 1): raise Sorry("Cannot simultaneously pereform: combine_tls and extract_tls") if([params.combine_tls, params.extract_tls].count(True) > 0): if(len(tls_selections_strings)==0): raise Sorry("No TLS selections found.") # if(len(tls_selections_strings)): if(not command_line.options.silent): utils.print_header("TLS groups selections", out = log) selections = utils.get_atom_selections( all_chain_proxies = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies, selection_strings = tls_selections_strings, xray_structure = xray_structure) if(not command_line.options.silent): print >> log, "Number of TLS groups: ", len(selections) print >> log, "Number of atoms: %d" % xray_structure.scatterers().size() n_atoms_in_tls = 0 for sel_a in selections: n_atoms_in_tls += sel_a.size() if(not command_line.options.silent): print >> log, "Number of atoms in TLS groups: %d" % n_atoms_in_tls print >> log assert len(tls_selections_strings) == len(selections) if(not command_line.options.silent): for sel_a, sel_s in zip(selections,tls_selections_strings): print >> log, "Selection string:\n%s" % sel_s print >> log, "selects %d atoms." % sel_a.size() print >> log print >> log, "Ready-to-use in phenix.refine:\n" for sel_a, sel_s in zip(selections,tls_selections_strings): print >> log, sel_s # ofn = params.output_file_name if(ofn is None): ofn = os.path.splitext(os.path.basename(processed_args.pdb_file_names[0]))[0] if(len(processed_args.pdb_file_names) > 1): ofn = ofn+"_el_al" if(params.combine_tls): ofn = ofn+"_combine_tls.pdb" elif(params.extract_tls): ofn = ofn+"_extract_tls.pdb" else: ofn = None if(ofn is not None): ofo = open(ofn, "w") # if(params.extract_tls): utils.print_header( "Fit TLS matrices to B-factors of selected sets of atoms", out = log) tlsos = mmtbx.tls.tools.generate_tlsos( selections = selections, xray_structure = xray_structure, value = 0.0) for rt,rl,rs in [[1,0,1],[1,1,1],[0,1,1], [1,0,0],[0,1,0],[0,0,1],[1,1,1], [0,0,1]]*10: tlsos = mmtbx.tls.tools.tls_from_uanisos( xray_structure = xray_structure, selections = selections, tlsos_initial = tlsos, number_of_macro_cycles = 10, max_iterations = 100, refine_T = rt, refine_L = rl, refine_S = rs, enforce_positive_definite_TL = params.enforce_positive_definite_TL, verbose = -1, out = log) mmtbx.tls.tools.show_tls(tlsos = tlsos, out = log) u_cart_from_tls = mmtbx.tls.tools.u_cart_from_tls( sites_cart = xray_structure.sites_cart(), selections = selections, tlsos = tlsos) unit_cell = xray_structure.unit_cell() for i_seq, sc in enumerate(xray_structure.scatterers()): if(u_cart_from_tls[i_seq] != (0,0,0,0,0,0)): u_star_tls = adptbx.u_cart_as_u_star(unit_cell, tuple(u_cart_from_tls[i_seq])) sc.u_star = tuple(flex.double(sc.u_star) - flex.double(u_star_tls)) for sel in selections: xray_structure.convert_to_isotropic(selection = sel) mmtbx.tls.tools.remark_3_tls(tlsos = tlsos, selection_strings = tls_selections_strings, out = ofo) # if(params.combine_tls): utils.print_header("Combine B_tls with B_residual", out = log) mmtbx.tls.tools.combine_tls_and_u_local(xray_structure = xray_structure, tls_selections = selections, tls_groups = tls_groups) print >> log, "All done." # if(ofn is not None): utils.print_header("Write output PDB file %s"%ofn, out = log) utils.write_pdb_file( xray_structure = xray_structure, pdb_hierarchy = mmtbx_pdb_file.processed_pdb_file.all_chain_proxies.pdb_hierarchy, out = ofo) ofo.close() print >> log, "All done."
def kinetic_energy_stats(self): if self.ensemble_obj is not None: if self.ensemble_obj.er_data.ke_protein_running is not None: utils.print_header( line ="Non-solvent KE Statistics | MC : "+str(self.ensemble_obj.macro_cycle), out = self.ensemble_obj.log) ke_basic_stats = scitbx.math.basic_statistics(self.ensemble_obj.er_data.ke_protein_running) print(' {0:<11} {1:>12} {2:>12} {3:>12} {4:>12} {5:>12} '.format( '','min','max','mean', 'sdev', 'skew'), file=self.ensemble_obj.log) print(' KE MC {0:<5}: {1:12.3f} {2:12.3f} {3:12.3f} {4:12.3f} {5:12.3f}'.format( self.ensemble_obj.macro_cycle, ke_basic_stats.min, ke_basic_stats.max, ke_basic_stats.mean, ke_basic_stats.biased_standard_deviation, ke_basic_stats.skew), file=self.ensemble_obj.log) ke_atom_number_tuple_list = [] ke_list_histo = [] for n, ke in enumerate(self.ensemble_obj.er_data.ke_protein_running): ke_atom_number_tuple_list.append( (n, ke) ) ke_list_histo.append(ke) assert len(ke_atom_number_tuple_list) == len(self.ensemble_obj.er_data.ke_protein_running) sorted_by_ke_ke_atom_number_tuple_list = \ sorted(ke_atom_number_tuple_list, key=lambda ke:ke[-1]) ke_list_histo = sorted(ke_list_histo) #Lowest KE Atoms pdb_atoms = self.ensemble_obj.pdb_hierarchy().atoms() print("\nNon-solvent atoms lowest KE : ", file=self.ensemble_obj.log) print(' {0:3} : {1:>44} {2:>12} {3:>12}'.format( 'rank', 'KE', 'dmean/sdev', '%cum freq'), file=self.ensemble_obj.log) low_five_percent = (len(ke_atom_number_tuple_list) * 0.05) cntr = 0 lowest_range = min(25, int(0.5 * len(ke_atom_number_tuple_list) ) ) while cntr < lowest_range: atom_info = pdb_atoms[sorted_by_ke_ke_atom_number_tuple_list[cntr][0]].fetch_labels() assert atom_info.i_seq == sorted_by_ke_ke_atom_number_tuple_list[cntr][0] print(' {0:5} : {1:6} {2:6} {3:6} {4:6} {5:6} {6:9.3f} {7:12.3f} {8:12.1f}'.format( cntr+1, sorted_by_ke_ke_atom_number_tuple_list[cntr][0], atom_info.name, atom_info.resname, atom_info.chain_id, atom_info.resseq, sorted_by_ke_ke_atom_number_tuple_list[cntr][1], (sorted_by_ke_ke_atom_number_tuple_list[cntr][1]-ke_basic_stats.mean)\ / ke_basic_stats.biased_standard_deviation, 100 * (float(cntr)/float(len(ke_atom_number_tuple_list))) ), file=self.ensemble_obj.log) cntr+=1 #Highest KE Atoms print("\nNon-solvent atoms highest KE : ", file=self.ensemble_obj.log) print(' {0:3} : {1:>44} {2:>12} {3:>12}'.format( 'rank', 'KE', 'dmean/sdev', '%cum freq'), file=self.ensemble_obj.log) cntr = len(ke_atom_number_tuple_list) - min(25, int(0.5 * len(ke_atom_number_tuple_list) ) ) while cntr < len(ke_atom_number_tuple_list): atom_info = pdb_atoms[sorted_by_ke_ke_atom_number_tuple_list[cntr][0]].fetch_labels() assert atom_info.i_seq == sorted_by_ke_ke_atom_number_tuple_list[cntr][0] print(' {0:5} : {1:6} {2:6} {3:6} {4:6} {5:6} {6:9.3f} {7:12.3f} {8:12.1f}'.format( cntr+1, sorted_by_ke_ke_atom_number_tuple_list[cntr][0], atom_info.name, atom_info.resname, atom_info.chain_id, atom_info.resseq, sorted_by_ke_ke_atom_number_tuple_list[cntr][1], (sorted_by_ke_ke_atom_number_tuple_list[cntr][1]-ke_basic_stats.mean)\ / ke_basic_stats.biased_standard_deviation, 100 * (float(cntr)/float(len(ke_atom_number_tuple_list))) ), file=self.ensemble_obj.log) cntr+=1 #XXX Add print stats by for <ke>/residue #Histogram bin_list, bin_range_list = self.bin_generator_equal_range( array = ke_list_histo[:-int(0.1 * (len(ke_list_histo) ) )], number_of_bins = 50) bin_range_list[-1][1] = max(ke_list_histo) self.bivariate_histogram( bin_array = ke_list_histo, value_array = ke_list_histo, name = 'KE Histogram', bin_list = bin_list, bin_range_list = bin_range_list) print("|"+"-"*77+"|\n", file=self.ensemble_obj.log)
def ensemble_mean_geometry_stats(self, restraints_manager, xray_structure, ensemble_xray_structures, ignore_hd = True, verbose = False, out = None, return_pdb_string = False): if (out is None): out = sys.stdout if verbose: utils.print_header("Ensemble mean geometry statistics", out = out) ensemble_size = len(ensemble_xray_structures) print("Ensemble size : ", ensemble_size, file=out) # Dictionaries to store deltas ensemble_bond_deltas = {} ensemble_angle_deltas = {} ensemble_chirality_deltas = {} ensemble_planarity_deltas = {} ensemble_dihedral_deltas = {} # List to store rmsd of each model structures_bond_rmsd = flex.double() structures_angle_rmsd = flex.double() structures_chirality_rmsd = flex.double() structures_planarity_rmsd = flex.double() structures_dihedral_rmsd = flex.double() # Remove water and hd atoms from global restraints manager selection = flex.bool() for sc in xray_structure.scatterers(): if sc.label.find('HOH') > -1: selection.append(True) else: selection.append(False) if ignore_hd: hd_selection = xray_structure.hd_selection() assert hd_selection.size() == selection.size() for n in range(hd_selection.size()): if hd_selection[n] or selection[n]: selection[n] = True restraints_manager = restraints_manager.select(selection = ~selection) # Get all deltas for n, structure in enumerate(ensemble_xray_structures): if verbose: print("\nModel : ", n+1, file=out) sites_cart = structure.sites_cart() # Remove water and hd atoms from individual structures sites cart selection = flex.bool() for sc in structure.scatterers(): if sc.label.find('HOH') > -1: selection.append(True) else: selection.append(False) if ignore_hd: hd_selection = structure.hd_selection() assert hd_selection.size() == selection.size() for n in range(hd_selection.size()): if hd_selection[n] or selection[n]: selection[n] = True sites_cart = sites_cart.select(~selection) assert sites_cart is not None site_labels = None energies_sites = restraints_manager.energies_sites( sites_cart = sites_cart, compute_gradients = False) # Rmsd of individual model bond_rmsd = energies_sites.geometry.bond_deviations()[2] angle_rmsd = energies_sites.geometry.angle_deviations()[2] chirality_rmsd = energies_sites.geometry.chirality_deviations()[2] planarity_rmsd = energies_sites.geometry.planarity_deviations()[2] dihedral_rmsd = energies_sites.geometry.dihedral_deviations()[2] structures_bond_rmsd.append(bond_rmsd) structures_angle_rmsd.append(angle_rmsd) structures_chirality_rmsd.append(chirality_rmsd) structures_planarity_rmsd.append(planarity_rmsd) structures_dihedral_rmsd.append(dihedral_rmsd) if verbose: print(" Model RMSD", file=out) print(" bond : %.6g" % bond_rmsd, file=out) print(" angle : %.6g" % angle_rmsd, file=out) print(" chirality : %.6g" % chirality_rmsd, file=out) print(" planarity : %.6g" % planarity_rmsd, file=out) print(" dihedral : %.6g" % dihedral_rmsd, file=out) # Bond pair_proxies = restraints_manager.geometry.pair_proxies(flags=None, sites_cart=sites_cart) assert pair_proxies is not None if verbose: pair_proxies.bond_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in pair_proxies.bond_proxies.simple: bond_simple_proxy = geometry_restraints.bond( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_bond_deltas: ensemble_bond_deltas[proxy.i_seqs][0]+=bond_simple_proxy.delta ensemble_bond_deltas[proxy.i_seqs][1]+=1 else: ensemble_bond_deltas[proxy.i_seqs] = [bond_simple_proxy.delta, 1] if verbose: print("bond simple :", proxy.i_seqs, file=out) print(" distance_ideal : %.6g" % proxy.distance_ideal, file=out) print(" distance_model : %.6g" % bond_simple_proxy.distance_model, file=out) print(" detla : %.6g" % bond_simple_proxy.delta, file=out) if (pair_proxies.bond_proxies.asu.size() > 0): asu_mappings = pair_proxies.bond_proxies.asu_mappings() for proxy in pair_proxies.bond_proxies.asu: rt_mx = asu_mappings.get_rt_mx_ji(pair=proxy) bond_asu_proxy = geometry_restraints.bond( sites_cart = sites_cart, asu_mappings = asu_mappings, proxy = proxy) proxy_i_seqs = (proxy.i_seq, proxy.j_seq) if proxy_i_seqs in ensemble_bond_deltas: ensemble_bond_deltas[proxy_i_seqs][0]+=bond_asu_proxy.delta ensemble_bond_deltas[proxy_i_seqs][1]+=1 else: ensemble_bond_deltas[proxy_i_seqs] = [bond_asu_proxy.delta, 1] if verbose: print("bond asu :", (proxy.i_seq, proxy.j_seq), rt_mx, file=out) print(" distance_ideal : %.6g" % proxy.distance_ideal, file=out) print(" distance_model : %.6g" % bond_asu_proxy.distance_model, file=out) print(" delta : %.6g" % bond_asu_proxy.delta, file=out) # Angle if verbose: restraints_manager.geometry.angle_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in restraints_manager.geometry.angle_proxies: angle_proxy = geometry_restraints.angle( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_angle_deltas: ensemble_angle_deltas[proxy.i_seqs][0]+=angle_proxy.delta ensemble_angle_deltas[proxy.i_seqs][1]+=1 else: ensemble_angle_deltas[proxy.i_seqs] = [angle_proxy.delta, 1] if verbose: print("angle : ", proxy.i_seqs, file=out) print(" angle_ideal : %.6g" % proxy.angle_ideal, file=out) print(" angle_model : %.6g" % angle_proxy.angle_model, file=out) print(" delta : %.6g" % angle_proxy.delta, file=out) # Chirality if verbose: restraints_manager.geometry.chirality_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in restraints_manager.geometry.chirality_proxies: chirality_proxy = geometry_restraints.chirality( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_chirality_deltas: ensemble_chirality_deltas[proxy.i_seqs][0]+=chirality_proxy.delta ensemble_chirality_deltas[proxy.i_seqs][1]+=1 else: ensemble_chirality_deltas[proxy.i_seqs] = [chirality_proxy.delta, 1] if verbose: print("chirality : ", proxy.i_seqs, file=out) print(" chirality_ideal : %.6g" % proxy.volume_ideal, file=out) print(" chirality_model : %.6g" % chirality_proxy.volume_model, file=out) print(" chirality : %.6g" % chirality_proxy.delta, file=out) # Planarity for proxy in restraints_manager.geometry.planarity_proxies: planarity_proxy = geometry_restraints.planarity( sites_cart = sites_cart, proxy = proxy) proxy_i_seqs = [] for i_seq in proxy.i_seqs: proxy_i_seqs.append(i_seq) proxy_i_seqs = tuple(proxy_i_seqs) if proxy_i_seqs in ensemble_planarity_deltas: ensemble_planarity_deltas[proxy_i_seqs][0]+=planarity_proxy.rms_deltas() ensemble_planarity_deltas[proxy_i_seqs][1]+=1 else: ensemble_planarity_deltas[proxy_i_seqs] = [planarity_proxy.rms_deltas(), 1] if verbose: print("planarity : ", proxy_i_seqs, file=out) print(" planarity rms_deltas : %.6g" % planarity_proxy.rms_deltas(), file=out) # Dihedral if verbose: restraints_manager.geometry.dihedral_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in restraints_manager.geometry.dihedral_proxies: dihedral_proxy = geometry_restraints.dihedral( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_dihedral_deltas: ensemble_dihedral_deltas[proxy.i_seqs][0]+=dihedral_proxy.delta ensemble_dihedral_deltas[proxy.i_seqs][1]+=1 else: ensemble_dihedral_deltas[proxy.i_seqs] = [dihedral_proxy.delta, 1] if verbose: print("dihedral : ", proxy.i_seqs, file=out) print(" dihedral_ideal : %.6g" % proxy.angle_ideal, file=out) print(" periodicity : %.6g" % proxy.periodicity, file=out) print(" dihedral_model : %.6g" % dihedral_proxy.angle_model, file=out) print(" delta : %.6g" % dihedral_proxy.delta, file=out) # Calculate RMSDs for ensemble model # Bond mean_bond_delta = flex.double() for proxy, info in six.iteritems(ensemble_bond_deltas): # assert info[1] == ensemble_size if info[1]!=ensemble_size: print('skipping bond RMSD calns of ensemble %s' % info, file=out) continue mean_delta = info[0] / info[1] mean_bond_delta.append(mean_delta) bond_delta_sq = mean_bond_delta * mean_bond_delta ensemble_bond_rmsd = math.sqrt(flex.mean_default(bond_delta_sq, 0)) # Angle mean_angle_delta = flex.double() for proxy, info in six.iteritems(ensemble_angle_deltas): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_angle_delta.append(mean_delta) angle_delta_sq = mean_angle_delta * mean_angle_delta ensemble_angle_rmsd = math.sqrt(flex.mean_default(angle_delta_sq, 0)) # Chirality mean_chirality_delta = flex.double() for proxy, info in six.iteritems(ensemble_chirality_deltas): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_chirality_delta.append(mean_delta) chirality_delta_sq = mean_chirality_delta * mean_chirality_delta ensemble_chirality_rmsd = math.sqrt(flex.mean_default(chirality_delta_sq, 0)) # Planarity mean_planarity_delta = flex.double() for proxy, info in six.iteritems(ensemble_planarity_deltas): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_planarity_delta.append(mean_delta) planarity_delta_sq = mean_planarity_delta * mean_planarity_delta ensemble_planarity_rmsd = math.sqrt(flex.mean_default(planarity_delta_sq, 0)) # Dihedral mean_dihedral_delta = flex.double() for proxy, info in six.iteritems(ensemble_dihedral_deltas): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_dihedral_delta.append(mean_delta) dihedral_delta_sq = mean_dihedral_delta * mean_dihedral_delta ensemble_dihedral_rmsd = math.sqrt(flex.mean_default(dihedral_delta_sq, 0)) # Calculate <structure rmsd> assert ensemble_size == structures_bond_rmsd assert ensemble_size == structures_angle_rmsd assert ensemble_size == structures_chirality_rmsd assert ensemble_size == structures_planarity_rmsd assert ensemble_size == structures_dihedral_rmsd structure_bond_rmsd_mean = structures_bond_rmsd.min_max_mean().mean structure_angle_rmsd_mean = structures_angle_rmsd.min_max_mean().mean structure_chirality_rmsd_mean = structures_chirality_rmsd.min_max_mean().mean structure_planarity_rmsd_mean = structures_planarity_rmsd.min_max_mean().mean structure_dihedral_rmsd_mean = structures_dihedral_rmsd.min_max_mean().mean # Show summary utils.print_header("Ensemble RMSD summary", out = out) print(" RMSD (mean delta per restraint)", file=out) print(" bond : %.6g" % ensemble_bond_rmsd, file=out) print(" angle : %.6g" % ensemble_angle_rmsd, file=out) print(" chirality : %.6g" % ensemble_chirality_rmsd, file=out) print(" planarity : %.6g" % ensemble_planarity_rmsd, file=out) print(" dihedral : %.6g" % ensemble_dihedral_rmsd, file=out) print(" RMSD (mean RMSD per structure)", file=out) print(" bond : %.6g" % structure_bond_rmsd_mean, file=out) print(" angle : %.6g" % structure_angle_rmsd_mean, file=out) print(" chirality : %.6g" % structure_chirality_rmsd_mean, file=out) print(" planarity : %.6g" % structure_planarity_rmsd_mean, file=out) print(" dihedral : %.6g" % structure_dihedral_rmsd_mean, file=out) if ignore_hd: print("\n Calculated excluding H/D", file=out) else: print("\n Calculated including H/D", file=out) if return_pdb_string: ens_geo_pdb_string = "REMARK 3" ens_geo_pdb_string += "\nREMARK 3 NUMBER STRUCTURES IN ENSEMBLE : {0:5d}".format(ensemble_size) if ignore_hd: ens_geo_pdb_string += "\nREMARK 3 RMS DEVIATIONS FROM IDEAL VALUES (EXCLUDING H/D)" else: ens_geo_pdb_string += "\nREMARK 3 RMS DEVIATIONS FROM IDEAL VALUES (INCLUDING H/D)" ens_geo_pdb_string += "\nREMARK 3 RMSD (MEAN DELTA PER RESTRAINT)" ens_geo_pdb_string += "\nREMARK 3 BOND : {0:5.3f}".format(ensemble_bond_rmsd) ens_geo_pdb_string += "\nREMARK 3 ANGLE : {0:5.3f}".format(ensemble_angle_rmsd) ens_geo_pdb_string += "\nREMARK 3 CHIRALITY : {0:5.3f}".format(ensemble_chirality_rmsd) ens_geo_pdb_string += "\nREMARK 3 PLANARITY : {0:5.3f}".format(ensemble_planarity_rmsd) ens_geo_pdb_string += "\nREMARK 3 DIHEDRAL : {0:5.2f}".format(ensemble_dihedral_rmsd) ens_geo_pdb_string += "\nREMARK 3 RMSD (MEAN RMSD PER STRUCTURE)" ens_geo_pdb_string += "\nREMARK 3 BOND : {0:5.3f}".format(structure_bond_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 ANGLE : {0:5.3f}".format(structure_angle_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 CHIRALITY : {0:5.3f}".format(structure_chirality_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 PLANARITY : {0:5.3f}".format(structure_planarity_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 DIHEDRAL : {0:5.2f}".format(structure_dihedral_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3" return ens_geo_pdb_string
def ensemble_reduction(self, rfree_tolerance = 0.0025): #Reduces number of models to minimum required to reproduce Rfree utils.print_header("Ensemble reducer", out = self.ensemble_obj.log) self.ensemble_obj.show_overall(message = "Full simulation fmodel final", fmodel_running = False) final_rfree = self.ensemble_obj.fmodel_total.r_free() final_rwork = self.ensemble_obj.fmodel_total.r_work() # XXX no b_iso - how to apply this??? # print >> self.ensemble_obj.log, "\nApply B_iso to all model in ensemble" # shift_b_iso = self.ensemble_obj.fmodel_total.b_iso() # print >> self.ensemble_obj.log, 'Shift B_iso : {0:8.3f}'.format(shift_b_iso) # for x in self.ensemble_obj.er_data.xray_structures: # x.shift_us(b_shift = shift_b_iso) total_number_xrs = len(self.ensemble_obj.er_data.xray_structures) print("\nReduce ensemble with equal distribution though trajectory :", file=self.ensemble_obj.log) print("Rfree tolerance (%) : ", rfree_tolerance * 100, file=self.ensemble_obj.log) print('\n {0:>12} {1:>8} {2:>8} {3:>8}'\ .format('Num','Rwork','Rfree','k1'), file=self.ensemble_obj.log) target_rfree = final_rfree final_div = None for div_int in [1,2,3,4,5,6,7,8,9,10,12,14,16,18,20,25,30,35,40,45,50,60,70,80,90,100,200,300,400,500,600,700,800,900,1000,2000,3000,4000,5000]: if div_int <= total_number_xrs: self.fmodel_ens = self.ensemble_obj.fmodel_total.deep_copy() cntr = 0.0 fcalc_total = None fmask_total = None # self.fmodel_ens.update(k_sols = self.ensemble_obj.fmodel_total.k_sols(), # b_sol = self.ensemble_obj.fmodel_total.b_sol(), # b_cart = self.ensemble_obj.fmodel_total.b_cart() ) for x in range(total_number_xrs): if x%int(div_int) == 0: #Apply back trace of Biso here... self.fmodel_ens.update_xray_structure( xray_structure = self.ensemble_obj.er_data.xray_structures[x], update_f_calc = True, update_f_mask = True, force_update_f_mask = True) if fcalc_total == None: fcalc_total = self.fmodel_ens.f_calc().data().deep_copy() fmask_total = self.fmodel_ens.f_masks()[0].data().deep_copy() cntr = 1 else: fcalc_total += self.fmodel_ens.f_calc().data().deep_copy() fmask_total += self.fmodel_ens.f_masks()[0].data().deep_copy() cntr += 1 if x == total_number_xrs-1: self.fmodel_ens.update( f_calc = self.ensemble_obj.copy_ma.array(data = (fcalc_total / cntr)), f_mask = self.ensemble_obj.copy_ma.array(data = (fmask_total / cntr)) ) self.fmodel_ens.update_all_scales( log = self.ensemble_obj.log, remove_outliers=False, params = self.ensemble_obj.bsp) if cntr < 4: break print("Ens: {0:8d} {1:8.3f} {2:8.3f} {3:8.3f}"\ .format(cntr, self.fmodel_ens.r_work(), self.fmodel_ens.r_free(), self.fmodel_ens.scale_k1() ), file=self.ensemble_obj.log) if self.fmodel_ens.r_free() < (target_rfree + rfree_tolerance): final_div = div_int final_f_calc = self.ensemble_obj.copy_ma.array(data = (fcalc_total / cntr)) final_f_mask = self.ensemble_obj.copy_ma.array(data = (fmask_total / cntr)) if self.fmodel_ens.r_free() < target_rfree: target_rfree = self.fmodel_ens.r_free() if final_div == None: print("Warning pdb ensemble does not contain sufficent models and missrepresents simulation. Simulation Rfree: {0:2.3f} %".format(100*(final_rfree)), file=self.ensemble_obj.log) else: #Update fmodel_total self.ensemble_obj.fmodel_total.update(f_calc = final_f_calc, f_mask = final_f_mask) self.ensemble_obj.fmodel_total.update_all_scales( log = self.ensemble_obj.log, remove_outliers=False, params = self.ensemble_obj.bsp) #Parse arrays for output PDB copy_ed_data_xray_structures = [] copy_pdb_hierarchys = [] copy_ed_data_ke_pdb = [] for x in range(len(self.ensemble_obj.er_data.xray_structures)): if x%int(final_div) == 0: copy_ed_data_xray_structures.append(self.ensemble_obj.er_data.xray_structures[x]) copy_pdb_hierarchys.append(self.ensemble_obj.er_data.pdb_hierarchys[x]) copy_ed_data_ke_pdb.append(self.ensemble_obj.er_data.ke_pdb[x]) self.ensemble_obj.er_data.xray_structures = copy_ed_data_xray_structures self.ensemble_obj.er_data.pdb_hierarchys = copy_pdb_hierarchys self.ensemble_obj.er_data.ke_pdb = copy_ed_data_ke_pdb print("Final pdb ensemble contains {0:3d} models".format(len(self.ensemble_obj.er_data.xray_structures)), file=self.ensemble_obj.log) assert len(self.ensemble_obj.er_data.xray_structures) == len(self.ensemble_obj.er_data.pdb_hierarchys) assert len(self.ensemble_obj.er_data.xray_structures) == len(self.ensemble_obj.er_data.ke_pdb) print("|"+"-"*77+"|\n", file=self.ensemble_obj.log)
def run(args, command_name="phenix.tls"): if (len(args) == 0): args = ["--help"] usage_fmt = "%s pdb_file [parameters: file or command line string]" des_fmt = "Example: %s model.pdb fit_tls_to.selection='%s' fit_tls_to.selection='%s'" command_line = (iotbx_option_parser( usage=usage_fmt % command_name, description=banner).option( "--show_defaults", action="store_true", help="Do not output to the screen (except errors).").option( "--silent", action="store_true", help="Suppress output to the screen.")).process(args=args) # log = sys.stdout if (not command_line.options.silent): utils.print_header("TLS tools", out=log) if (command_line.options.show_defaults): master_params.show(out=log) print(file=log) return if (not command_line.options.silent): print(banner, file=log) # processed_args = utils.process_command_line_args( args=command_line.args, master_params=master_params, log=log) reflection_files = processed_args.reflection_files if (processed_args.crystal_symmetry is None): raise Sorry("No crystal symmetry found.") if (len(processed_args.pdb_file_names) == 0): raise Sorry("No PDB file found.") params = processed_args.params if (not command_line.options.silent): utils.print_header("Input parameters", out=log) params.show(out=log) params = params.extract() # if (processed_args.crystal_symmetry.unit_cell() is None or processed_args.crystal_symmetry.space_group() is None): raise Sorry("No CRYST1 record found.") pdb_combined = iotbx.pdb.combine_unique_pdb_files( file_names=processed_args.pdb_file_names) pdb_combined.report_non_unique(out=log) if (len(pdb_combined.unique_file_names) == 0): raise Sorry("No coordinate file given.") raw_records = pdb_combined.raw_records try: pdb_inp = iotbx.pdb.input(source_info=None, lines=flex.std_string(raw_records)) except ValueError as e: raise Sorry("Model format (PDB or mmCIF) error:\n%s" % str(e)) model = mmtbx.model.manager( model_input=pdb_inp, restraint_objects=processed_args.cif_objects, crystal_symmetry=processed_args.crystal_symmetry, log=log) if (not command_line.options.silent): utils.print_header("TLS groups from PDB file header", out=log) pdb_inp_tls = mmtbx.tls.tools.tls_from_pdb_inp( remark_3_records=model._model_input.extract_remark_iii_records(3), pdb_hierarchy=model.get_hierarchy()) # tls_groups = [] if (pdb_inp_tls.tls_present): if (pdb_inp_tls.error_string is not None): raise Sorry(pdb_inp_tls.error_string) pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb(pdb_inp_tls=pdb_inp_tls, model=model) tls_groups = pdb_tls.pdb_inp_tls.tls_params # tls_selections_strings = [] # if (len(tls_groups) == 0 and not command_line.options.silent): print("No TLS groups found in PDB file header.", file=log) else: for i_seq, tls_group in enumerate(tls_groups): tls_selections_strings.append(tls_group.selection_string) if (not command_line.options.silent): print("TLS group %d: %s" % (i_seq + 1, tls_group.selection_string), file=log) mmtbx.tls.tools.show_tls_one_group(tlso=tls_group, out=log) print(file=log) # if (len(tls_selections_strings) > 0 and len(params.selection) > 0): raise Sorry( "Two TLS selection sources found: PDB file header and parameters.") if (len(params.selection) > 0): tls_selections_strings = params.selection if ([params.combine_tls, params.extract_tls].count(True) > 1): raise Sorry( "Cannot simultaneously pereform: combine_tls and extract_tls") if ([params.combine_tls, params.extract_tls].count(True) > 0): if (len(tls_selections_strings) == 0): raise Sorry("No TLS selections found.") # if (len(tls_selections_strings)): if (not command_line.options.silent): utils.print_header("TLS groups selections", out=log) selections = utils.get_atom_selections( model=model, selection_strings=tls_selections_strings) if (not command_line.options.silent): print("Number of TLS groups: ", len(selections), file=log) print("Number of atoms: %d" % model.get_number_of_atoms(), file=log) n_atoms_in_tls = 0 for sel_a in selections: n_atoms_in_tls += sel_a.size() if (not command_line.options.silent): print("Number of atoms in TLS groups: %d" % n_atoms_in_tls, file=log) print(file=log) assert len(tls_selections_strings) == len(selections) if (not command_line.options.silent): for sel_a, sel_s in zip(selections, tls_selections_strings): print("Selection string:\n%s" % sel_s, file=log) print("selects %d atoms." % sel_a.size(), file=log) print(file=log) print("Ready-to-use in phenix.refine:\n", file=log) for sel_a, sel_s in zip(selections, tls_selections_strings): print(sel_s, file=log) # ofn = params.output_file_name if (ofn is None): ofn = os.path.splitext( os.path.basename(processed_args.pdb_file_names[0]))[0] if (len(processed_args.pdb_file_names) > 1): ofn = ofn + "_el_al" if (params.combine_tls): ofn = ofn + "_combine_tls.pdb" elif (params.extract_tls): ofn = ofn + "_extract_tls.pdb" else: ofn = None if (ofn is not None): ofo = open(ofn, "w") # if (params.extract_tls): utils.print_header( "Fit TLS matrices to B-factors of selected sets of atoms", out=log) tlsos = mmtbx.tls.tools.generate_tlsos( selections=selections, xray_structure=model.get_xray_structure(), value=0.0) for rt, rl, rs in [[1, 0, 1], [1, 1, 1], [0, 1, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 1, 1], [0, 0, 1]] * 10: tlsos = mmtbx.tls.tools.tls_from_uanisos( xray_structure=model.get_xray_structure(), selections=selections, tlsos_initial=tlsos, number_of_macro_cycles=10, max_iterations=100, refine_T=rt, refine_L=rl, refine_S=rs, enforce_positive_definite_TL=params. enforce_positive_definite_TL, verbose=-1, out=log) mmtbx.tls.tools.show_tls(tlsos=tlsos, out=log) u_cart_from_tls = mmtbx.tls.tools.u_cart_from_tls( sites_cart=model.get_sites_cart(), selections=selections, tlsos=tlsos) unit_cell = model.get_xray_structure().unit_cell() for i_seq, sc in enumerate(model.get_xray_structure().scatterers()): if (u_cart_from_tls[i_seq] != (0, 0, 0, 0, 0, 0)): u_star_tls = adptbx.u_cart_as_u_star( unit_cell, tuple(u_cart_from_tls[i_seq])) sc.u_star = tuple( flex.double(sc.u_star) - flex.double(u_star_tls)) for sel in selections: model.get_xray_structure().convert_to_isotropic(selection=sel) mmtbx.tls.tools.remark_3_tls(tlsos=tlsos, selection_strings=tls_selections_strings, out=ofo) # if (params.combine_tls): utils.print_header("Combine B_tls with B_residual", out=log) mmtbx.tls.tools.combine_tls_and_u_local( xray_structure=model.get_xray_structure(), tls_selections=selections, tls_groups=tls_groups) print("All done.", file=log) # if (ofn is not None): utils.print_header("Write output PDB file %s" % ofn, out=log) model.set_sites_cart_from_xrs() pdb_str = model.model_as_pdb() ofo.write(pdb_str) ofo.close() print("All done.", file=log)
def run(args, command_name="phenix.fobs_minus_fobs_map", log=None): if (len(args) == 0): args = ["--help"] examples = """Examples: phenix.fobs_minus_fobs_map f_obs_1_file=data1.mtz f_obs_2_file=data2.sca \ f_obs_1_label=FOBS1 f_obs_2_label=FOBS2 model.pdb phenix.fobs_minus_fobs_map f_obs_1_file=data.mtz f_obs_2_file=data.mtz \ f_obs_1_label=FOBS1 f_obs_2_label=FOBS2 phase_source=model.pdb \ high_res=2.0 sigma_cutoff=2 scattering_table=neutron""" command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description=examples).option("--silent", action="store_true", help="Suppress output to the screen."). enable_symmetry_comprehensive()).process(args=args) # if (log is None): log = sys.stdout if (not command_line.options.silent): utils.print_header("phenix.fobs_minus_fobs_map", out=log) print("Command line arguments: ", file=log) print(args, file=log) print(file=log) # processed_args = utils.process_command_line_args( args=command_line.args, cmd_cs=command_line.symmetry, master_params=fo_minus_fo_master_params(), absolute_angle_tolerance=5, absolute_length_tolerance=1, log=log, suppress_symmetry_related_errors=True) working_phil = processed_args.params if (not command_line.options.silent): print("*** Parameters:", file=log) working_phil.show(out=log) print(file=log) params = working_phil.extract() consensus_symmetry = None if (params.ignore_non_isomorphous_unit_cells): if (None in [ params.f_obs_1_file_name, params.f_obs_2_file_name, params.phase_source ]): raise Sorry( "The file parameters (f_obs_1_file_name, f_obs_2_file_name, " + "phase_source) must be specified explicitly when " + "ignore_non_isomorphous_unit_cells=True.") symm_manager = iotbx.symmetry.manager() pdb_in = iotbx.file_reader.any_file(params.phase_source, force_type="pdb") symm_manager.process_pdb_file(pdb_in) hkl_in_1 = iotbx.file_reader.any_file(params.f_obs_1_file_name, force_type="hkl") sg_err_1, uc_err_1 = symm_manager.process_reflections_file(hkl_in_1) hkl_in_2 = iotbx.file_reader.any_file(params.f_obs_2_file_name, force_type="hkl") sg_err_2, uc_err_2 = symm_manager.process_reflections_file(hkl_in_2) out = StringIO() symm_manager.show(out=out) if (sg_err_1) or (sg_err_2): raise Sorry(( "Incompatible space groups in input files:\n%s\nAll files " + "must have the same point group (and ideally the same space group). " + "Please note that any symmetry information in the PDB file will be " + "used first.") % out.getvalue()) elif (uc_err_1) or (uc_err_2): libtbx.call_back( message="warn", data= ("Crystal symmetry mismatch:\n%s\nCalculations will continue " + "using the symmetry in the PDB file (or if not available, the " + "first reflection file), but the maps should be treated with " + "extreme suspicion.") % out.getvalue()) crystal_symmetry = symm_manager.as_symmetry_object() else: processed_args = utils.process_command_line_args( args=command_line.args, cmd_cs=command_line.symmetry, master_params=fo_minus_fo_master_params(), suppress_symmetry_related_errors=False, absolute_angle_tolerance=5, absolute_length_tolerance=1, log=StringIO()) crystal_symmetry = processed_args.crystal_symmetry # pdb_file_names = processed_args.pdb_file_names if (len(processed_args.pdb_file_names) == 0): if (params.phase_source is not None): pdb_file_names = [params.phase_source] else: raise Sorry("No PDB file found.") # Extaract Fobs1, Fobs2 f_obss = [] if (len(processed_args.reflection_files) == 2): for reflection_file in processed_args.reflection_files: reflection_file_server = reflection_file_utils.reflection_file_server( crystal_symmetry=crystal_symmetry, force_symmetry=True, reflection_files=[reflection_file], err=null_out()) # XXX UGLY !!! try: parameters = utils.data_and_flags_master_params().extract() if (params.f_obs_1_label is not None): parameters.labels = [params.f_obs_1_label] determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server=reflection_file_server, keep_going=True, parameters=parameters, log=null_out()) except: # intentional parameters = utils.data_and_flags_master_params().extract() if (params.f_obs_2_label is not None): parameters.labels = [params.f_obs_2_label] determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server=reflection_file_server, keep_going=True, parameters=parameters, log=null_out()) f_obss.append(determine_data_and_flags_result.f_obs) else: if ([params.f_obs_1_file_name, params.f_obs_2_file_name].count(None) == 2): raise Sorry("No reflection data file found.") for file_name, label in zip( [params.f_obs_1_file_name, params.f_obs_2_file_name], [params.f_obs_1_label, params.f_obs_2_label]): reflection_file = reflection_file_reader.any_reflection_file( file_name=file_name, ensure_read_access=False) reflection_file_server = reflection_file_utils.reflection_file_server( crystal_symmetry=crystal_symmetry, force_symmetry=True, reflection_files=[reflection_file], err=null_out()) parameters = utils.data_and_flags_master_params().extract() if (label is not None): parameters.labels = [label] determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server=reflection_file_server, parameters=parameters, keep_going=True, log=null_out()) f_obss.append(determine_data_and_flags_result.f_obs) if (len(f_obss) != 2): raise Sorry(" ".join(errors)) if (not command_line.options.silent): for ifobs, fobs in enumerate(f_obss): print("*** Summary for data set %d:" % ifobs, file=log) fobs.show_comprehensive_summary(f=log) print(file=log) pdb_combined = combine_unique_pdb_files(file_names=pdb_file_names) pdb_combined.report_non_unique(out=log) if (len(pdb_combined.unique_file_names) == 0): raise Sorry("No coordinate file given.") # raw_recs = flex.std_string() for rec in pdb_combined.raw_records: if (rec.upper().count("CRYST1") == 0): raw_recs.append(rec) raw_recs.append( iotbx.pdb.format_cryst1_record(crystal_symmetry=crystal_symmetry)) # pdb_in = iotbx.pdb.input(source_info=None, lines=raw_recs) model = mmtbx.model.manager(model_input=pdb_in) d_min = min(f_obss[0].d_min(), f_obss[1].d_min()) model.setup_scattering_dictionaries( scattering_table=params.scattering_table, d_min=d_min) xray_structure = model.get_xray_structure() hierarchy = model.get_hierarchy() # omit_sel = flex.bool(hierarchy.atoms_size(), False) if (params.advanced.omit_selection is not None): print("Will omit selection from phasing model:", file=log) print(" " + params.advanced.omit_selection, file=log) omit_sel = hierarchy.atom_selection_cache().selection( params.advanced.omit_selection) print("%d atoms selected for removal" % omit_sel.count(True), file=log) del hierarchy xray_structure = xray_structure.select(~omit_sel) if (not command_line.options.silent): print("*** Model summary:", file=log) xray_structure.show_summary(f=log) print(file=log) info0 = f_obss[0].info() info1 = f_obss[1].info() f_obss[0] = f_obss[0].resolution_filter( d_min=params.high_resolution, d_max=params.low_resolution).set_info(info0) f_obss[1] = f_obss[1].resolution_filter( d_min=params.high_resolution, d_max=params.low_resolution).set_info(info1) if (params.sigma_cutoff is not None): for i in [0, 1]: if (f_obss[i].sigmas() is not None): sel = f_obss[i].data( ) > f_obss[i].sigmas() * params.sigma_cutoff f_obss[i] = f_obss[i].select(sel).set_info(info0) for k, f_obs in enumerate(f_obss): if (f_obs.indices().size() == 0): raise Sorry( "No data left in array %d (labels=%s) after filtering!" % (k + 1, f_obs.info().label_string())) output_file_name = params.output_file if (output_file_name is None) and (params.file_name_prefix is not None): output_file_name = "%s_%s.mtz" % (params.file_name_prefix, params.job_id) output_files = compute_fo_minus_fo_map( data_arrays=f_obss, xray_structure=xray_structure, log=log, silent=command_line.options.silent, output_file=output_file_name, peak_search=params.find_peaks_holes, map_cutoff=params.map_cutoff, peak_search_params=params.peak_search, multiscale=params.advanced.multiscale, anomalous=params.advanced.anomalous).file_names return output_files
def run(args, command_name = "phenix.fobs_minus_fobs_map", log=None): if(len(args) == 0): args = ["--help"] examples = """Examples: phenix.fobs_minus_fobs_map f_obs_1_file=data1.mtz f_obs_2_file=data2.sca \ f_obs_1_label=FOBS1 f_obs_2_label=FOBS2 model.pdb phenix.fobs_minus_fobs_map f_obs_1_file=data.mtz f_obs_2_file=data.mtz \ f_obs_1_label=FOBS1 f_obs_2_label=FOBS2 phase_source=model.pdb \ high_res=2.0 sigma_cutoff=2 scattering_table=neutron""" command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description=examples) .option("--silent", action="store_true", help="Suppress output to the screen.") .enable_symmetry_comprehensive() ).process(args=args) # if (log is None) : log = sys.stdout if(not command_line.options.silent): utils.print_header("phenix.fobs_minus_fobs_map", out = log) print >> log, "Command line arguments: " print >> log, args print >> log # processed_args = utils.process_command_line_args( args=command_line.args, cmd_cs=command_line.symmetry, master_params=fo_minus_fo_master_params(), absolute_angle_tolerance=5, absolute_length_tolerance=1, log=log, suppress_symmetry_related_errors=True) working_phil = processed_args.params if(not command_line.options.silent): print >> log, "*** Parameters:" working_phil.show(out = log) print >> log params = working_phil.extract() consensus_symmetry = None if (params.ignore_non_isomorphous_unit_cells) : if (None in [params.f_obs_1_file_name, params.f_obs_2_file_name, params.phase_source]): raise Sorry("The file parameters (f_obs_1_file_name, f_obs_2_file_name, "+ "phase_source) must be specified explicitly when "+ "ignore_non_isomorphous_unit_cells=True.") symm_manager = iotbx.symmetry.manager() pdb_in = iotbx.file_reader.any_file(params.phase_source, force_type="pdb") symm_manager.process_pdb_file(pdb_in) hkl_in_1 = iotbx.file_reader.any_file(params.f_obs_1_file_name, force_type="hkl") sg_err_1, uc_err_1 = symm_manager.process_reflections_file(hkl_in_1) hkl_in_2 = iotbx.file_reader.any_file(params.f_obs_2_file_name, force_type="hkl") sg_err_2, uc_err_2 = symm_manager.process_reflections_file(hkl_in_2) out = StringIO() symm_manager.show(out=out) if (sg_err_1) or (sg_err_2) : raise Sorry(("Incompatible space groups in input files:\n%s\nAll files "+ "must have the same point group (and ideally the same space group). "+ "Please note that any symmetry information in the PDB file will be "+ "used first.") % out.getvalue()) elif (uc_err_1) or (uc_err_2) : libtbx.call_back(message="warn", data=("Crystal symmetry mismatch:\n%s\nCalculations will continue "+ "using the symmetry in the PDB file (or if not available, the "+ "first reflection file), but the maps should be treated with "+ "extreme suspicion.") % out.getvalue()) crystal_symmetry = symm_manager.as_symmetry_object() else : processed_args = utils.process_command_line_args( args=command_line.args, cmd_cs=command_line.symmetry, master_params=fo_minus_fo_master_params(), absolute_angle_tolerance=5, absolute_length_tolerance=1, log=StringIO()) crystal_symmetry = processed_args.crystal_symmetry # pdb_file_names = processed_args.pdb_file_names if(len(processed_args.pdb_file_names) == 0): if(params.phase_source is not None): pdb_file_names = [params.phase_source] else: raise Sorry("No PDB file found.") # Extaract Fobs1, Fobs2 f_obss = [] if(len(processed_args.reflection_files)==2): for reflection_file in processed_args.reflection_files: reflection_file_server = reflection_file_utils.reflection_file_server( crystal_symmetry = crystal_symmetry, force_symmetry = True, reflection_files = [reflection_file], err = null_out()) # XXX UGLY !!! try: parameters = utils.data_and_flags_master_params().extract() if(params.f_obs_1_label is not None): parameters.labels = [params.f_obs_1_label] determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = reflection_file_server, keep_going = True, parameters = parameters, log = null_out()) except: # intentional parameters = utils.data_and_flags_master_params().extract() if(params.f_obs_2_label is not None): parameters.labels = [params.f_obs_2_label] determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = reflection_file_server, keep_going = True, parameters = parameters, log = null_out()) f_obss.append(determine_data_and_flags_result.f_obs) else: if([params.f_obs_1_file_name,params.f_obs_2_file_name].count(None)==2): raise Sorry("No reflection data file found.") for file_name, label in zip([params.f_obs_1_file_name,params.f_obs_2_file_name], [params.f_obs_1_label,params.f_obs_2_label]): reflection_file = reflection_file_reader.any_reflection_file( file_name = file_name, ensure_read_access = False) reflection_file_server = reflection_file_utils.reflection_file_server( crystal_symmetry = crystal_symmetry, force_symmetry = True, reflection_files = [reflection_file], err = null_out()) parameters = utils.data_and_flags_master_params().extract() if(label is not None): parameters.labels = [label] determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = reflection_file_server, parameters = parameters, keep_going = True, log = null_out()) f_obss.append(determine_data_and_flags_result.f_obs) if(len(f_obss)!=2): raise Sorry(" ".join(errors)) if(not command_line.options.silent): for ifobs, fobs in enumerate(f_obss): print >> log, "*** Summary for data set %d:"%ifobs fobs.show_comprehensive_summary(f = log) print >> log pdb_combined = combine_unique_pdb_files(file_names = pdb_file_names) pdb_combined.report_non_unique(out = log) if(len(pdb_combined.unique_file_names) == 0): raise Sorry("No coordinate file given.") # raw_recs = flex.std_string() for rec in pdb_combined.raw_records: if(rec.upper().count("CRYST1")==0): raw_recs.append(rec) raw_recs.append(iotbx.pdb.format_cryst1_record( crystal_symmetry = crystal_symmetry)) # pdb_in = iotbx.pdb.input(source_info = None, lines = raw_recs) hierarchy = pdb_in.construct_hierarchy() omit_sel = flex.bool(hierarchy.atoms_size(), False) if (params.advanced.omit_selection is not None) : print >> log, "Will omit selection from phasing model:" print >> log, " " + params.advanced.omit_selection omit_sel = hierarchy.atom_selection_cache().selection( params.advanced.omit_selection) print >> log, "%d atoms selected for removal" % omit_sel.count(True) xray_structure = pdb_in.xray_structure_simple() xray_structure = xray_structure.select(~omit_sel) if(not command_line.options.silent): print >> log, "*** Model summary:" xray_structure.show_summary(f = log) print >> log info0 = f_obss[0].info() info1 = f_obss[1].info() f_obss[0] = f_obss[0].resolution_filter(d_min = params.high_resolution, d_max = params.low_resolution).set_info(info0) f_obss[1] = f_obss[1].resolution_filter(d_min = params.high_resolution, d_max = params.low_resolution).set_info(info1) if(params.sigma_cutoff is not None): for i in [0,1]: if(f_obss[i].sigmas() is not None): sel = f_obss[i].data() > f_obss[i].sigmas()*params.sigma_cutoff f_obss[i] = f_obss[i].select(sel).set_info(info0) for k, f_obs in enumerate(f_obss) : if (f_obs.indices().size() == 0) : raise Sorry("No data left in array %d (labels=%s) after filtering!" % (k+1, f_obs.info().label_string())) output_file_name = params.output_file if (output_file_name is None) and (params.file_name_prefix is not None) : output_file_name = "%s_%s.mtz" % (params.file_name_prefix, params.job_id) output_files = compute_fo_minus_fo_map( data_arrays = f_obss, xray_structure = xray_structure, log = log, silent = command_line.options.silent, output_file = output_file_name, peak_search=params.find_peaks_holes, map_cutoff=params.map_cutoff, peak_search_params=params.peak_search, multiscale=params.advanced.multiscale, anomalous=params.advanced.anomalous).file_names return output_files
def ensemble_rmsf_stats( self, ensemble_hierarchys, transfer_b_factors=True, ignore_hd=True, max_print=10, verbose=False, out=None, ): if (out is None): out = sys.stdout if verbose: utils.print_header( "Ensemble mean and centroid geometry statistics", out=out) ensemble_size = len(ensemble_hierarchys) self.mean_hierarchy = ensemble_mean_hierarchy( ensemble_hierarchys, ignore_hd=ignore_hd, verbose=verbose, ) close_hierarchy, self.closest_to_mean_index = closest_to_mean( ensemble_hierarchys, self.mean_hierarchy, ignore_hd=ignore_hd, verbose=verbose, ) self.centroid_hierarchy, least_hierarchy, self.centroid_index, self.least_index = \ get_centroid_hierarchy( ensemble_hierarchys, ignore_hd=ignore_hd, verbose=verbose, ) self.tempFactor, self.per_residue, self.per_atom = \ get_rmsf_B_factor_per_residue_per_atom( ensemble_hierarchys, self.centroid_hierarchy, # mean_sites_cart, ignore_hd=ignore_hd, verbose=verbose, ) if verbose: print('Per residue rmsf', file=out) for i, (key, item) in enumerate(self.per_residue.items()): print(' %5d : %s %0.2f' % (i, key, item), file=out) if i >= max_print: break print('B-factor', file=out) for i, (key, item) in enumerate(self.tempFactor.items()): print(' %5d : %s %7.2f' % (i, key, item[0]), file=out) if i >= max_print: break print('Per atom rmsf', file=out) for i, atom in enumerate(ensemble_hierarchys[0].atoms()): print(' %5d : %s %0.2f' % (i, atom.quote(), self.per_atom[i]), file=out) if i >= max_print: break if transfer_b_factors: atoms = self.centroid_hierarchy.atoms() occupancies = atoms.extract_occ() occupancies *= len(ensemble_hierarchys) atoms.set_occ(occupancies) for i, (key, item) in enumerate(self.tempFactor.items()): atom = atoms[i] atom.b = item[0] return self
def kinetic_energy_stats(self): if self.ensemble_obj is not None: if self.ensemble_obj.er_data.ke_protein_running is not None: utils.print_header( line ="Non-solvent KE Statistics | MC : "+str(self.ensemble_obj.macro_cycle), out = self.ensemble_obj.log) ke_basic_stats = scitbx.math.basic_statistics(self.ensemble_obj.er_data.ke_protein_running) print >> self.ensemble_obj.log, ' {0:<11} {1:>12} {2:>12} {3:>12} {4:>12} {5:>12} '.format( '','min','max','mean', 'sdev', 'skew') print >> self.ensemble_obj.log, ' KE MC {0:<5}: {1:12.3f} {2:12.3f} {3:12.3f} {4:12.3f} {5:12.3f}'.format( self.ensemble_obj.macro_cycle, ke_basic_stats.min, ke_basic_stats.max, ke_basic_stats.mean, ke_basic_stats.biased_standard_deviation, ke_basic_stats.skew) ke_atom_number_tuple_list = [] ke_list_histo = [] for n, ke in enumerate(self.ensemble_obj.er_data.ke_protein_running): ke_atom_number_tuple_list.append( (n, ke) ) ke_list_histo.append(ke) assert len(ke_atom_number_tuple_list) == len(self.ensemble_obj.er_data.ke_protein_running) sorted_by_ke_ke_atom_number_tuple_list = \ sorted(ke_atom_number_tuple_list, key=lambda ke:ke[-1]) ke_list_histo = sorted(ke_list_histo) #Lowest KE Atoms pdb_atoms = self.ensemble_obj.pdb_hierarchy().atoms() print >> self.ensemble_obj.log, "\nNon-solvent atoms lowest KE : " print >> self.ensemble_obj.log, \ ' {0:3} : {1:>44} {2:>12} {3:>12}'.format( 'rank', 'KE', 'dmean/sdev', '%cum freq') low_five_percent = (len(ke_atom_number_tuple_list) * 0.05) cntr = 0 lowest_range = min(25, int(0.5 * len(ke_atom_number_tuple_list) ) ) while cntr < lowest_range: atom_info = pdb_atoms[sorted_by_ke_ke_atom_number_tuple_list[cntr][0]].fetch_labels() assert atom_info.i_seq == sorted_by_ke_ke_atom_number_tuple_list[cntr][0] print >> self.ensemble_obj.log, \ ' {0:5} : {1:6} {2:6} {3:6} {4:6} {5:6} {6:9.3f} {7:12.3f} {8:12.1f}'.format( cntr+1, sorted_by_ke_ke_atom_number_tuple_list[cntr][0], atom_info.name, atom_info.resname, atom_info.chain_id, atom_info.resseq, sorted_by_ke_ke_atom_number_tuple_list[cntr][1], (sorted_by_ke_ke_atom_number_tuple_list[cntr][1]-ke_basic_stats.mean)\ / ke_basic_stats.biased_standard_deviation, 100 * (float(cntr)/float(len(ke_atom_number_tuple_list))) ) cntr+=1 #Highest KE Atoms print >> self.ensemble_obj.log, "\nNon-solvent atoms highest KE : " print >> self.ensemble_obj.log, \ ' {0:3} : {1:>44} {2:>12} {3:>12}'.format( 'rank', 'KE', 'dmean/sdev', '%cum freq') cntr = len(ke_atom_number_tuple_list) - min(25, int(0.5 * len(ke_atom_number_tuple_list) ) ) while cntr < len(ke_atom_number_tuple_list): atom_info = pdb_atoms[sorted_by_ke_ke_atom_number_tuple_list[cntr][0]].fetch_labels() assert atom_info.i_seq == sorted_by_ke_ke_atom_number_tuple_list[cntr][0] print >> self.ensemble_obj.log, \ ' {0:5} : {1:6} {2:6} {3:6} {4:6} {5:6} {6:9.3f} {7:12.3f} {8:12.1f}'.format( cntr+1, sorted_by_ke_ke_atom_number_tuple_list[cntr][0], atom_info.name, atom_info.resname, atom_info.chain_id, atom_info.resseq, sorted_by_ke_ke_atom_number_tuple_list[cntr][1], (sorted_by_ke_ke_atom_number_tuple_list[cntr][1]-ke_basic_stats.mean)\ / ke_basic_stats.biased_standard_deviation, 100 * (float(cntr)/float(len(ke_atom_number_tuple_list))) ) cntr+=1 #XXX Add print stats by for <ke>/residue #Histogram bin_list, bin_range_list = self.bin_generator_equal_range( array = ke_list_histo[:-int(0.1 * (len(ke_list_histo) ) )], number_of_bins = 50) bin_range_list[-1][1] = max(ke_list_histo) self.bivariate_histogram( bin_array = ke_list_histo, value_array = ke_list_histo, name = 'KE Histogram', bin_list = bin_list, bin_range_list = bin_range_list) print >> self.ensemble_obj.log, "|"+"-"*77+"|\n"
def ensemble_mean_geometry_stats(self, restraints_manager, xray_structure, ensemble_xray_structures, ignore_hd = True, verbose = False, out = None, return_pdb_string = False): if (out is None): out = sys.stdout if verbose: utils.print_header("Ensemble mean geometry statistics", out = out) ensemble_size = len(ensemble_xray_structures) print >> out, "Ensemble size : ", ensemble_size # Dictionaries to store deltas ensemble_bond_deltas = {} ensemble_angle_deltas = {} ensemble_chirality_deltas = {} ensemble_planarity_deltas = {} ensemble_dihedral_deltas = {} # List to store rmsd of each model structures_bond_rmsd = flex.double() structures_angle_rmsd = flex.double() structures_chirality_rmsd = flex.double() structures_planarity_rmsd = flex.double() structures_dihedral_rmsd = flex.double() # Remove water and hd atoms from global restraints manager selection = flex.bool() for sc in xray_structure.scatterers(): if sc.label.find('HOH') > -1: selection.append(True) else: selection.append(False) if ignore_hd: hd_selection = xray_structure.hd_selection() assert hd_selection.size() == selection.size() for n in xrange(hd_selection.size()): if hd_selection[n] or selection[n]: selection[n] = True restraints_manager = restraints_manager.select(selection = ~selection) # Get all deltas for n, structure in enumerate(ensemble_xray_structures): if verbose: print >> out, "\nModel : ", n+1 sites_cart = structure.sites_cart() # Remove water and hd atoms from individual structures sites cart selection = flex.bool() for sc in structure.scatterers(): if sc.label.find('HOH') > -1: selection.append(True) else: selection.append(False) if ignore_hd: hd_selection = structure.hd_selection() assert hd_selection.size() == selection.size() for n in xrange(hd_selection.size()): if hd_selection[n] or selection[n]: selection[n] = True sites_cart = sites_cart.select(~selection) assert sites_cart is not None site_labels = None energies_sites = restraints_manager.energies_sites( sites_cart = sites_cart, compute_gradients = False) # Rmsd of individual model bond_rmsd = energies_sites.geometry.bond_deviations()[2] angle_rmsd = energies_sites.geometry.angle_deviations()[2] chirality_rmsd = energies_sites.geometry.chirality_deviations()[2] planarity_rmsd = energies_sites.geometry.planarity_deviations()[2] dihedral_rmsd = energies_sites.geometry.dihedral_deviations()[2] structures_bond_rmsd.append(bond_rmsd) structures_angle_rmsd.append(angle_rmsd) structures_chirality_rmsd.append(chirality_rmsd) structures_planarity_rmsd.append(planarity_rmsd) structures_dihedral_rmsd.append(dihedral_rmsd) if verbose: print >> out, " Model RMSD" print >> out, " bond : %.6g" % bond_rmsd print >> out, " angle : %.6g" % angle_rmsd print >> out, " chirality : %.6g" % chirality_rmsd print >> out, " planarity : %.6g" % planarity_rmsd print >> out, " dihedral : %.6g" % dihedral_rmsd # Bond pair_proxies = restraints_manager.geometry.pair_proxies(flags=None, sites_cart=sites_cart) assert pair_proxies is not None if verbose: pair_proxies.bond_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in pair_proxies.bond_proxies.simple: bond_simple_proxy = geometry_restraints.bond( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_bond_deltas: ensemble_bond_deltas[proxy.i_seqs][0]+=bond_simple_proxy.delta ensemble_bond_deltas[proxy.i_seqs][1]+=1 else: ensemble_bond_deltas[proxy.i_seqs] = [bond_simple_proxy.delta, 1] if verbose: print >> out, "bond simple :", proxy.i_seqs print >> out, " distance_ideal : %.6g" % proxy.distance_ideal print >> out, " distance_model : %.6g" % bond_simple_proxy.distance_model print >> out, " detla : %.6g" % bond_simple_proxy.delta if (pair_proxies.bond_proxies.asu.size() > 0): asu_mappings = pair_proxies.bond_proxies.asu_mappings() for proxy in pair_proxies.bond_proxies.asu: rt_mx = asu_mappings.get_rt_mx_ji(pair=proxy) bond_asu_proxy = geometry_restraints.bond( sites_cart = sites_cart, asu_mappings = asu_mappings, proxy = proxy) proxy_i_seqs = (proxy.i_seq, proxy.j_seq) if proxy_i_seqs in ensemble_bond_deltas: ensemble_bond_deltas[proxy_i_seqs][0]+=bond_asu_proxy.delta ensemble_bond_deltas[proxy_i_seqs][1]+=1 else: ensemble_bond_deltas[proxy_i_seqs] = [bond_asu_proxy.delta, 1] if verbose: print >> out, "bond asu :", (proxy.i_seq, proxy.j_seq), rt_mx print >> out, " distance_ideal : %.6g" % proxy.distance_ideal print >> out, " distance_model : %.6g" % bond_asu_proxy.distance_model print >> out, " delta : %.6g" % bond_asu_proxy.delta # Angle if verbose: restraints_manager.geometry.angle_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in restraints_manager.geometry.angle_proxies: angle_proxy = geometry_restraints.angle( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_angle_deltas: ensemble_angle_deltas[proxy.i_seqs][0]+=angle_proxy.delta ensemble_angle_deltas[proxy.i_seqs][1]+=1 else: ensemble_angle_deltas[proxy.i_seqs] = [angle_proxy.delta, 1] if verbose: print >> out, "angle : ", proxy.i_seqs print >> out, " angle_ideal : %.6g" % proxy.angle_ideal print >> out, " angle_model : %.6g" % angle_proxy.angle_model print >> out, " delta : %.6g" % angle_proxy.delta # Chirality if verbose: restraints_manager.geometry.chirality_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in restraints_manager.geometry.chirality_proxies: chirality_proxy = geometry_restraints.chirality( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_chirality_deltas: ensemble_chirality_deltas[proxy.i_seqs][0]+=chirality_proxy.delta ensemble_chirality_deltas[proxy.i_seqs][1]+=1 else: ensemble_chirality_deltas[proxy.i_seqs] = [chirality_proxy.delta, 1] if verbose: print >> out, "chirality : ", proxy.i_seqs print >> out, " chirality_ideal : %.6g" % proxy.volume_ideal print >> out, " chirality_model : %.6g" % chirality_proxy.volume_model print >> out, " chirality : %.6g" % chirality_proxy.delta # Planarity for proxy in restraints_manager.geometry.planarity_proxies: planarity_proxy = geometry_restraints.planarity( sites_cart = sites_cart, proxy = proxy) proxy_i_seqs = [] for i_seq in proxy.i_seqs: proxy_i_seqs.append(i_seq) proxy_i_seqs = tuple(proxy_i_seqs) if proxy_i_seqs in ensemble_planarity_deltas: ensemble_planarity_deltas[proxy_i_seqs][0]+=planarity_proxy.rms_deltas() ensemble_planarity_deltas[proxy_i_seqs][1]+=1 else: ensemble_planarity_deltas[proxy_i_seqs] = [planarity_proxy.rms_deltas(), 1] if verbose: print >> out, "planarity : ", proxy_i_seqs print >> out, " planarity rms_deltas : %.6g" % planarity_proxy.rms_deltas() # Dihedral if verbose: restraints_manager.geometry.dihedral_proxies.show_histogram_of_deltas( sites_cart = sites_cart, n_slots = 10, f = out) for proxy in restraints_manager.geometry.dihedral_proxies: dihedral_proxy = geometry_restraints.dihedral( sites_cart = sites_cart, proxy = proxy) if proxy.i_seqs in ensemble_dihedral_deltas: ensemble_dihedral_deltas[proxy.i_seqs][0]+=dihedral_proxy.delta ensemble_dihedral_deltas[proxy.i_seqs][1]+=1 else: ensemble_dihedral_deltas[proxy.i_seqs] = [dihedral_proxy.delta, 1] if verbose: print >> out, "dihedral : ", proxy.i_seqs print >> out, " dihedral_ideal : %.6g" % proxy.angle_ideal print >> out, " periodicity : %.6g" % proxy.periodicity print >> out, " dihedral_model : %.6g" % dihedral_proxy.angle_model print >> out, " delta : %.6g" % dihedral_proxy.delta # Calculate RMSDs for ensemble model # Bond mean_bond_delta = flex.double() for proxy, info in ensemble_bond_deltas.iteritems(): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_bond_delta.append(mean_delta) bond_delta_sq = mean_bond_delta * mean_bond_delta ensemble_bond_rmsd = math.sqrt(flex.mean_default(bond_delta_sq, 0)) # Angle mean_angle_delta = flex.double() for proxy, info in ensemble_angle_deltas.iteritems(): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_angle_delta.append(mean_delta) angle_delta_sq = mean_angle_delta * mean_angle_delta ensemble_angle_rmsd = math.sqrt(flex.mean_default(angle_delta_sq, 0)) # Chirality mean_chirality_delta = flex.double() for proxy, info in ensemble_chirality_deltas.iteritems(): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_chirality_delta.append(mean_delta) chirality_delta_sq = mean_chirality_delta * mean_chirality_delta ensemble_chirality_rmsd = math.sqrt(flex.mean_default(chirality_delta_sq, 0)) # Planarity mean_planarity_delta = flex.double() for proxy, info in ensemble_planarity_deltas.iteritems(): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_planarity_delta.append(mean_delta) planarity_delta_sq = mean_planarity_delta * mean_planarity_delta ensemble_planarity_rmsd = math.sqrt(flex.mean_default(planarity_delta_sq, 0)) # Dihedral mean_dihedral_delta = flex.double() for proxy, info in ensemble_dihedral_deltas.iteritems(): assert info[1] == ensemble_size mean_delta = info[0] / info[1] mean_dihedral_delta.append(mean_delta) dihedral_delta_sq = mean_dihedral_delta * mean_dihedral_delta ensemble_dihedral_rmsd = math.sqrt(flex.mean_default(dihedral_delta_sq, 0)) # Calculate <structure rmsd> assert ensemble_size == structures_bond_rmsd assert ensemble_size == structures_angle_rmsd assert ensemble_size == structures_chirality_rmsd assert ensemble_size == structures_planarity_rmsd assert ensemble_size == structures_dihedral_rmsd structure_bond_rmsd_mean = structures_bond_rmsd.min_max_mean().mean structure_angle_rmsd_mean = structures_angle_rmsd.min_max_mean().mean structure_chirality_rmsd_mean = structures_chirality_rmsd.min_max_mean().mean structure_planarity_rmsd_mean = structures_planarity_rmsd.min_max_mean().mean structure_dihedral_rmsd_mean = structures_dihedral_rmsd.min_max_mean().mean # Show summary utils.print_header("Ensemble RMSD summary", out = out) print >> out, " RMSD (mean delta per restraint)" print >> out, " bond : %.6g" % ensemble_bond_rmsd print >> out, " angle : %.6g" % ensemble_angle_rmsd print >> out, " chirality : %.6g" % ensemble_chirality_rmsd print >> out, " planarity : %.6g" % ensemble_planarity_rmsd print >> out, " dihedral : %.6g" % ensemble_dihedral_rmsd print >> out, " RMSD (mean RMSD per structure)" print >> out, " bond : %.6g" % structure_bond_rmsd_mean print >> out, " angle : %.6g" % structure_angle_rmsd_mean print >> out, " chirality : %.6g" % structure_chirality_rmsd_mean print >> out, " planarity : %.6g" % structure_planarity_rmsd_mean print >> out, " dihedral : %.6g" % structure_dihedral_rmsd_mean if ignore_hd: print >> out, "\n Calculated excluding H/D" else: print >> out, "\n Calculated including H/D" if return_pdb_string: ens_geo_pdb_string = "REMARK 3" ens_geo_pdb_string += "\nREMARK 3 NUMBER STRUCTURES IN ENSEMBLE : {0:5d}".format(ensemble_size) if ignore_hd: ens_geo_pdb_string += "\nREMARK 3 RMS DEVIATIONS FROM IDEAL VALUES (EXCLUDING H/D)" else: ens_geo_pdb_string += "\nREMARK 3 RMS DEVIATIONS FROM IDEAL VALUES (INCLUDING H/D)" ens_geo_pdb_string += "\nREMARK 3 RMSD (MEAN DELTA PER RESTRAINT)" ens_geo_pdb_string += "\nREMARK 3 BOND : {0:5.3f}".format(ensemble_bond_rmsd) ens_geo_pdb_string += "\nREMARK 3 ANGLE : {0:5.3f}".format(ensemble_angle_rmsd) ens_geo_pdb_string += "\nREMARK 3 CHIRALITY : {0:5.3f}".format(ensemble_chirality_rmsd) ens_geo_pdb_string += "\nREMARK 3 PLANARITY : {0:5.3f}".format(ensemble_planarity_rmsd) ens_geo_pdb_string += "\nREMARK 3 DIHEDRAL : {0:5.2f}".format(ensemble_dihedral_rmsd) ens_geo_pdb_string += "\nREMARK 3 RMSD (MEAN RMSD PER STRUCTURE)" ens_geo_pdb_string += "\nREMARK 3 BOND : {0:5.3f}".format(structure_bond_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 ANGLE : {0:5.3f}".format(structure_angle_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 CHIRALITY : {0:5.3f}".format(structure_chirality_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 PLANARITY : {0:5.3f}".format(structure_planarity_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3 DIHEDRAL : {0:5.2f}".format(structure_dihedral_rmsd_mean) ens_geo_pdb_string += "\nREMARK 3" return ens_geo_pdb_string
def ensemble_reduction(self, rfree_tolerance = 0.0025): #Reduces number of models to minimum required to reproduce Rfree utils.print_header("Ensemble reducer", out = self.ensemble_obj.log) self.ensemble_obj.show_overall(message = "Full simulation fmodel final", fmodel_running = False) final_rfree = self.ensemble_obj.fmodel_total.r_free() final_rwork = self.ensemble_obj.fmodel_total.r_work() # XXX no b_iso - how to apply this??? # print >> self.ensemble_obj.log, "\nApply B_iso to all model in ensemble" # shift_b_iso = self.ensemble_obj.fmodel_total.b_iso() # print >> self.ensemble_obj.log, 'Shift B_iso : {0:8.3f}'.format(shift_b_iso) # for x in self.ensemble_obj.er_data.xray_structures: # x.shift_us(b_shift = shift_b_iso) total_number_xrs = len(self.ensemble_obj.er_data.xray_structures) print >> self.ensemble_obj.log, "\nReduce ensemble with equal distribution though trajectory :" print >> self.ensemble_obj.log, "Rfree tolerance (%) : ", rfree_tolerance * 100 print >> self.ensemble_obj.log, '\n {0:>12} {1:>8} {2:>8} {3:>8}'\ .format('Num','Rwork','Rfree','k1') target_rfree = final_rfree final_div = None for div_int in [1,2,3,4,5,6,7,8,9,10,12,14,16,18,20,25,30,35,40,45,50,60,70,80,90,100,200,300,400,500,600,700,800,900,1000,2000,3000,4000,5000]: if div_int <= total_number_xrs: self.fmodel_ens = self.ensemble_obj.fmodel_total.deep_copy() cntr = 0.0 fcalc_total = None fmask_total = None # self.fmodel_ens.update(k_sols = self.ensemble_obj.fmodel_total.k_sols(), # b_sol = self.ensemble_obj.fmodel_total.b_sol(), # b_cart = self.ensemble_obj.fmodel_total.b_cart() ) for x in xrange(total_number_xrs): if x%int(div_int) == 0: #Apply back trace of Biso here... self.fmodel_ens.update_xray_structure( xray_structure = self.ensemble_obj.er_data.xray_structures[x], update_f_calc = True, update_f_mask = True, force_update_f_mask = True) if fcalc_total == None: fcalc_total = self.fmodel_ens.f_calc().data().deep_copy() fmask_total = self.fmodel_ens.f_masks()[0].data().deep_copy() cntr = 1 else: fcalc_total += self.fmodel_ens.f_calc().data().deep_copy() fmask_total += self.fmodel_ens.f_masks()[0].data().deep_copy() cntr += 1 if x == total_number_xrs-1: self.fmodel_ens.update( f_calc = self.ensemble_obj.copy_ma.array(data = (fcalc_total / cntr)), f_mask = self.ensemble_obj.copy_ma.array(data = (fmask_total / cntr)) ) self.fmodel_ens.update_all_scales( log = self.ensemble_obj.log, remove_outliers=False, params = self.ensemble_obj.bsp) if cntr < 4: break print >> self.ensemble_obj.log, "Ens: {0:8d} {1:8.3f} {2:8.3f} {3:8.3f}"\ .format(cntr, self.fmodel_ens.r_work(), self.fmodel_ens.r_free(), self.fmodel_ens.scale_k1() ) if self.fmodel_ens.r_free() < (target_rfree + rfree_tolerance): final_div = div_int final_f_calc = self.ensemble_obj.copy_ma.array(data = (fcalc_total / cntr)) final_f_mask = self.ensemble_obj.copy_ma.array(data = (fmask_total / cntr)) if self.fmodel_ens.r_free() < target_rfree: target_rfree = self.fmodel_ens.r_free() if final_div == None: print >> self.ensemble_obj.log, "Warning pdb ensemble does not contain sufficent models and missrepresents simulation. Simulation Rfree: {0:2.3f} %".format(100*(final_rfree)) else: #Update fmodel_total self.ensemble_obj.fmodel_total.update(f_calc = final_f_calc, f_mask = final_f_mask) self.ensemble_obj.fmodel_total.update_all_scales( log = self.ensemble_obj.log, remove_outliers=False, params = self.ensemble_obj.bsp) #Parse arrays for output PDB copy_ed_data_xray_structures = [] copy_pdb_hierarchys = [] copy_ed_data_ke_pdb = [] for x in xrange(len(self.ensemble_obj.er_data.xray_structures)): if x%int(final_div) == 0: copy_ed_data_xray_structures.append(self.ensemble_obj.er_data.xray_structures[x]) copy_pdb_hierarchys.append(self.ensemble_obj.er_data.pdb_hierarchys[x]) copy_ed_data_ke_pdb.append(self.ensemble_obj.er_data.ke_pdb[x]) self.ensemble_obj.er_data.xray_structures = copy_ed_data_xray_structures self.ensemble_obj.er_data.pdb_hierarchys = copy_pdb_hierarchys self.ensemble_obj.er_data.ke_pdb = copy_ed_data_ke_pdb print >> self.ensemble_obj.log, "Final pdb ensemble contains {0:3d} models".format(len(self.ensemble_obj.er_data.xray_structures)) assert len(self.ensemble_obj.er_data.xray_structures) == len(self.ensemble_obj.er_data.pdb_hierarchys) assert len(self.ensemble_obj.er_data.xray_structures) == len(self.ensemble_obj.er_data.ke_pdb) print >> self.ensemble_obj.log, "|"+"-"*77+"|\n"
if (len(pdb_combined.unique_file_names) == 0): raise Sorry("No coordinate file given.") raw_records = pdb_combined.raw_records try: pdb_inp = iotbx.pdb.input(source_info=None, lines=flex.std_string(raw_records)) except ValueError, e: raise Sorry("Model format (PDB or mmCIF) error:\n%s" % str(e)) model = mmtbx.model.manager( model_input=pdb_inp, restraint_objects=processed_args.cif_objects, crystal_symmetry=processed_args.crystal_symmetry, log=log) if (not command_line.options.silent): utils.print_header("TLS groups from PDB file header", out=log) pdb_inp_tls = mmtbx.tls.tools.tls_from_pdb_inp( remark_3_records=model._model_input.extract_remark_iii_records(3), pdb_hierarchy=model.get_hierarchy()) # tls_groups = [] if (pdb_inp_tls.tls_present): if (pdb_inp_tls.error_string is not None): raise Sorry(pdb_inp_tls.error_string) pdb_tls = mmtbx.tls.tools.extract_tls_from_pdb(pdb_inp_tls=pdb_inp_tls, model=model) tls_groups = pdb_tls.pdb_inp_tls.tls_params # tls_selections_strings = [] # if (len(tls_groups) == 0 and not command_line.options.silent):
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb'%command_name) .option(None, "--show_defaults", action="store_true", help="Show list of parameters.") ).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args = args, log = sys.stdout, master_params = master_phil) params = processed_args.params if(params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1 : raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register( label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open(pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.log', "w") self.log.replace_stringio( old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out = self.log) params.show(out = self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry("Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print >> self.log, 'Reflection file : ', processed_args.reflection_file_names[0] utils.print_header("Model and data statistics", out = self.log) rfs = reflection_file_server( crystal_symmetry = crystal_symmetry, reflection_files = processed_args.reflection_files, log = self.log) parameters = utils.data_and_flags_master_params().extract() determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = rfs, parameters = parameters, data_parameter_scope = "refinement.input.xray_data", flags_parameter_scope = "refinement.input.xray_data.r_free_flags", data_description = "X-ray data", keep_going = True, log = self.log) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if(r_free_flags is None): r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print >> self.log, 'Number of structure in ensemble : ', number_structures # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file(self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl" : if input_file.file_object.file_type() != "ccp4_mtz" : raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, map_coeffs_1 = map_coeffs_1, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, log = self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_vs_' + self.params.ensemble_sigma_map_input.replace('.mtz','') + '_pensemble.pdb' write_ensemble_pdb(filename = filename, xrs_list = xrs_list, ens_pdb_hierarchy = ens_pdb_hierarchy ) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry("Please supply high resolution limit or input mtz file.") dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print >> self.log, 'Supplied mtz used to determine high and low resolution cuttoffs' dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs(ens_pdb_xrs.structure_factors(d_min = dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array(data=flex.bool(dummy_fobs.data().size(),False)) # fmodel = utils.fmodel_simple( scattering_table = "wk1995", xray_structures = [ens_pdb_xrs], f_obs = dummy_fobs, target_name = 'ls', bulk_solvent_and_scaling = False, r_free_flags = r_free_flags ) f_calc_ave = fmodel.f_calc().array(data = fmodel.f_calc().data()*0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure = ens_pdb_xrs, update_f_calc = True, update_f_mask = False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print >> self.log, 'Model :', model+1 print >> self.log, "\nStructure vs real Fobs (no bulk solvent or scaling)" print >> self.log, 'Rwork : %5.4f '%fmodel.r_work() print >> self.log, 'Rfree : %5.4f '%fmodel.r_free() print >> self.log, 'K1 : %5.4f '%fmodel.scale_k1() fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type = 'Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(column_root_label ='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write(file_name = str(model+1)+"_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total / number_structures)) print >> self.log, "\nEnsemble vs real Fobs (no bulk solvent or scaling)" print >> self.log, 'Rwork : %5.4f '%fmodel.r_work() print >> self.log, 'Rfree : %5.4f '%fmodel.r_free() print >> self.log, 'K1 : %5.4f '%fmodel.scale_k1() # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(map_type = 'Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(column_root_label ='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name = "aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print >> self.log, "<Fcalc> Map Stats :" fcalc_ave_map_stats.show_summary(f = self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print >> self.log, '\n\nModel : ', model+1 # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print >> self.log, "Fcalc map stats :" fcalc_map_stats.show_summary(f = self.log) xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, fft_map_1 = fcalc_map, model_i = model, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, number_previous_scatters = number_previous_scatters, log = self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, fft_map_1 = fcalc_ave_map_coeffs, model_i = model, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, number_previous_scatters = number_previous_scatters, log = self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n,p in enumerate(prob): print >> self.log, ' {0:5d} {1:5.3f}'.format(n,p) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print >> self.log, 'Model probability stats :' print >> self.log, prob.min_max_mean().show() print >> self.log, ' Count < 0.0 : ', prob_lss_zero.count(True) print >> self.log, ' Count > 1.0 : ', prob_grt_one.count(True) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb(filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb', xrs_list = xrs_list, ens_pdb_hierarchy = ens_pdb_hierarchy ) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0,0.975,0.95,0.9,0.8,0.6,0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[int(number_atoms * percentile)-1] cutoff_selections = flex.bool(nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected(cutoff_selections, 0.0) #XXX Debug if False: print '\nDebug' for x in xrange(len(cutoff_selections)): print cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x] print percentile print percentile_prob_cutoff print cutoff_selections.count(True) print cutoff_selections.size() print cutoff_nll_occ.count(0.0) print 'Count q = 1 : ', cutoff_nll_occ.count(1.0) print 'Count scatterers size : ', cutoff_nll_occ.size() xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure = xrs, update_f_calc = True, update_f_mask = True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data().deep_copy() f_mask_ave_total_reordered = fmodel.f_masks()[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data().deep_copy() f_mask_ave_total_reordered += fmodel.f_masks()[0].data().deep_copy() cntr+=1 fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total_reordered / cntr).deep_copy(), f_mask = f_calc_ave.array(f_mask_ave_total_reordered / cntr).deep_copy() ) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free()]) # Order models by nll and print summary print >> self.log, '\nModels ranked by nll <Fcalc> R-factors recalculated' print >> self.log, 'Percentile cutoff : {0:5.3f}'.format(percentile) xrs_list_sorted_nll = [] print >> self.log, ' | NLL <Rw> <Rf> Ens Model' for info in print_list: print >> self.log, ' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.format( info[0], info[1], info[3], info[4], info[2]+1, ) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb(filename = 'nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb', xrs_list = xrs_list_sorted_nll, ens_pdb_hierarchy = ens_pdb_hierarchy )