def exercise(): from libtbx.test_utils import show_diff, Exception_expected from six.moves import cPickle as pickle # from libtbx.str_utils import split_keeping_spaces assert split_keeping_spaces(s="") == [] assert split_keeping_spaces(s=" ") == [" "] assert split_keeping_spaces(s="a") == ["a"] assert split_keeping_spaces(s="abc") == ["abc"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" abc") == [" ", "abc"] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="abc ") == ["abc", " "] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s=" a b c d ") == [ " ", "a", " ", "b", " ", "c", " ", "d", " " ] # from libtbx.str_utils import size_as_string_with_commas assert size_as_string_with_commas(0) == "0" assert size_as_string_with_commas(1) == "1" assert size_as_string_with_commas(-1) == "-1" assert size_as_string_with_commas(10) == "10" assert size_as_string_with_commas(100) == "100" assert size_as_string_with_commas(1000) == "1,000" assert size_as_string_with_commas(12345) == "12,345" assert size_as_string_with_commas(12345678) == "12,345,678" assert size_as_string_with_commas(-12345678) == "-12,345,678" # from libtbx.str_utils import show_string assert show_string("abc") == '"abc"' assert show_string("a'c") == '"a\'c"' assert show_string('a"c') == "'a\"c'" assert show_string('\'"c') == '"\'\\"c"' # from libtbx.str_utils import prefix_each_line assert prefix_each_line(prefix="^", lines_as_one_string="""\ hello world""") == """\ ^hello ^world""" # from libtbx.str_utils import prefix_each_line_suffix assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ") == """\ ^hello ^world""" assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ", rstrip=False) == """\ ^hello%s ^world """ % " " # from libtbx.str_utils import show_sorted_by_counts from six.moves import cStringIO out = cStringIO() assert show_sorted_by_counts(label_count_pairs=[("b", 3), ("a", 3), ("c", -2)], out=out, prefix="%") assert not show_diff(out.getvalue(), """\ %"a" 3 %"b" 3 %"c" -2 """) out = cStringIO() assert show_sorted_by_counts(label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False, out=out, prefix="%", annotations=[None, "", "x"]) assert not show_diff(out.getvalue(), """\ %"a" -3 %"b" -3 %"c" 2 x """) # from libtbx.str_utils import line_breaker for string, expected_result in [ ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]), ("this is a sentence", ["this is", "a", "sentence"]), ("this is a longer sentence", ["this is", "a", "longer", "sentence"]), ("this is a very long sentence indeed", ["this is", "a very", "long", "sentence", "indeed"]) ]: assert [block for block in line_breaker(string, width=7)] == expected_result # from libtbx.str_utils import StringIO out1 = cStringIO() out2 = StringIO() out3 = StringIO("Hello world!\n") print("Hello world!", file=out1) print("Hello world!", file=out2) try: print("Hello world!", file=out3) except AttributeError: pass else: raise Exception_expected out4 = pickle.loads(pickle.dumps(out2)) out5 = pickle.loads(pickle.dumps(out3)) assert out4.getvalue() == out1.getvalue() == out2.getvalue( ) == out5.getvalue() # from libtbx.str_utils import reformat_terminal_text txt1 = """ This is some terminal-formatted text which needs to be reset. """ assert (reformat_terminal_text(txt1) == "This is some terminal-formatted text which needs to be reset.") txt2 = """ This is more terminal-formatted text which needs to be reset. """ # from libtbx.str_utils import strip_lines, rstrip_lines lines = [" This is more ", " terminal-formatted ", " text "] assert ( strip_lines(txt2) == "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.") assert ( rstrip_lines(txt2) == "\n This is more\n terminal-formatted\n text which needs\n to be reset." ) # from libtbx.str_utils import expandtabs_track_columns def check(s): es, js = expandtabs_track_columns(s=s) assert len(js) == len(s) assert es == s.expandtabs() sr = "".join([es[j] for j in js]) assert sr == s.replace("\t", " ") check("") check("\t") check("\t\t") check("\ty") check("x\ty") check("x\ty\tz") check("\txy\t\tz") check("abcdefg\txy\t\tz") check("ab defgh\txyz\t\tu") # from libtbx.str_utils import format_value assert format_value("%.4f", 1.2345678) == "1.2346" assert format_value("%.4f", None) == " None" assert format_value("%.4f", None, replace_none_with="---") == " ---" # from libtbx.str_utils import make_header out = StringIO() make_header("Header 1", out=out) assert (out.getvalue() == """ =================================== Header 1 ================================== """) out = StringIO() make_header("Header 2", out=out) assert (out.getvalue() == """ =================================== Header 2 ================================== """) # import sys from libtbx.str_utils import string_representation iset = list(range(130)) + list(range(250, 256)) for i in iset: s = chr(i) for j in iset: ss = s + chr(j) sr = string_representation(string=ss, preferred_quote="'", alternative_quote='"') if sys.hexversion < 0x03000000: assert sr == repr(ss) else: assert eval(sr) == ss from libtbx.str_utils import framed_output out = StringIO() box = framed_output(out, frame='#') print("Hello, world!", file=box) box.close() assert (out.getvalue() == """ ################# # Hello, world! # ################# """) out = StringIO() box = framed_output(out, frame='-', width=80, center=True, title="Refinement stats") box.write("r_free = 0.1234") box.write(" ") box.write("r_work = 0.1567") box.close() assert (out.getvalue() == """ |--------------------------------Refinement stats------------------------------| | r_free = 0.1234 r_work = 0.1567 | |------------------------------------------------------------------------------| """) out = StringIO() box = framed_output(out, frame='-', width=72, prefix=" ", title="Validation summary") print("Overall MolProbity score: 2.56", file=box) box.add_separator() print("""\ Ramachandran favored: 97.5 % outliers: 2.5 % Rotamer outliers: 5.9 % Clashscore: 10.9""", file=box) assert (out.getvalue() == "") del box assert (out.getvalue() == """ |-Validation summary---------------------------------------------------| | Overall MolProbity score: 2.56 | |----------------------------------------------------------------------| | Ramachandran favored: 97.5 % | | outliers: 2.5 % | | Rotamer outliers: 5.9 % | | Clashscore: 10.9 | |----------------------------------------------------------------------| """) from libtbx.str_utils import print_message_in_box out = StringIO() print_message_in_box( message="This is some terminal-formatted text which needs to be reset.", out=out, width=32, center=True, prefix=" ", frame='*') assert (out.getvalue() == """ ******************************** * This is some * * terminal-formatted text * * which needs to be reset. * ******************************** """) from libtbx.str_utils import make_big_header out = StringIO() make_big_header("Section title", out=out) assert (out.getvalue() == """ ################################################################################ # Section title # ################################################################################ """)
def run(args, command_name="phenix.remove_outliers"): if (len(args) == 0 or "--help" in args or "--h" in args or "-h" in args): print_help(command_name=command_name) else: log = multi_out() plot_out = None if (not "--quiet" in args): log.register(label="stdout", file_object=sys.stdout) string_buffer = StringIO() string_buffer_plots = StringIO() log.register(label="log_buffer", file_object=string_buffer) phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="outlier_detection") for arg in args: command_line_params = None arg_is_processed = False # is it a file? if arg == "--quiet": arg_is_processed = True if (os.path.isfile(arg)): ## is this a file name? # check if it is a phil file try: command_line_params = iotbx.phil.parse(file_name=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception: pass else: try: command_line_params = argument_interpreter.process(arg=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception: pass if not arg_is_processed: print >> log, "##----------------------------------------------##" print >> log, "## Unknown file or keyword:", arg print >> log, "##----------------------------------------------##" print >> log raise Sorry("Unknown file or keyword: %s" % arg) effective_params = master_params.fetch(sources=phil_objects) params = effective_params.extract() if not os.path.exists(params.outlier_utils.input.xray_data.file_name): raise Sorry("File %s can not be found" % (params.outlier_utils.input.xray_data.file_name)) if params.outlier_utils.input.model.file_name is not None: if not os.path.exists(params.outlier_utils.input.model.file_name): raise Sorry("File %s can not be found" % (params.outlier_utils.input.model.file_name)) # now get the unit cell from the pdb file hkl_xs = None if params.outlier_utils.input.xray_data.file_name is not None: hkl_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.xray_data.file_name) pdb_xs = None if params.outlier_utils.input.model.file_name is not None: pdb_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.model.file_name) phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group) phil_xs.show_summary() hkl_xs.show_summary() combined_xs = crystal.select_crystal_symmetry(None, phil_xs, [pdb_xs], [hkl_xs]) # inject the unit cell and symmetry in the phil scope please params.outlier_utils.input.unit_cell = combined_xs.unit_cell() params.outlier_utils.input.space_group = \ sgtbx.space_group_info( group = combined_xs.space_group() ) new_params = master_params.format(python_object=params) new_params.show(out=log) if params.outlier_utils.input.unit_cell is None: raise Sorry("unit cell not specified") if params.outlier_utils.input.space_group is None: raise Sorry("space group not specified") if params.outlier_utils.input.xray_data.file_name is None: raise Sorry("Xray data not specified") if params.outlier_utils.input.model.file_name is None: print "PDB file not specified. Basic wilson outlier rejections only." #----------------------------------------------------------- # # step 1: read in the reflection file # phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group) xray_data_server = reflection_file_utils.reflection_file_server( crystal_symmetry=phil_xs, force_symmetry=True, reflection_files=[]) miller_array = None miller_array = xray_data_server.get_xray_data( file_name=params.outlier_utils.input.xray_data.file_name, labels=params.outlier_utils.input.xray_data.obs_labels, ignore_all_zeros=True, parameter_scope='outlier_utils.input.xray_data', parameter_name='obs_labels') info = miller_array.info() miller_array = miller_array.map_to_asu() miller_array = miller_array.select(miller_array.indices() != (0, 0, 0)) #we have to check if the sigma's make any sense at all if not miller_array.sigmas_are_sensible(): miller_array = miller_array.customized_copy( data=miller_array.data(), sigmas=None).set_observation_type(miller_array) miller_array = miller_array.select(miller_array.data() > 0) if miller_array.sigmas() is not None: miller_array = miller_array.select(miller_array.sigmas() > 0) if (miller_array.is_xray_intensity_array()): miller_array = miller_array.f_sq_as_f() elif (miller_array.is_complex_array()): miller_array = abs(miller_array) miller_array.set_info(info) merged_anomalous = False if miller_array.anomalous_flag(): miller_array = miller_array.average_bijvoet_mates( ).set_observation_type(miller_array) merged_anomalous = True miller_array = miller_array.map_to_asu() # get the free reflections please free_flags = None if params.outlier_utils.input.xray_data.free_flags is None: free_flags = miller_array.generate_r_free_flags( fraction=params.outlier_utils.\ additional_parameters.free_flag_generation.fraction, max_free=params.outlier_utils.\ additional_parameters.free_flag_generation.max_number, lattice_symmetry_max_delta=params.outlier_utils.\ additional_parameters.free_flag_generation.lattice_symmetry_max_delta, use_lattice_symmetry=params.outlier_utils.\ additional_parameters.free_flag_generation.use_lattice_symmetry ) else: free_flags = xray_data_server.get_xray_data( file_name=params.outlier_utils.input.xray_data.file_name, labels=params.outlier_utils.input.xray_data.free_flags, ignore_all_zeros=True, parameter_scope='outlier_utils.input.xray_data', parameter_name='free_flags') if free_flags.anomalous_flag(): free_flags = free_flags.average_bijvoet_mates() merged_anomalous = True free_flags = free_flags.customized_copy(data=flex.bool( free_flags.data() == 1)) free_flags = free_flags.map_to_asu() free_flags = free_flags.common_set(miller_array) print >> log print >> log, "Summary info of observed data" print >> log, "=============================" miller_array.show_summary(f=log) if merged_anomalous: print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged." print >> log print >> log, "Constructing an outlier manager" print >> log, "===============================" print >> log outlier_manager = outlier_rejection.outlier_manager(miller_array, free_flags, out=log) basic_array = None extreme_array = None model_based_array = None basic_array = outlier_manager.basic_wilson_outliers( p_basic_wilson = params.outlier_utils.outlier_detection.\ parameters.basic_wilson.level, return_data = True) extreme_array = outlier_manager.extreme_wilson_outliers( p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\ extreme_wilson.level, return_data = True) beamstop_array = outlier_manager.beamstop_shadow_outliers( level = params.outlier_utils.outlier_detection.parameters.\ beamstop.level, d_min = params.outlier_utils.outlier_detection.parameters.\ beamstop.d_min, return_data=True) #---------------------------------------------------------------- # Step 2: get an xray structure from the PDB file # if params.outlier_utils.input.model.file_name is not None: model = pdb.input(file_name=params.outlier_utils.input.model. file_name).xray_structure_simple( crystal_symmetry=phil_xs) print >> log, "Atomic model summary" print >> log, "====================" model.show_summary(f=log) print >> log # please make an f_model object for bulk solvent scaling etc etc f_model_object = f_model.manager(f_obs=miller_array, r_free_flags=free_flags, xray_structure=model) print >> log, "Bulk solvent scaling of the data" print >> log, "================================" print >> log, "Maximum likelihood bulk solvent scaling." print >> log f_model_object.update_all_scales(log=log, remove_outliers=False) plot_out = StringIO() model_based_array = outlier_manager.model_based_outliers( f_model_object.f_model(), level=params.outlier_utils.outlier_detection.parameters. model_based.level, return_data=True, plot_out=plot_out) #check what needs to be put out please if params.outlier_utils.output.hklout is not None: if params.outlier_utils.outlier_detection.protocol == "model": if params.outlier_utils.input.model.file_name == None: print >> log, "Model based rejections requested. No model was supplied." print >> log, "Switching to writing out rejections based on extreme value Wilson statistics." params.outlier_utils.outlier_detection.protocol = "extreme" output_array = None print >> log if params.outlier_utils.outlier_detection.protocol == "basic": print >> log, "Non-outliers found by the basic wilson statistics" print >> log, "protocol will be written out." output_array = basic_array new_set_of_free_flags = free_flags.common_set(basic_array) if params.outlier_utils.outlier_detection.protocol == "extreme": print >> log, "Non-outliers found by the extreme value wilson statistics" print >> log, "protocol will be written out." output_array = extreme_array new_set_of_free_flags = free_flags.common_set(extreme_array) if params.outlier_utils.outlier_detection.protocol == "model": print >> log, "Non-outliers found by the model based" print >> log, "protocol will be written out to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array) if params.outlier_utils.outlier_detection.protocol == "beamstop": print >> log, "Outliers found for the beamstop shadow" print >> log, "problems detection protocol will be written to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array) mtz_dataset = output_array.as_mtz_dataset(column_root_label="FOBS") mtz_dataset = mtz_dataset.add_miller_array( miller_array=new_set_of_free_flags, column_root_label="Free_R_Flag") mtz_dataset.mtz_object().write( file_name=params.outlier_utils.output.hklout) if (params.outlier_utils.output.logfile is not None): final_log = StringIO() print >> final_log, string_buffer.getvalue() print >> final_log if plot_out is not None: print >> final_log, plot_out.getvalue() outfile = open(params.outlier_utils.output.logfile, 'w') outfile.write(final_log.getvalue()) print >> log print >> log, "A logfile named %s was created." % ( params.outlier_utils.output.logfile) print >> log, "This logfile contains the screen output and" print >> log, "(possibly) some ccp4 style loggraph plots"
def run(args, command_name="phenix.remove_outliers"): if (len(args)==0 or "--help" in args or "--h" in args or "-h" in args): print_help(command_name=command_name) else: log = multi_out() plot_out = None if (not "--quiet" in args): log.register(label="stdout", file_object=sys.stdout) string_buffer = StringIO() string_buffer_plots = StringIO() log.register(label="log_buffer", file_object=string_buffer) phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="outlier_detection") for arg in args: command_line_params = None arg_is_processed = False # is it a file? if arg=="--quiet": arg_is_processed = True if (os.path.isfile(arg)): ## is this a file name? # check if it is a phil file try: command_line_params = iotbx.phil.parse(file_name=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception : pass else: try: command_line_params = argument_interpreter.process(arg=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception : pass if not arg_is_processed: print >> log, "##----------------------------------------------##" print >> log, "## Unknown file or keyword:", arg print >> log, "##----------------------------------------------##" print >> log raise Sorry("Unknown file or keyword: %s" % arg) effective_params = master_params.fetch(sources=phil_objects) params = effective_params.extract() if not os.path.exists( params.outlier_utils.input.xray_data.file_name ) : raise Sorry("File %s can not be found"%(params.outlier_utils.input.xray_data.file_name) ) if params.outlier_utils.input.model.file_name is not None: if not os.path.exists( params.outlier_utils.input.model.file_name ): raise Sorry("File %s can not be found"%(params.outlier_utils.input.model.file_name) ) # now get the unit cell from the pdb file hkl_xs = None if params.outlier_utils.input.xray_data.file_name is not None: hkl_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.xray_data.file_name) pdb_xs = None if params.outlier_utils.input.model.file_name is not None: pdb_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.model.file_name) phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group ) phil_xs.show_summary() hkl_xs.show_summary() combined_xs = crystal.select_crystal_symmetry( None,phil_xs, [pdb_xs],[hkl_xs]) # inject the unit cell and symmetry in the phil scope please params.outlier_utils.input.unit_cell = combined_xs.unit_cell() params.outlier_utils.input.space_group = \ sgtbx.space_group_info( group = combined_xs.space_group() ) new_params = master_params.format(python_object=params) new_params.show(out=log) if params.outlier_utils.input.unit_cell is None: raise Sorry("unit cell not specified") if params.outlier_utils.input.space_group is None: raise Sorry("space group not specified") if params.outlier_utils.input.xray_data.file_name is None: raise Sorry("Xray data not specified") if params.outlier_utils.input.model.file_name is None: print "PDB file not specified. Basic wilson outlier rejections only." #----------------------------------------------------------- # # step 1: read in the reflection file # phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group ) xray_data_server = reflection_file_utils.reflection_file_server( crystal_symmetry = phil_xs, force_symmetry = True, reflection_files=[]) miller_array = None miller_array = xray_data_server.get_xray_data( file_name = params.outlier_utils.input.xray_data.file_name, labels = params.outlier_utils.input.xray_data.obs_labels, ignore_all_zeros = True, parameter_scope = 'outlier_utils.input.xray_data', parameter_name = 'obs_labels' ) info = miller_array.info() miller_array = miller_array.map_to_asu() miller_array = miller_array.select( miller_array.indices() != (0,0,0)) #we have to check if the sigma's make any sense at all if not miller_array.sigmas_are_sensible(): miller_array = miller_array.customized_copy( data = miller_array.data(), sigmas=None).set_observation_type(miller_array) miller_array = miller_array.select( miller_array.data() > 0 ) if miller_array.sigmas() is not None: miller_array = miller_array.select( miller_array.sigmas() > 0 ) if (miller_array.is_xray_intensity_array()): miller_array = miller_array.f_sq_as_f() elif (miller_array.is_complex_array()): miller_array = abs(miller_array) miller_array.set_info(info) merged_anomalous=False if miller_array.anomalous_flag(): miller_array = miller_array.average_bijvoet_mates().set_observation_type( miller_array ) merged_anomalous=True miller_array = miller_array.map_to_asu() # get the free reflections please free_flags = None if params.outlier_utils.input.xray_data.free_flags is None: free_flags = miller_array.generate_r_free_flags( fraction=params.outlier_utils.\ additional_parameters.free_flag_generation.fraction, max_free=params.outlier_utils.\ additional_parameters.free_flag_generation.max_number, lattice_symmetry_max_delta=params.outlier_utils.\ additional_parameters.free_flag_generation.lattice_symmetry_max_delta, use_lattice_symmetry=params.outlier_utils.\ additional_parameters.free_flag_generation.use_lattice_symmetry ) else: free_flags = xray_data_server.get_xray_data( file_name = params.outlier_utils.input.xray_data.file_name, labels = params.outlier_utils.input.xray_data.free_flags, ignore_all_zeros = True, parameter_scope = 'outlier_utils.input.xray_data', parameter_name = 'free_flags' ) if free_flags.anomalous_flag(): free_flags = free_flags.average_bijvoet_mates() merged_anomalous=True free_flags = free_flags.customized_copy( data = flex.bool( free_flags.data() == 1 )) free_flags = free_flags.map_to_asu() free_flags = free_flags.common_set( miller_array ) print >> log print >> log, "Summary info of observed data" print >> log, "=============================" miller_array.show_summary(f=log) if merged_anomalous: print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged." print >> log print >> log, "Constructing an outlier manager" print >> log, "===============================" print >> log outlier_manager = outlier_rejection.outlier_manager( miller_array, free_flags, out=log) basic_array = None extreme_array = None model_based_array = None basic_array = outlier_manager.basic_wilson_outliers( p_basic_wilson = params.outlier_utils.outlier_detection.\ parameters.basic_wilson.level, return_data = True) extreme_array = outlier_manager.extreme_wilson_outliers( p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\ extreme_wilson.level, return_data = True) beamstop_array = outlier_manager.beamstop_shadow_outliers( level = params.outlier_utils.outlier_detection.parameters.\ beamstop.level, d_min = params.outlier_utils.outlier_detection.parameters.\ beamstop.d_min, return_data=True) #---------------------------------------------------------------- # Step 2: get an xray structure from the PDB file # if params.outlier_utils.input.model.file_name is not None: model = pdb.input(file_name=params.outlier_utils.input.model.file_name).xray_structure_simple( crystal_symmetry=phil_xs) print >> log, "Atomic model summary" print >> log, "====================" model.show_summary(f=log) print >> log # please make an f_model object for bulk solvent scaling etc etc f_model_object = f_model.manager( f_obs = miller_array, r_free_flags = free_flags, xray_structure = model ) print >> log, "Bulk solvent scaling of the data" print >> log, "================================" print >> log, "Maximum likelihood bulk solvent scaling." print >> log f_model_object.update_solvent_and_scale(out=log) plot_out = StringIO() model_based_array = outlier_manager.model_based_outliers( f_model_object.f_model(), level=params.outlier_utils.outlier_detection.parameters.model_based.level, return_data=True, plot_out=plot_out) #check what needs to be put out please if params.outlier_utils.output.hklout is not None: if params.outlier_utils.outlier_detection.protocol == "model": if params.outlier_utils.input.model.file_name == None: print >> log, "Model based rejections requested. No model was supplied." print >> log, "Switching to writing out rejections based on extreme value Wilson statistics." params.outlier_utils.outlier_detection.protocol="extreme" output_array = None print >> log if params.outlier_utils.outlier_detection.protocol == "basic": print >> log, "Non-outliers found by the basic wilson statistics" print >> log, "protocol will be written out." output_array = basic_array new_set_of_free_flags = free_flags.common_set( basic_array ) if params.outlier_utils.outlier_detection.protocol == "extreme": print >> log, "Non-outliers found by the extreme value wilson statistics" print >> log, "protocol will be written out." output_array = extreme_array new_set_of_free_flags = free_flags.common_set( extreme_array ) if params.outlier_utils.outlier_detection.protocol == "model": print >> log, "Non-outliers found by the model based" print >> log, "protocol will be written out to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array ) if params.outlier_utils.outlier_detection.protocol == "beamstop": print >> log, "Outliers found for the beamstop shadow" print >> log, "problems detection protocol will be written to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array ) mtz_dataset = output_array.as_mtz_dataset( column_root_label="FOBS") mtz_dataset = mtz_dataset.add_miller_array( miller_array = new_set_of_free_flags, column_root_label = "Free_R_Flag" ) mtz_dataset.mtz_object().write( file_name=params.outlier_utils.output.hklout) if (params.outlier_utils.output.logfile is not None): final_log = StringIO() print >> final_log, string_buffer.getvalue() print >> final_log if plot_out is not None: print >> final_log, plot_out.getvalue() outfile = open( params.outlier_utils.output.logfile, 'w' ) outfile.write( final_log.getvalue() ) print >> log print >> log, "A logfile named %s was created."%( params.outlier_utils.output.logfile) print >> log, "This logfile contains the screen output and" print >> log, "(possibly) some ccp4 style loggraph plots"
def exercise(): from libtbx.test_utils import show_diff, Exception_expected import cPickle # from libtbx.str_utils import split_keeping_spaces assert split_keeping_spaces(s="") == [] assert split_keeping_spaces(s=" ") == [" "] assert split_keeping_spaces(s="a") == ["a"] assert split_keeping_spaces(s="abc") == ["abc"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" abc") == [" ", "abc"] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="abc ") == ["abc", " "] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s=" a b c d ") == [ " ", "a", " ", "b", " ", "c", " ", "d", " "] # from libtbx.str_utils import size_as_string_with_commas assert size_as_string_with_commas(0) == "0" assert size_as_string_with_commas(1) == "1" assert size_as_string_with_commas(-1) == "-1" assert size_as_string_with_commas(10) == "10" assert size_as_string_with_commas(100) == "100" assert size_as_string_with_commas(1000) == "1,000" assert size_as_string_with_commas(12345) == "12,345" assert size_as_string_with_commas(12345678) == "12,345,678" assert size_as_string_with_commas(-12345678) == "-12,345,678" # from libtbx.str_utils import show_string assert show_string("abc") == '"abc"' assert show_string("a'c") == '"a\'c"' assert show_string('a"c') == "'a\"c'" assert show_string('\'"c') == '"\'\\"c"' # from libtbx.str_utils import prefix_each_line assert prefix_each_line(prefix="^", lines_as_one_string="""\ hello world""") == """\ ^hello ^world""" # from libtbx.str_utils import prefix_each_line_suffix assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ") == """\ ^hello ^world""" assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ", rstrip=False) == """\ ^hello%s ^world """ % " " # from libtbx.str_utils import show_sorted_by_counts import cStringIO out = cStringIO.StringIO() assert show_sorted_by_counts( label_count_pairs=[("b", 3), ("a", 3), ("c", -2)], out=out, prefix="%") assert not show_diff(out.getvalue(), """\ %"a" 3 %"b" 3 %"c" -2 """) out = cStringIO.StringIO() assert show_sorted_by_counts( label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False, out=out, prefix="%", annotations=[None, "", "x"]) assert not show_diff(out.getvalue(), """\ %"c" 2 x %"a" -3 %"b" -3 """) # from libtbx.str_utils import line_breaker for string, expected_result in [ ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]), ("this is a sentence", ["this is", "a", "sentence"]), ("this is a longer sentence", ["this is", "a", "longer", "sentence"]), ("this is a very long sentence indeed", ["this is", "a very", "long", "sentence", "indeed"])]: assert [block for block in line_breaker(string, width=7)]==expected_result # from libtbx.str_utils import StringIO out1 = cStringIO.StringIO() out2 = StringIO() out3 = StringIO("Hello world!\n") print >> out1, "Hello world!" print >> out2, "Hello world!" try : print >> out3, "Hello world!" except AttributeError : pass else : raise Exception_expected out4 = cPickle.loads(cPickle.dumps(out2)) out5 = cPickle.loads(cPickle.dumps(out3)) assert out4.getvalue()==out1.getvalue()==out2.getvalue()==out5.getvalue() # from libtbx.str_utils import reformat_terminal_text txt1 = """ This is some terminal-formatted text which needs to be reset. """ assert (reformat_terminal_text(txt1) == "This is some terminal-formatted text which needs to be reset.") txt2 = """ This is more terminal-formatted text which needs to be reset. """ # from libtbx.str_utils import strip_lines, rstrip_lines lines = [" This is more ", " terminal-formatted ", " text "] assert (strip_lines(txt2) == "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.") assert (rstrip_lines(txt2) == "\n This is more\n terminal-formatted\n text which needs\n to be reset." ) # from libtbx.str_utils import expandtabs_track_columns def check(s): es,js = expandtabs_track_columns(s=s) assert len(js) == len(s) assert es == s.expandtabs() sr = "".join([es[j] for j in js]) assert sr == s.replace("\t", " ") check("") check("\t") check("\t\t") check("\ty") check("x\ty") check("x\ty\tz") check("\txy\t\tz") check("abcdefg\txy\t\tz") check("ab defgh\txyz\t\tu") # from libtbx.str_utils import format_value assert format_value("%.4f", 1.2345678) == "1.2346" assert format_value("%.4f", None) == " None" assert format_value("%.4f", None, replace_none_with="---") == " ---" # from libtbx.str_utils import make_header out = StringIO() make_header("Header 1", out=out) assert (out.getvalue() == """ =================================== Header 1 ================================== """) out = StringIO() make_header("Header 2", out=out) assert (out.getvalue() == """ =================================== Header 2 ================================== """) # from libtbx.str_utils import string_representation iset = range(130) + range(250,256) for i in iset: s = chr(i) for j in iset: ss = s + chr(j) assert string_representation( string=ss, preferred_quote="'", alternative_quote='"') == repr(ss) from libtbx.str_utils import framed_output out = StringIO() box = framed_output(out, frame='#') print >> box, "Hello, world!" box.close() assert (out.getvalue() == """ ################# # Hello, world! # ################# """) out = StringIO() box = framed_output(out, frame='-', width=80, center=True, title="Refinement stats") box.write("r_free = 0.1234") box.write(" ") box.write("r_work = 0.1567") box.close() assert (out.getvalue() == """ |--------------------------------Refinement stats------------------------------| | r_free = 0.1234 r_work = 0.1567 | |------------------------------------------------------------------------------| """) out = StringIO() box = framed_output(out, frame='-', width=72, prefix=" ", title="Validation summary") print >> box, "Overall MolProbity score: 2.56" box.add_separator() print >> box, """\ Ramachandran favored: 97.5 % outliers: 2.5 % Rotamer outliers: 5.9 % Clashscore: 10.9""" assert (out.getvalue() == "") del box assert (out.getvalue() == """ |-Validation summary---------------------------------------------------| | Overall MolProbity score: 2.56 | |----------------------------------------------------------------------| | Ramachandran favored: 97.5 % | | outliers: 2.5 % | | Rotamer outliers: 5.9 % | | Clashscore: 10.9 | |----------------------------------------------------------------------| """) from libtbx.str_utils import print_message_in_box out = StringIO() print_message_in_box( message="This is some terminal-formatted text which needs to be reset.", out=out, width=32, center=True, prefix=" ", frame='*') assert (out.getvalue() == """ ******************************** * This is some * * terminal-formatted text * * which needs to be reset. * ******************************** """)
def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())
def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13, ) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data(), ) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs="%7.3f") print >>self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())