def run(args): if len(args) == 0: master_params.show(expert_level=0) elif ("--help" in args): print("no help available") elif ("--h" in args): print("no help available") elif ("--show_defaults" in args): master_params.show(expert_level=0) elif ("--show_defaults_all" in args): master_params.show(expert_level=10) else: log = multi_out() if (not "--quiet" in args): log.register(label="stdout", file_object=sys.stdout) string_buffer = StringIO() string_buffer_plots = StringIO() log.register(label="log_buffer", file_object=string_buffer) log_plots = StringIO() print("#phil __OFF__", file=log) print(file=log) print(date_and_time(), file=log) print(file=log) print(file=log) phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="scaling") reflection_file = None for arg in args: command_line_params = None arg_is_processed = False if arg == '--quiet': arg_is_processed = True ## The associated action with this keyword is implemented above if (os.path.isfile(arg)): ## is this a file name? ## Check if this is a phil file try: command_line_params = iotbx.phil.parse(file_name=arg) except KeyboardInterrupt: raise except Exception: pass if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True ## Check if this file is a reflection file if command_line_params is None: reflection_file = reflection_file_reader.any_reflection_file( file_name=arg, ensure_read_access=False) if (reflection_file is not None): reflection_file = arg arg_is_processed = True ## If it is not a file, it must be a phil command else: try: command_line_params = argument_interpreter.process(arg=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception: pass if not arg_is_processed: print("##----------------------------------------------##", file=log) print("## Unknown phil-file or phil-command:", arg, file=log) print("##----------------------------------------------##", file=log) print(file=log) raise Sorry("Unknown file format or phil command: %s" % arg) effective_params = master_params.fetch(sources=phil_objects) params = effective_params.extract() ## Now please read in the reflections files ## get symmetry and cell data first please ## By default, the native cell and symmetry are used ## as reference crystal_symmetry_nat = None crystal_symmetry_nat = crystal_symmetry_from_any.extract_from( file_name=params.scaling.input.xray_data.native.file_name) if params.scaling.input.xray_data.space_group is None: params.scaling.input.xray_data.space_group =\ crystal_symmetry_nat.space_group_info() print("Using symmetry of native data", file=log) if params.scaling.input.xray_data.unit_cell is None: params.scaling.input.xray_data.unit_cell =\ crystal_symmetry_nat.unit_cell() print("Using cell of native data", file=log) ## Check if a unit cell is defined if params.scaling.input.xray_data.space_group is None: raise Sorry("No space group defined") if params.scaling.input.xray_data.unit_cell is None: raise Sorry("No unit cell defined") crystal_symmetry = crystal_symmetry = crystal.symmetry( unit_cell=params.scaling.input.xray_data.unit_cell, space_group_symbol=str(params.scaling.input.xray_data.space_group)) effective_params = master_params.fetch(sources=phil_objects) new_params = master_params.format(python_object=params) print("Effective parameters", file=log) print("#phil __ON__", file=log) new_params.show(out=log, expert_level=params.scaling.input.expert_level) print("#phil __END__", file=log) print(file=log) ## define a xray data server xray_data_server = reflection_file_utils.reflection_file_server( crystal_symmetry=crystal_symmetry, force_symmetry=True, reflection_files=[]) ## Read in native data and make appropriatre selections miller_array_native = None miller_array_native = xray_data_server.get_xray_data( file_name=params.scaling.input.xray_data.native.file_name, labels=params.scaling.input.xray_data.native.labels, ignore_all_zeros=True, parameter_scope='scaling.input.SIR_scale.xray_data.native') info_native = miller_array_native.info() miller_array_native = miller_array_native.map_to_asu().select( miller_array_native.indices() != (0, 0, 0)) miller_array_native = miller_array_native.select( miller_array_native.data() > 0) ## Convert to amplitudes if (miller_array_native.is_xray_intensity_array()): miller_array_native = miller_array_native.f_sq_as_f() elif (miller_array_native.is_complex_array()): miller_array_native = abs(miller_array_native) if not miller_array_native.is_real_array(): raise Sorry("miller_array_native is not a real array") miller_array_native.set_info(info=info_native) ## Read in derivative data and make appropriate selections miller_array_derivative = None miller_array_derivative = xray_data_server.get_xray_data( file_name=params.scaling.input.xray_data.derivative.file_name, labels=params.scaling.input.xray_data.derivative.labels, ignore_all_zeros=True, parameter_scope='scaling.input.SIR_scale.xray_data.derivative') info_derivative = miller_array_derivative.info() miller_array_derivative = miller_array_derivative.map_to_asu().select( miller_array_derivative.indices() != (0, 0, 0)) miller_array_derivative = miller_array_derivative.select( miller_array_derivative.data() > 0) ## Convert to amplitudes if (miller_array_derivative.is_xray_intensity_array()): miller_array_derivative = miller_array_derivative.f_sq_as_f() elif (miller_array_derivative.is_complex_array()): miller_array_derivative = abs(miller_array_derivative) if not miller_array_derivative.is_real_array(): raise Sorry("miller_array_derivative is not a real array") miller_array_derivative.set_info(info=info_derivative) ## As this is a SIR case, we will remove any anomalous pairs if miller_array_derivative.anomalous_flag(): miller_array_derivative = miller_array_derivative.average_bijvoet_mates()\ .set_observation_type( miller_array_derivative ) if miller_array_native.anomalous_flag(): miller_array_native = miller_array_native.average_bijvoet_mates()\ .set_observation_type( miller_array_native ) ## Print info print(file=log) print("Native data", file=log) print("===========", file=log) miller_array_native.show_comprehensive_summary(f=log) print(file=log) native_pre_scale = pre_scale.pre_scaler( miller_array_native, params.scaling.input.scaling_strategy.pre_scaler_protocol, params.scaling.input.basic) miller_array_native = native_pre_scale.x1.deep_copy() del native_pre_scale print(file=log) print("Derivative data", file=log) print("===============", file=log) miller_array_derivative.show_comprehensive_summary(f=log) print(file=log) derivative_pre_scale = pre_scale.pre_scaler( miller_array_derivative, params.scaling.input.scaling_strategy.pre_scaler_protocol, params.scaling.input.basic) miller_array_derivative = derivative_pre_scale.x1.deep_copy() del derivative_pre_scale scaler = fa_estimation.combined_scaling( miller_array_native, miller_array_derivative, params.scaling.input.scaling_strategy.iso_protocol) miller_array_native = scaler.x1.deep_copy() miller_array_derivative = scaler.x2.deep_copy() del scaler print(file=log) print("Making delta f's", file=log) print("----------------", file=log) print(file=log) delta_gen = pair_analyses.delta_generator(miller_array_native, miller_array_derivative) print(file=log) print("writing mtz file", file=log) print("----------------", file=log) print(file=log) ## some assertions to make sure nothing went weerd assert miller_array_native.observation_type() is not None assert miller_array_derivative.observation_type() is not None assert delta_gen.abs_delta_f.observation_type() is not None ## Please write out the abs_delta_f array mtz_dataset = delta_gen.abs_delta_f.as_mtz_dataset( column_root_label='F' + params.scaling.input.output.outlabel) mtz_dataset.mtz_object().write( file_name=params.scaling.input.output.hklout)
def exercise(): from libtbx.test_utils import show_diff, Exception_expected from six.moves import cPickle as pickle # from libtbx.str_utils import split_keeping_spaces assert split_keeping_spaces(s="") == [] assert split_keeping_spaces(s=" ") == [" "] assert split_keeping_spaces(s="a") == ["a"] assert split_keeping_spaces(s="abc") == ["abc"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" abc") == [" ", "abc"] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="abc ") == ["abc", " "] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s=" a b c d ") == [ " ", "a", " ", "b", " ", "c", " ", "d", " " ] # from libtbx.str_utils import size_as_string_with_commas assert size_as_string_with_commas(0) == "0" assert size_as_string_with_commas(1) == "1" assert size_as_string_with_commas(-1) == "-1" assert size_as_string_with_commas(10) == "10" assert size_as_string_with_commas(100) == "100" assert size_as_string_with_commas(1000) == "1,000" assert size_as_string_with_commas(12345) == "12,345" assert size_as_string_with_commas(12345678) == "12,345,678" assert size_as_string_with_commas(-12345678) == "-12,345,678" # from libtbx.str_utils import show_string assert show_string("abc") == '"abc"' assert show_string("a'c") == '"a\'c"' assert show_string('a"c') == "'a\"c'" assert show_string('\'"c') == '"\'\\"c"' # from libtbx.str_utils import prefix_each_line assert prefix_each_line(prefix="^", lines_as_one_string="""\ hello world""") == """\ ^hello ^world""" # from libtbx.str_utils import prefix_each_line_suffix assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ") == """\ ^hello ^world""" assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ", rstrip=False) == """\ ^hello%s ^world """ % " " # from libtbx.str_utils import show_sorted_by_counts from six.moves import cStringIO out = cStringIO() assert show_sorted_by_counts(label_count_pairs=[("b", 3), ("a", 3), ("c", -2)], out=out, prefix="%") assert not show_diff(out.getvalue(), """\ %"a" 3 %"b" 3 %"c" -2 """) out = cStringIO() assert show_sorted_by_counts(label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False, out=out, prefix="%", annotations=[None, "", "x"]) assert not show_diff(out.getvalue(), """\ %"a" -3 %"b" -3 %"c" 2 x """) # from libtbx.str_utils import line_breaker for string, expected_result in [ ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]), ("this is a sentence", ["this is", "a", "sentence"]), ("this is a longer sentence", ["this is", "a", "longer", "sentence"]), ("this is a very long sentence indeed", ["this is", "a very", "long", "sentence", "indeed"]) ]: assert [block for block in line_breaker(string, width=7)] == expected_result # from libtbx.str_utils import StringIO out1 = cStringIO() out2 = StringIO() out3 = StringIO("Hello world!\n") print("Hello world!", file=out1) print("Hello world!", file=out2) try: print("Hello world!", file=out3) except AttributeError: pass else: raise Exception_expected out4 = pickle.loads(pickle.dumps(out2)) out5 = pickle.loads(pickle.dumps(out3)) assert out4.getvalue() == out1.getvalue() == out2.getvalue( ) == out5.getvalue() # from libtbx.str_utils import reformat_terminal_text txt1 = """ This is some terminal-formatted text which needs to be reset. """ assert (reformat_terminal_text(txt1) == "This is some terminal-formatted text which needs to be reset.") txt2 = """ This is more terminal-formatted text which needs to be reset. """ # from libtbx.str_utils import strip_lines, rstrip_lines lines = [" This is more ", " terminal-formatted ", " text "] assert ( strip_lines(txt2) == "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.") assert ( rstrip_lines(txt2) == "\n This is more\n terminal-formatted\n text which needs\n to be reset." ) # from libtbx.str_utils import expandtabs_track_columns def check(s): es, js = expandtabs_track_columns(s=s) assert len(js) == len(s) assert es == s.expandtabs() sr = "".join([es[j] for j in js]) assert sr == s.replace("\t", " ") check("") check("\t") check("\t\t") check("\ty") check("x\ty") check("x\ty\tz") check("\txy\t\tz") check("abcdefg\txy\t\tz") check("ab defgh\txyz\t\tu") # from libtbx.str_utils import format_value assert format_value("%.4f", 1.2345678) == "1.2346" assert format_value("%.4f", None) == " None" assert format_value("%.4f", None, replace_none_with="---") == " ---" # from libtbx.str_utils import make_header out = StringIO() make_header("Header 1", out=out) assert (out.getvalue() == """ =================================== Header 1 ================================== """) out = StringIO() make_header("Header 2", out=out) assert (out.getvalue() == """ =================================== Header 2 ================================== """) # import sys from libtbx.str_utils import string_representation iset = list(range(130)) + list(range(250, 256)) for i in iset: s = chr(i) for j in iset: ss = s + chr(j) sr = string_representation(string=ss, preferred_quote="'", alternative_quote='"') if sys.hexversion < 0x03000000: assert sr == repr(ss) else: assert eval(sr) == ss from libtbx.str_utils import framed_output out = StringIO() box = framed_output(out, frame='#') print("Hello, world!", file=box) box.close() assert (out.getvalue() == """ ################# # Hello, world! # ################# """) out = StringIO() box = framed_output(out, frame='-', width=80, center=True, title="Refinement stats") box.write("r_free = 0.1234") box.write(" ") box.write("r_work = 0.1567") box.close() assert (out.getvalue() == """ |--------------------------------Refinement stats------------------------------| | r_free = 0.1234 r_work = 0.1567 | |------------------------------------------------------------------------------| """) out = StringIO() box = framed_output(out, frame='-', width=72, prefix=" ", title="Validation summary") print("Overall MolProbity score: 2.56", file=box) box.add_separator() print("""\ Ramachandran favored: 97.5 % outliers: 2.5 % Rotamer outliers: 5.9 % Clashscore: 10.9""", file=box) assert (out.getvalue() == "") del box assert (out.getvalue() == """ |-Validation summary---------------------------------------------------| | Overall MolProbity score: 2.56 | |----------------------------------------------------------------------| | Ramachandran favored: 97.5 % | | outliers: 2.5 % | | Rotamer outliers: 5.9 % | | Clashscore: 10.9 | |----------------------------------------------------------------------| """) from libtbx.str_utils import print_message_in_box out = StringIO() print_message_in_box( message="This is some terminal-formatted text which needs to be reset.", out=out, width=32, center=True, prefix=" ", frame='*') assert (out.getvalue() == """ ******************************** * This is some * * terminal-formatted text * * which needs to be reset. * ******************************** """) from libtbx.str_utils import make_big_header out = StringIO() make_big_header("Section title", out=out) assert (out.getvalue() == """ ################################################################################ # Section title # ################################################################################ """)
def run(args, command_name="phenix.remove_outliers"): if (len(args)==0 or "--help" in args or "--h" in args or "-h" in args): print_help(command_name=command_name) else: log = multi_out() plot_out = None if (not "--quiet" in args): log.register(label="stdout", file_object=sys.stdout) string_buffer = StringIO() string_buffer_plots = StringIO() log.register(label="log_buffer", file_object=string_buffer) phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="outlier_detection") for arg in args: command_line_params = None arg_is_processed = False # is it a file? if arg=="--quiet": arg_is_processed = True if (os.path.isfile(arg)): ## is this a file name? # check if it is a phil file try: command_line_params = iotbx.phil.parse(file_name=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception : pass else: try: command_line_params = argument_interpreter.process(arg=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception : pass if not arg_is_processed: print >> log, "##----------------------------------------------##" print >> log, "## Unknown file or keyword:", arg print >> log, "##----------------------------------------------##" print >> log raise Sorry("Unknown file or keyword: %s" % arg) effective_params = master_params.fetch(sources=phil_objects) params = effective_params.extract() if not os.path.exists( params.outlier_utils.input.xray_data.file_name ) : raise Sorry("File %s can not be found"%(params.outlier_utils.input.xray_data.file_name) ) if params.outlier_utils.input.model.file_name is not None: if not os.path.exists( params.outlier_utils.input.model.file_name ): raise Sorry("File %s can not be found"%(params.outlier_utils.input.model.file_name) ) # now get the unit cell from the pdb file hkl_xs = None if params.outlier_utils.input.xray_data.file_name is not None: hkl_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.xray_data.file_name) pdb_xs = None if params.outlier_utils.input.model.file_name is not None: pdb_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.model.file_name) phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group ) phil_xs.show_summary() hkl_xs.show_summary() combined_xs = crystal.select_crystal_symmetry( None,phil_xs, [pdb_xs],[hkl_xs]) # inject the unit cell and symmetry in the phil scope please params.outlier_utils.input.unit_cell = combined_xs.unit_cell() params.outlier_utils.input.space_group = \ sgtbx.space_group_info( group = combined_xs.space_group() ) new_params = master_params.format(python_object=params) new_params.show(out=log) if params.outlier_utils.input.unit_cell is None: raise Sorry("unit cell not specified") if params.outlier_utils.input.space_group is None: raise Sorry("space group not specified") if params.outlier_utils.input.xray_data.file_name is None: raise Sorry("Xray data not specified") if params.outlier_utils.input.model.file_name is None: print "PDB file not specified. Basic wilson outlier rejections only." #----------------------------------------------------------- # # step 1: read in the reflection file # phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group ) xray_data_server = reflection_file_utils.reflection_file_server( crystal_symmetry = phil_xs, force_symmetry = True, reflection_files=[]) miller_array = None miller_array = xray_data_server.get_xray_data( file_name = params.outlier_utils.input.xray_data.file_name, labels = params.outlier_utils.input.xray_data.obs_labels, ignore_all_zeros = True, parameter_scope = 'outlier_utils.input.xray_data', parameter_name = 'obs_labels' ) info = miller_array.info() miller_array = miller_array.map_to_asu() miller_array = miller_array.select( miller_array.indices() != (0,0,0)) #we have to check if the sigma's make any sense at all if not miller_array.sigmas_are_sensible(): miller_array = miller_array.customized_copy( data = miller_array.data(), sigmas=None).set_observation_type(miller_array) miller_array = miller_array.select( miller_array.data() > 0 ) if miller_array.sigmas() is not None: miller_array = miller_array.select( miller_array.sigmas() > 0 ) if (miller_array.is_xray_intensity_array()): miller_array = miller_array.f_sq_as_f() elif (miller_array.is_complex_array()): miller_array = abs(miller_array) miller_array.set_info(info) merged_anomalous=False if miller_array.anomalous_flag(): miller_array = miller_array.average_bijvoet_mates().set_observation_type( miller_array ) merged_anomalous=True miller_array = miller_array.map_to_asu() # get the free reflections please free_flags = None if params.outlier_utils.input.xray_data.free_flags is None: free_flags = miller_array.generate_r_free_flags( fraction=params.outlier_utils.\ additional_parameters.free_flag_generation.fraction, max_free=params.outlier_utils.\ additional_parameters.free_flag_generation.max_number, lattice_symmetry_max_delta=params.outlier_utils.\ additional_parameters.free_flag_generation.lattice_symmetry_max_delta, use_lattice_symmetry=params.outlier_utils.\ additional_parameters.free_flag_generation.use_lattice_symmetry ) else: free_flags = xray_data_server.get_xray_data( file_name = params.outlier_utils.input.xray_data.file_name, labels = params.outlier_utils.input.xray_data.free_flags, ignore_all_zeros = True, parameter_scope = 'outlier_utils.input.xray_data', parameter_name = 'free_flags' ) if free_flags.anomalous_flag(): free_flags = free_flags.average_bijvoet_mates() merged_anomalous=True free_flags = free_flags.customized_copy( data = flex.bool( free_flags.data() == 1 )) free_flags = free_flags.map_to_asu() free_flags = free_flags.common_set( miller_array ) print >> log print >> log, "Summary info of observed data" print >> log, "=============================" miller_array.show_summary(f=log) if merged_anomalous: print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged." print >> log print >> log, "Constructing an outlier manager" print >> log, "===============================" print >> log outlier_manager = outlier_rejection.outlier_manager( miller_array, free_flags, out=log) basic_array = None extreme_array = None model_based_array = None basic_array = outlier_manager.basic_wilson_outliers( p_basic_wilson = params.outlier_utils.outlier_detection.\ parameters.basic_wilson.level, return_data = True) extreme_array = outlier_manager.extreme_wilson_outliers( p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\ extreme_wilson.level, return_data = True) beamstop_array = outlier_manager.beamstop_shadow_outliers( level = params.outlier_utils.outlier_detection.parameters.\ beamstop.level, d_min = params.outlier_utils.outlier_detection.parameters.\ beamstop.d_min, return_data=True) #---------------------------------------------------------------- # Step 2: get an xray structure from the PDB file # if params.outlier_utils.input.model.file_name is not None: model = pdb.input(file_name=params.outlier_utils.input.model.file_name).xray_structure_simple( crystal_symmetry=phil_xs) print >> log, "Atomic model summary" print >> log, "====================" model.show_summary(f=log) print >> log # please make an f_model object for bulk solvent scaling etc etc f_model_object = f_model.manager( f_obs = miller_array, r_free_flags = free_flags, xray_structure = model ) print >> log, "Bulk solvent scaling of the data" print >> log, "================================" print >> log, "Maximum likelihood bulk solvent scaling." print >> log f_model_object.update_solvent_and_scale(out=log) plot_out = StringIO() model_based_array = outlier_manager.model_based_outliers( f_model_object.f_model(), level=params.outlier_utils.outlier_detection.parameters.model_based.level, return_data=True, plot_out=plot_out) #check what needs to be put out please if params.outlier_utils.output.hklout is not None: if params.outlier_utils.outlier_detection.protocol == "model": if params.outlier_utils.input.model.file_name == None: print >> log, "Model based rejections requested. No model was supplied." print >> log, "Switching to writing out rejections based on extreme value Wilson statistics." params.outlier_utils.outlier_detection.protocol="extreme" output_array = None print >> log if params.outlier_utils.outlier_detection.protocol == "basic": print >> log, "Non-outliers found by the basic wilson statistics" print >> log, "protocol will be written out." output_array = basic_array new_set_of_free_flags = free_flags.common_set( basic_array ) if params.outlier_utils.outlier_detection.protocol == "extreme": print >> log, "Non-outliers found by the extreme value wilson statistics" print >> log, "protocol will be written out." output_array = extreme_array new_set_of_free_flags = free_flags.common_set( extreme_array ) if params.outlier_utils.outlier_detection.protocol == "model": print >> log, "Non-outliers found by the model based" print >> log, "protocol will be written out to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array ) if params.outlier_utils.outlier_detection.protocol == "beamstop": print >> log, "Outliers found for the beamstop shadow" print >> log, "problems detection protocol will be written to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array ) mtz_dataset = output_array.as_mtz_dataset( column_root_label="FOBS") mtz_dataset = mtz_dataset.add_miller_array( miller_array = new_set_of_free_flags, column_root_label = "Free_R_Flag" ) mtz_dataset.mtz_object().write( file_name=params.outlier_utils.output.hklout) if (params.outlier_utils.output.logfile is not None): final_log = StringIO() print >> final_log, string_buffer.getvalue() print >> final_log if plot_out is not None: print >> final_log, plot_out.getvalue() outfile = open( params.outlier_utils.output.logfile, 'w' ) outfile.write( final_log.getvalue() ) print >> log print >> log, "A logfile named %s was created."%( params.outlier_utils.output.logfile) print >> log, "This logfile contains the screen output and" print >> log, "(possibly) some ccp4 style loggraph plots"
def run(args, command_name="phenix.remove_outliers"): if (len(args) == 0 or "--help" in args or "--h" in args or "-h" in args): print_help(command_name=command_name) else: log = multi_out() plot_out = None if (not "--quiet" in args): log.register(label="stdout", file_object=sys.stdout) string_buffer = StringIO() string_buffer_plots = StringIO() log.register(label="log_buffer", file_object=string_buffer) phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="outlier_detection") for arg in args: command_line_params = None arg_is_processed = False # is it a file? if arg == "--quiet": arg_is_processed = True if (os.path.isfile(arg)): ## is this a file name? # check if it is a phil file try: command_line_params = iotbx.phil.parse(file_name=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception: pass else: try: command_line_params = argument_interpreter.process(arg=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception: pass if not arg_is_processed: print >> log, "##----------------------------------------------##" print >> log, "## Unknown file or keyword:", arg print >> log, "##----------------------------------------------##" print >> log raise Sorry("Unknown file or keyword: %s" % arg) effective_params = master_params.fetch(sources=phil_objects) params = effective_params.extract() if not os.path.exists(params.outlier_utils.input.xray_data.file_name): raise Sorry("File %s can not be found" % (params.outlier_utils.input.xray_data.file_name)) if params.outlier_utils.input.model.file_name is not None: if not os.path.exists(params.outlier_utils.input.model.file_name): raise Sorry("File %s can not be found" % (params.outlier_utils.input.model.file_name)) # now get the unit cell from the pdb file hkl_xs = None if params.outlier_utils.input.xray_data.file_name is not None: hkl_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.xray_data.file_name) pdb_xs = None if params.outlier_utils.input.model.file_name is not None: pdb_xs = crystal_symmetry_from_any.extract_from( file_name=params.outlier_utils.input.model.file_name) phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group) phil_xs.show_summary() hkl_xs.show_summary() combined_xs = crystal.select_crystal_symmetry(None, phil_xs, [pdb_xs], [hkl_xs]) # inject the unit cell and symmetry in the phil scope please params.outlier_utils.input.unit_cell = combined_xs.unit_cell() params.outlier_utils.input.space_group = \ sgtbx.space_group_info( group = combined_xs.space_group() ) new_params = master_params.format(python_object=params) new_params.show(out=log) if params.outlier_utils.input.unit_cell is None: raise Sorry("unit cell not specified") if params.outlier_utils.input.space_group is None: raise Sorry("space group not specified") if params.outlier_utils.input.xray_data.file_name is None: raise Sorry("Xray data not specified") if params.outlier_utils.input.model.file_name is None: print "PDB file not specified. Basic wilson outlier rejections only." #----------------------------------------------------------- # # step 1: read in the reflection file # phil_xs = crystal.symmetry( unit_cell=params.outlier_utils.input.unit_cell, space_group_info=params.outlier_utils.input.space_group) xray_data_server = reflection_file_utils.reflection_file_server( crystal_symmetry=phil_xs, force_symmetry=True, reflection_files=[]) miller_array = None miller_array = xray_data_server.get_xray_data( file_name=params.outlier_utils.input.xray_data.file_name, labels=params.outlier_utils.input.xray_data.obs_labels, ignore_all_zeros=True, parameter_scope='outlier_utils.input.xray_data', parameter_name='obs_labels') info = miller_array.info() miller_array = miller_array.map_to_asu() miller_array = miller_array.select(miller_array.indices() != (0, 0, 0)) #we have to check if the sigma's make any sense at all if not miller_array.sigmas_are_sensible(): miller_array = miller_array.customized_copy( data=miller_array.data(), sigmas=None).set_observation_type(miller_array) miller_array = miller_array.select(miller_array.data() > 0) if miller_array.sigmas() is not None: miller_array = miller_array.select(miller_array.sigmas() > 0) if (miller_array.is_xray_intensity_array()): miller_array = miller_array.f_sq_as_f() elif (miller_array.is_complex_array()): miller_array = abs(miller_array) miller_array.set_info(info) merged_anomalous = False if miller_array.anomalous_flag(): miller_array = miller_array.average_bijvoet_mates( ).set_observation_type(miller_array) merged_anomalous = True miller_array = miller_array.map_to_asu() # get the free reflections please free_flags = None if params.outlier_utils.input.xray_data.free_flags is None: free_flags = miller_array.generate_r_free_flags( fraction=params.outlier_utils.\ additional_parameters.free_flag_generation.fraction, max_free=params.outlier_utils.\ additional_parameters.free_flag_generation.max_number, lattice_symmetry_max_delta=params.outlier_utils.\ additional_parameters.free_flag_generation.lattice_symmetry_max_delta, use_lattice_symmetry=params.outlier_utils.\ additional_parameters.free_flag_generation.use_lattice_symmetry ) else: free_flags = xray_data_server.get_xray_data( file_name=params.outlier_utils.input.xray_data.file_name, labels=params.outlier_utils.input.xray_data.free_flags, ignore_all_zeros=True, parameter_scope='outlier_utils.input.xray_data', parameter_name='free_flags') if free_flags.anomalous_flag(): free_flags = free_flags.average_bijvoet_mates() merged_anomalous = True free_flags = free_flags.customized_copy(data=flex.bool( free_flags.data() == 1)) free_flags = free_flags.map_to_asu() free_flags = free_flags.common_set(miller_array) print >> log print >> log, "Summary info of observed data" print >> log, "=============================" miller_array.show_summary(f=log) if merged_anomalous: print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged." print >> log print >> log, "Constructing an outlier manager" print >> log, "===============================" print >> log outlier_manager = outlier_rejection.outlier_manager(miller_array, free_flags, out=log) basic_array = None extreme_array = None model_based_array = None basic_array = outlier_manager.basic_wilson_outliers( p_basic_wilson = params.outlier_utils.outlier_detection.\ parameters.basic_wilson.level, return_data = True) extreme_array = outlier_manager.extreme_wilson_outliers( p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\ extreme_wilson.level, return_data = True) beamstop_array = outlier_manager.beamstop_shadow_outliers( level = params.outlier_utils.outlier_detection.parameters.\ beamstop.level, d_min = params.outlier_utils.outlier_detection.parameters.\ beamstop.d_min, return_data=True) #---------------------------------------------------------------- # Step 2: get an xray structure from the PDB file # if params.outlier_utils.input.model.file_name is not None: model = pdb.input(file_name=params.outlier_utils.input.model. file_name).xray_structure_simple( crystal_symmetry=phil_xs) print >> log, "Atomic model summary" print >> log, "====================" model.show_summary(f=log) print >> log # please make an f_model object for bulk solvent scaling etc etc f_model_object = f_model.manager(f_obs=miller_array, r_free_flags=free_flags, xray_structure=model) print >> log, "Bulk solvent scaling of the data" print >> log, "================================" print >> log, "Maximum likelihood bulk solvent scaling." print >> log f_model_object.update_all_scales(log=log, remove_outliers=False) plot_out = StringIO() model_based_array = outlier_manager.model_based_outliers( f_model_object.f_model(), level=params.outlier_utils.outlier_detection.parameters. model_based.level, return_data=True, plot_out=plot_out) #check what needs to be put out please if params.outlier_utils.output.hklout is not None: if params.outlier_utils.outlier_detection.protocol == "model": if params.outlier_utils.input.model.file_name == None: print >> log, "Model based rejections requested. No model was supplied." print >> log, "Switching to writing out rejections based on extreme value Wilson statistics." params.outlier_utils.outlier_detection.protocol = "extreme" output_array = None print >> log if params.outlier_utils.outlier_detection.protocol == "basic": print >> log, "Non-outliers found by the basic wilson statistics" print >> log, "protocol will be written out." output_array = basic_array new_set_of_free_flags = free_flags.common_set(basic_array) if params.outlier_utils.outlier_detection.protocol == "extreme": print >> log, "Non-outliers found by the extreme value wilson statistics" print >> log, "protocol will be written out." output_array = extreme_array new_set_of_free_flags = free_flags.common_set(extreme_array) if params.outlier_utils.outlier_detection.protocol == "model": print >> log, "Non-outliers found by the model based" print >> log, "protocol will be written out to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array) if params.outlier_utils.outlier_detection.protocol == "beamstop": print >> log, "Outliers found for the beamstop shadow" print >> log, "problems detection protocol will be written to the file:" print >> log, params.outlier_utils.output.hklout print >> log output_array = model_based_array new_set_of_free_flags = free_flags.common_set( model_based_array) mtz_dataset = output_array.as_mtz_dataset(column_root_label="FOBS") mtz_dataset = mtz_dataset.add_miller_array( miller_array=new_set_of_free_flags, column_root_label="Free_R_Flag") mtz_dataset.mtz_object().write( file_name=params.outlier_utils.output.hklout) if (params.outlier_utils.output.logfile is not None): final_log = StringIO() print >> final_log, string_buffer.getvalue() print >> final_log if plot_out is not None: print >> final_log, plot_out.getvalue() outfile = open(params.outlier_utils.output.logfile, 'w') outfile.write(final_log.getvalue()) print >> log print >> log, "A logfile named %s was created." % ( params.outlier_utils.output.logfile) print >> log, "This logfile contains the screen output and" print >> log, "(possibly) some ccp4 style loggraph plots"
def exercise(): from libtbx.test_utils import show_diff, Exception_expected import cPickle # from libtbx.str_utils import split_keeping_spaces assert split_keeping_spaces(s="") == [] assert split_keeping_spaces(s=" ") == [" "] assert split_keeping_spaces(s="a") == ["a"] assert split_keeping_spaces(s="abc") == ["abc"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" abc") == [" ", "abc"] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="abc ") == ["abc", " "] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s=" a b c d ") == [ " ", "a", " ", "b", " ", "c", " ", "d", " "] # from libtbx.str_utils import size_as_string_with_commas assert size_as_string_with_commas(0) == "0" assert size_as_string_with_commas(1) == "1" assert size_as_string_with_commas(-1) == "-1" assert size_as_string_with_commas(10) == "10" assert size_as_string_with_commas(100) == "100" assert size_as_string_with_commas(1000) == "1,000" assert size_as_string_with_commas(12345) == "12,345" assert size_as_string_with_commas(12345678) == "12,345,678" assert size_as_string_with_commas(-12345678) == "-12,345,678" # from libtbx.str_utils import show_string assert show_string("abc") == '"abc"' assert show_string("a'c") == '"a\'c"' assert show_string('a"c') == "'a\"c'" assert show_string('\'"c') == '"\'\\"c"' # from libtbx.str_utils import prefix_each_line assert prefix_each_line(prefix="^", lines_as_one_string="""\ hello world""") == """\ ^hello ^world""" # from libtbx.str_utils import prefix_each_line_suffix assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ") == """\ ^hello ^world""" assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ", rstrip=False) == """\ ^hello%s ^world """ % " " # from libtbx.str_utils import show_sorted_by_counts import cStringIO out = cStringIO.StringIO() assert show_sorted_by_counts( label_count_pairs=[("b", 3), ("a", 3), ("c", -2)], out=out, prefix="%") assert not show_diff(out.getvalue(), """\ %"a" 3 %"b" 3 %"c" -2 """) out = cStringIO.StringIO() assert show_sorted_by_counts( label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False, out=out, prefix="%", annotations=[None, "", "x"]) assert not show_diff(out.getvalue(), """\ %"c" 2 x %"a" -3 %"b" -3 """) # from libtbx.str_utils import line_breaker for string, expected_result in [ ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]), ("this is a sentence", ["this is", "a", "sentence"]), ("this is a longer sentence", ["this is", "a", "longer", "sentence"]), ("this is a very long sentence indeed", ["this is", "a very", "long", "sentence", "indeed"])]: assert [block for block in line_breaker(string, width=7)]==expected_result # from libtbx.str_utils import StringIO out1 = cStringIO.StringIO() out2 = StringIO() out3 = StringIO("Hello world!\n") print >> out1, "Hello world!" print >> out2, "Hello world!" try : print >> out3, "Hello world!" except AttributeError : pass else : raise Exception_expected out4 = cPickle.loads(cPickle.dumps(out2)) out5 = cPickle.loads(cPickle.dumps(out3)) assert out4.getvalue()==out1.getvalue()==out2.getvalue()==out5.getvalue() # from libtbx.str_utils import reformat_terminal_text txt1 = """ This is some terminal-formatted text which needs to be reset. """ assert (reformat_terminal_text(txt1) == "This is some terminal-formatted text which needs to be reset.") txt2 = """ This is more terminal-formatted text which needs to be reset. """ # from libtbx.str_utils import strip_lines, rstrip_lines lines = [" This is more ", " terminal-formatted ", " text "] assert (strip_lines(txt2) == "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.") assert (rstrip_lines(txt2) == "\n This is more\n terminal-formatted\n text which needs\n to be reset." ) # from libtbx.str_utils import expandtabs_track_columns def check(s): es,js = expandtabs_track_columns(s=s) assert len(js) == len(s) assert es == s.expandtabs() sr = "".join([es[j] for j in js]) assert sr == s.replace("\t", " ") check("") check("\t") check("\t\t") check("\ty") check("x\ty") check("x\ty\tz") check("\txy\t\tz") check("abcdefg\txy\t\tz") check("ab defgh\txyz\t\tu") # from libtbx.str_utils import format_value assert format_value("%.4f", 1.2345678) == "1.2346" assert format_value("%.4f", None) == " None" assert format_value("%.4f", None, replace_none_with="---") == " ---" # from libtbx.str_utils import make_header out = StringIO() make_header("Header 1", out=out) assert (out.getvalue() == """ =================================== Header 1 ================================== """) out = StringIO() make_header("Header 2", out=out) assert (out.getvalue() == """ =================================== Header 2 ================================== """) # from libtbx.str_utils import string_representation iset = range(130) + range(250,256) for i in iset: s = chr(i) for j in iset: ss = s + chr(j) assert string_representation( string=ss, preferred_quote="'", alternative_quote='"') == repr(ss) from libtbx.str_utils import framed_output out = StringIO() box = framed_output(out, frame='#') print >> box, "Hello, world!" box.close() assert (out.getvalue() == """ ################# # Hello, world! # ################# """) out = StringIO() box = framed_output(out, frame='-', width=80, center=True, title="Refinement stats") box.write("r_free = 0.1234") box.write(" ") box.write("r_work = 0.1567") box.close() assert (out.getvalue() == """ |--------------------------------Refinement stats------------------------------| | r_free = 0.1234 r_work = 0.1567 | |------------------------------------------------------------------------------| """) out = StringIO() box = framed_output(out, frame='-', width=72, prefix=" ", title="Validation summary") print >> box, "Overall MolProbity score: 2.56" box.add_separator() print >> box, """\ Ramachandran favored: 97.5 % outliers: 2.5 % Rotamer outliers: 5.9 % Clashscore: 10.9""" assert (out.getvalue() == "") del box assert (out.getvalue() == """ |-Validation summary---------------------------------------------------| | Overall MolProbity score: 2.56 | |----------------------------------------------------------------------| | Ramachandran favored: 97.5 % | | outliers: 2.5 % | | Rotamer outliers: 5.9 % | | Clashscore: 10.9 | |----------------------------------------------------------------------| """) from libtbx.str_utils import print_message_in_box out = StringIO() print_message_in_box( message="This is some terminal-formatted text which needs to be reset.", out=out, width=32, center=True, prefix=" ", frame='*') assert (out.getvalue() == """ ******************************** * This is some * * terminal-formatted text * * which needs to be reset. * ******************************** """)
def twin_the_data_and_analyse(twin_operator, twin_fraction=0.2): out_string = StringIO() miller_array = random_data(35).map_to_asu() miller_array = miller_array.f_as_f_sq() cb_op = sgtbx.change_of_basis_op(twin_operator) miller_array_mod, miller_array_twin = miller_array.common_sets( miller_array.change_basis(cb_op).map_to_asu()) twinned_miller = miller_array_mod.customized_copy( data = (1.0-twin_fraction)*miller_array_mod.data() + twin_fraction*miller_array_twin.data(), sigmas = flex.sqrt( flex.pow( ((1.0-twin_fraction)*miller_array_mod.sigmas()),2.0)+\ flex.pow( ((twin_fraction)*miller_array_twin.sigmas()),2.0)) ) twinned_miller.set_observation_type(miller_array.observation_type()) twin_anal_object = t_a.twin_analyses(twinned_miller, out=out_string, verbose=-100) index = twin_anal_object.twin_summary.most_worrysome_twin_law assert approx_equal(twin_anal_object.twin_summary.britton_alpha[index], twin_fraction, eps=0.1) assert approx_equal(twin_anal_object.twin_law_dependent_analyses[index]. ml_murray_rust.estimated_alpha, twin_fraction, eps=0.1) ## Untwinned data standards if twin_fraction == 0: ## L-test assert approx_equal(twin_anal_object.l_test.mean_l, 0.50, eps=0.1) ## Wilson ratios assert approx_equal(twin_anal_object.twin_summary.i_ratio, 2.00, eps=0.1) ## H-test assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.50, eps=0.1) ## Perfect twin standards if twin_fraction == 0.5: assert approx_equal(twin_anal_object.l_test.mean_l, 0.375, eps=0.1) assert approx_equal(twin_anal_object.twin_summary.i_ratio, 1.50, eps=0.1) assert approx_equal( twin_anal_object.twin_law_dependent_analyses[index].h_test.mean_h, 0.00, eps=0.1) ## Just make sure we actually detect significant twinning if twin_fraction > 0.10: assert (twin_anal_object.twin_summary.maha_l > 3.0) ## The patterson origin peak should be smallish ... assert (twin_anal_object.twin_summary.patterson_p_value > 0.01) # and the brief test should be passed as well answer = t_a.twin_analyses_brief(twinned_miller, out=out_string, verbose=-100) if twin_fraction > 0.10: assert answer is True
def model_based_outliers(self, f_model, level=.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if (self.r_free_flags.data().count(True) == 0): self.r_free_flags = self.r_free_flags.array( data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data()) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs='%7.3f') print >> self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())
def run(args): if len(args)==0: master_params.show(expert_level=100) elif ( "--help" in args ): print("no help available") elif ( "--h" in args ): print("no help available") elif ( "--show_defaults" in args ): master_params.show(expert_level=0) elif ( "--show_defaults_all" in args ): master_params.show(expert_level=10) else: log = multi_out() if (not "--quiet" in args): log.register(label="stdout", file_object=sys.stdout) string_buffer = StringIO() string_buffer_plots = StringIO() log.register(label="log_buffer", file_object=string_buffer) log_plots = StringIO() print("#phil __OFF__", file=log) print(file=log) print(date_and_time(), file=log) print(file=log) print(file=log) phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="scaling") reflection_file = None for arg in args: command_line_params = None arg_is_processed = False if arg == '--quiet': arg_is_processed = True ## The associated action with this keyword is implemented above if (os.path.isfile(arg)): ## is this a file name? ## Check if this is a phil file try: command_line_params = iotbx.phil.parse(file_name=arg) except KeyboardInterrupt: raise except Exception : pass if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True ## Check if this file is a reflection file if command_line_params is None: reflection_file = reflection_file_reader.any_reflection_file( file_name=arg, ensure_read_access=False) if (reflection_file is not None): reflection_file = arg arg_is_processed = True ## If it is not a file, it must be a phil command else: try: command_line_params = argument_interpreter.process(arg=arg) if command_line_params is not None: phil_objects.append(command_line_params) arg_is_processed = True except KeyboardInterrupt: raise except Exception : pass if not arg_is_processed: print("##----------------------------------------------##", file=log) print("## Unknown phil-file or phil-command:", arg, file=log) print("##----------------------------------------------##", file=log) print(file=log) raise Sorry("Unknown file format or phil command: %s" % arg) effective_params = master_params.fetch(sources=phil_objects) params = effective_params.extract() ## Now please read in the reflections files ## get symmetry and cell data first please ## By default, the native cell and symmetry are used ## as reference crystal_symmetry_nat = None print(params.scaling.input.xray_data.wavelength1.file_name) crystal_symmetry_nat = crystal_symmetry_from_any.extract_from( file_name=params.scaling.input.xray_data.wavelength1.file_name) if params.scaling.input.xray_data.space_group is None: params.scaling.input.xray_data.space_group =\ crystal_symmetry_nat.space_group_info() print("Using symmetry of native data", file=log) if params.scaling.input.xray_data.unit_cell is None: params.scaling.input.xray_data.unit_cell =\ crystal_symmetry_nat.unit_cell() print("Using cell of native data", file=log) ## Check if a unit cell is defined if params.scaling.input.xray_data.space_group is None: raise Sorry("No space group defined") if params.scaling.input.xray_data.unit_cell is None: raise Sorry("No unit cell defined") crystal_symmetry = crystal_symmetry = crystal.symmetry( unit_cell = params.scaling.input.xray_data.unit_cell, space_group_symbol = str( params.scaling.input.xray_data.space_group) ) effective_params = master_params.fetch(sources=phil_objects) new_params = master_params.format(python_object=params) print("Effective parameters", file=log) print("#phil __ON__", file=log) new_params.show(out=log,expert_level=params.scaling.input.expert_level) print("#phil __END__", file=log) print(file=log) ## define a xray data server xray_data_server = reflection_file_utils.reflection_file_server( crystal_symmetry = crystal_symmetry, force_symmetry = True, reflection_files=[]) ## Read in native data and make appropriate selections miller_array_w1 = None miller_array_w1 = xray_data_server.get_xray_data( file_name = params.scaling.input.xray_data.wavelength1.file_name, labels = params.scaling.input.xray_data.wavelength1.labels, ignore_all_zeros = True, parameter_scope = 'scaling.input.SIR_scale.xray_data.native' ) info_native = miller_array_w1.info() miller_array_w1=miller_array_w1.map_to_asu().select( miller_array_w1.indices()!=(0,0,0) ) miller_array_w1 = miller_array_w1.select( miller_array_w1.data() > 0 ) ## Convert to amplitudes if (miller_array_w1.is_xray_intensity_array()): miller_array_w1 = miller_array_w1.f_sq_as_f() elif (miller_array_w1.is_complex_array()): miller_array_w1 = abs(miller_array_w1) if not miller_array_w1.is_real_array(): raise Sorry("miller_array_native is not a real array") miller_array_w1.set_info(info = info_native) ## Read in derivative data and make appropriate selections miller_array_w2 = None miller_array_w2 = xray_data_server.get_xray_data( file_name = params.scaling.input.xray_data.wavelength2.file_name, labels = params.scaling.input.xray_data.wavelength2.labels, ignore_all_zeros = True, parameter_scope = 'scaling.input.SIR_scale.xray_data.derivative' ) info_w2 = miller_array_w2.info() miller_array_w2=miller_array_w2.map_to_asu().select( miller_array_w2.indices()!=(0,0,0) ) miller_array_w2 = miller_array_w2.select( miller_array_w2.data() > 0 ) ## Convert to amplitudes if (miller_array_w2.is_xray_intensity_array()): miller_array_w2 = miller_array_w2.f_sq_as_f() elif (miller_array_w2.is_complex_array()): miller_array_w2 = abs(miller_array_w2) if not miller_array_w2.is_real_array(): raise Sorry("miller_array_derivative is not a real array") miller_array_w2.set_info(info = info_w2) ## Make sure we have anomalous diffs in both files assert miller_array_w1.anomalous_flag() assert miller_array_w2.anomalous_flag() ## Print info print(file=log) print("Wavelength 1", file=log) print("============", file=log) miller_array_w1.show_comprehensive_summary(f=log) print(file=log) w1_pre_scale = pre_scale.pre_scaler( miller_array_w1, params.scaling.input.scaling_strategy.pre_scaler_protocol, params.scaling.input.basic) miller_array_w1 = w1_pre_scale.x1.deep_copy() del w1_pre_scale print(file=log) print("Wavelength 2", file=log) print("============", file=log) miller_array_w2.show_comprehensive_summary(f=log) print(file=log) w2_pre_scale = pre_scale.pre_scaler( miller_array_w2, params.scaling.input.scaling_strategy.pre_scaler_protocol, params.scaling.input.basic) miller_array_w2 = w2_pre_scale.x1.deep_copy() del w2_pre_scale print(file=log) print("Checking for possible reindexing schemes", file=log) print("----------------------------------------", file=log) print(file=log) print("Reindexing operator derived as described in:", file=log) print("Grosse-Kunstleve, Afonine, Sauter & Adams. (2005).", file=log) print(" IUCr Computing Commission Newsletter 5.", file=log) print(file=log) reindex_object = pair_analyses.reindexing( set_a=miller_array_w1, set_b=miller_array_w2, out=log) miller_array_w2 = reindex_object.select_and_transform() miller_array_w2.map_to_asu() print(file=log) print("Relative scaling of 2-wavelength mad data", file=log) print("-----------------------------------------", file=log) print(file=log) scaler = fa_estimation.combined_scaling( miller_array_w1, miller_array_w2, params.scaling.input.scaling_strategy.iso_protocol) miller_array_w1 = scaler.x1.deep_copy() miller_array_w2 = scaler.x2.deep_copy() del scaler print(file=log) print("Estimating f\" and f' ratios", file=log) print("----------------------------", file=log) print(file=log) # now things are scaled see if we can guestimate the ratio fdpratio = pair_analyses.f_double_prime_ratio( miller_array_w1, miller_array_w2) fpfdpratio = pair_analyses.delta_f_prime_f_double_prime_ratio( miller_array_w1, miller_array_w2) k1 = fdpratio.ratio k2 = fpfdpratio.ratio if k1 is not None: print(file=log) print(" The estimate of f\"(w1)/f\"(w2) is %3.2f"\ %(fdpratio.ratio), file=log) if k2 is not None: print(" The estimate of (f'(w1)-f'(w2))/f\"(w2) is %3.2f"\ %(fpfdpratio.ratio), file=log) print(file=log) print(" The quality of these estimates depends to a large extend", file=log) print(" on the quality of the data. If user supplied values", file=log) print(" of f\" and f' are given, they will be used instead ", file=log) print(" of the estimates.", file=log) print(file=log) if params.scaling.input.xray_data.wavelength1.f_double_prime is not None: if params.scaling.input.xray_data.wavelength2.f_double_prime is not None: k1 = (params.scaling.input.xray_data.wavelength1.f_double_prime/ params.scaling.input.xray_data.wavelength2.f_double_prime) print(" Using user specified f\" values", file=log) print(" user specified f\"(w1)/f\"(w2) is %3.2f"\ %(k1), file=log) print(file=log) if params.scaling.input.xray_data.wavelength1.f_prime is not None: if params.scaling.input.xray_data.wavelength2.f_prime is not None: if params.scaling.input.xray_data.wavelength2.f_double_prime is not None: k2 = (params.scaling.input.xray_data.wavelength1.f_prime- params.scaling.input.xray_data.wavelength2.f_prime)\ /params.scaling.input.xray_data.wavelength2.f_double_prime print(" Using user specified f\" and f' values", file=log) print(" user specified f\"(w1)/f\"(w2) is %3.2f"\ %(k2), file=log) print(file=log) fa_gen = fa_estimation.twmad_fa_driver(miller_array_w1, miller_array_w2, k1, k2, params.scaling.input.fa_estimation) print(file=log) print("writing mtz file", file=log) print("----------------", file=log) print(file=log) ## Please write out the abs_delta_f array fa = fa_gen.fa_values mtz_dataset = fa.as_mtz_dataset( column_root_label='F'+params.scaling.input.output.outlabel) mtz_dataset.mtz_object().write( file_name=params.scaling.input.output.hklout)
def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None): assert self.r_free_flags is not None if self.r_free_flags.data().count(True) == 0: self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data()) sigmaa_estimator = sigmaa_estimation.sigmaa_estimator( miller_obs=self.miller_obs, miller_calc=f_model, r_free_flags=self.r_free_flags, kernel_width_free_reflections=200, n_sampling_points=20, n_chebyshev_terms=13, ) sigmaa_estimator.show(out=self.out) sigmaa = sigmaa_estimator.sigmaa() obs_norm = abs(sigmaa_estimator.normalized_obs) calc_norm = sigmaa_estimator.normalized_calc f_model_outlier_object = scaling.likelihood_ratio_outlier_test( f_obs=obs_norm.data(), sigma_obs=None, f_calc=calc_norm.data(), # the data is prenormalized, all epsies are unity epsilon=flex.double(calc_norm.data().size(), 1.0), centric=obs_norm.centric_flags().data(), alpha=sigmaa.data(), beta=1.0 - sigmaa.data() * sigmaa.data(), ) modes = f_model_outlier_object.posterior_mode() lik = f_model_outlier_object.log_likelihood() p_lik = f_model_outlier_object.posterior_mode_log_likelihood() s_der = f_model_outlier_object.posterior_mode_snd_der() ll_gain = f_model_outlier_object.standardized_likelihood() # The smallest vallue should be 0. # sometimes, due to numerical issues, it comes out # a wee bit negative. please repair that eps = 1.0e-10 zeros = flex.bool(ll_gain < eps) p_values = ll_gain p_values = p_values.set_selected(zeros, eps) p_values = erf(flex.sqrt(p_values / 2.0)) p_values = 1.0 - flex.pow(p_values, float(p_values.size())) # select on p-values flags = flex.bool(p_values > level) flags = self.miller_obs.customized_copy(data=flags) ll_gain = self.miller_obs.customized_copy(data=ll_gain) p_values = self.miller_obs.customized_copy(data=p_values) log_message = """ Model based outlier rejection. ------------------------------ Calculated amplitudes and estimated values of alpha and beta are used to compute the log-likelihood of the observed amplitude. The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764. Outliers are rejected on the basis of the assumption that a scaled log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed according to a Chi-square distribution (Q\" is equal to the second derivative of the log likelihood function of the mode of the distribution). The outlier threshold of the p-value relates to the p-value of the extreme value distribution of the chi-square distribution. """ flags.map_to_asu() ll_gain.map_to_asu() p_values.map_to_asu() assert flags.indices().all_eq(self.miller_obs.indices()) assert ll_gain.indices().all_eq(self.miller_obs.indices()) assert p_values.indices().all_eq(self.miller_obs.indices()) log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out) tmp_log = StringIO() print >> tmp_log, log_message # histogram of log likelihood gain values print >> tmp_log print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below." print >> tmp_log, " Note: scaled (LL-gain) is approximately Chi-square distributed." print >> tmp_log print >> tmp_log, " scaled(LL-gain) Frequency" histo = flex.histogram(ll_gain.data(), 15) histo.show(f=tmp_log, format_cutoffs="%7.3f") print >>self.out, tmp_log.getvalue() if not return_data: return flags else: assert flags.indices().all_eq(self.miller_obs.indices()) return self.miller_obs.select(flags.data())