Ejemplo n.º 1
0
def exercise():
    from libtbx.test_utils import show_diff, Exception_expected
    from six.moves import cPickle as pickle
    #
    from libtbx.str_utils import split_keeping_spaces
    assert split_keeping_spaces(s="") == []
    assert split_keeping_spaces(s=" ") == [" "]
    assert split_keeping_spaces(s="a") == ["a"]
    assert split_keeping_spaces(s="abc") == ["abc"]
    assert split_keeping_spaces(s=" a") == [" ", "a"]
    assert split_keeping_spaces(s="  a") == ["  ", "a"]
    assert split_keeping_spaces(s="  abc") == ["  ", "abc"]
    assert split_keeping_spaces(s="  abc ") == ["  ", "abc", " "]
    assert split_keeping_spaces(s="  abc  ") == ["  ", "abc", "  "]
    assert split_keeping_spaces(s="a ") == ["a", " "]
    assert split_keeping_spaces(s="a  ") == ["a", "  "]
    assert split_keeping_spaces(s="abc  ") == ["abc", "  "]
    assert split_keeping_spaces(s="a b") == ["a", " ", "b"]
    assert split_keeping_spaces(s="a  b") == ["a", "  ", "b"]
    assert split_keeping_spaces(s="  a  b c   d ") == [
        "  ", "a", "  ", "b", " ", "c", "   ", "d", " "
    ]
    #
    from libtbx.str_utils import size_as_string_with_commas
    assert size_as_string_with_commas(0) == "0"
    assert size_as_string_with_commas(1) == "1"
    assert size_as_string_with_commas(-1) == "-1"
    assert size_as_string_with_commas(10) == "10"
    assert size_as_string_with_commas(100) == "100"
    assert size_as_string_with_commas(1000) == "1,000"
    assert size_as_string_with_commas(12345) == "12,345"
    assert size_as_string_with_commas(12345678) == "12,345,678"
    assert size_as_string_with_commas(-12345678) == "-12,345,678"
    #
    from libtbx.str_utils import show_string
    assert show_string("abc") == '"abc"'
    assert show_string("a'c") == '"a\'c"'
    assert show_string('a"c') == "'a\"c'"
    assert show_string('\'"c') == '"\'\\"c"'
    #
    from libtbx.str_utils import prefix_each_line
    assert prefix_each_line(prefix="^",
                            lines_as_one_string="""\
hello
world""") == """\
^hello
^world"""
    #
    from libtbx.str_utils import prefix_each_line_suffix
    assert prefix_each_line_suffix(prefix="^",
                                   lines_as_one_string="""\
hello
world""",
                                   suffix=" ") == """\
^hello
^world"""
    assert prefix_each_line_suffix(prefix="^",
                                   lines_as_one_string="""\
hello
world""",
                                   suffix=" ",
                                   rstrip=False) == """\
^hello%s
^world """ % " "
    #
    from libtbx.str_utils import show_sorted_by_counts
    from six.moves import cStringIO
    out = cStringIO()
    assert show_sorted_by_counts(label_count_pairs=[("b", 3), ("a", 3),
                                                    ("c", -2)],
                                 out=out,
                                 prefix="%")
    assert not show_diff(out.getvalue(), """\
%"a"  3
%"b"  3
%"c" -2
""")
    out = cStringIO()
    assert show_sorted_by_counts(label_count_pairs=[("b", -3), ("a", -3),
                                                    ("c", 2)],
                                 reverse=False,
                                 out=out,
                                 prefix="%",
                                 annotations=[None, "", "x"])
    assert not show_diff(out.getvalue(), """\
%"a" -3
%"b" -3
%"c"  2 x
""")
    #
    from libtbx.str_utils import line_breaker
    for string, expected_result in [
        ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]),
        ("this is a sentence", ["this is", "a", "sentence"]),
        ("this is a longer sentence", ["this is", "a", "longer", "sentence"]),
        ("this is a very long sentence indeed",
         ["this is", "a very", "long", "sentence", "indeed"])
    ]:
        assert [block
                for block in line_breaker(string, width=7)] == expected_result
    #
    from libtbx.str_utils import StringIO
    out1 = cStringIO()
    out2 = StringIO()
    out3 = StringIO("Hello world!\n")
    print("Hello world!", file=out1)
    print("Hello world!", file=out2)
    try:
        print("Hello world!", file=out3)
    except AttributeError:
        pass
    else:
        raise Exception_expected
    out4 = pickle.loads(pickle.dumps(out2))
    out5 = pickle.loads(pickle.dumps(out3))
    assert out4.getvalue() == out1.getvalue() == out2.getvalue(
    ) == out5.getvalue()
    #
    from libtbx.str_utils import reformat_terminal_text
    txt1 = """
This is some
terminal-formatted
text which needs
to be reset.
"""
    assert (reformat_terminal_text(txt1) ==
            "This is some terminal-formatted text which needs to be reset.")
    txt2 = """
  This is more
  terminal-formatted
  text which needs
  to be reset.
"""
    #
    from libtbx.str_utils import strip_lines, rstrip_lines
    lines = ["  This is more ", "  terminal-formatted ", "  text "]
    assert (
        strip_lines(txt2) ==
        "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.")
    assert (
        rstrip_lines(txt2) ==
        "\n  This is more\n  terminal-formatted\n  text which needs\n  to be reset."
    )
    #
    from libtbx.str_utils import expandtabs_track_columns

    def check(s):
        es, js = expandtabs_track_columns(s=s)
        assert len(js) == len(s)
        assert es == s.expandtabs()
        sr = "".join([es[j] for j in js])
        assert sr == s.replace("\t", " ")

    check("")
    check("\t")
    check("\t\t")
    check("\ty")
    check("x\ty")
    check("x\ty\tz")
    check("\txy\t\tz")
    check("abcdefg\txy\t\tz")
    check("ab defgh\txyz\t\tu")
    #
    from libtbx.str_utils import format_value
    assert format_value("%.4f", 1.2345678) == "1.2346"
    assert format_value("%.4f", None) == "  None"
    assert format_value("%.4f", None, replace_none_with="---") == "   ---"
    #
    from libtbx.str_utils import make_header
    out = StringIO()
    make_header("Header 1", out=out)
    assert (out.getvalue() == """
=================================== Header 1 ==================================

""")
    out = StringIO()
    make_header("Header 2", out=out)
    assert (out.getvalue() == """
=================================== Header 2 ==================================

""")
    #
    import sys
    from libtbx.str_utils import string_representation
    iset = list(range(130)) + list(range(250, 256))
    for i in iset:
        s = chr(i)
        for j in iset:
            ss = s + chr(j)
            sr = string_representation(string=ss,
                                       preferred_quote="'",
                                       alternative_quote='"')
            if sys.hexversion < 0x03000000:
                assert sr == repr(ss)
            else:
                assert eval(sr) == ss
    from libtbx.str_utils import framed_output
    out = StringIO()
    box = framed_output(out, frame='#')
    print("Hello, world!", file=box)
    box.close()
    assert (out.getvalue() == """
#################
# Hello, world! #
#################
""")
    out = StringIO()
    box = framed_output(out,
                        frame='-',
                        width=80,
                        center=True,
                        title="Refinement stats")
    box.write("r_free = 0.1234")
    box.write("  ")
    box.write("r_work = 0.1567")
    box.close()
    assert (out.getvalue() == """
|--------------------------------Refinement stats------------------------------|
|                       r_free = 0.1234  r_work = 0.1567                       |
|------------------------------------------------------------------------------|
""")
    out = StringIO()
    box = framed_output(out,
                        frame='-',
                        width=72,
                        prefix="    ",
                        title="Validation summary")
    print("Overall MolProbity score: 2.56", file=box)
    box.add_separator()
    print("""\
Ramachandran favored:  97.5 %
             outliers:  2.5 %
Rotamer outliers:       5.9 %
Clashscore:            10.9""",
          file=box)
    assert (out.getvalue() == "")
    del box
    assert (out.getvalue() == """
    |-Validation summary---------------------------------------------------|
    | Overall MolProbity score: 2.56                                       |
    |----------------------------------------------------------------------|
    | Ramachandran favored:  97.5 %                                        |
    |              outliers:  2.5 %                                        |
    | Rotamer outliers:       5.9 %                                        |
    | Clashscore:            10.9                                          |
    |----------------------------------------------------------------------|
""")
    from libtbx.str_utils import print_message_in_box
    out = StringIO()
    print_message_in_box(
        message="This is some terminal-formatted text which needs to be reset.",
        out=out,
        width=32,
        center=True,
        prefix="  ",
        frame='*')
    assert (out.getvalue() == """
  ********************************
  *         This is some         *
  *   terminal-formatted text    *
  *   which needs to be reset.   *
  ********************************
""")
    from libtbx.str_utils import make_big_header
    out = StringIO()
    make_big_header("Section title", out=out)
    assert (out.getvalue() == """
################################################################################
#                                Section title                                 #
################################################################################
""")
Ejemplo n.º 2
0
def run(args, command_name="phenix.remove_outliers"):
    if (len(args) == 0 or "--help" in args or "--h" in args or "-h" in args):
        print_help(command_name=command_name)
    else:
        log = multi_out()
        plot_out = None
        if (not "--quiet" in args):
            log.register(label="stdout", file_object=sys.stdout)
        string_buffer = StringIO()
        string_buffer_plots = StringIO()
        log.register(label="log_buffer", file_object=string_buffer)

        phil_objects = []
        argument_interpreter = master_params.command_line_argument_interpreter(
            home_scope="outlier_detection")

        for arg in args:
            command_line_params = None
            arg_is_processed = False
            # is it a file?
            if arg == "--quiet":
                arg_is_processed = True
            if (os.path.isfile(arg)):  ## is this a file name?
                # check if it is a phil file
                try:
                    command_line_params = iotbx.phil.parse(file_name=arg)
                    if command_line_params is not None:
                        phil_objects.append(command_line_params)
                        arg_is_processed = True
                except KeyboardInterrupt:
                    raise
                except Exception:
                    pass
            else:
                try:
                    command_line_params = argument_interpreter.process(arg=arg)
                    if command_line_params is not None:
                        phil_objects.append(command_line_params)
                        arg_is_processed = True
                except KeyboardInterrupt:
                    raise
                except Exception:
                    pass

            if not arg_is_processed:
                print >> log, "##----------------------------------------------##"
                print >> log, "## Unknown file or keyword:", arg
                print >> log, "##----------------------------------------------##"
                print >> log
                raise Sorry("Unknown file or keyword: %s" % arg)

        effective_params = master_params.fetch(sources=phil_objects)
        params = effective_params.extract()
        if not os.path.exists(params.outlier_utils.input.xray_data.file_name):
            raise Sorry("File %s can not be found" %
                        (params.outlier_utils.input.xray_data.file_name))
        if params.outlier_utils.input.model.file_name is not None:
            if not os.path.exists(params.outlier_utils.input.model.file_name):
                raise Sorry("File %s can not be found" %
                            (params.outlier_utils.input.model.file_name))

        # now get the unit cell from the pdb file

        hkl_xs = None
        if params.outlier_utils.input.xray_data.file_name is not None:
            hkl_xs = crystal_symmetry_from_any.extract_from(
                file_name=params.outlier_utils.input.xray_data.file_name)
        pdb_xs = None
        if params.outlier_utils.input.model.file_name is not None:
            pdb_xs = crystal_symmetry_from_any.extract_from(
                file_name=params.outlier_utils.input.model.file_name)

        phil_xs = crystal.symmetry(
            unit_cell=params.outlier_utils.input.unit_cell,
            space_group_info=params.outlier_utils.input.space_group)

        phil_xs.show_summary()
        hkl_xs.show_summary()

        combined_xs = crystal.select_crystal_symmetry(None, phil_xs, [pdb_xs],
                                                      [hkl_xs])

        # inject the unit cell and symmetry in the phil scope please
        params.outlier_utils.input.unit_cell = combined_xs.unit_cell()
        params.outlier_utils.input.space_group = \
          sgtbx.space_group_info( group = combined_xs.space_group() )

        new_params = master_params.format(python_object=params)
        new_params.show(out=log)

        if params.outlier_utils.input.unit_cell is None:
            raise Sorry("unit cell not specified")
        if params.outlier_utils.input.space_group is None:
            raise Sorry("space group not specified")
        if params.outlier_utils.input.xray_data.file_name is None:
            raise Sorry("Xray data not specified")
        if params.outlier_utils.input.model.file_name is None:
            print "PDB file not specified. Basic wilson outlier rejections only."

        #-----------------------------------------------------------
        #
        # step 1: read in the reflection file
        #
        phil_xs = crystal.symmetry(
            unit_cell=params.outlier_utils.input.unit_cell,
            space_group_info=params.outlier_utils.input.space_group)

        xray_data_server = reflection_file_utils.reflection_file_server(
            crystal_symmetry=phil_xs, force_symmetry=True, reflection_files=[])

        miller_array = None

        miller_array = xray_data_server.get_xray_data(
            file_name=params.outlier_utils.input.xray_data.file_name,
            labels=params.outlier_utils.input.xray_data.obs_labels,
            ignore_all_zeros=True,
            parameter_scope='outlier_utils.input.xray_data',
            parameter_name='obs_labels')

        info = miller_array.info()

        miller_array = miller_array.map_to_asu()

        miller_array = miller_array.select(miller_array.indices() != (0, 0, 0))

        #we have to check if the sigma's make any sense at all
        if not miller_array.sigmas_are_sensible():
            miller_array = miller_array.customized_copy(
                data=miller_array.data(),
                sigmas=None).set_observation_type(miller_array)
        miller_array = miller_array.select(miller_array.data() > 0)
        if miller_array.sigmas() is not None:
            miller_array = miller_array.select(miller_array.sigmas() > 0)

        if (miller_array.is_xray_intensity_array()):
            miller_array = miller_array.f_sq_as_f()
        elif (miller_array.is_complex_array()):
            miller_array = abs(miller_array)

        miller_array.set_info(info)
        merged_anomalous = False
        if miller_array.anomalous_flag():
            miller_array = miller_array.average_bijvoet_mates(
            ).set_observation_type(miller_array)
            merged_anomalous = True
        miller_array = miller_array.map_to_asu()

        # get the free reflections please
        free_flags = None
        if params.outlier_utils.input.xray_data.free_flags is None:
            free_flags = miller_array.generate_r_free_flags(
               fraction=params.outlier_utils.\
                 additional_parameters.free_flag_generation.fraction,
               max_free=params.outlier_utils.\
                 additional_parameters.free_flag_generation.max_number,
               lattice_symmetry_max_delta=params.outlier_utils.\
                 additional_parameters.free_flag_generation.lattice_symmetry_max_delta,
               use_lattice_symmetry=params.outlier_utils.\
                 additional_parameters.free_flag_generation.use_lattice_symmetry
              )
        else:
            free_flags = xray_data_server.get_xray_data(
                file_name=params.outlier_utils.input.xray_data.file_name,
                labels=params.outlier_utils.input.xray_data.free_flags,
                ignore_all_zeros=True,
                parameter_scope='outlier_utils.input.xray_data',
                parameter_name='free_flags')

            if free_flags.anomalous_flag():
                free_flags = free_flags.average_bijvoet_mates()
                merged_anomalous = True
            free_flags = free_flags.customized_copy(data=flex.bool(
                free_flags.data() == 1))
            free_flags = free_flags.map_to_asu()
            free_flags = free_flags.common_set(miller_array)

        print >> log
        print >> log, "Summary info of observed data"
        print >> log, "============================="
        miller_array.show_summary(f=log)
        if merged_anomalous:
            print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged."
        print >> log

        print >> log, "Constructing an outlier manager"
        print >> log, "==============================="
        print >> log
        outlier_manager = outlier_rejection.outlier_manager(miller_array,
                                                            free_flags,
                                                            out=log)

        basic_array = None
        extreme_array = None
        model_based_array = None

        basic_array = outlier_manager.basic_wilson_outliers(
          p_basic_wilson = params.outlier_utils.outlier_detection.\
                           parameters.basic_wilson.level,
          return_data = True)

        extreme_array = outlier_manager.extreme_wilson_outliers(
          p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\
                             extreme_wilson.level,
          return_data = True)

        beamstop_array = outlier_manager.beamstop_shadow_outliers(
          level = params.outlier_utils.outlier_detection.parameters.\
                   beamstop.level,
          d_min = params.outlier_utils.outlier_detection.parameters.\
                   beamstop.d_min,
          return_data=True)

        #----------------------------------------------------------------
        # Step 2: get an xray structure from the PDB file
        #
        if params.outlier_utils.input.model.file_name is not None:
            model = pdb.input(file_name=params.outlier_utils.input.model.
                              file_name).xray_structure_simple(
                                  crystal_symmetry=phil_xs)
            print >> log, "Atomic model summary"
            print >> log, "===================="
            model.show_summary(f=log)
            print >> log

            # please make an f_model object for bulk solvent scaling etc etc

            f_model_object = f_model.manager(f_obs=miller_array,
                                             r_free_flags=free_flags,
                                             xray_structure=model)
            print >> log, "Bulk solvent scaling of the data"
            print >> log, "================================"
            print >> log, "Maximum likelihood bulk solvent scaling."
            print >> log
            f_model_object.update_all_scales(log=log, remove_outliers=False)
            plot_out = StringIO()
            model_based_array = outlier_manager.model_based_outliers(
                f_model_object.f_model(),
                level=params.outlier_utils.outlier_detection.parameters.
                model_based.level,
                return_data=True,
                plot_out=plot_out)
        #check what needs to be put out please
        if params.outlier_utils.output.hklout is not None:
            if params.outlier_utils.outlier_detection.protocol == "model":
                if params.outlier_utils.input.model.file_name == None:
                    print >> log, "Model based rejections requested. No model was supplied."
                    print >> log, "Switching to writing out rejections based on extreme value Wilson statistics."
                    params.outlier_utils.outlier_detection.protocol = "extreme"

            output_array = None
            print >> log
            if params.outlier_utils.outlier_detection.protocol == "basic":
                print >> log, "Non-outliers found by the basic wilson statistics"
                print >> log, "protocol will be written out."
                output_array = basic_array
                new_set_of_free_flags = free_flags.common_set(basic_array)

            if params.outlier_utils.outlier_detection.protocol == "extreme":
                print >> log, "Non-outliers found by the extreme value wilson statistics"
                print >> log, "protocol will be written out."
                output_array = extreme_array
                new_set_of_free_flags = free_flags.common_set(extreme_array)

            if params.outlier_utils.outlier_detection.protocol == "model":
                print >> log, "Non-outliers found by the model based"
                print >> log, "protocol will be written out to the file:"
                print >> log, params.outlier_utils.output.hklout
                print >> log
                output_array = model_based_array
                new_set_of_free_flags = free_flags.common_set(
                    model_based_array)

            if params.outlier_utils.outlier_detection.protocol == "beamstop":
                print >> log, "Outliers found for the beamstop shadow"
                print >> log, "problems detection protocol will be written to the file:"
                print >> log, params.outlier_utils.output.hklout
                print >> log
                output_array = model_based_array
                new_set_of_free_flags = free_flags.common_set(
                    model_based_array)

            mtz_dataset = output_array.as_mtz_dataset(column_root_label="FOBS")
            mtz_dataset = mtz_dataset.add_miller_array(
                miller_array=new_set_of_free_flags,
                column_root_label="Free_R_Flag")
            mtz_dataset.mtz_object().write(
                file_name=params.outlier_utils.output.hklout)

        if (params.outlier_utils.output.logfile is not None):
            final_log = StringIO()
            print >> final_log, string_buffer.getvalue()
            print >> final_log
            if plot_out is not None:
                print >> final_log, plot_out.getvalue()
            outfile = open(params.outlier_utils.output.logfile, 'w')
            outfile.write(final_log.getvalue())
            print >> log
            print >> log, "A logfile named %s was created." % (
                params.outlier_utils.output.logfile)
            print >> log, "This logfile contains the screen output and"
            print >> log, "(possibly) some ccp4 style loggraph plots"
Ejemplo n.º 3
0
def run(args, command_name="phenix.remove_outliers"):
  if (len(args)==0 or "--help" in args or "--h" in args or "-h" in args):
    print_help(command_name=command_name)
  else:
    log = multi_out()
    plot_out = None
    if (not "--quiet" in args):
      log.register(label="stdout", file_object=sys.stdout)
    string_buffer = StringIO()
    string_buffer_plots = StringIO()
    log.register(label="log_buffer", file_object=string_buffer)

    phil_objects = []
    argument_interpreter = master_params.command_line_argument_interpreter(
      home_scope="outlier_detection")

    for arg in args:
      command_line_params = None
      arg_is_processed = False
      # is it a file?
      if arg=="--quiet":
        arg_is_processed = True
      if (os.path.isfile(arg)): ## is this a file name?
        # check if it is a phil file
        try:
          command_line_params = iotbx.phil.parse(file_name=arg)
          if command_line_params is not None:
            phil_objects.append(command_line_params)
            arg_is_processed = True
        except KeyboardInterrupt: raise
        except Exception : pass
      else:
        try:
          command_line_params = argument_interpreter.process(arg=arg)
          if command_line_params is not None:
            phil_objects.append(command_line_params)
            arg_is_processed = True
        except KeyboardInterrupt: raise
        except Exception : pass

      if not arg_is_processed:
        print >> log, "##----------------------------------------------##"
        print >> log, "## Unknown file or keyword:", arg
        print >> log, "##----------------------------------------------##"
        print >> log
        raise Sorry("Unknown file or keyword: %s" % arg)

    effective_params = master_params.fetch(sources=phil_objects)
    params = effective_params.extract()
    if not os.path.exists( params.outlier_utils.input.xray_data.file_name ) :
      raise Sorry("File %s can not be found"%(params.outlier_utils.input.xray_data.file_name) )
    if params.outlier_utils.input.model.file_name is not None:
      if not os.path.exists( params.outlier_utils.input.model.file_name ):
        raise Sorry("File %s can not be found"%(params.outlier_utils.input.model.file_name) )



    # now get the unit cell from the pdb file

    hkl_xs = None
    if params.outlier_utils.input.xray_data.file_name is not None:
      hkl_xs = crystal_symmetry_from_any.extract_from(
        file_name=params.outlier_utils.input.xray_data.file_name)
    pdb_xs = None
    if params.outlier_utils.input.model.file_name is not None:
      pdb_xs = crystal_symmetry_from_any.extract_from(
        file_name=params.outlier_utils.input.model.file_name)

    phil_xs = crystal.symmetry(
      unit_cell=params.outlier_utils.input.unit_cell,
      space_group_info=params.outlier_utils.input.space_group  )

    phil_xs.show_summary()
    hkl_xs.show_summary()


    combined_xs = crystal.select_crystal_symmetry(
      None,phil_xs, [pdb_xs],[hkl_xs])

    # inject the unit cell and symmetry in the phil scope please
    params.outlier_utils.input.unit_cell = combined_xs.unit_cell()
    params.outlier_utils.input.space_group = \
      sgtbx.space_group_info( group = combined_xs.space_group() )

    new_params =  master_params.format(python_object=params)
    new_params.show(out=log)

    if params.outlier_utils.input.unit_cell is None:
      raise Sorry("unit cell not specified")
    if params.outlier_utils.input.space_group is None:
      raise Sorry("space group not specified")
    if params.outlier_utils.input.xray_data.file_name is None:
      raise Sorry("Xray data not specified")
    if params.outlier_utils.input.model.file_name is None:
      print "PDB file not specified. Basic wilson outlier rejections only."



    #-----------------------------------------------------------
    #
    # step 1: read in the reflection file
    #
    phil_xs = crystal.symmetry(
      unit_cell=params.outlier_utils.input.unit_cell,
      space_group_info=params.outlier_utils.input.space_group  )

    xray_data_server =  reflection_file_utils.reflection_file_server(
      crystal_symmetry = phil_xs,
      force_symmetry = True,
      reflection_files=[])

    miller_array = None

    miller_array = xray_data_server.get_xray_data(
      file_name = params.outlier_utils.input.xray_data.file_name,
      labels = params.outlier_utils.input.xray_data.obs_labels,
      ignore_all_zeros = True,
      parameter_scope = 'outlier_utils.input.xray_data',
      parameter_name = 'obs_labels'
      )

    info = miller_array.info()

    miller_array = miller_array.map_to_asu()

    miller_array = miller_array.select(
      miller_array.indices() != (0,0,0))

    #we have to check if the sigma's make any sense at all
    if not miller_array.sigmas_are_sensible():
      miller_array = miller_array.customized_copy(
        data = miller_array.data(),
        sigmas=None).set_observation_type(miller_array)
    miller_array = miller_array.select(
      miller_array.data() > 0 )
    if  miller_array.sigmas() is not None:
      miller_array = miller_array.select(
        miller_array.sigmas() > 0 )

    if (miller_array.is_xray_intensity_array()):
      miller_array = miller_array.f_sq_as_f()
    elif (miller_array.is_complex_array()):
      miller_array = abs(miller_array)

    miller_array.set_info(info)
    merged_anomalous=False
    if miller_array.anomalous_flag():
      miller_array = miller_array.average_bijvoet_mates().set_observation_type(
        miller_array )
      merged_anomalous=True
    miller_array = miller_array.map_to_asu()

    # get the free reflections please
    free_flags = None
    if params.outlier_utils.input.xray_data.free_flags is None:
      free_flags = miller_array.generate_r_free_flags(
         fraction=params.outlier_utils.\
           additional_parameters.free_flag_generation.fraction,
         max_free=params.outlier_utils.\
           additional_parameters.free_flag_generation.max_number,
         lattice_symmetry_max_delta=params.outlier_utils.\
           additional_parameters.free_flag_generation.lattice_symmetry_max_delta,
         use_lattice_symmetry=params.outlier_utils.\
           additional_parameters.free_flag_generation.use_lattice_symmetry
        )
    else:
      free_flags = xray_data_server.get_xray_data(
        file_name = params.outlier_utils.input.xray_data.file_name,
        labels = params.outlier_utils.input.xray_data.free_flags,
        ignore_all_zeros = True,
        parameter_scope = 'outlier_utils.input.xray_data',
        parameter_name = 'free_flags'
        )

      if free_flags.anomalous_flag():
        free_flags = free_flags.average_bijvoet_mates()
        merged_anomalous=True
      free_flags = free_flags.customized_copy(
        data = flex.bool( free_flags.data() == 1 ))
      free_flags = free_flags.map_to_asu()
      free_flags = free_flags.common_set( miller_array )


    print >> log
    print >> log, "Summary info of observed data"
    print >> log, "============================="
    miller_array.show_summary(f=log)
    if merged_anomalous:
      print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged."
    print >> log

    print >> log, "Constructing an outlier manager"
    print >> log, "==============================="
    print >> log
    outlier_manager = outlier_rejection.outlier_manager(
      miller_array,
      free_flags,
      out=log)

    basic_array = None
    extreme_array = None
    model_based_array = None

    basic_array = outlier_manager.basic_wilson_outliers(
      p_basic_wilson = params.outlier_utils.outlier_detection.\
                       parameters.basic_wilson.level,
      return_data = True)

    extreme_array = outlier_manager.extreme_wilson_outliers(
      p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\
                         extreme_wilson.level,
      return_data = True)

    beamstop_array = outlier_manager.beamstop_shadow_outliers(
      level = params.outlier_utils.outlier_detection.parameters.\
               beamstop.level,
      d_min = params.outlier_utils.outlier_detection.parameters.\
               beamstop.d_min,
      return_data=True)



    #----------------------------------------------------------------
    # Step 2: get an xray structure from the PDB file
    #
    if params.outlier_utils.input.model.file_name is not None:
      model = pdb.input(file_name=params.outlier_utils.input.model.file_name).xray_structure_simple(
        crystal_symmetry=phil_xs)
      print >> log, "Atomic model summary"
      print >> log, "===================="
      model.show_summary(f=log)
      print >> log


      # please make an f_model object for bulk solvent scaling etc etc

      f_model_object = f_model.manager(
        f_obs = miller_array,
        r_free_flags = free_flags,
        xray_structure = model )
      print >> log, "Bulk solvent scaling of the data"
      print >> log, "================================"
      print >> log, "Maximum likelihood bulk solvent scaling."
      print >> log
      f_model_object.update_solvent_and_scale(out=log)
      plot_out = StringIO()
      model_based_array = outlier_manager.model_based_outliers(
        f_model_object.f_model(),
        level=params.outlier_utils.outlier_detection.parameters.model_based.level,
        return_data=True,
        plot_out=plot_out)
    #check what needs to be put out please
    if params.outlier_utils.output.hklout is not None:
      if params.outlier_utils.outlier_detection.protocol == "model":
        if params.outlier_utils.input.model.file_name == None:
          print >> log, "Model based rejections requested. No model was supplied."
          print >> log, "Switching to writing out rejections based on extreme value Wilson statistics."
          params.outlier_utils.outlier_detection.protocol="extreme"

      output_array = None
      print >> log
      if params.outlier_utils.outlier_detection.protocol == "basic":
        print >> log, "Non-outliers found by the basic wilson statistics"
        print >> log, "protocol will be written out."
        output_array = basic_array
        new_set_of_free_flags = free_flags.common_set( basic_array )

      if params.outlier_utils.outlier_detection.protocol == "extreme":
        print >> log, "Non-outliers found by the extreme value wilson statistics"
        print >> log, "protocol will be written out."
        output_array = extreme_array
        new_set_of_free_flags = free_flags.common_set( extreme_array )

      if params.outlier_utils.outlier_detection.protocol == "model":
        print >> log, "Non-outliers found by the model based"
        print >> log, "protocol will be written out to the file:"
        print >> log, params.outlier_utils.output.hklout
        print >> log
        output_array = model_based_array
        new_set_of_free_flags = free_flags.common_set( model_based_array )

      if params.outlier_utils.outlier_detection.protocol == "beamstop":
        print >> log, "Outliers found for the beamstop shadow"
        print >> log, "problems detection protocol will be written to the file:"
        print >> log, params.outlier_utils.output.hklout
        print >> log
        output_array = model_based_array
        new_set_of_free_flags = free_flags.common_set( model_based_array )

      mtz_dataset = output_array.as_mtz_dataset(
        column_root_label="FOBS")
      mtz_dataset = mtz_dataset.add_miller_array(
        miller_array = new_set_of_free_flags,
        column_root_label = "Free_R_Flag"
        )
      mtz_dataset.mtz_object().write(
        file_name=params.outlier_utils.output.hklout)

    if (params.outlier_utils.output.logfile is not None):
      final_log = StringIO()
      print >> final_log, string_buffer.getvalue()
      print >> final_log
      if plot_out is not None:
        print >> final_log, plot_out.getvalue()
      outfile = open( params.outlier_utils.output.logfile, 'w' )
      outfile.write( final_log.getvalue() )
      print >> log
      print >> log, "A logfile named %s was created."%(
        params.outlier_utils.output.logfile)
      print >> log, "This logfile contains the screen output and"
      print >> log, "(possibly) some ccp4 style loggraph plots"
Ejemplo n.º 4
0
def exercise():
  from libtbx.test_utils import show_diff, Exception_expected
  import cPickle
  #
  from libtbx.str_utils import split_keeping_spaces
  assert split_keeping_spaces(s="") == []
  assert split_keeping_spaces(s=" ") == [" "]
  assert split_keeping_spaces(s="a") == ["a"]
  assert split_keeping_spaces(s="abc") == ["abc"]
  assert split_keeping_spaces(s=" a") == [" ", "a"]
  assert split_keeping_spaces(s="  a") == ["  ", "a"]
  assert split_keeping_spaces(s="  abc") == ["  ", "abc"]
  assert split_keeping_spaces(s="  abc ") == ["  ", "abc", " "]
  assert split_keeping_spaces(s="  abc  ") == ["  ", "abc", "  "]
  assert split_keeping_spaces(s="a ") == ["a", " "]
  assert split_keeping_spaces(s="a  ") == ["a", "  "]
  assert split_keeping_spaces(s="abc  ") == ["abc", "  "]
  assert split_keeping_spaces(s="a b") == ["a", " ", "b"]
  assert split_keeping_spaces(s="a  b") == ["a", "  ", "b"]
  assert split_keeping_spaces(s="  a  b c   d ") == [
    "  ", "a", "  ", "b", " ", "c", "   ", "d", " "]
  #
  from libtbx.str_utils import size_as_string_with_commas
  assert size_as_string_with_commas(0) == "0"
  assert size_as_string_with_commas(1) == "1"
  assert size_as_string_with_commas(-1) == "-1"
  assert size_as_string_with_commas(10) == "10"
  assert size_as_string_with_commas(100) == "100"
  assert size_as_string_with_commas(1000) == "1,000"
  assert size_as_string_with_commas(12345) == "12,345"
  assert size_as_string_with_commas(12345678) == "12,345,678"
  assert size_as_string_with_commas(-12345678) == "-12,345,678"
  #
  from libtbx.str_utils import show_string
  assert show_string("abc") == '"abc"'
  assert show_string("a'c") == '"a\'c"'
  assert show_string('a"c') == "'a\"c'"
  assert show_string('\'"c') == '"\'\\"c"'
  #
  from libtbx.str_utils import prefix_each_line
  assert prefix_each_line(prefix="^", lines_as_one_string="""\
hello
world""") == """\
^hello
^world"""
  #
  from libtbx.str_utils import prefix_each_line_suffix
  assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\
hello
world""", suffix=" ") == """\
^hello
^world"""
  assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\
hello
world""", suffix=" ", rstrip=False) == """\
^hello%s
^world """ % " "
  #
  from libtbx.str_utils import show_sorted_by_counts
  import cStringIO
  out = cStringIO.StringIO()
  assert show_sorted_by_counts(
    label_count_pairs=[("b", 3), ("a", 3), ("c", -2)],
    out=out, prefix="%")
  assert not show_diff(out.getvalue(), """\
%"a"  3
%"b"  3
%"c" -2
""")
  out = cStringIO.StringIO()
  assert show_sorted_by_counts(
    label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False,
     out=out, prefix="%", annotations=[None, "", "x"])
  assert not show_diff(out.getvalue(), """\
%"c"  2 x
%"a" -3
%"b" -3
""")
  #
  from libtbx.str_utils import line_breaker
  for string, expected_result in [
    ("", [""]),
    ("this is", ["this is"]),
    ("this is a", ["this is", "a"]),
    ("this is a sentence", ["this is", "a", "sentence"]),
    ("this is a longer sentence", ["this is", "a", "longer", "sentence"]),
    ("this is a very long sentence indeed",
      ["this is", "a very", "long", "sentence", "indeed"])]:
    assert [block for block in line_breaker(string, width=7)]==expected_result
  #
  from libtbx.str_utils import StringIO
  out1 = cStringIO.StringIO()
  out2 = StringIO()
  out3 = StringIO("Hello world!\n")
  print >> out1, "Hello world!"
  print >> out2, "Hello world!"
  try :
      print >> out3, "Hello world!"
  except AttributeError :
    pass
  else :
    raise Exception_expected
  out4 = cPickle.loads(cPickle.dumps(out2))
  out5 = cPickle.loads(cPickle.dumps(out3))
  assert out4.getvalue()==out1.getvalue()==out2.getvalue()==out5.getvalue()
  #
  from libtbx.str_utils import reformat_terminal_text
  txt1 = """
This is some
terminal-formatted
text which needs
to be reset.
"""
  assert (reformat_terminal_text(txt1) ==
          "This is some terminal-formatted text which needs to be reset.")
  txt2 = """
  This is more
  terminal-formatted
  text which needs
  to be reset.
"""
  #
  from libtbx.str_utils import strip_lines, rstrip_lines
  lines = ["  This is more ", "  terminal-formatted ", "  text "]
  assert (strip_lines(txt2) ==
    "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.")
  assert (rstrip_lines(txt2) ==
    "\n  This is more\n  terminal-formatted\n  text which needs\n  to be reset."
  )
  #
  from libtbx.str_utils import expandtabs_track_columns
  def check(s):
    es,js = expandtabs_track_columns(s=s)
    assert len(js) == len(s)
    assert es == s.expandtabs()
    sr = "".join([es[j] for j in js])
    assert sr == s.replace("\t", " ")
  check("")
  check("\t")
  check("\t\t")
  check("\ty")
  check("x\ty")
  check("x\ty\tz")
  check("\txy\t\tz")
  check("abcdefg\txy\t\tz")
  check("ab defgh\txyz\t\tu")
  #
  from libtbx.str_utils import format_value
  assert format_value("%.4f", 1.2345678) == "1.2346"
  assert format_value("%.4f", None) == "  None"
  assert format_value("%.4f", None, replace_none_with="---") == "   ---"
  #
  from libtbx.str_utils import make_header
  out = StringIO()
  make_header("Header 1", out=out)
  assert (out.getvalue() == """
=================================== Header 1 ==================================

""")
  out = StringIO()
  make_header("Header 2", out=out)
  assert (out.getvalue() == """
=================================== Header 2 ==================================

""")
  #
  from libtbx.str_utils import string_representation
  iset = range(130) + range(250,256)
  for i in iset:
    s = chr(i)
    for j in iset:
      ss = s + chr(j)
      assert string_representation(
        string=ss, preferred_quote="'", alternative_quote='"') == repr(ss)
  from libtbx.str_utils import framed_output
  out = StringIO()
  box = framed_output(out, frame='#')
  print >> box, "Hello, world!"
  box.close()
  assert (out.getvalue() == """
#################
# Hello, world! #
#################
""")
  out = StringIO()
  box = framed_output(out, frame='-', width=80, center=True,
    title="Refinement stats")
  box.write("r_free = 0.1234")
  box.write("  ")
  box.write("r_work = 0.1567")
  box.close()
  assert (out.getvalue() == """
|--------------------------------Refinement stats------------------------------|
|                       r_free = 0.1234  r_work = 0.1567                       |
|------------------------------------------------------------------------------|
""")
  out = StringIO()
  box = framed_output(out, frame='-', width=72, prefix="    ",
    title="Validation summary")
  print >> box, "Overall MolProbity score: 2.56"
  box.add_separator()
  print >> box, """\
Ramachandran favored:  97.5 %
             outliers:  2.5 %
Rotamer outliers:       5.9 %
Clashscore:            10.9"""
  assert (out.getvalue() == "")
  del box
  assert (out.getvalue() == """
    |-Validation summary---------------------------------------------------|
    | Overall MolProbity score: 2.56                                       |
    |----------------------------------------------------------------------|
    | Ramachandran favored:  97.5 %                                        |
    |              outliers:  2.5 %                                        |
    | Rotamer outliers:       5.9 %                                        |
    | Clashscore:            10.9                                          |
    |----------------------------------------------------------------------|
""")
  from libtbx.str_utils import print_message_in_box
  out = StringIO()
  print_message_in_box(
    message="This is some terminal-formatted text which needs to be reset.",
    out=out,
    width=32,
    center=True,
    prefix="  ",
    frame='*')
  assert (out.getvalue() == """
  ********************************
  *         This is some         *
  *   terminal-formatted text    *
  *   which needs to be reset.   *
  ********************************
""")
Ejemplo n.º 5
0
    def model_based_outliers(self,
                             f_model,
                             level=.01,
                             return_data=False,
                             plot_out=None):

        assert self.r_free_flags is not None
        if (self.r_free_flags.data().count(True) == 0):
            self.r_free_flags = self.r_free_flags.array(
                data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13)

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data())
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain,
                                          p_values, obs_norm, calc_norm,
                                          sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs='%7.3f')

        print >> self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())
Ejemplo n.º 6
0
    def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None):

        assert self.r_free_flags is not None
        if self.r_free_flags.data().count(True) == 0:
            self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13,
        )

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data(),
        )
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs="%7.3f")

        print >>self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())