Esempio n. 1
0
def run(args, command_name="phenix.remove_outliers"):
    if (len(args) == 0 or "--help" in args or "--h" in args or "-h" in args):
        print_help(command_name=command_name)
    else:
        log = multi_out()
        plot_out = None
        if (not "--quiet" in args):
            log.register(label="stdout", file_object=sys.stdout)
        string_buffer = StringIO()
        string_buffer_plots = StringIO()
        log.register(label="log_buffer", file_object=string_buffer)

        phil_objects = []
        argument_interpreter = master_params.command_line_argument_interpreter(
            home_scope="outlier_detection")

        for arg in args:
            command_line_params = None
            arg_is_processed = False
            # is it a file?
            if arg == "--quiet":
                arg_is_processed = True
            if (os.path.isfile(arg)):  ## is this a file name?
                # check if it is a phil file
                try:
                    command_line_params = iotbx.phil.parse(file_name=arg)
                    if command_line_params is not None:
                        phil_objects.append(command_line_params)
                        arg_is_processed = True
                except KeyboardInterrupt:
                    raise
                except Exception:
                    pass
            else:
                try:
                    command_line_params = argument_interpreter.process(arg=arg)
                    if command_line_params is not None:
                        phil_objects.append(command_line_params)
                        arg_is_processed = True
                except KeyboardInterrupt:
                    raise
                except Exception:
                    pass

            if not arg_is_processed:
                print >> log, "##----------------------------------------------##"
                print >> log, "## Unknown file or keyword:", arg
                print >> log, "##----------------------------------------------##"
                print >> log
                raise Sorry("Unknown file or keyword: %s" % arg)

        effective_params = master_params.fetch(sources=phil_objects)
        params = effective_params.extract()
        if not os.path.exists(params.outlier_utils.input.xray_data.file_name):
            raise Sorry("File %s can not be found" %
                        (params.outlier_utils.input.xray_data.file_name))
        if params.outlier_utils.input.model.file_name is not None:
            if not os.path.exists(params.outlier_utils.input.model.file_name):
                raise Sorry("File %s can not be found" %
                            (params.outlier_utils.input.model.file_name))

        # now get the unit cell from the pdb file

        hkl_xs = None
        if params.outlier_utils.input.xray_data.file_name is not None:
            hkl_xs = crystal_symmetry_from_any.extract_from(
                file_name=params.outlier_utils.input.xray_data.file_name)
        pdb_xs = None
        if params.outlier_utils.input.model.file_name is not None:
            pdb_xs = crystal_symmetry_from_any.extract_from(
                file_name=params.outlier_utils.input.model.file_name)

        phil_xs = crystal.symmetry(
            unit_cell=params.outlier_utils.input.unit_cell,
            space_group_info=params.outlier_utils.input.space_group)

        phil_xs.show_summary()
        hkl_xs.show_summary()

        combined_xs = crystal.select_crystal_symmetry(None, phil_xs, [pdb_xs],
                                                      [hkl_xs])

        # inject the unit cell and symmetry in the phil scope please
        params.outlier_utils.input.unit_cell = combined_xs.unit_cell()
        params.outlier_utils.input.space_group = \
          sgtbx.space_group_info( group = combined_xs.space_group() )

        new_params = master_params.format(python_object=params)
        new_params.show(out=log)

        if params.outlier_utils.input.unit_cell is None:
            raise Sorry("unit cell not specified")
        if params.outlier_utils.input.space_group is None:
            raise Sorry("space group not specified")
        if params.outlier_utils.input.xray_data.file_name is None:
            raise Sorry("Xray data not specified")
        if params.outlier_utils.input.model.file_name is None:
            print "PDB file not specified. Basic wilson outlier rejections only."

        #-----------------------------------------------------------
        #
        # step 1: read in the reflection file
        #
        phil_xs = crystal.symmetry(
            unit_cell=params.outlier_utils.input.unit_cell,
            space_group_info=params.outlier_utils.input.space_group)

        xray_data_server = reflection_file_utils.reflection_file_server(
            crystal_symmetry=phil_xs, force_symmetry=True, reflection_files=[])

        miller_array = None

        miller_array = xray_data_server.get_xray_data(
            file_name=params.outlier_utils.input.xray_data.file_name,
            labels=params.outlier_utils.input.xray_data.obs_labels,
            ignore_all_zeros=True,
            parameter_scope='outlier_utils.input.xray_data',
            parameter_name='obs_labels')

        info = miller_array.info()

        miller_array = miller_array.map_to_asu()

        miller_array = miller_array.select(miller_array.indices() != (0, 0, 0))

        #we have to check if the sigma's make any sense at all
        if not miller_array.sigmas_are_sensible():
            miller_array = miller_array.customized_copy(
                data=miller_array.data(),
                sigmas=None).set_observation_type(miller_array)
        miller_array = miller_array.select(miller_array.data() > 0)
        if miller_array.sigmas() is not None:
            miller_array = miller_array.select(miller_array.sigmas() > 0)

        if (miller_array.is_xray_intensity_array()):
            miller_array = miller_array.f_sq_as_f()
        elif (miller_array.is_complex_array()):
            miller_array = abs(miller_array)

        miller_array.set_info(info)
        merged_anomalous = False
        if miller_array.anomalous_flag():
            miller_array = miller_array.average_bijvoet_mates(
            ).set_observation_type(miller_array)
            merged_anomalous = True
        miller_array = miller_array.map_to_asu()

        # get the free reflections please
        free_flags = None
        if params.outlier_utils.input.xray_data.free_flags is None:
            free_flags = miller_array.generate_r_free_flags(
               fraction=params.outlier_utils.\
                 additional_parameters.free_flag_generation.fraction,
               max_free=params.outlier_utils.\
                 additional_parameters.free_flag_generation.max_number,
               lattice_symmetry_max_delta=params.outlier_utils.\
                 additional_parameters.free_flag_generation.lattice_symmetry_max_delta,
               use_lattice_symmetry=params.outlier_utils.\
                 additional_parameters.free_flag_generation.use_lattice_symmetry
              )
        else:
            free_flags = xray_data_server.get_xray_data(
                file_name=params.outlier_utils.input.xray_data.file_name,
                labels=params.outlier_utils.input.xray_data.free_flags,
                ignore_all_zeros=True,
                parameter_scope='outlier_utils.input.xray_data',
                parameter_name='free_flags')

            if free_flags.anomalous_flag():
                free_flags = free_flags.average_bijvoet_mates()
                merged_anomalous = True
            free_flags = free_flags.customized_copy(data=flex.bool(
                free_flags.data() == 1))
            free_flags = free_flags.map_to_asu()
            free_flags = free_flags.common_set(miller_array)

        print >> log
        print >> log, "Summary info of observed data"
        print >> log, "============================="
        miller_array.show_summary(f=log)
        if merged_anomalous:
            print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged."
        print >> log

        print >> log, "Constructing an outlier manager"
        print >> log, "==============================="
        print >> log
        outlier_manager = outlier_rejection.outlier_manager(miller_array,
                                                            free_flags,
                                                            out=log)

        basic_array = None
        extreme_array = None
        model_based_array = None

        basic_array = outlier_manager.basic_wilson_outliers(
          p_basic_wilson = params.outlier_utils.outlier_detection.\
                           parameters.basic_wilson.level,
          return_data = True)

        extreme_array = outlier_manager.extreme_wilson_outliers(
          p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\
                             extreme_wilson.level,
          return_data = True)

        beamstop_array = outlier_manager.beamstop_shadow_outliers(
          level = params.outlier_utils.outlier_detection.parameters.\
                   beamstop.level,
          d_min = params.outlier_utils.outlier_detection.parameters.\
                   beamstop.d_min,
          return_data=True)

        #----------------------------------------------------------------
        # Step 2: get an xray structure from the PDB file
        #
        if params.outlier_utils.input.model.file_name is not None:
            model = pdb.input(file_name=params.outlier_utils.input.model.
                              file_name).xray_structure_simple(
                                  crystal_symmetry=phil_xs)
            print >> log, "Atomic model summary"
            print >> log, "===================="
            model.show_summary(f=log)
            print >> log

            # please make an f_model object for bulk solvent scaling etc etc

            f_model_object = f_model.manager(f_obs=miller_array,
                                             r_free_flags=free_flags,
                                             xray_structure=model)
            print >> log, "Bulk solvent scaling of the data"
            print >> log, "================================"
            print >> log, "Maximum likelihood bulk solvent scaling."
            print >> log
            f_model_object.update_all_scales(log=log, remove_outliers=False)
            plot_out = StringIO()
            model_based_array = outlier_manager.model_based_outliers(
                f_model_object.f_model(),
                level=params.outlier_utils.outlier_detection.parameters.
                model_based.level,
                return_data=True,
                plot_out=plot_out)
        #check what needs to be put out please
        if params.outlier_utils.output.hklout is not None:
            if params.outlier_utils.outlier_detection.protocol == "model":
                if params.outlier_utils.input.model.file_name == None:
                    print >> log, "Model based rejections requested. No model was supplied."
                    print >> log, "Switching to writing out rejections based on extreme value Wilson statistics."
                    params.outlier_utils.outlier_detection.protocol = "extreme"

            output_array = None
            print >> log
            if params.outlier_utils.outlier_detection.protocol == "basic":
                print >> log, "Non-outliers found by the basic wilson statistics"
                print >> log, "protocol will be written out."
                output_array = basic_array
                new_set_of_free_flags = free_flags.common_set(basic_array)

            if params.outlier_utils.outlier_detection.protocol == "extreme":
                print >> log, "Non-outliers found by the extreme value wilson statistics"
                print >> log, "protocol will be written out."
                output_array = extreme_array
                new_set_of_free_flags = free_flags.common_set(extreme_array)

            if params.outlier_utils.outlier_detection.protocol == "model":
                print >> log, "Non-outliers found by the model based"
                print >> log, "protocol will be written out to the file:"
                print >> log, params.outlier_utils.output.hklout
                print >> log
                output_array = model_based_array
                new_set_of_free_flags = free_flags.common_set(
                    model_based_array)

            if params.outlier_utils.outlier_detection.protocol == "beamstop":
                print >> log, "Outliers found for the beamstop shadow"
                print >> log, "problems detection protocol will be written to the file:"
                print >> log, params.outlier_utils.output.hklout
                print >> log
                output_array = model_based_array
                new_set_of_free_flags = free_flags.common_set(
                    model_based_array)

            mtz_dataset = output_array.as_mtz_dataset(column_root_label="FOBS")
            mtz_dataset = mtz_dataset.add_miller_array(
                miller_array=new_set_of_free_flags,
                column_root_label="Free_R_Flag")
            mtz_dataset.mtz_object().write(
                file_name=params.outlier_utils.output.hklout)

        if (params.outlier_utils.output.logfile is not None):
            final_log = StringIO()
            print >> final_log, string_buffer.getvalue()
            print >> final_log
            if plot_out is not None:
                print >> final_log, plot_out.getvalue()
            outfile = open(params.outlier_utils.output.logfile, 'w')
            outfile.write(final_log.getvalue())
            print >> log
            print >> log, "A logfile named %s was created." % (
                params.outlier_utils.output.logfile)
            print >> log, "This logfile contains the screen output and"
            print >> log, "(possibly) some ccp4 style loggraph plots"
  def __init__(self,
               miller_array,
               parameters,
               out=None,
               n_residues=100,
               n_bases=0):

    self.params=parameters
    self.miller_array=miller_array.deep_copy().set_observation_type(
      miller_array).merge_equivalents().array()
    self.out = out
    if self.out is None:
      self.out = sys.stdout
    if self.out == "silent":
      self.out = null_out()


    self.no_aniso_array = self.miller_array
    if self.params.aniso.action == "remove_aniso":
      # first perfom aniso scaling
      aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(
        miller_array = self.miller_array,
        n_residues = n_residues,
        n_bases = n_bases)
      aniso_scale_and_b.p_scale = 0 # set the p_scale back to 0!
      aniso_scale_and_b.show(out=out)
      # now do aniso correction please
      self.aniso_p_scale = aniso_scale_and_b.p_scale
      self.aniso_u_star  = aniso_scale_and_b.u_star
      self.aniso_b_cart  = aniso_scale_and_b.b_cart
      if self.params.aniso.final_b == "eigen_min":
        b_use=aniso_scale_and_b.eigen_values[2]
      elif self.params.aniso.final_b == "eigen_mean" :
        b_use=flex.mean(aniso_scale_and_b.eigen_values)
      elif self.params.aniso.final_b == "user_b_iso":
        assert self.params.aniso.b_iso is not None
        b_use=self.params.aniso.b_iso
      else:
        b_use = 30

      b_cart_aniso_removed = [ -b_use,
                               -b_use,
                               -b_use,
                               0,
                               0,
                               0]
      u_star_aniso_removed = adptbx.u_cart_as_u_star(
        miller_array.unit_cell(),
        adptbx.b_as_u( b_cart_aniso_removed  ) )
      ## I do things in two steps, but can easely be done in 1 step
      ## just for clarity, thats all.
      self.no_aniso_array = absolute_scaling.anisotropic_correction(
        self.miller_array,0.0,aniso_scale_and_b.u_star )
      self.no_aniso_array = absolute_scaling.anisotropic_correction(
        self.no_aniso_array,0.0,u_star_aniso_removed)
      self.no_aniso_array = self.no_aniso_array.set_observation_type(
        miller_array )

    # that is done now, now we can do outlier detection if desired
    outlier_manager = outlier_rejection.outlier_manager(
      self.no_aniso_array,
      None,
      out=self.out)


    self.new_miller_array = self.no_aniso_array
    if self.params.outlier.action == "basic":
      print >> self.out, "Non-outliers found by the basic wilson statistics"
      print >> self.out, "protocol will be written out."
      basic_array = outlier_manager.basic_wilson_outliers(
        p_basic_wilson = self.params.outlier.parameters.basic_wilson.level,
        return_data = True)
      self.new_miller_array = basic_array

    if self.params.outlier.action == "extreme":
      print >> self.out, "Non-outliers found by the extreme value wilson statistics"
      print >> self.out, "protocol will be written out."
      extreme_array = outlier_manager.extreme_wilson_outliers(
      p_extreme_wilson = self.params.outlier.parameters.extreme_wilson.level,
      return_data = True)
      self.new_miller_array = extreme_array

    if self.params.outlier.action == "beamstop":
      print >> self.out, "Outliers found for the beamstop shadow"
      print >> self.out, "problems detection protocol will be written out."
      beamstop_array = outlier_manager.beamstop_shadow_outliers(
        level = self.params.outlier.parameters.beamstop.level,
        d_min = self.params.outlier.parameters.beamstop.d_min,
        return_data=True)
      self.new_miller_array = beamstop_array

    if self.params.outlier.action == "None":
      self.new_miller_array =  self.no_aniso_array



    # now we can twin or detwin the data if needed
    self.final_array = self.new_miller_array
    if self.params.symmetry.action == "twin":
      alpha = self.params.symmetry.twinning_parameters.fraction
      if (alpha is None):
        raise Sorry("Twin fraction not specified, not twinning data")
      elif not (0 <= alpha <= 0.5):
        raise Sorry("Twin fraction must be between 0 and 0.5.")
      print >> self.out
      print >> self.out, "Twinning given data"
      print >> self.out, "-------------------"
      print >> self.out
      print >> self.out, "Artifically twinning the data with fraction %3.2f" %\
        alpha

      self.final_array = self.new_miller_array.twin_data(
        twin_law = self.params.symmetry.twinning_parameters.twin_law,
        alpha=alpha).as_intensity_array()

    elif (self.params.symmetry.action == "detwin"):
      twin_law = self.params.symmetry.twinning_parameters.twin_law
      alpha = self.params.symmetry.twinning_parameters.fraction
      if (alpha is None):
        raise Sorry("Twin fraction not specified, not detwinning data")
      elif not (0 <= alpha <= 0.5):
        raise Sorry("Twin fraction must be between 0 and 0.5.")
      print >> self.out, """

Attempting to detwin data
-------------------------
Detwinning data with:
  - twin law:      %s
  - twin fraciton: %.2f

BE WARNED! DETWINNING OF DATA DOES NOT SOLVE YOUR TWINNING PROBLEM!
PREFERABLY, REFINEMENT SHOULD BE CARRIED OUT AGAINST ORIGINAL DATA
ONLY USING A TWIN SPECIFIC TARGET FUNCTION!

""" % (twin_law, alpha)
      self.final_array = self.new_miller_array.detwin_data(
        twin_law=twin_law,
        alpha=alpha).as_intensity_array()

    assert self.final_array is not None
Esempio n. 3
0
def run(args, command_name="phenix.remove_outliers"):
  if (len(args)==0 or "--help" in args or "--h" in args or "-h" in args):
    print_help(command_name=command_name)
  else:
    log = multi_out()
    plot_out = None
    if (not "--quiet" in args):
      log.register(label="stdout", file_object=sys.stdout)
    string_buffer = StringIO()
    string_buffer_plots = StringIO()
    log.register(label="log_buffer", file_object=string_buffer)

    phil_objects = []
    argument_interpreter = master_params.command_line_argument_interpreter(
      home_scope="outlier_detection")

    for arg in args:
      command_line_params = None
      arg_is_processed = False
      # is it a file?
      if arg=="--quiet":
        arg_is_processed = True
      if (os.path.isfile(arg)): ## is this a file name?
        # check if it is a phil file
        try:
          command_line_params = iotbx.phil.parse(file_name=arg)
          if command_line_params is not None:
            phil_objects.append(command_line_params)
            arg_is_processed = True
        except KeyboardInterrupt: raise
        except Exception : pass
      else:
        try:
          command_line_params = argument_interpreter.process(arg=arg)
          if command_line_params is not None:
            phil_objects.append(command_line_params)
            arg_is_processed = True
        except KeyboardInterrupt: raise
        except Exception : pass

      if not arg_is_processed:
        print >> log, "##----------------------------------------------##"
        print >> log, "## Unknown file or keyword:", arg
        print >> log, "##----------------------------------------------##"
        print >> log
        raise Sorry("Unknown file or keyword: %s" % arg)

    effective_params = master_params.fetch(sources=phil_objects)
    params = effective_params.extract()
    if not os.path.exists( params.outlier_utils.input.xray_data.file_name ) :
      raise Sorry("File %s can not be found"%(params.outlier_utils.input.xray_data.file_name) )
    if params.outlier_utils.input.model.file_name is not None:
      if not os.path.exists( params.outlier_utils.input.model.file_name ):
        raise Sorry("File %s can not be found"%(params.outlier_utils.input.model.file_name) )



    # now get the unit cell from the pdb file

    hkl_xs = None
    if params.outlier_utils.input.xray_data.file_name is not None:
      hkl_xs = crystal_symmetry_from_any.extract_from(
        file_name=params.outlier_utils.input.xray_data.file_name)
    pdb_xs = None
    if params.outlier_utils.input.model.file_name is not None:
      pdb_xs = crystal_symmetry_from_any.extract_from(
        file_name=params.outlier_utils.input.model.file_name)

    phil_xs = crystal.symmetry(
      unit_cell=params.outlier_utils.input.unit_cell,
      space_group_info=params.outlier_utils.input.space_group  )

    phil_xs.show_summary()
    hkl_xs.show_summary()


    combined_xs = crystal.select_crystal_symmetry(
      None,phil_xs, [pdb_xs],[hkl_xs])

    # inject the unit cell and symmetry in the phil scope please
    params.outlier_utils.input.unit_cell = combined_xs.unit_cell()
    params.outlier_utils.input.space_group = \
      sgtbx.space_group_info( group = combined_xs.space_group() )

    new_params =  master_params.format(python_object=params)
    new_params.show(out=log)

    if params.outlier_utils.input.unit_cell is None:
      raise Sorry("unit cell not specified")
    if params.outlier_utils.input.space_group is None:
      raise Sorry("space group not specified")
    if params.outlier_utils.input.xray_data.file_name is None:
      raise Sorry("Xray data not specified")
    if params.outlier_utils.input.model.file_name is None:
      print "PDB file not specified. Basic wilson outlier rejections only."



    #-----------------------------------------------------------
    #
    # step 1: read in the reflection file
    #
    phil_xs = crystal.symmetry(
      unit_cell=params.outlier_utils.input.unit_cell,
      space_group_info=params.outlier_utils.input.space_group  )

    xray_data_server =  reflection_file_utils.reflection_file_server(
      crystal_symmetry = phil_xs,
      force_symmetry = True,
      reflection_files=[])

    miller_array = None

    miller_array = xray_data_server.get_xray_data(
      file_name = params.outlier_utils.input.xray_data.file_name,
      labels = params.outlier_utils.input.xray_data.obs_labels,
      ignore_all_zeros = True,
      parameter_scope = 'outlier_utils.input.xray_data',
      parameter_name = 'obs_labels'
      )

    info = miller_array.info()

    miller_array = miller_array.map_to_asu()

    miller_array = miller_array.select(
      miller_array.indices() != (0,0,0))

    #we have to check if the sigma's make any sense at all
    if not miller_array.sigmas_are_sensible():
      miller_array = miller_array.customized_copy(
        data = miller_array.data(),
        sigmas=None).set_observation_type(miller_array)
    miller_array = miller_array.select(
      miller_array.data() > 0 )
    if  miller_array.sigmas() is not None:
      miller_array = miller_array.select(
        miller_array.sigmas() > 0 )

    if (miller_array.is_xray_intensity_array()):
      miller_array = miller_array.f_sq_as_f()
    elif (miller_array.is_complex_array()):
      miller_array = abs(miller_array)

    miller_array.set_info(info)
    merged_anomalous=False
    if miller_array.anomalous_flag():
      miller_array = miller_array.average_bijvoet_mates().set_observation_type(
        miller_array )
      merged_anomalous=True
    miller_array = miller_array.map_to_asu()

    # get the free reflections please
    free_flags = None
    if params.outlier_utils.input.xray_data.free_flags is None:
      free_flags = miller_array.generate_r_free_flags(
         fraction=params.outlier_utils.\
           additional_parameters.free_flag_generation.fraction,
         max_free=params.outlier_utils.\
           additional_parameters.free_flag_generation.max_number,
         lattice_symmetry_max_delta=params.outlier_utils.\
           additional_parameters.free_flag_generation.lattice_symmetry_max_delta,
         use_lattice_symmetry=params.outlier_utils.\
           additional_parameters.free_flag_generation.use_lattice_symmetry
        )
    else:
      free_flags = xray_data_server.get_xray_data(
        file_name = params.outlier_utils.input.xray_data.file_name,
        labels = params.outlier_utils.input.xray_data.free_flags,
        ignore_all_zeros = True,
        parameter_scope = 'outlier_utils.input.xray_data',
        parameter_name = 'free_flags'
        )

      if free_flags.anomalous_flag():
        free_flags = free_flags.average_bijvoet_mates()
        merged_anomalous=True
      free_flags = free_flags.customized_copy(
        data = flex.bool( free_flags.data() == 1 ))
      free_flags = free_flags.map_to_asu()
      free_flags = free_flags.common_set( miller_array )


    print >> log
    print >> log, "Summary info of observed data"
    print >> log, "============================="
    miller_array.show_summary(f=log)
    if merged_anomalous:
      print >> log, "For outlier detection purposes, the Bijvoet pairs have been merged."
    print >> log

    print >> log, "Constructing an outlier manager"
    print >> log, "==============================="
    print >> log
    outlier_manager = outlier_rejection.outlier_manager(
      miller_array,
      free_flags,
      out=log)

    basic_array = None
    extreme_array = None
    model_based_array = None

    basic_array = outlier_manager.basic_wilson_outliers(
      p_basic_wilson = params.outlier_utils.outlier_detection.\
                       parameters.basic_wilson.level,
      return_data = True)

    extreme_array = outlier_manager.extreme_wilson_outliers(
      p_extreme_wilson = params.outlier_utils.outlier_detection.parameters.\
                         extreme_wilson.level,
      return_data = True)

    beamstop_array = outlier_manager.beamstop_shadow_outliers(
      level = params.outlier_utils.outlier_detection.parameters.\
               beamstop.level,
      d_min = params.outlier_utils.outlier_detection.parameters.\
               beamstop.d_min,
      return_data=True)



    #----------------------------------------------------------------
    # Step 2: get an xray structure from the PDB file
    #
    if params.outlier_utils.input.model.file_name is not None:
      model = pdb.input(file_name=params.outlier_utils.input.model.file_name).xray_structure_simple(
        crystal_symmetry=phil_xs)
      print >> log, "Atomic model summary"
      print >> log, "===================="
      model.show_summary(f=log)
      print >> log


      # please make an f_model object for bulk solvent scaling etc etc

      f_model_object = f_model.manager(
        f_obs = miller_array,
        r_free_flags = free_flags,
        xray_structure = model )
      print >> log, "Bulk solvent scaling of the data"
      print >> log, "================================"
      print >> log, "Maximum likelihood bulk solvent scaling."
      print >> log
      f_model_object.update_solvent_and_scale(out=log)
      plot_out = StringIO()
      model_based_array = outlier_manager.model_based_outliers(
        f_model_object.f_model(),
        level=params.outlier_utils.outlier_detection.parameters.model_based.level,
        return_data=True,
        plot_out=plot_out)
    #check what needs to be put out please
    if params.outlier_utils.output.hklout is not None:
      if params.outlier_utils.outlier_detection.protocol == "model":
        if params.outlier_utils.input.model.file_name == None:
          print >> log, "Model based rejections requested. No model was supplied."
          print >> log, "Switching to writing out rejections based on extreme value Wilson statistics."
          params.outlier_utils.outlier_detection.protocol="extreme"

      output_array = None
      print >> log
      if params.outlier_utils.outlier_detection.protocol == "basic":
        print >> log, "Non-outliers found by the basic wilson statistics"
        print >> log, "protocol will be written out."
        output_array = basic_array
        new_set_of_free_flags = free_flags.common_set( basic_array )

      if params.outlier_utils.outlier_detection.protocol == "extreme":
        print >> log, "Non-outliers found by the extreme value wilson statistics"
        print >> log, "protocol will be written out."
        output_array = extreme_array
        new_set_of_free_flags = free_flags.common_set( extreme_array )

      if params.outlier_utils.outlier_detection.protocol == "model":
        print >> log, "Non-outliers found by the model based"
        print >> log, "protocol will be written out to the file:"
        print >> log, params.outlier_utils.output.hklout
        print >> log
        output_array = model_based_array
        new_set_of_free_flags = free_flags.common_set( model_based_array )

      if params.outlier_utils.outlier_detection.protocol == "beamstop":
        print >> log, "Outliers found for the beamstop shadow"
        print >> log, "problems detection protocol will be written to the file:"
        print >> log, params.outlier_utils.output.hklout
        print >> log
        output_array = model_based_array
        new_set_of_free_flags = free_flags.common_set( model_based_array )

      mtz_dataset = output_array.as_mtz_dataset(
        column_root_label="FOBS")
      mtz_dataset = mtz_dataset.add_miller_array(
        miller_array = new_set_of_free_flags,
        column_root_label = "Free_R_Flag"
        )
      mtz_dataset.mtz_object().write(
        file_name=params.outlier_utils.output.hklout)

    if (params.outlier_utils.output.logfile is not None):
      final_log = StringIO()
      print >> final_log, string_buffer.getvalue()
      print >> final_log
      if plot_out is not None:
        print >> final_log, plot_out.getvalue()
      outfile = open( params.outlier_utils.output.logfile, 'w' )
      outfile.write( final_log.getvalue() )
      print >> log
      print >> log, "A logfile named %s was created."%(
        params.outlier_utils.output.logfile)
      print >> log, "This logfile contains the screen output and"
      print >> log, "(possibly) some ccp4 style loggraph plots"
def exercise(
    d_min=3.5,
    k_sol=0.3,
    b_sol=60.0,
    b_cart=[0, 0, 0, 0, 0, 0],
    anomalous_flag=False,
    scattering_table="it1992",
    space_group_info=None,
):
    space_groups = [str(space_group_info)]
    for sg in space_groups:
        ### get random structure
        xray_structure = random_structure.xray_structure(
            space_group_info=sgtbx.space_group_info(sg),
            elements=(("O", "C", "N") * 50),
            volume_per_atom=100,
            min_distance=1.5,
            general_positions_only=True,
            random_u_iso=True,
        )
        xray_structure.scattering_type_registry(table=scattering_table)
        ### Get FOBS
        for scale in [0.0001, 1.0, 1000.0]:
            dummy = abs(xray_structure.structure_factors(d_min=d_min, anomalous_flag=anomalous_flag).f_calc())
            flags = dummy.generate_r_free_flags(fraction=0.1, max_free=99999999)
            fmodel = mmtbx.f_model.manager(
                xray_structure=xray_structure,
                r_free_flags=flags,
                target_name="ls_wunit_k1",
                f_obs=dummy,
                k_sol=k_sol,
                b_sol=b_sol,
                b_cart=b_cart,
            )

            fmodel.update_xray_structure(xray_structure=xray_structure, update_f_calc=True, update_f_mask=True)
            f_obs = abs(fmodel.f_model())
            f_obs = f_obs.array(data=f_obs.data() * scale)
            f_obs.set_observation_type(observation_type=observation_types.amplitude())
            ### look at non-model based outliers detection
            om = outlier_rejection.outlier_manager(miller_obs=f_obs, r_free_flags=flags, out="silent")
            tmp1 = om.basic_wilson_outliers()
            tmp2 = om.extreme_wilson_outliers()
            tmp3 = om.beamstop_shadow_outliers()
            # start loop over distorted models
            for error in [0.0, 0.8]:
                for fraction in [0.0, 0.5]:
                    # get distorted model
                    xrs_dc = xray_structure.deep_copy_scatterers()
                    sel = xrs_dc.random_remove_sites_selection(fraction=fraction)
                    xrs_dc = xrs_dc.select(sel)
                    xrs_dc.shake_sites_in_place(rms_difference=error)
                    xrs_dc.scattering_type_registry(table=scattering_table)
                    for k_sol in [0.50]:
                        for b_sol in [60.0]:
                            fmodel = mmtbx.f_model.manager(
                                xray_structure=xrs_dc,
                                r_free_flags=flags,
                                target_name="ls_wunit_k1",
                                f_obs=f_obs,
                                k_sol=k_sol,
                                b_sol=b_sol,
                                b_cart=b_cart,
                            )
                            a, b = fmodel.alpha_beta()
                            o_sel = om.model_based_outliers(f_model=fmodel.f_model())
                            n_out = o_sel.data().count(False)
                            assert n_out < 5
  def __init__(self,
               miller_array,
               parameters,
               out=None,
               n_residues=100,
               n_bases=0):

    self.params=parameters
    self.miller_array=miller_array.deep_copy().set_observation_type(
      miller_array).merge_equivalents().array()
    self.out = out
    if self.out is None:
      self.out = sys.stdout
    if self.out == "silent":
      self.out = null_out()


    self.no_aniso_array = self.miller_array
    if self.params.aniso.action == "remove_aniso":
      # first perfom aniso scaling
      aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(
        miller_array = self.miller_array,
        n_residues = n_residues,
        n_bases = n_bases)
      aniso_scale_and_b.p_scale = 0 # set the p_scale back to 0!
      aniso_scale_and_b.show(out=out)
      # now do aniso correction please
      self.aniso_p_scale = aniso_scale_and_b.p_scale
      self.aniso_u_star  = aniso_scale_and_b.u_star
      self.aniso_b_cart  = aniso_scale_and_b.b_cart
      if self.params.aniso.final_b == "eigen_min":
        b_use=aniso_scale_and_b.eigen_values[2]
      elif self.params.aniso.final_b == "eigen_mean" :
        b_use=flex.mean(aniso_scale_and_b.eigen_values)
      elif self.params.aniso.final_b == "user_b_iso":
        assert self.params.aniso.b_iso is not None
        b_use=self.params.aniso.b_iso
      else:
        b_use = 30

      b_cart_aniso_removed = [ -b_use,
                               -b_use,
                               -b_use,
                               0,
                               0,
                               0]
      u_star_aniso_removed = adptbx.u_cart_as_u_star(
        miller_array.unit_cell(),
        adptbx.b_as_u( b_cart_aniso_removed  ) )
      ## I do things in two steps, but can easely be done in 1 step
      ## just for clarity, thats all.
      self.no_aniso_array = absolute_scaling.anisotropic_correction(
        self.miller_array,0.0,aniso_scale_and_b.u_star )
      self.no_aniso_array = absolute_scaling.anisotropic_correction(
        self.no_aniso_array,0.0,u_star_aniso_removed)
      self.no_aniso_array = self.no_aniso_array.set_observation_type(
        miller_array )

    # that is done now, now we can do outlier detection if desired
    outlier_manager = outlier_rejection.outlier_manager(
      self.no_aniso_array,
      None,
      out=self.out)


    self.new_miller_array = self.no_aniso_array
    if self.params.outlier.action == "basic":
      print >> self.out, "Non-outliers found by the basic wilson statistics"
      print >> self.out, "protocol will be written out."
      basic_array = outlier_manager.basic_wilson_outliers(
        p_basic_wilson = self.params.outlier.parameters.basic_wilson.level,
        return_data = True)
      self.new_miller_array = basic_array

    if self.params.outlier.action == "extreme":
      print >> self.out, "Non-outliers found by the extreme value wilson statistics"
      print >> self.out, "protocol will be written out."
      extreme_array = outlier_manager.extreme_wilson_outliers(
      p_extreme_wilson = self.params.outlier.parameters.extreme_wilson.level,
      return_data = True)
      self.new_miller_array = extreme_array

    if self.params.outlier.action == "beamstop":
      print >> self.out, "Outliers found for the beamstop shadow"
      print >> self.out, "problems detection protocol will be written out."
      beamstop_array = outlier_manager.beamstop_shadow_outliers(
        level = self.params.outlier.parameters.beamstop.level,
        d_min = self.params.outlier.parameters.beamstop.d_min,
        return_data=True)
      self.new_miller_array = beamstop_array

    if self.params.outlier.action == "None":
      self.new_miller_array =  self.no_aniso_array



    # now we can twin or detwin the data if needed
    self.final_array = self.new_miller_array
    if self.params.symmetry.action == "twin":
      alpha = self.params.symmetry.twinning_parameters.fraction
      if (alpha is None) :
        raise Sorry("Twin fraction not specified, not twinning data")
      elif not (0 <= alpha <= 0.5):
        raise Sorry("Twin fraction must be between 0 and 0.5.")
      print >> self.out
      print >> self.out, "Twinning given data"
      print >> self.out, "-------------------"
      print >> self.out
      print >> self.out, "Artifically twinning the data with fraction %3.2f" %\
        alpha

      self.final_array = self.new_miller_array.twin_data(
        twin_law = self.params.symmetry.twinning_parameters.twin_law,
        alpha=alpha).as_intensity_array()

    elif (self.params.symmetry.action == "detwin") :
      twin_law = self.params.symmetry.twinning_parameters.twin_law
      alpha = self.params.symmetry.twinning_parameters.fraction
      if (alpha is None) :
        raise Sorry("Twin fraction not specified, not detwinning data")
      elif not (0 <= alpha <= 0.5):
        raise Sorry("Twin fraction must be between 0 and 0.5.")
      print >> self.out, """

Attempting to detwin data
-------------------------
Detwinning data with:
  - twin law:      %s
  - twin fraciton: %.2f

BE WARNED! DETWINNING OF DATA DOES NOT SOLVE YOUR TWINNING PROBLEM!
PREFERABLY, REFINEMENT SHOULD BE CARRIED OUT AGAINST ORIGINAL DATA
ONLY USING A TWIN SPECIFIC TARGET FUNCTION!

""" % (twin_law, alpha)
      self.final_array = self.new_miller_array.detwin_data(
        twin_law=twin_law,
        alpha=alpha).as_intensity_array()

    assert self.final_array is not None
Esempio n. 6
0
def exercise(d_min            = 3.5,
             k_sol            = 0.3,
             b_sol            = 60.0,
             b_cart           = [0,0,0,0,0,0],
             anomalous_flag   = False,
             scattering_table = "it1992",
             space_group_info = None):
  space_groups = [ str(space_group_info) ]
  for sg in space_groups:
      ### get random structure
      xray_structure = random_structure.xray_structure(
                          space_group_info       = sgtbx.space_group_info(sg),
                          elements               = (("O","C","N")*50),
                          volume_per_atom        = 100,
                          min_distance           = 1.5,
                          general_positions_only = True,
                          random_u_iso           = True)
      xray_structure.scattering_type_registry(table = scattering_table)
      ### Get FOBS
      for scale in [0.0001, 1.0, 1000.0]:
          dummy = abs(xray_structure.structure_factors(
                                   d_min          = d_min,
                                   anomalous_flag = anomalous_flag).f_calc())
          flags = dummy.generate_r_free_flags(fraction = 0.1,
                                              max_free = 99999999)
          fmodel = mmtbx.f_model.manager(xray_structure   = xray_structure,
                                         r_free_flags     = flags,
                                         target_name      = "ls_wunit_k1",
                                         f_obs            = dummy,
                                         k_sol            = k_sol,
                                         b_sol            = b_sol,
                                         b_cart           = b_cart)

          fmodel.update_xray_structure(xray_structure = xray_structure,
                                       update_f_calc = True,
                                       update_f_mask = True)
          f_obs = abs(fmodel.f_model())
          f_obs = f_obs.array(data = f_obs.data()*scale)
          f_obs.set_observation_type(observation_type = observation_types.amplitude())
          ### look at non-model based outliers detection
          om = outlier_rejection.outlier_manager(miller_obs   = f_obs,
                                                 r_free_flags = flags,
                                                 out          = "silent")
          tmp1 = om.basic_wilson_outliers()
          tmp2 = om.extreme_wilson_outliers()
          tmp3 = om.beamstop_shadow_outliers()
          # start loop over distorted models
          for error in [0.0,  0.8]:
              for fraction in [0.0,0.5]:
                # get distorted model
                xrs_dc = xray_structure.deep_copy_scatterers()
                sel = xrs_dc.random_remove_sites_selection(fraction = fraction)
                xrs_dc = xrs_dc.select(sel)
                xrs_dc.shake_sites_in_place(rms_difference=error)
                xrs_dc.scattering_type_registry(table = scattering_table)
                for k_sol in [0.50,]:
                  for b_sol in [60.,]:
                    fmodel = mmtbx.f_model.manager(
                       xray_structure = xrs_dc,
                       r_free_flags   = flags,
                       target_name    = "ls_wunit_k1",
                       f_obs          = f_obs,
                       k_sol          = k_sol,
                       b_sol          = b_sol,
                       b_cart         = b_cart)
                    a,b = fmodel.alpha_beta()
                    o_sel =  om.model_based_outliers(f_model = fmodel.f_model())
                    n_out = o_sel.data().count(False)
                    assert (n_out < 5)