def __init__(self,
                 miller_obs,
                 miller_calc,
                 miller_mock,
                 n_reso_bins=25,
                 n_e_bins=20,
                 thres=3.0):
        self.miller_obs = miller_obs
        self.miller_calc = miller_calc
        self.miller_mock = miller_mock

        # take a common set to avoid possible problems
        self.miller_calc = self.miller_calc.common_set(self.miller_obs)
        self.miller_mock = self.miller_mock.common_set(self.miller_obs)

        # we need to normalise the data, both fobs and fcalc
        norma_obs_obj = absolute_scaling.kernel_normalisation(self.miller_obs,
                                                              auto_kernel=True)
        norma_calc_obj = absolute_scaling.kernel_normalisation(
            self.miller_calc, auto_kernel=True)
        norma_mock_obj = absolute_scaling.kernel_normalisation(
            self.miller_mock, auto_kernel=True)
        self.norma_obs = norma_obs_obj.normalised_miller_dev_eps.f_sq_as_f(
        )  # normalized data (dived by eps)
        self.norma_calc = norma_calc_obj.normalised_miller_dev_eps.f_sq_as_f(
        )  # as above, for calculated data
        self.norma_mock = norma_mock_obj.normalised_miller_dev_eps.f_sq_as_f(
        )  # as above, for mock data
        self.norma_obs_const = norma_obs_obj.normalizer_for_miller_array  # the divisor (no eps)
        self.norma_calc_const = norma_calc_obj.normalizer_for_miller_array  # as above
        self.norma_mock_const = norma_mock_obj.normalizer_for_miller_array  # as above

        self.thres = thres

        self.n_reso_bins = n_reso_bins
        self.n_e_bins = n_e_bins
        # first set up a binner please
        self.miller_obs.setup_binner(n_bins=self.n_reso_bins)
        self.miller_calc.use_binner_of(self.miller_obs)
        self.miller_mock.use_binner_of(self.miller_obs)
        self.norma_obs.use_binner_of(self.miller_obs)
        self.norma_calc.use_binner_of(self.miller_calc)
        self.norma_mock.use_binner_of(self.miller_mock)

        self.new_norma_obs = self.norma_obs.deep_copy().set_observation_type(
            self.norma_obs)

        self.new_obs = None
        self.swap_it()
        #we have to denormalize the data now
        self.new_obs = self.norma_obs.customized_copy(
            data=self.new_norma_obs.data() *
            self.new_norma_obs.epsilons().data().as_double() *
            flex.sqrt(self.norma_calc_const),
            sigmas=self.new_norma_obs.sigmas() *
            self.new_norma_obs.epsilons().data().as_double() *
            flex.sqrt(self.norma_calc_const)).set_observation_type(
                self.miller_obs)
Beispiel #2
0
def test_kernel_based_normalisation():
    miller_array = random_data(35.0, d_min=2.5)
    normalizer = absolute_scaling.kernel_normalisation(miller_array,
                                                       auto_kernel=True)
    z_values = normalizer.normalised_miller.data()/\
               normalizer.normalised_miller.epsilons().data().as_double()
    z_values = flex.mean(z_values)
    assert approx_equal(1.0, z_values, eps=0.05)
    # This should raise an error rather than enter an infinite loop
    with raises(AssertionError) as e:
        absolute_scaling.kernel_normalisation(
            miller_array[:1].set_observation_type_xray_amplitude(),
            auto_kernel=True)
  def __init__(self,
               miller_obs,
               miller_calc,
               miller_mock,
               n_reso_bins=25,
               n_e_bins = 20,
               thres=3.0):
    self.miller_obs = miller_obs
    self.miller_calc = miller_calc
    self.miller_mock = miller_mock

    # take a common set to avoid possible problems
    self.miller_calc = self.miller_calc.common_set( self.miller_obs )
    self.miller_mock = self.miller_mock.common_set( self.miller_obs )


    # we need to normalise the data, both fobs and fcalc
    norma_obs_obj = absolute_scaling.kernel_normalisation( self.miller_obs,auto_kernel=True )
    norma_calc_obj = absolute_scaling.kernel_normalisation( self.miller_calc,auto_kernel=True )
    norma_mock_obj = absolute_scaling.kernel_normalisation( self.miller_mock,auto_kernel=True )
    self.norma_obs  = norma_obs_obj.normalised_miller_dev_eps.f_sq_as_f()           # normalized data (dived by eps)
    self.norma_calc = norma_calc_obj.normalised_miller_dev_eps.f_sq_as_f()          # as above, for calculated data
    self.norma_mock = norma_mock_obj.normalised_miller_dev_eps.f_sq_as_f()          # as above, for mock data
    self.norma_obs_const =  norma_obs_obj.normalizer_for_miller_array   # the divisor (no eps)
    self.norma_calc_const = norma_calc_obj.normalizer_for_miller_array  # as above
    self.norma_mock_const = norma_mock_obj.normalizer_for_miller_array  # as above

    self.thres = thres

    self.n_reso_bins = n_reso_bins
    self.n_e_bins = n_e_bins
    # first set up a binner please
    self.miller_obs.setup_binner(n_bins = self.n_reso_bins )
    self.miller_calc.use_binner_of( self.miller_obs )
    self.miller_mock.use_binner_of( self.miller_obs )
    self.norma_obs.use_binner_of( self.miller_obs )
    self.norma_calc.use_binner_of( self.miller_calc )
    self.norma_mock.use_binner_of( self.miller_mock )

    self.new_norma_obs = self.norma_obs.deep_copy().set_observation_type( self.norma_obs )

    self.new_obs = None
    self.swap_it()
    #we have to denormalize the data now
    self.new_obs = self.norma_obs.customized_copy(
      data   = self.new_norma_obs.data()*self.new_norma_obs.epsilons().data().as_double()*flex.sqrt(self.norma_calc_const),
      sigmas = self.new_norma_obs.sigmas()*self.new_norma_obs.epsilons().data().as_double()*flex.sqrt(self.norma_calc_const)
    ).set_observation_type( self.miller_obs )
Beispiel #4
0
def read_target_files(target_files, d_min, d_max, normalization, log_out):
    ret = collections.OrderedDict()
    for i, f in enumerate(target_files):
        f = iotbx.file_reader.any_file(f,
                                       force_type="hkl",
                                       raise_sorry_if_errors=True)
        arrays = f.file_server.get_miller_arrays(None)
        scores = iotbx.reflection_file_utils.get_xray_data_scores(
            arrays,
            ignore_all_zeros=True,
            prefer_anomalous=False,
            prefer_amplitudes=False)
        array = arrays[scores.index(max(scores))]

        log_out.write("# target%.3d = %s %s\n" %
                      (i, array.info(), array.d_max_min()))

        if array.anomalous_flag(): array = array.average_bijvoet_mates()
        array = array.as_intensity_array().resolution_filter(d_max=d_max,
                                                             d_min=d_min)

        if normalization == "E":
            normaliser = kernel_normalisation(array, auto_kernel=True)
            ret[f] = array.customized_copy(
                data=array.data() / normaliser.normalizer_for_miller_array,
                sigmas=array.sigmas() / normaliser.normalizer_for_miller_array
                if array.sigmas() else None)
        else:
            ret[f] = array

    return ret
Beispiel #5
0
 def normalise_all(self):
     ## normalise all difference data please
     for set in self.ano_and_iso:
         tmp_norm = absolute_scaling.kernel_normalisation(set,
                                                          auto_kernel=True)
         set = tmp_norm.normalised_miller.deep_copy().set_observation_type(
             tmp_norm.normalised_miller)
Beispiel #6
0
def test_kernel_based_normalisation():
  miller_array = random_data(35.0, d_min=2.5 )
  normalizer = absolute_scaling.kernel_normalisation(
    miller_array, auto_kernel=True)
  z_values = normalizer.normalised_miller.data()/\
             normalizer.normalised_miller.epsilons().data().as_double()
  z_values = flex.mean(z_values)
  assert approx_equal(1.0,z_values,eps=0.05)
 def normalise_all(self):
   ## normalise all difference data please
   for set in self.ano_and_iso:
     tmp_norm = absolute_scaling.kernel_normalisation(
       set,
       auto_kernel=True)
     set = tmp_norm.normalised_miller.deep_copy().set_observation_type(
       tmp_norm.normalised_miller)
Beispiel #8
0
def test_kernel_based_normalisation():
    miller_array = random_data(35.0, d_min=2.5)
    normalizer = absolute_scaling.kernel_normalisation(miller_array,
                                                       auto_kernel=True)
    z_values = normalizer.normalised_miller.data()/\
               normalizer.normalised_miller.epsilons().data().as_double()
    z_values = flex.mean(z_values)
    assert approx_equal(1.0, z_values, eps=0.05)
Beispiel #9
0
    def __init__(self, lambda1, lambda2, k1, k2, options, out=None):
        self.out = out
        if self.out == None:
            self.out = sys.stdout

        self.options = options
        print("FA estimation", file=self.out)
        print("=============", file=self.out)

        if k1 is None:
            raise Sorry(
                "f\"(w1)/f\"(w2) ratio is not defined. Please provide f\" values upon input"
            )

        if k2 is None:
            if self.options.protocol == 'algebraic':
                raise Sorry("""
delta f' f\" ratio is not defined.
Either provide f' and f\" values upon input,
or chose different Fa estimation protocol.
               """)

        self.options = options

        protocol = {'algebraic': False, 'cns': False, 'combine_ano': False}
        protocol[self.options.protocol] = True

        self.fa_values = None

        if protocol['algebraic']:
            print(" Using algebraic approach to estimate FA values ",
                  file=self.out)
            print(file=self.out)
            tmp = singh_ramasheshan_fa_estimate(lambda1, lambda2, k1, k2)
            self.fa_values = tmp.fa.f_sq_as_f()

        if protocol['cns']:
            print(" Using CNS approach to estimate FA values ", file=self.out)
            print(file=self.out)

            tmp = cns_fa_driver([lambda1, lambda2])
            self.fa_values = tmp.fa

        if protocol['combine_ano']:
            print(" Combining anomalous data only", file=self.out)
            print(file=self.out)

            tmp = mum_dad(lambda1, lambda2, k1)
            self.fa_values = tmp.dad

        norma = absolute_scaling.kernel_normalisation(self.fa_values,
                                                      auto_kernel=True)

        self.fa_values = norma.normalised_miller.f_sq_as_f()
Beispiel #10
0
    def kernel_normalisation(intensities):
        """Kernel normalisation of the input intensities.

        Args:
          intensities (cctbx.miller.array): The intensities to be normalised.

        Returns:
          cctbx.miller.array: The normalised intensities.
        """
        normalisation = absolute_scaling.kernel_normalisation(intensities,
                                                              auto_kernel=True)
        return normalisation.normalised_miller.deep_copy().set_info(
            intensities.info())
Beispiel #11
0
  def __init__(self,
               ano,
               iso,
               options,
               out=None):
    if out == None:
      out = sys.stdout

    ## get stuff
    self.options = options
    self.iso = iso.deep_copy().map_to_asu()
    self.ano = ano.deep_copy().map_to_asu()
    ## get common sets
    self.iso, self.ano = self.iso.common_sets( self.ano )

    ## perform normalisation
    normalizer_iso = absolute_scaling.kernel_normalisation(
      self.iso, auto_kernel=True, n_term=options.number_of_terms_in_normalisation_curve)
    normalizer_ano = absolute_scaling.kernel_normalisation(
      self.ano, auto_kernel=True, n_term=options.number_of_terms_in_normalisation_curve)

    self.fa = self.iso.customized_copy(
      data = flex.sqrt( self.iso.data()*self.iso.data()\
               /normalizer_iso.normalizer_for_miller_array
               +
               self.ano.data()*self.ano.data()\
               /normalizer_ano.normalizer_for_miller_array
              ),
      sigmas = flex.sqrt( self.iso.sigmas()*self.iso.sigmas()\
               /(normalizer_iso.normalizer_for_miller_array*
                 normalizer_iso.normalizer_for_miller_array
                 )
               +
               self.ano.sigmas()*self.ano.sigmas()\
               /(normalizer_ano.normalizer_for_miller_array
                 *normalizer_ano.normalizer_for_miller_array)
              ))
Beispiel #12
0
    def __init__(self, ano, iso, options, out=None):
        if out == None:
            out = sys.stdout

        ## get stuff
        self.options = options
        self.iso = iso.deep_copy().map_to_asu()
        self.ano = ano.deep_copy().map_to_asu()
        ## get common sets
        self.iso, self.ano = self.iso.common_sets(self.ano)

        ## perform normalisation
        normalizer_iso = absolute_scaling.kernel_normalisation(
            self.iso,
            auto_kernel=True,
            n_term=options.number_of_terms_in_normalisation_curve)
        normalizer_ano = absolute_scaling.kernel_normalisation(
            self.ano,
            auto_kernel=True,
            n_term=options.number_of_terms_in_normalisation_curve)

        self.fa = self.iso.customized_copy(
          data = flex.sqrt( self.iso.data()*self.iso.data()\
                   /normalizer_iso.normalizer_for_miller_array
                   +
                   self.ano.data()*self.ano.data()\
                   /normalizer_ano.normalizer_for_miller_array
                  ),
          sigmas = flex.sqrt( self.iso.sigmas()*self.iso.sigmas()\
                   /(normalizer_iso.normalizer_for_miller_array*
                     normalizer_iso.normalizer_for_miller_array
                     )
                   +
                   self.ano.sigmas()*self.ano.sigmas()\
                   /(normalizer_ano.normalizer_for_miller_array
                     *normalizer_ano.normalizer_for_miller_array)
                  ))
Beispiel #13
0
def run(params, target_files):
    assert params.normalization in ("no", "E")
    ofs = open(params.dat_out, "w")

    xac_files = util.read_path_list(params.lstin)
    targets = read_target_files(target_files, params.d_min, params.d_max,
                                params.normalization, ofs)

    cellcon = CellConstraints(targets.values()[0].space_group())

    #for i, t in enumerate(targets): ofs.write("# target%.3d = %s\n" % (i,t))
    ofs.write("# normalization = %s\n" % params.normalization)
    ofs.write("# d_min, d_max = %s, %s\n" % (params.d_min, params.d_max))
    ofs.write("file %s " % cellcon.get_label_for_free_params())
    ofs.write(" ".join(
        map(lambda x: "cc.%.3d nref.%.3d" % (x, x), xrange(len(targets)))))
    ofs.write("\n")

    for xac_file in xac_files:
        print "reading", xac_file
        xac = xds_ascii.XDS_ASCII(xac_file)
        xac.remove_rejected()
        iobs = xac.i_obs(anomalous_flag=False).merge_equivalents(
            use_internal_variance=False).array()
        ofs.write("%s %s" %
                  (xac_file, cellcon.format_free_params(iobs.unit_cell())))
        fail_flag = False
        if params.normalization == "E":
            try:
                normaliser = kernel_normalisation(iobs, auto_kernel=True)
                iobs = iobs.customized_copy(
                    data=iobs.data() / normaliser.normalizer_for_miller_array,
                    sigmas=iobs.sigmas() /
                    normaliser.normalizer_for_miller_array)
            except:
                fail_flag = True

        for i, ta in enumerate(targets.values()):
            if fail_flag:
                ofs.write(" % .4f %4d" % cc_num)
            else:
                cc_num = calc_cc(iobs, ta)
                ofs.write(" % .4f %4d" % cc_num)

        ofs.write("\n")
Beispiel #14
0
    def __init__(self, miller_obs, r_free_flags, out=None):
        self.out = out
        if self.out is None:
            self.out = sys.stdout
        if out == "silent":
            self.out = null_out()

        # the original miller array
        self.miller_obs = miller_obs

        if self.miller_obs.observation_type() is None:
            raise Sorry("Unknown observation type")

        # we make a working copy of the above miller array
        self.work_obs = self.miller_obs.deep_copy().set_observation_type(
            self.miller_obs)

        if not self.work_obs.is_xray_intensity_array():
            self.work_obs = self.work_obs.f_as_f_sq()

        if not self.miller_obs.is_xray_amplitude_array():
            self.miller_obs = self.miller_obs.f_sq_as_f()

        self.r_free_flags = r_free_flags

        #-----------------------
        # These calculations are needed for wilson based outlier rejection
        #
        # Normalize the data
        normalizer = absolute_scaling.kernel_normalisation(self.work_obs,
                                                           auto_kernel=True)
        self.norma_work = self.work_obs.customized_copy(
            data=normalizer.normalised_miller.data() /
            normalizer.normalised_miller.epsilons().data().as_double())
        assert (flex.min(self.norma_work.data()) >= 0)
        # split things into centric and acentric sets please
        self.centric_work = self.norma_work.select_centric(
        ).set_observation_type(self.norma_work)
        self.acentric_work = self.norma_work.select_acentric(
        ).set_observation_type(self.norma_work)
    def __init__(self, miller_obs, r_free_flags, out=None):
        self.out = out
        if self.out is None:
            self.out = sys.stdout
        if out == "silent":
            self.out = null_out()

        # the original miller array
        self.miller_obs = miller_obs

        if self.miller_obs.observation_type() is None:
            raise Sorry("Unknown observation type")

        # we make a working copy of the above miller array
        self.work_obs = self.miller_obs.deep_copy().set_observation_type(self.miller_obs)

        if not self.work_obs.is_xray_intensity_array():
            self.work_obs = self.work_obs.f_as_f_sq()

        if not self.miller_obs.is_xray_amplitude_array():
            self.miller_obs = self.miller_obs.f_sq_as_f()

        self.r_free_flags = r_free_flags

        # -----------------------
        # These calculations are needed for wilson based outlier rejection
        #
        # Normalize the data
        normalizer = absolute_scaling.kernel_normalisation(self.work_obs, auto_kernel=True)
        self.norma_work = self.work_obs.customized_copy(
            data=normalizer.normalised_miller.data() / normalizer.normalised_miller.epsilons().data().as_double()
        )
        assert flex.min(self.norma_work.data()) >= 0
        # split things into centric and acentric sets please
        self.centric_work = self.norma_work.select_centric().set_observation_type(self.norma_work)
        self.acentric_work = self.norma_work.select_acentric().set_observation_type(self.norma_work)
  def __init__(self,
               miller_obs,
               miller_calc,
               r_free_flags,
               ta_d,
               kernel_width_free_reflections=None,
               kernel_width_d_star_cubed=None,
               kernel_in_bin_centers=False,
               kernel_on_chebyshev_nodes=True,
               n_sampling_points=20,
               n_chebyshev_terms=10,
               use_sampling_sum_weights=False,
               make_checks_and_clean_up=True):
    assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1

    self.miller_obs = miller_obs
    self.miller_calc = abs(miller_calc)
    self.r_free_flags = r_free_flags
    self.kernel_width_free_reflections = kernel_width_free_reflections
    self.kernel_width_d_star_cubed = kernel_width_d_star_cubed
    self.n_chebyshev_terms = n_chebyshev_terms
    self.ta_d = ta_d


    if make_checks_and_clean_up:
      self.miller_obs = self.miller_obs.map_to_asu()
      self.miller_calc = self.miller_calc.map_to_asu()
      self.r_free_flags = self.r_free_flags.map_to_asu()
      assert self.r_free_flags.indices().all_eq(
        self.miller_obs.indices() )
      self.miller_calc = self.miller_calc.common_set(
        self.miller_obs )
      assert self.r_free_flags.indices().all_eq(
        self.miller_calc.indices() )
      assert self.miller_obs.is_real_array()

      if self.miller_obs.is_xray_intensity_array():
        self.miller_obs = self.miller_obs.f_sq_as_f()
      assert self.miller_obs.observation_type() is None or \
             self.miller_obs.is_xray_amplitude_array()

    if self.miller_calc.observation_type() is None:
      self.miller_calc = self.miller_calc.set_observation_type(
        self.miller_obs)

    # get normalized data please
    self.normalized_obs_f = absolute_scaling.kernel_normalisation(
      self.miller_obs, auto_kernel=True)
    self.normalized_obs =self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f()

    self.normalized_calc_f = absolute_scaling.kernel_normalisation(
      self.miller_calc, auto_kernel=True)
    self.normalized_calc =self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f()

    # get the 'free data'
    self.free_norm_obs = self.normalized_obs.select( self.r_free_flags.data() )
    self.free_norm_calc= self.normalized_calc.select( self.r_free_flags.data() )

    if self.free_norm_obs.data().size() <= 0:
      raise RuntimeError("No free reflections.")

#    if (self.kernel_width_d_star_cubed is None):
#      self.kernel_width_d_star_cubed=sigmaa_estimator_kernel_width_d_star_cubed(
#        r_free_flags=self.r_free_flags,
#        kernel_width_free_reflections=self.kernel_width_free_reflections)

#    self.sigma_target_functor = ext.sigmaa_estimator(
#      e_obs     = self.free_norm_obs.data(),
#      e_calc    = self.free_norm_calc.data(),
#      centric   = self.free_norm_obs.centric_flags().data(),
#      d_star_cubed = self.free_norm_obs.d_star_cubed().data() ,
#      width=self.kernel_width_d_star_cubed)

#    d_star_cubed_overall = self.miller_obs.d_star_cubed().data()
#    self.min_h = flex.min( d_star_cubed_overall )
#    self.max_h = flex.max( d_star_cubed_overall )
#    self.h_array = None
#    if (kernel_in_bin_centers):
#      self.h_array = flex.double( xrange(1,n_sampling_points*2,2) )*(
#        self.max_h-self.min_h)/(n_sampling_points*2)+self.min_h
#    else:
#      self.min_h *= 0.99
#      self.max_h *= 1.01
#      if kernel_on_chebyshev_nodes:
#        self.h_array = chebyshev_lsq_fit.chebyshev_nodes(
#          n=n_sampling_points,
#          low=self.min_h,
#          high=self.max_h,
#          include_limits=True)
#      else:
#        self.h_array = flex.double( range(n_sampling_points) )*(
#          self.max_h-self.min_h)/float(n_sampling_points-1.0)+self.min_h

#    assert self.h_array.size() == n_sampling_points
#    self.sigmaa_array = flex.double()
#    self.sigmaa_array.reserve(self.h_array.size())
#    self.sum_weights = flex.double()
#    self.sum_weights.reserve(self.h_array.size())

#    for h in self.h_array:
#      stimator = sigmaa_point_estimator(self.sigma_target_functor, h)
#      self.sigmaa_array.append( stimator.sigmaa )
#      self.sum_weights.append(
#        self.sigma_target_functor.sum_weights(d_star_cubed=h))

#    # fit a smooth function
#    reparam_sa = -flex.log( 1.0/self.sigmaa_array -1.0 )
#    if (use_sampling_sum_weights):
#      w_obs = flex.sqrt(self.sum_weights)
#    else:
#      w_obs = None
#    fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit(
#      n_terms=self.n_chebyshev_terms,
#      x_obs=self.h_array,
#      y_obs=reparam_sa,
#      w_obs=w_obs)

#    cheb_pol = chebyshev_polynome(
#        self.n_chebyshev_terms,
#        self.min_h,
#        self.max_h,
#        fit_lsq.coefs)
#    def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values))
#    self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array))
#    self.sigmaa_miller_array = reverse_reparam(cheb_pol.f(d_star_cubed_overall))
#    assert flex.min(self.sigmaa_miller_array) >= 0
#    assert flex.max(self.sigmaa_miller_array) <= 1
#    self.sigmaa_miller_array = self.miller_obs.array(data=self.sigmaa_miller_array)

    self.alpha = None
    self.beta = None
    self.fom_array = None
    self.ta_d_miller = self.miller_obs.array(data=self.ta_d)
Beispiel #17
0
def run(params, mtzfiles):
    arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax)

    if params.take_common:
        arrays = commonalize(arrays)

    maxlen_f = max(map(lambda x: len(x[0]), arrays))

    ref_f_obs = arrays[0][1]

    scales = []
    for f, f_obs, f_model, flag in arrays:
        if ref_f_obs == f_obs: k, B = 1., 0
        else: k, B = kBdecider(ref_f_obs, f_obs).run()

        scales.append((k, B))

    if params.reference != "first":
        if params.reference == "bmin": # scale to strongest
            kref, bref = max(scales, key=lambda x:x[1])
        elif params.reference == "bmax": # scale to most weak
            kref, bref = min(scales, key=lambda x:x[1])
        elif params.reference == "bmed": # scale to most weak
            perm = range(len(scales))
            perm.sort(key=lambda i:scales[i][1])
            kref, bref = scales[perm[len(perm)//2]]
        else:
            raise "Never reaches here"

        print "# Set K=%.2f B=%.2f as reference" % (kref,bref)
        scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later

    print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename"
    for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales):
        d_star_sq = f_obs.d_star_sq().data()
        scale = k * flex.exp(-B*d_star_sq)
        
        # Normalized
        #f_obs.setup_binner(auto_binning=True)
        #f_model.setup_binner(auto_binning=True)
        #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model))
        e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True)
        e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f()
        e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True)
        e_model = e_model.normalised_miller_dev_eps.f_sq_as_f()

        f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data())
        f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data())

        e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data())
        e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data())

        r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data()))
        r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data()))

        cc_work_E = calc_cc(e_obs_w, e_model_w, False)
        cc_free_E = calc_cc(e_obs_t, e_model_t, False)
        #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True)
        #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True)

        se = calc_sigmaa(f_obs, f_model, flag)
        sigmaa = flex.mean(se.sigmaa().data())
        fom = flex.mean(se.fom().data())

        print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
Beispiel #18
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(
                    data=arr.data() / normaliser.normalizer_for_miller_array,
                    sigmas=arr.sigmas() /
                    normaliser.normalizer_for_miller_array)
        # Prep
        args = []
        for i in xrange(len(self.arrays) - 1):
            for j in xrange(i + 1, len(self.arrays)):
                args.append((i, j))

        # Calc all CC
        if self.use_sfdist:
            worker = lambda x: calc_sfdist(self.arrays.values()[x[0]],
                                           self.arrays.values()[x[1]])
        else:
            worker = lambda x: calc_cc(self.arrays.values()[x[0]],
                                       self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i, j), (cc, nref) in zip(args, results):
            cc_data_for_html.append((i, j, cc, nref))
            if cc == cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i, j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x: x[1])
        remove_idxes = set()

        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes,
                           xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst" % prefix, "w").write("\n".join(
                map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i, j), (cc, nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc

        open("%s.matrix" % prefix,
             "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten())))

        ofs = open("%s.dat" % prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i, j), (cc, nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref))

        open("%s_ana.R" % prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d" % (x + 1), org2now.keys()))))

        call(cmd="Rscript",
             arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int, ids)]
Beispiel #19
0
  def __init__(self,
               miller_obs,
               miller_calc,
               r_free_flags,
               kernel_width_free_reflections=None,
               kernel_width_d_star_cubed=None,
               kernel_in_bin_centers=False,
               kernel_on_chebyshev_nodes=True,
               n_sampling_points=20,
               n_chebyshev_terms=10,
               use_sampling_sum_weights=False,
               make_checks_and_clean_up=True):
    assert [kernel_width_free_reflections, kernel_width_d_star_cubed].count(None) == 1

    self.miller_obs = miller_obs
    self.miller_calc = abs(miller_calc)
    self.r_free_flags = r_free_flags
    self.kernel_width_free_reflections = kernel_width_free_reflections
    self.kernel_width_d_star_cubed = kernel_width_d_star_cubed
    self.n_chebyshev_terms = n_chebyshev_terms

    if make_checks_and_clean_up:
      self.miller_obs = self.miller_obs.map_to_asu()
      self.miller_calc = self.miller_calc.map_to_asu()
      self.r_free_flags = self.r_free_flags.map_to_asu()
      assert self.r_free_flags.indices().all_eq(
        self.miller_obs.indices() )
      self.miller_calc = self.miller_calc.common_set(
        self.miller_obs )
      assert self.r_free_flags.indices().all_eq(
        self.miller_calc.indices() )
      assert self.miller_obs.is_real_array()

      if self.miller_obs.is_xray_intensity_array():
        self.miller_obs = self.miller_obs.f_sq_as_f()
      assert self.miller_obs.observation_type() is None or \
             self.miller_obs.is_xray_amplitude_array()

    if self.miller_calc.observation_type() is None:
      self.miller_calc = self.miller_calc.set_observation_type(
        self.miller_obs)

    # get normalized data please
    self.normalized_obs_f = absolute_scaling.kernel_normalisation(
      self.miller_obs, auto_kernel=True)
    self.normalized_obs =self.normalized_obs_f.normalised_miller_dev_eps.f_sq_as_f()

    self.normalized_calc_f = absolute_scaling.kernel_normalisation(
      self.miller_calc, auto_kernel=True)
    self.normalized_calc =self.normalized_calc_f.normalised_miller_dev_eps.f_sq_as_f()

    # get the 'free data'

    if(self.r_free_flags.data().count(True) == 0):
      self.r_free_flags = self.r_free_flags.array(
        data = ~self.r_free_flags.data())

    self.free_norm_obs = self.normalized_obs.select( self.r_free_flags.data() )
    self.free_norm_calc= self.normalized_calc.select( self.r_free_flags.data() )

    if self.free_norm_obs.data().size() <= 0:
      raise RuntimeError("No free reflections.")

    if (self.kernel_width_d_star_cubed is None):
      self.kernel_width_d_star_cubed=sigmaa_estimator_kernel_width_d_star_cubed(
        r_free_flags=self.r_free_flags,
        kernel_width_free_reflections=self.kernel_width_free_reflections)

    self.sigma_target_functor = ext.sigmaa_estimator(
      e_obs     = self.free_norm_obs.data(),
      e_calc    = self.free_norm_calc.data(),
      centric   = self.free_norm_obs.centric_flags().data(),
      d_star_cubed = self.free_norm_obs.d_star_cubed().data() ,
      width=self.kernel_width_d_star_cubed)

    d_star_cubed_overall = self.miller_obs.d_star_cubed().data()
    self.min_h = flex.min( d_star_cubed_overall )
    self.max_h = flex.max( d_star_cubed_overall )
    self.h_array = None
    if (kernel_in_bin_centers):
      self.h_array = flex.double( range(1,n_sampling_points*2,2) )*(
        self.max_h-self.min_h)/(n_sampling_points*2)+self.min_h
    else:
      self.min_h *= 0.99
      self.max_h *= 1.01
      if kernel_on_chebyshev_nodes:
        self.h_array = chebyshev_lsq_fit.chebyshev_nodes(
          n=n_sampling_points,
          low=self.min_h,
          high=self.max_h,
          include_limits=True)
      else:
        self.h_array = flex.double( range(n_sampling_points) )*(
          self.max_h-self.min_h)/float(n_sampling_points-1.0)+self.min_h
    assert self.h_array.size() == n_sampling_points
    self.sigmaa_array = flex.double()
    self.sigmaa_array.reserve(self.h_array.size())
    self.sum_weights = flex.double()
    self.sum_weights.reserve(self.h_array.size())

    for h in self.h_array:
      stimator = sigmaa_point_estimator(self.sigma_target_functor, h)
      self.sigmaa_array.append( stimator.sigmaa )
      self.sum_weights.append(
        self.sigma_target_functor.sum_weights(d_star_cubed=h))

    # fit a smooth function
    reparam_sa = -flex.log( 1.0/self.sigmaa_array -1.0 )
    if (use_sampling_sum_weights):
      w_obs = flex.sqrt(self.sum_weights)
    else:
      w_obs = None
    fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit(
      n_terms=self.n_chebyshev_terms,
      x_obs=self.h_array,
      y_obs=reparam_sa,
      w_obs=w_obs)

    cheb_pol = chebyshev_polynome(
        self.n_chebyshev_terms,
        self.min_h,
        self.max_h,
        fit_lsq.coefs)
    def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values))
    self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array))
    self.sigmaa_miller_array = reverse_reparam(cheb_pol.f(d_star_cubed_overall))
    assert flex.min(self.sigmaa_miller_array) >= 0
    assert flex.max(self.sigmaa_miller_array) <= 1
    self.sigmaa_miller_array = self.miller_obs.array(data=self.sigmaa_miller_array)

    self.alpha = None
    self.beta = None
    self.fom_array = None
Beispiel #20
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      cluster_method="ward",
                      distance_eqn="sqrt(1-cc)",
                      min_common_refs=3,
                      html_maker=None):
        """
        Using correlation as distance metric (for hierarchical clustering)
        https://stats.stackexchange.com/questions/165194/using-correlation-as-distance-metric-for-hierarchical-clustering

        Correlation "Distances" and Hierarchical Clustering
        http://research.stowers.org/mcm/efg/R/Visualization/cor-cluster/index.htm
        """

        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        distance_eqns = {
            "sqrt(1-cc)": lambda x: numpy.sqrt(1. - x),
            "1-cc": lambda x: 1. - x,
            "sqrt(1-cc^2)": lambda x: numpy.sqrt(1. - x**2),
        }
        cc_to_distance = distance_eqns[
            distance_eqn]  # Fail when unknown options
        assert cluster_method in ("single", "complete", "average", "weighted",
                                  "centroid", "median", "ward"
                                  )  # available methods in scipy

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            failed = {}
            for f in self.arrays:
                arr = self.arrays[f]
                try:
                    normaliser = kernel_normalisation(arr, auto_kernel=True)
                    self.arrays[f] = arr.customized_copy(
                        data=arr.data() /
                        normaliser.normalizer_for_miller_array,
                        sigmas=arr.sigmas() /
                        normaliser.normalizer_for_miller_array)
                except Exception, e:
                    failed.setdefault(e.message, []).append(f)

            if failed:
                msg = ""
                for r in failed:
                    msg += " %s\n%s\n" % (r, "\n".join(
                        map(lambda x: "  %s" % x, failed[r])))
                raise Sorry(
                    "intensity normalization failed by following reason(s):\n%s"
                    % msg)
Beispiel #21
0
class basic_analyses(object):  # XXX is this ever used?
    def __init__(self,
                 miller_array,
                 phil_object,
                 out=None,
                 out_plot=None,
                 miller_calc=None,
                 original_intensities=None,
                 completeness_as_non_anomalous=None,
                 verbose=0):
        if out is None:
            out = sys.stdout
        if verbose > 0:
            print >> out
            print >> out
            print >> out, "Matthews coefficient and Solvent content statistics"
        n_copies_solc = 1.0
        self.nres_known = False
        if (phil_object.scaling.input.asu_contents.n_residues is not None
                or phil_object.scaling.input.asu_contents.n_bases is not None):
            self.nres_known = True
            if (phil_object.scaling.input.asu_contents.sequence_file
                    is not None):
                print >> out, "  warning: ignoring sequence file"
        elif (phil_object.scaling.input.asu_contents.sequence_file
              is not None):
            print >> out, "  determining composition from sequence file %s" % \
              phil_object.scaling.input.asu_contents.sequence_file
            seq_comp = iotbx.bioinformatics.composition_from_sequence_file(
                file_name=phil_object.scaling.input.asu_contents.sequence_file,
                log=out)
            if (seq_comp is not None):
                phil_object.scaling.input.asu_contents.n_residues = seq_comp.n_residues
                phil_object.scaling.input.asu_contents.n_bases = seq_comp.n_bases
                self.nres_known = True
        matthews_results = matthews.matthews_rupp(
            crystal_symmetry=miller_array,
            n_residues=phil_object.scaling.input.asu_contents.n_residues,
            n_bases=phil_object.scaling.input.asu_contents.n_bases,
            out=out,
            verbose=1)
        phil_object.scaling.input.asu_contents.n_residues = matthews_results[0]
        phil_object.scaling.input.asu_contents.n_bases = matthews_results[1]
        n_copies_solc = matthews_results[2]
        self.matthews_results = matthews_results

        if phil_object.scaling.input.asu_contents.n_copies_per_asu is not None:
            n_copies_solc = phil_object.scaling.input.asu_contents.n_copies_per_asu
            self.defined_copies = n_copies_solc
            if verbose > 0:
                print >> out, "Number of copies per asymmetric unit provided"
                print >> out, " Will use user specified value of ", n_copies_solc
        else:
            phil_object.scaling.input.asu_contents.n_copies_per_asu = n_copies_solc
            self.guessed_copies = n_copies_solc

        # first report on I over sigma
        miller_array_new = miller_array
        self.data_strength = None
        miller_array_intensities = miller_array
        if (original_intensities is not None):
            assert original_intensities.is_xray_intensity_array()
            miller_array_intensities = original_intensities
        if miller_array_intensities.sigmas() is not None:
            data_strength = data_statistics.i_sigi_completeness_stats(
                miller_array_intensities,
                isigi_cut=phil_object.scaling.input.parameters.
                misc_twin_parameters.twin_test_cuts.isigi_cut,
                completeness_cut=phil_object.scaling.input.parameters.
                misc_twin_parameters.twin_test_cuts.completeness_cut,
                completeness_as_non_anomalous=completeness_as_non_anomalous)
            data_strength.show(out)
            self.data_strength = data_strength
            if phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution is None:
                if data_strength.resolution_cut > data_strength.resolution_at_least:
                    phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_at_least
                else:
                    phil_object.scaling.input.parameters.misc_twin_parameters.twin_test_cuts.high_resolution = data_strength.resolution_cut

        ## Isotropic wilson scaling
        if verbose > 0:
            print >> out
            print >> out
            print >> out, "Maximum likelihood isotropic Wilson scaling "

        n_residues = phil_object.scaling.input.asu_contents.n_residues
        n_bases = phil_object.scaling.input.asu_contents.n_bases
        if n_residues is None:
            n_residues = 0
        if n_bases is None:
            n_bases = 0
        if n_bases + n_residues == 0:
            raise Sorry("No scatterers available")
        iso_scale_and_b = absolute_scaling.ml_iso_absolute_scaling(
            miller_array=miller_array_new,
            n_residues=n_residues * miller_array.space_group().order_z() *
            n_copies_solc,
            n_bases=n_bases * miller_array.space_group().order_z() *
            n_copies_solc)
        iso_scale_and_b.show(out=out, verbose=verbose)
        self.iso_scale_and_b = iso_scale_and_b
        ## Store the b and scale values from isotropic ML scaling
        self.iso_p_scale = iso_scale_and_b.p_scale
        self.iso_b_wilson = iso_scale_and_b.b_wilson

        ## Anisotropic ml wilson scaling
        if verbose > 0:
            print >> out
            print >> out
            print >> out, "Maximum likelihood anisotropic Wilson scaling "
        aniso_scale_and_b = absolute_scaling.ml_aniso_absolute_scaling(
            miller_array=miller_array_new,
            n_residues=n_residues * miller_array.space_group().order_z() *
            n_copies_solc,
            n_bases=n_bases * miller_array.space_group().order_z() *
            n_copies_solc)
        aniso_scale_and_b.show(out=out, verbose=1)

        self.aniso_scale_and_b = aniso_scale_and_b

        try:
            b_cart = aniso_scale_and_b.b_cart
        except AttributeError, e:
            print >> out, "*** ERROR ***"
            print >> out, str(e)
            show_exception_info_if_full_testing()
            return

        self.aniso_p_scale = aniso_scale_and_b.p_scale
        self.aniso_u_star = aniso_scale_and_b.u_star
        self.aniso_b_cart = aniso_scale_and_b.b_cart
        # XXX: for GUI
        self.overall_b_cart = getattr(aniso_scale_and_b, "overall_b_cart",
                                      None)

        ## Correcting for anisotropy
        if verbose > 0:
            print >> out, "Correcting for anisotropy in the data"
            print >> out

        b_cart_observed = aniso_scale_and_b.b_cart

        b_trace_average = (b_cart_observed[0] + b_cart_observed[1] +
                           b_cart_observed[2]) / 3.0
        b_trace_min = b_cart_observed[0]
        if b_cart_observed[1] < b_trace_min: b_trace_min = b_cart_observed[1]
        if b_cart_observed[2] < b_trace_min: b_trace_min = b_cart_observed[2]

        if phil_object.scaling.input.optional.aniso.final_b == "eigen_min":
            b_use = aniso_scale_and_b.eigen_values[2]
        elif phil_object.scaling.input.optional.aniso.final_b == "eigen_mean":
            b_use = flex.mean(aniso_scale_and_b.eigen_values)
        elif phil_object.scaling.input.optional.aniso.final_b == "user_b_iso":
            assert phil_object.scaling.input.optional.aniso.b_iso is not None
            b_use = phil_object.scaling.input.optional.aniso.b_iso
        else:
            b_use = 30

        b_cart_aniso_removed = [-b_use, -b_use, -b_use, 0, 0, 0]
        u_star_aniso_removed = adptbx.u_cart_as_u_star(
            miller_array.unit_cell(), adptbx.b_as_u(b_cart_aniso_removed))
        ## I do things in two steps, but can easely be done in 1 step
        ## just for clarity, thats all.
        self.no_aniso_array = absolute_scaling.anisotropic_correction(
            miller_array_new, 0.0, aniso_scale_and_b.u_star)
        self.no_aniso_array = absolute_scaling.anisotropic_correction(
            self.no_aniso_array, 0.0, u_star_aniso_removed)
        self.no_aniso_array = self.no_aniso_array.set_observation_type(
            miller_array)

        ## Make normalised structure factors please

        sel_big = self.no_aniso_array.data() > 1.e+50
        self.no_aniso_array = self.no_aniso_array.array(
            data=self.no_aniso_array.data().set_selected(sel_big, 0))
        self.no_aniso_array = self.no_aniso_array.set_observation_type(
            miller_array)

        normalistion = absolute_scaling.kernel_normalisation(
            self.no_aniso_array, auto_kernel=True)
        self.normalised_miller = normalistion.normalised_miller.deep_copy()

        self.phil_object = phil_object

        ## Some basic statistics and sanity checks follow
        if verbose > 0:
            print >> out, "Some basic intensity statistics follow."
            print >> out

        basic_data_stats = data_statistics.basic_intensity_statistics(
            miller_array,
            aniso_scale_and_b.p_scale,
            aniso_scale_and_b.u_star,
            iso_scale_and_b.scat_info,
            out=out,
            out_plot=out_plot)
        self.basic_data_stats = basic_data_stats
        self.miller_array = basic_data_stats.new_miller

        #relative wilson plot
        self.rel_wilson = None
        if (miller_calc is not None) and (miller_calc.d_min() < 4.0):
            try:
                self.rel_wilson = relative_wilson.relative_wilson(
                    miller_obs=miller_array, miller_calc=miller_calc)
            except RuntimeError, e:
                print >> out, "*** Error calculating relative Wilson plot - skipping."
                print >> out, ""
 def kernel_normalisation(self):
   normalisation = absolute_scaling.kernel_normalisation(
     self.intensities, auto_kernel=True)
   self.intensities = normalisation.normalised_miller.deep_copy().set_info(
     self.intensities.info())
Beispiel #23
0
  def __init__(self,
               lambda1,
               lambda2,
               k1,
               k2,
               options,
               out=None):
    self.out=out
    if self.out==None:
      self.out=sys.stdout

    self.options = options
    print >> self.out, "FA estimation"
    print >> self.out, "============="

    if k1 is None:
      raise Sorry("f\"(w1)/f\"(w2) ratio is not defined. Please provide f\" values upon input")

    if k2 is None:
      if self.options.protocol=='algebraic':
        raise Sorry("""
delta f' f\" ratio is not defined.
Either provide f' and f\" values upon input,
or chose different Fa estimation protocol.
               """)

    self.options = options

    protocol = {'algebraic': False,
                'cns': False,
                'combine_ano': False}
    protocol[ self.options.protocol ] = True

    self.fa_values = None

    if protocol['algebraic']:
      print >> self.out, " Using algebraic approach to estimate FA values "
      print >> self.out
      tmp = singh_ramasheshan_fa_estimate(
        lambda1,
        lambda2,
        k1,
        k2)
      self.fa_values = tmp.fa.f_sq_as_f()

    if protocol['cns']:
      print >> self.out, " Using CNS approach to estimate FA values "
      print >> self.out

      tmp = cns_fa_driver( [lambda1, lambda2] )
      self.fa_values = tmp.fa

    if protocol['combine_ano']:
      print >> self.out, " Combining anomalous data only"
      print >> self.out

      tmp = mum_dad(
        lambda1,
        lambda2,
        k1)
      self.fa_values = tmp.dad

    norma = absolute_scaling.kernel_normalisation(
      self.fa_values,
      auto_kernel=True)

    self.fa_values = norma.normalised_miller.f_sq_as_f()
Beispiel #24
0
def run(args):
    import libtbx
    from libtbx import easy_pickle
    from dials.util import log
    from dials.util.options import OptionParser

    parser = OptionParser(
        #usage=usage,
        phil=phil_scope,
        read_reflections=True,
        read_datablocks=False,
        read_experiments=True,
        check_format=False,
        #epilog=help_message
    )

    params, options, args = parser.parse_args(show_diff_phil=False,
                                              return_unhandled=True)

    # Configure the logging
    log.config(params.verbosity,
               info=params.output.log,
               debug=params.output.debug_log)

    from dials.util.version import dials_version
    logger.info(dials_version())

    # Log the diff phil
    diff_phil = parser.diff_phil.as_str()
    if diff_phil is not '':
        logger.info('The following parameters have been modified:\n')
        logger.info(diff_phil)

    if params.seed is not None:
        import random
        flex.set_random_seed(params.seed)
        random.seed(params.seed)

    if params.save_plot and not params.animate:
        import matplotlib
        # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
        matplotlib.use('Agg')  # use a non-interactive backend

    datasets_input = []

    experiments = flatten_experiments(params.input.experiments)
    reflections = flatten_reflections(params.input.reflections)

    if len(experiments) or len(reflections):
        if len(reflections) == 1:
            reflections_input = reflections[0]
            reflections = []
            for i in range(len(experiments)):
                reflections.append(
                    reflections_input.select(reflections_input['id'] == i))

        if len(experiments) > len(reflections):
            flattened_reflections = []
            for refl in reflections:
                for i in range(0, flex.max(refl['id']) + 1):
                    sel = refl['id'] == i
                    flattened_reflections.append(refl.select(sel))
            reflections = flattened_reflections

        assert len(experiments) == len(reflections)

        i_refl = 0
        for i_expt in enumerate(experiments):
            refl = reflections[i_refl]

        for expt, refl in zip(experiments, reflections):
            crystal_symmetry = crystal.symmetry(
                unit_cell=expt.crystal.get_unit_cell(),
                space_group=expt.crystal.get_space_group())
            if 0 and 'intensity.prf.value' in refl:
                sel = refl.get_flags(refl.flags.integrated_prf)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.prf.value']
                variances = refl['intensity.prf.variance']
            else:
                assert 'intensity.sum.value' in refl
                sel = refl.get_flags(refl.flags.integrated_sum)
                assert sel.count(True) > 0
                refl = refl.select(sel)
                data = refl['intensity.sum.value']
                variances = refl['intensity.sum.variance']
            # FIXME probably need to do some filtering of intensities similar to that
            # done in export_mtz
            miller_indices = refl['miller_index']
            assert variances.all_gt(0)
            sigmas = flex.sqrt(variances)

            miller_set = miller.set(crystal_symmetry,
                                    miller_indices,
                                    anomalous_flag=False)
            intensities = miller.array(miller_set, data=data, sigmas=sigmas)
            intensities.set_observation_type_xray_intensity()
            intensities.set_info(
                miller.array_info(source='DIALS', source_type='pickle'))
            datasets_input.append(intensities)

    files = args

    for file_name in files:

        try:
            data = easy_pickle.load(file_name)
            intensities = data['observations'][0]
            intensities.set_info(
                miller.array_info(source=file_name, source_type='pickle'))
            intensities = intensities.customized_copy(
                anomalous_flag=False).set_info(intensities.info())
            batches = None
        except Exception:
            reader = any_reflection_file(file_name)
            assert reader.file_type() == 'ccp4_mtz'

            as_miller_arrays = reader.as_miller_arrays(merge_equivalents=False)
            intensities = [
                ma for ma in as_miller_arrays
                if ma.info().labels == ['I', 'SIGI']
            ][0]
            batches = [
                ma for ma in as_miller_arrays if ma.info().labels == ['BATCH']
            ]
            if len(batches):
                batches = batches[0]
            else:
                batches = None
            mtz_object = reader.file_content()
            intensities = intensities.customized_copy(
                anomalous_flag=False,
                indices=mtz_object.extract_original_index_miller_indices(
                )).set_info(intensities.info())

        intensities.set_observation_type_xray_intensity()
        datasets_input.append(intensities)

    if len(datasets_input) == 0:
        raise Sorry('No valid reflection files provided on command line')

    datasets = []
    for intensities in datasets_input:

        if params.batch is not None:
            assert batches is not None
            bmin, bmax = params.batch
            assert bmax >= bmin
            sel = (batches.data() >= bmin) & (batches.data() <= bmax)
            assert sel.count(True) > 0
            intensities = intensities.select(sel)

        if params.min_i_mean_over_sigma_mean is not None and (
                params.d_min is libtbx.Auto or params.d_min is not None):
            from xia2.Modules import Resolutionizer
            rparams = Resolutionizer.phil_defaults.extract().resolutionizer
            rparams.nbins = 20
            resolutionizer = Resolutionizer.resolutionizer(
                intensities, None, rparams)
            i_mean_over_sigma_mean = 4
            d_min = resolutionizer.resolution_i_mean_over_sigma_mean(
                i_mean_over_sigma_mean)
            if params.d_min is libtbx.Auto:
                intensities = intensities.resolution_filter(
                    d_min=d_min).set_info(intensities.info())
                if params.verbose:
                    logger.info('Selecting reflections with d > %.2f' % d_min)
            elif d_min > params.d_min:
                logger.info('Rejecting dataset %s as d_min too low (%.2f)' %
                            (file_name, d_min))
                continue
            else:
                logger.info('Estimated d_min for %s: %.2f' %
                            (file_name, d_min))
        elif params.d_min not in (None, libtbx.Auto):
            intensities = intensities.resolution_filter(
                d_min=params.d_min).set_info(intensities.info())

        if params.normalisation == 'kernel':
            from mmtbx.scaling import absolute_scaling
            normalisation = absolute_scaling.kernel_normalisation(
                intensities, auto_kernel=True)
            intensities = normalisation.normalised_miller.deep_copy()

        cb_op_to_primitive = intensities.change_of_basis_op_to_primitive_setting(
        )
        intensities = intensities.change_basis(cb_op_to_primitive)
        if params.mode == 'full' or params.space_group is not None:
            if params.space_group is not None:
                space_group_info = params.space_group.primitive_setting()
                if not space_group_info.group().is_compatible_unit_cell(
                        intensities.unit_cell()):
                    logger.info(
                        'Skipping data set - incompatible space group and unit cell: %s, %s'
                        % (space_group_info, intensities.unit_cell()))
                    continue
            else:
                space_group_info = sgtbx.space_group_info('P1')
            intensities = intensities.customized_copy(
                space_group_info=space_group_info)

        datasets.append(intensities)

    crystal_symmetries = [d.crystal_symmetry().niggli_cell() for d in datasets]
    lattice_ids = range(len(datasets))
    from xfel.clustering.cluster import Cluster
    from xfel.clustering.cluster_groups import unit_cell_info
    ucs = Cluster.from_crystal_symmetries(crystal_symmetries,
                                          lattice_ids=lattice_ids)
    threshold = 1000
    if params.save_plot:
        from matplotlib import pyplot as plt
        fig = plt.figure("Andrews-Bernstein distance dendogram",
                         figsize=(12, 8))
        ax = plt.gca()
    else:
        ax = None
    clusters, _ = ucs.ab_cluster(params.unit_cell_clustering.threshold,
                                 log=params.unit_cell_clustering.log,
                                 write_file_lists=False,
                                 schnell=False,
                                 doplot=params.save_plot,
                                 ax=ax)
    if params.save_plot:
        plt.tight_layout()
        plt.savefig('%scluster_unit_cell.png' % params.plot_prefix)
        plt.close(fig)
    logger.info(unit_cell_info(clusters))
    largest_cluster = None
    largest_cluster_lattice_ids = None
    for cluster in clusters:
        cluster_lattice_ids = [m.lattice_id for m in cluster.members]
        if largest_cluster_lattice_ids is None:
            largest_cluster_lattice_ids = cluster_lattice_ids
        elif len(cluster_lattice_ids) > len(largest_cluster_lattice_ids):
            largest_cluster_lattice_ids = cluster_lattice_ids

    dataset_selection = largest_cluster_lattice_ids
    if len(dataset_selection) < len(datasets):
        logger.info('Selecting subset of data for cosym analysis: %s' %
                    str(dataset_selection))
        datasets = [datasets[i] for i in dataset_selection]

    # per-dataset change of basis operator to ensure all consistent
    change_of_basis_ops = []
    for i, dataset in enumerate(datasets):
        metric_subgroups = sgtbx.lattice_symmetry.metric_subgroups(dataset,
                                                                   max_delta=5)
        subgroup = metric_subgroups.result_groups[0]
        cb_op_inp_best = subgroup['cb_op_inp_best']
        datasets[i] = dataset.change_basis(cb_op_inp_best)
        change_of_basis_ops.append(cb_op_inp_best)

    cb_op_ref_min = datasets[0].change_of_basis_op_to_niggli_cell()
    for i, dataset in enumerate(datasets):
        if params.space_group is None:
            datasets[i] = dataset.change_basis(cb_op_ref_min).customized_copy(
                space_group_info=sgtbx.space_group_info('P1'))
        else:
            datasets[i] = dataset.change_basis(cb_op_ref_min)
            datasets[i] = datasets[i].customized_copy(
                crystal_symmetry=crystal.symmetry(
                    unit_cell=datasets[i].unit_cell(),
                    space_group_info=params.space_group.primitive_setting(),
                    assert_is_compatible_unit_cell=False))
        datasets[i] = datasets[i].merge_equivalents().array()
        change_of_basis_ops[i] = cb_op_ref_min * change_of_basis_ops[i]

    result = analyse_datasets(datasets, params)

    space_groups = {}
    reindexing_ops = {}
    for dataset_id in result.reindexing_ops.iterkeys():
        if 0 in result.reindexing_ops[dataset_id]:
            cb_op = result.reindexing_ops[dataset_id][0]
            reindexing_ops.setdefault(cb_op, [])
            reindexing_ops[cb_op].append(dataset_id)
        if dataset_id in result.space_groups:
            space_groups.setdefault(result.space_groups[dataset_id], [])
            space_groups[result.space_groups[dataset_id]].append(dataset_id)

    logger.info('Space groups:')
    for sg, datasets in space_groups.iteritems():
        logger.info(str(sg.info().reference_setting()))
        logger.info(datasets)

    logger.info('Reindexing operators:')
    for cb_op, datasets in reindexing_ops.iteritems():
        logger.info(cb_op)
        logger.info(datasets)

    if (len(experiments) and len(reflections)
            and params.output.reflections is not None
            and params.output.experiments is not None):
        import copy
        from dxtbx.model import ExperimentList
        from dxtbx.serialize import dump
        reindexed_experiments = ExperimentList()
        reindexed_reflections = flex.reflection_table()
        expt_id = 0
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                expt = experiments[dataset_selection[dataset_id]]
                refl = reflections[dataset_selection[dataset_id]]
                reindexed_expt = copy.deepcopy(expt)
                refl_reindexed = copy.deepcopy(refl)
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                reindexed_expt.crystal = reindexed_expt.crystal.change_basis(
                    cb_op_this)
                refl_reindexed['miller_index'] = cb_op_this.apply(
                    refl_reindexed['miller_index'])
                reindexed_experiments.append(reindexed_expt)
                refl_reindexed['id'] = flex.int(refl_reindexed.size(), expt_id)
                reindexed_reflections.extend(refl_reindexed)
                expt_id += 1

        logger.info('Saving reindexed experiments to %s' %
                    params.output.experiments)
        dump.experiment_list(reindexed_experiments, params.output.experiments)
        logger.info('Saving reindexed reflections to %s' %
                    params.output.reflections)
        reindexed_reflections.as_pickle(params.output.reflections)

    elif params.output.suffix is not None:
        for cb_op, dataset_ids in reindexing_ops.iteritems():
            cb_op = sgtbx.change_of_basis_op(cb_op)
            for dataset_id in dataset_ids:
                file_name = files[dataset_selection[dataset_id]]
                basename = os.path.basename(file_name)
                out_name = os.path.splitext(
                    basename)[0] + params.output.suffix + '_' + str(
                        dataset_selection[dataset_id]) + ".mtz"
                reader = any_reflection_file(file_name)
                assert reader.file_type() == 'ccp4_mtz'
                mtz_object = reader.file_content()
                cb_op_this = cb_op * change_of_basis_ops[dataset_id]
                if not cb_op_this.is_identity_op():
                    logger.info('reindexing %s (%s)' %
                                (file_name, cb_op_this.as_xyz()))
                    mtz_object.change_basis_in_place(cb_op_this)
                mtz_object.write(out_name)
    def __init__(self,
                 miller_obs,
                 miller_calc,
                 min_d_star_sq=0.0,
                 max_d_star_sq=2.0,
                 n_points=2000,
                 level=6.0):
        assert miller_obs.indices().all_eq(miller_calc.indices())
        if (miller_obs.is_xray_amplitude_array()):
            miller_obs = miller_obs.f_as_f_sq()
        if (miller_calc.is_xray_amplitude_array()):
            miller_calc = miller_calc.f_as_f_sq()
        self.obs = miller_obs.deep_copy()
        self.calc = miller_calc.deep_copy()
        self.mind = min_d_star_sq
        self.maxd = max_d_star_sq
        self.m = n_points
        self.n = 2
        self.level = level

        norma_obs = absolute_scaling.kernel_normalisation(
            miller_array=self.obs, auto_kernel=True, n_bins=45, n_term=17)
        norma_calc = absolute_scaling.kernel_normalisation(
            miller_array=self.calc, auto_kernel=True, n_bins=45, n_term=17)

        obs_d_star_sq = norma_obs.d_star_sq_array
        calc_d_star_sq = norma_calc.d_star_sq_array
        sel_calc_obs = norma_calc.bin_selection.select(norma_obs.bin_selection)
        sel_obs_calc = norma_obs.bin_selection.select(norma_calc.bin_selection)
        sel = ((obs_d_star_sq > low_lim) & (obs_d_star_sq < high_lim) &
               (norma_obs.mean_I_array > 0))
        sel = sel.select(sel_calc_obs)

        self.obs_d_star_sq = obs_d_star_sq.select(sel)
        self.calc_d_star_sq = calc_d_star_sq.select(sel_obs_calc).select(sel)
        self.mean_obs = norma_obs.mean_I_array.select(sel)
        self.mean_calc = norma_calc.mean_I_array.select(sel_obs_calc).select(
            sel)
        self.var_obs = norma_obs.var_I_array.select(sel)
        self.var_calc = norma_calc.var_I_array.select(sel_obs_calc).select(sel)

        # make an interpolator object please
        self.interpol = scale_curves.curve_interpolator(
            self.mind, self.maxd, self.m)
        # do the interpolation
        tmp_obs_d_star_sq  , self.mean_obs,self.obs_a  , self.obs_b  = \
          self.interpol.interpolate(self.obs_d_star_sq,self.mean_obs)
        self.obs_d_star_sq , self.var_obs,self.obs_a   , self.obs_b  = \
          self.interpol.interpolate(self.obs_d_star_sq, self.var_obs)
        tmp_calc_d_star_sq , self.mean_calc,self.calc_a, self.calc_b = \
          self.interpol.interpolate(self.calc_d_star_sq,self.mean_calc)
        self.calc_d_star_sq, self.var_calc,self.calc_a , self.calc_b = \
          self.interpol.interpolate(self.calc_d_star_sq,self.var_calc)

        self.mean_ratio_engine = chebyshev_polynome(mean_coefs.size(),
                                                    low_lim - 1e-3,
                                                    high_lim + 1e-3,
                                                    mean_coefs)
        self.std_ratio_engine = chebyshev_polynome(std_coefs.size(),
                                                   low_lim - 1e-3,
                                                   high_lim + 1e-3, std_coefs)

        self.x = flex.double([0, 0])

        self.low_lim_for_scaling = 1.0 / (4.0 * 4.0)  #0.0625
        selection = (self.calc_d_star_sq > self.low_lim_for_scaling)
        if (selection.count(True) == 0):
            raise Sorry(
                "No reflections within required resolution range after " +
                "filtering.")
        self.weight_array = selection.as_double() / (2.0 * self.var_obs)
        assert (not self.weight_array.all_eq(0.0))

        self.mean = flex.double(
            [1.0 / (flex.sum(self.mean_calc) / flex.sum(self.mean_obs)), 0.0])
        self.sigmas = flex.double([0.5, 0.5])

        s = 1.0 / (flex.sum(self.weight_array * self.mean_calc) /
                   flex.sum(self.weight_array * self.mean_obs))
        b = 0.0
        self.sart_simplex = [
            flex.double([s, b]),
            flex.double([s + 0.1, b + 1.1]),
            flex.double([s - 0.1, b - 1.1])
        ]
        self.opti = simplex.simplex_opt(2, self.sart_simplex, self)

        sol = self.opti.get_solution()
        self.scale = abs(sol[0])
        self.b_value = sol[1]

        self.modify_weights()
        self.all_bad_z_scores = self.weight_array.all_eq(0.0)
        if (not self.all_bad_z_scores):
            s = 1.0 / (flex.sum(self.weight_array * self.mean_calc) /
                       flex.sum(self.weight_array * self.mean_obs))
            b = 0.0
            self.sart_simplex = [
                flex.double([s, b]),
                flex.double([s + 0.1, b + 1.1]),
                flex.double([s - 0.1, b - 1.1])
            ]
            self.opti = simplex.simplex_opt(2, self.sart_simplex, self)
Beispiel #26
0
    def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor 
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling
            
            ofs_wilson = open("%s_wilson_scales.dat"%prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices())/4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data()*tmp,
                                                         sigmas=arr.sigmas()*tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(data=arr.data()/normaliser.normalizer_for_miller_array,
                                                     sigmas=arr.sigmas()/normaliser.normalizer_for_miller_array)
        # Prep 
        args = []
        for i in xrange(len(self.arrays)-1):
            for j in xrange(i+1, len(self.arrays)):
                args.append((i,j))
           
        # Calc all CC
        worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i,j), (cc,nref) in zip(args, results):
            cc_data_for_html.append((i,j,cc,nref))
            if cc==cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i,j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x:x[1])
        remove_idxes = set()
        
        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst"%prefix, "w").write("\n".join(map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i,j), (cc,nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc
            
        open("%s.matrix"%prefix, "w").write(" ".join(map(lambda x:"%.4f"%x, mat.flatten())))

        ofs = open("%s.dat"%prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i,j), (cc,nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i,j,cc,nref))

        open("%s_ana.R"%prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d"%(x+1), org2now.keys()))))

        call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int,ids)]
Beispiel #27
0
    def _kernel_normalisation(self, miller_array, n_bins=45, n_term=17):

        return absolute_scaling.kernel_normalisation(miller_array=miller_array,
                                                     auto_kernel=True,
                                                     n_bins=n_bins,
                                                     n_term=n_term)
  def __init__(self,
      miller_obs,
      miller_calc,
      min_d_star_sq=0.0,
      max_d_star_sq=2.0,
      n_points=2000,
      level=6.0):
    assert miller_obs.indices().all_eq(miller_calc.indices())
    if (miller_obs.is_xray_amplitude_array()) :
      miller_obs = miller_obs.f_as_f_sq()
    if (miller_calc.is_xray_amplitude_array()) :
      miller_calc = miller_calc.f_as_f_sq()
    self.obs  = miller_obs.deep_copy()
    self.calc = miller_calc.deep_copy()
    self.mind = min_d_star_sq
    self.maxd = max_d_star_sq
    self.m    = n_points
    self.n    = 2
    self.level = level

    norma_obs  = absolute_scaling.kernel_normalisation(
      miller_array=self.obs,
      auto_kernel=True,
      n_bins=45,
      n_term=17)
    norma_calc = absolute_scaling.kernel_normalisation(
      miller_array=self.calc,
      auto_kernel=True,
      n_bins=45,
      n_term=17)

    obs_d_star_sq  = norma_obs.d_star_sq_array
    calc_d_star_sq = norma_calc.d_star_sq_array
    sel_calc_obs = norma_calc.bin_selection.select(norma_obs.bin_selection)
    sel_obs_calc = norma_obs.bin_selection.select(norma_calc.bin_selection)
    sel  = ((obs_d_star_sq > low_lim) & (obs_d_star_sq < high_lim) &
            (norma_obs.mean_I_array > 0))
    sel = sel.select(sel_calc_obs)

    self.obs_d_star_sq  = obs_d_star_sq.select( sel )
    self.calc_d_star_sq = calc_d_star_sq.select( sel_obs_calc ).select(sel)
    self.mean_obs       = norma_obs.mean_I_array.select(sel)
    self.mean_calc      = norma_calc.mean_I_array.select(
                            sel_obs_calc).select(sel)
    self.var_obs        = norma_obs.var_I_array.select(sel)
    self.var_calc       = norma_calc.var_I_array.select(
      sel_obs_calc).select(sel)

    # make an interpolator object please
    self.interpol = scale_curves.curve_interpolator( self.mind, self.maxd,
      self.m)
    # do the interpolation
    tmp_obs_d_star_sq  , self.mean_obs,self.obs_a  , self.obs_b  = \
      self.interpol.interpolate(self.obs_d_star_sq,self.mean_obs)
    self.obs_d_star_sq , self.var_obs,self.obs_a   , self.obs_b  = \
      self.interpol.interpolate(self.obs_d_star_sq, self.var_obs)
    tmp_calc_d_star_sq , self.mean_calc,self.calc_a, self.calc_b = \
      self.interpol.interpolate(self.calc_d_star_sq,self.mean_calc)
    self.calc_d_star_sq, self.var_calc,self.calc_a , self.calc_b = \
      self.interpol.interpolate(self.calc_d_star_sq,self.var_calc)

    self.mean_ratio_engine = chebyshev_polynome( mean_coefs.size(),
      low_lim-1e-3, high_lim+1e-3,mean_coefs)
    self.std_ratio_engine = chebyshev_polynome( std_coefs.size(),
      low_lim-1e-3, high_lim+1e-3,std_coefs)

    self.x = flex.double([0,0])

    self.low_lim_for_scaling = 1.0/(4.0*4.0) #0.0625
    selection = (self.calc_d_star_sq > self.low_lim_for_scaling)
    if (selection.count(True) == 0) :
      raise Sorry("No reflections within required resolution range after "+
        "filtering.")
    self.weight_array = selection.as_double() / (2.0 * self.var_obs)
    assert (not self.weight_array.all_eq(0.0))

    self.mean   = flex.double( [1.0/(flex.sum(self.mean_calc) /
                                flex.sum(self.mean_obs)), 0.0 ] )
    self.sigmas = flex.double( [0.5, 0.5] )

    s = 1.0/(flex.sum(self.weight_array*self.mean_calc)/
             flex.sum(self.weight_array*self.mean_obs))
    b = 0.0
    self.sart_simplex = [ flex.double([s,b]), flex.double([s+0.1,b+1.1]),
                          flex.double([s-0.1,b-1.1]) ]
    self.opti = simplex.simplex_opt( 2, self.sart_simplex, self)

    sol = self.opti.get_solution()
    self.scale   = abs(sol[0])
    self.b_value = sol[1]

    self.modify_weights()
    self.all_bad_z_scores = self.weight_array.all_eq(0.0)
    if (not self.all_bad_z_scores) :
      s = 1.0/(flex.sum(self.weight_array*self.mean_calc) /
               flex.sum(self.weight_array*self.mean_obs))
      b = 0.0
      self.sart_simplex = [ flex.double([s,b]), flex.double([s+0.1,b+1.1]),
                            flex.double([s-0.1,b-1.1]) ]
      self.opti = simplex.simplex_opt( 2, self.sart_simplex, self)