def build_scattering_library(atom_types, q_array, radii, radius_scale,
                             Explicit_H, S_factor):
    scat_lib = intensity.scattering_library(q_array)
    ener_lib = server.ener_lib()
    for at in atom_types:
        element = ener_lib.lib_atom[at].element
        if (element is ''):
            element = 'C'
            print "Warning: unexpected atom found, and C element is used"
        val = xray_scattering.it1992(element, True).fetch()
        a = val.array_of_a()
        b = val.array_of_b()
        c = val.c()
        sf_result = scattering_factor(a, b, c, q_array)
        # getting displaced solvent volumn
        if (radii.__contains__(at)):
            r = radii[at] * radius_scale
            v = math.pi * 4.0 / 3.0 * r**3.0
        else:
            v = 16.44
        dummy_sf = dummy_factor(v, q_array)

        if (not Explicit_H):
            if (S_factor.__contains__(at)):
                scale = S_factor[at]
                sf_result *= (scale[0] *
                              flex.exp(-scale[1] * q_array * q_array))
                dummy_sf *= (flex.exp(-1.25 * q_array * q_array))

        scat_lib.load_scattering_info(at, sf_result, dummy_sf)
    return scat_lib
Example #2
0
        def fvec_callable(pfh,current_values):

          G = current_values[0]
          B_factor = current_values[1]

          I_obs = observations_original_sel.data()
          sigI_obs = observations_original_sel.sigmas()
          observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength)
          two_theta = observations_original_sel_two_theta.data()
          sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data()

          excursions = ((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs) - I_ref) / sigI_obs

          corr_now, slope_now = get_overall_correlation(G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs, I_ref)

          """
          print "SCALE G=%5.3f B_factor=%5.3f J=%6.3f cc=%6.3f slope=%6.3f"% \
          (G, B_factor, sum(excursions**2), corr_now, slope_now)


          plt.scatter(I_ref,(G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs),s=10, marker='x', c='r')
          plt.title('J=%6.5f CC=%6.5f Slope=%6.5f'%(sum(excursions**2), corr_now, slope_now))
          plt.xlabel('Reference intensity')
          plt.ylabel('Observed intensity (scaled)')
          plt.show()
          """

          return excursions
Example #3
0
 def df(self, x):
     k, B = float(x[0]), float(x[1])
     d_star_sq = self.calc.d_star_sq().data()
     tmp = self.obs.data() - k * flex.exp(-B*d_star_sq) * self.calc.data()
     dfdk = flex.sum(-2. * tmp * flex.exp(-B*d_star_sq) * self.calc.data())
     dfdB = flex.sum(2. * tmp * k * d_star_sq * flex.exp(-B*d_star_sq) * self.calc.data())
     return numpy.array([dfdk, dfdB])
Example #4
0
def sharp_map(sites_frac,
              map_coeffs,
              ss=None,
              b_sharp=None,
              b_min=-150,
              b_max=150,
              step=10):
    if (ss is None):
        ss = 1. / flex.pow2(map_coeffs.d_spacings().data()) / 4.
    from cctbx import miller
    if (b_sharp is None):
        t = -1
        map_coeffs_best = None
        b_sharp_best = None
        for b_sharp in range(b_min, b_max, step):
            map_coeffs_ = map_coeffs.deep_copy()
            sc2 = flex.exp(b_sharp * ss)
            map_coeffs_ = map_coeffs_.customized_copy(data=map_coeffs_.data() *
                                                      sc2)
            t_ = sharp_evaluation_target(sites_frac=sites_frac,
                                         map_coeffs=map_coeffs_)
            if (t_ > t):
                t = t_
                b_sharp_best = b_sharp
                map_coeffs_best = map_coeffs_.deep_copy()
        print "b_sharp:", b_sharp_best, t
    else:
        scale = flex.exp(b_sharp * ss)
        map_coeffs_best = map_coeffs.customized_copy(data=map_coeffs.data() *
                                                     scale)
        b_sharp_best = b_sharp
    return map_coeffs_best, b_sharp_best
Example #5
0
 def aniso_ratio_p_value(self,rat):
   return -3
   coefs = flex.double( [-1.7647171873040273, -3.4427008004789115,
     -1.097150249786379, 0.17303317520973829, 0.35955513268118661,
     0.066276397961476205, -0.064575726062529232, -0.0063025873711609016,
     0.0749945566688624, 0.14803702885155121, 0.154284467861286])
   fit_e = scitbx.math.chebyshev_polynome(11,0,1.0,coefs)
   x = flex.double( range(1000) )/999.0
   start = int(rat*1000)
   norma = flex.sum(flex.exp(fit_e.f(x)))/x[1]
   x = x*(1-rat)+rat
   norma2 = flex.sum(flex.exp(fit_e.f(x)))/(x[1]-x[0])
   return -math.log(norma2/norma )
 def aniso_ratio_p_value(self,rat):
   return -3
   coefs = flex.double( [-1.7647171873040273, -3.4427008004789115,
     -1.097150249786379, 0.17303317520973829, 0.35955513268118661,
     0.066276397961476205, -0.064575726062529232, -0.0063025873711609016,
     0.0749945566688624, 0.14803702885155121, 0.154284467861286])
   fit_e = scitbx.math.chebyshev_polynome(11,0,1.0,coefs)
   x = flex.double( range(1000) )/999.0
   start = int(rat*1000)
   norma = flex.sum(flex.exp(fit_e.f(x)))/x[1]
   x = x*(1-rat)+rat
   norma2 = flex.sum(flex.exp(fit_e.f(x)))/(x[1]-x[0])
   return -math.log(norma2/norma )
def run(xray_structure, f_map=None, map_data=None, d_fsc_model=None):
    assert [f_map, map_data].count(None) == 1
    xrs = xray_structure.deep_copy_scatterers().set_b_iso(value=0)
    if (f_map is None):
        f_map = miller.structure_factor_box_from_map(
            map=map_data, crystal_symmetry=xray_structure.crystal_symmetry())
    fc = f_map.structure_factors_from_scatterers(xray_structure=xrs).f_calc()
    d_model_b0 = run_at_b(b=0, f_map=f_map, f_calc=fc).d_min
    del xrs
    if (d_fsc_model is None):
        d_fsc_model = fc.d_min_from_fsc(other=f_map, fsc_cutoff=0).d_min
    fo = f_map.resolution_filter(d_min=d_fsc_model)
    fo, fc, = fo.common_sets(fc)
    cc = -999
    b = None
    ss = 1. / flex.pow2(fc.d_spacings().data()) / 4.
    data = fc.data()
    for b_ in range(-500, 500, 5):
        sc = flex.exp(-b_ * ss)
        fc_ = fc.customized_copy(data=data * sc)
        cc_ = fo.map_correlation(other=fc_)
        if (cc_ > cc):
            cc = cc_
            b = b_
    o = run_at_b(b=b, f_map=fo, f_calc=fc)
    return group_args(d_min=o.d_min,
                      b_iso=b,
                      d_model_b0=d_model_b0,
                      d_fsc_model=d_fsc_model)
Example #8
0
    def calc_scales(self, params_in):
        """ Calculate an array of scales based on scale, B and wavelength using the
    equation $scale * exp(-2*B*(sin(theta)/wavelength)^2)$

    Reuturn a scale vector for all the reflections in self, using the
    parameters defined in the array params.

    :params: a tuple of the form appropriate for the crystal symetry, such as
    one produced by get_x0(). This method only uses params[0] (scale) and
    params[1] (B)

    :return: a list of scales for all the miller indicies in self
    """
        if self.use_scales:
            scale = params_in[0]
            B = params_in[1]
            sin_sq_theta = self.miller_array.two_theta(wavelength=self.wavelength) \
              .sin_theta_over_lambda_sq().data()

            scales = scale * self.miller_array.data()
            exp_arg = flex.double(-2 * B * sin_sq_theta)
            return flex.double(flex.double(scales) * flex.exp(exp_arg))
        else:
            # Horrible way to get vector of ones...
            return flex.double(self.miller_array.data() /
                               self.miller_array.data())
Example #9
0
def dump_R_in_bins(obs, calc, scale_B=True, log_out=sys.stdout, n_bins=20):
    #obs, calc = obs.common_sets(calc, assert_is_similar_symmetry=False)

    if scale_B:
        scale, B = kBdecider(obs, calc).run()
        d_star_sq = calc.d_star_sq().data()
        calc = calc.customized_copy(data = scale * flex.exp(-B*d_star_sq) * calc.data())

    binner = obs.setup_binner(n_bins=n_bins)
    count=0
    log_out.write("dmax - dmin: R (nref) <I1> <I2> scale\n")

    for i_bin in binner.range_used():
        tmp_obs = obs.select(binner.bin_indices() == i_bin)
        tmp_calc = calc.select(binner.bin_indices() == i_bin)

        low = binner.bin_d_range(i_bin)[0]
        high = binner.bin_d_range(i_bin)[1]

        if scale_B:
            scale = 1.
        else:
            scale = flex.sum(tmp_obs.data()*tmp_calc.data()) / flex.sum(flex.pow2(tmp_calc.data()))

        R = flex.sum(flex.abs(tmp_obs.data() - scale*tmp_calc.data())) / flex.sum(0.5 * tmp_obs.data() + 0.5 * scale*tmp_calc.data())

        log_out.write("%5.2f - %5.2f: %.5f (%d) %.1f %.1f %.3e\n" % (low, high, R, len(tmp_obs.data()),
                                                                 flex.mean(tmp_obs.data()), flex.mean(tmp_calc.data()),
                                                                 scale))

    log_out.write("Overall R = %.5f (scale=%.3e, %%comp=%.3f)\n\n" % (calc_R(obs, calc, do_scale=not scale_B) + (obs.completeness()*100.,)) )
Example #10
0
def ls_ff_weights(f_obs, atom, B):
    d_star_sq_data = f_obs.d_star_sq().data()
    table = wk1995(atom).fetch()
    ff = table.at_d_star_sq(d_star_sq_data) * flex.exp(
        -B / 4.0 * d_star_sq_data)
    weights = 1.0 / flex.pow2(ff)
    return weights
Example #11
0
 def calc_full_refl(self, I_o_p_set, sin_theta_over_lambda_sq_set,
                  G, B, p_set, rs_set, flag_volume_correction=True):
   # avoid a floating-point exception
   argument = -2*B*sin_theta_over_lambda_sq_set
   if (argument<-75).count(True)>0 or (argument>75).count(True)>0:  raise ValueError("flex.exp arg out of bounds")
   I_o_full_set = I_o_p_set/(G * flex.exp(argument) * p_set)
   return I_o_full_set
def scattering_factor(a, b, c, q):
    result = q * 0
    stol = q / (math.pi * 4)
    for aa, bb in zip(a, b):
        result += aa * flex.exp(-bb * stol * stol)
    result += c
    return result
Example #13
0
  def calc_scales(self, params_in):
    """ Calculate an array of scales based on scale, B and wavelength using the
    equation $scale * exp(-2*B*(sin(theta)/wavelength)^2)$

    Reuturn a scale vector for all the reflections in self, using the
    parameters defined in the array params.

    :params: a tuple of the form appropriate for the crystal symetry, such as
    one produced by get_x0(). This method only uses params[0] (scale) and
    params[1] (B)

    :return: a list of scales for all the miller indicies in self
    """
    if self.use_scales:
      scale = params_in[0]
      B = params_in[1]
      sin_sq_theta = self.miller_array.two_theta(wavelength=self.wavelength) \
        .sin_theta_over_lambda_sq().data()

      scales = scale * self.miller_array.data()
      exp_arg = flex.double(-2 * B * sin_sq_theta)
      return flex.double(flex.double(scales) * flex.exp(exp_arg))
    else:
      # Horrible way to get vector of ones...
      return flex.double(self.miller_array.data()/self.miller_array.data())
Example #14
0
 def func_scale(self, params, *args):
   mean_I_r = args[0]
   mean_I_o = args[1]
   mean_stol_sq = args[2]
   G, B = params
   mean_I_o_scaled = mean_I_o/(G * flex.exp(flex.double(-2 * B * mean_stol_sq)))
   return (mean_I_r - mean_I_o_scaled)
Example #15
0
  def compute_cost_refine_crystal(self, I_ref, observations_original_sel, crystal_init_orientation,
            wavelength, parameters):
    G = parameters[0]
    B_factor = parameters[1]
    rotx = parameters[2]
    roty = parameters[3]
    ry = parameters[4]
    rz = parameters[5]

    I_obs = observations_original_sel.data()
    sigI_obs = observations_original_sel.sigmas()
    observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength)
    two_theta = observations_original_sel_two_theta.data()
    sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data()

    effective_orientation = crystal_init_orientation.rotate_thru((1,0,0),rotx
           ).rotate_thru((0,1,0),roty)

    effective_a_star = sqr(effective_orientation.reciprocal_matrix())
    ph = partiality_handler(wavelength, 0)
    partiality = ph.calc_partiality_anisotropy_set(effective_a_star, observations_original_sel.indices(), ry, rz, two_theta)

    excursions = (((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality) - I_ref) / sigI_obs

    return excursions
Example #16
0
def run(mtz, bs):
    # Open mtz
    mtz_file = iotbx.mtz.object(mtz)
    miller_arrays = mtz_file.as_miller_arrays()
    print "Opening", mtz

    for b in bs:
        mtz_out = os.path.splitext(os.path.basename(args[1]))[0] + "_b%.2f.mtz" % b
        mtz_dataset = None
        labels = ["H", "K", "L"]
        
        for i, ar in enumerate(miller_arrays):
            fake_label = 2 * string.uppercase[i]
            for lab in guess_array_output_labels(ar):
                labels.append(lab)
            array_types = get_original_array_types(mtz_file, ar.info().labels)
            default_types = iotbx.mtz.default_column_types(ar)
            if len(default_types) == len(array_types):
                column_types = array_types
            else:
                column_types = None

            if ar.is_xray_data_array() or ar.is_complex_array():
                fac = 2. if ar.is_xray_intensity_array() else 1.
                print "Applying B=%.2f to %s" % (b*fac, ar.info())
                k = flex.exp(-ar.d_star_sq().data() * b*fac)
                ar = ar.customized_copy(data=ar.data()*k, sigmas=ar.sigmas()*k if ar.sigmas() else None)

            mtz_dataset = add_array_to_mtz_dataset(mtz_dataset, ar, fake_label,
                                                   column_types)

        # Decide labels and write mtz file
        mtz_object = mtz_dataset.mtz_object()
        invalid_chars = re.compile("[^A-Za-z0-9_\-+\(\)]")

        used = dict([ (label, 0) for label in labels ])

        for i, column in enumerate(mtz_object.columns()):
            if column.label() != labels[i] :
                label = labels[i]
                original_label = label

                assert used[label] == 0

                try:
                    column.set_label(label)
                except RuntimeError, e:
                    if ("new_label is used already" in str(e)) :
                        col_names = [ col.label() for col in mtz_object.columns() ]
                        raise RuntimeError(("Duplicate column label '%s': current labels "+
                                            "are %s; user-specified output labels are %s.") %
                                           (label, " ".join(col_names), " ".join(labels)))
                else:
                    used[original_label] += 1

        mtz_object.write(file_name=mtz_out)

        print
        print "Writing:", mtz_out
        print
Example #17
0
 def voigt(self, x, sig, nu):
   if nu < 0:
     nu = 0
   elif nu > 1:
     nu = 1
   f1 = nu * math.sqrt(math.log(2)/math.pi) * flex.exp(-4*math.log(2)*((x/sig)**2)) * (1/abs(sig))
   f2 = (1-nu)/(math.pi*abs(sig)*(1+(4*((x/sig)**2))))
   f3 = ((nu * math.sqrt(math.log(2)/math.pi))/abs(sig)) + ((1-nu)/(math.pi*abs(sig)))
   svx = (f1 + f2)/f3
   return svx
 def voigt(self, x, sig, nu):
   if nu < 0:
     nu = 0
   elif nu > 1:
     nu = 1
   f1 = nu * math.sqrt(math.log(2)/math.pi) * flex.exp(-4*math.log(2)*((x/sig)**2)) * (1/abs(sig))
   f2 = (1-nu)/(math.pi*abs(sig)*(1+(4*((x/sig)**2))))
   f3 = ((nu * math.sqrt(math.log(2)/math.pi))/abs(sig)) + ((1-nu)/(math.pi*abs(sig)))
   svx = (f1 + f2)/f3
   return svx
Example #19
0
def f_ordered_solvent(f, n_water_atoms_absent, bf_atoms_absent,
                      absent_atom_type):
    nsym = f.space_group().order_z()
    n_lost_w = nsym * n_water_atoms_absent
    data = f.data() * n_lost_w
    d_star_sq_data = f.d_star_sq().data()
    table = wk1995(absent_atom_type).fetch()
    ff = table.at_d_star_sq(d_star_sq_data)
    factor = ff * flex.exp(-bf_atoms_absent / 4.0 * d_star_sq_data)
    f_by_m = miller.array(miller_set=f, data=data * factor)
    return f_by_m
def get_hl(f_obs_cmpl, k_blur, b_blur):
  f_model_phases = f_obs_cmpl.phases().data()
  sin_f_model_phases = flex.sin(f_model_phases)
  cos_f_model_phases = flex.cos(f_model_phases)
  ss = 1./flex.pow2(f_obs_cmpl.d_spacings().data()) / 4.
  t = 2*k_blur * flex.exp(-b_blur*ss)
  hl_a_model = t * cos_f_model_phases
  hl_b_model = t * sin_f_model_phases
  hl_data = flex.hendrickson_lattman(a = hl_a_model, b = hl_b_model)
  hl = f_obs_cmpl.customized_copy(data = hl_data)
  return hl
def log_inv_fit(x, y, degree=5):
    """Fit the values log(1 / y(x)) then return the inverse of this fit.

    x, y should be iterables, the order of the polynomial for the transformed
    fit needs to be specified. This will be useful for e.g. Rmerge."""

    fit = curve_fitting.univariate_polynomial_fit(
        x, flex.log(1 / y), degree=degree, max_iterations=100
    )
    f = curve_fitting.univariate_polynomial(*fit.params)
    return 1 / flex.exp(f(x))
Example #22
0
def alpha_beta(f_dist, n_atoms_included, n_nonwater_atoms_absent,
               n_water_atoms_absent, bf_atoms_absent, final_error,
               absent_atom_type):
    nsym = f_dist.space_group().order_z()
    ss = 1. / flex.pow2(f_dist.d_spacings().data())
    n_part = nsym * n_atoms_included
    n_lost_p = nsym * n_nonwater_atoms_absent
    n_lost_w = nsym * n_water_atoms_absent
    f_dist_data = flex.abs(f_dist.data())
    a_d = flex.exp(-0.25 * ss * final_error**2 * math.pi**3)
    d_star_sq_data = f_dist.d_star_sq().data()
    assert approx_equal(ss, d_star_sq_data)
    table = wk1995(absent_atom_type).fetch()
    ff = table.at_d_star_sq(d_star_sq_data)
    factor = ff * flex.exp(-bf_atoms_absent / 4.0 * d_star_sq_data)
    b_d = ((1.-a_d*a_d)*n_part+n_lost_p+n_lost_w*(1.-f_dist_data*f_dist_data))*\
                                                                    factor*factor
    alpha = f_dist.array(data=a_d)
    beta = f_dist.array(data=b_d)
    return alpha, beta
Example #23
0
 def calc_full_refl(self,
                    I_o_p_set,
                    sin_theta_over_lambda_sq_set,
                    G,
                    B,
                    p_set,
                    rs_set,
                    flag_volume_correction=True):
     I_o_full_set = I_o_p_set / (
         G * flex.exp(-2 * B * sin_theta_over_lambda_sq_set) * p_set)
     return I_o_full_set
Example #24
0
def get_hl(f_obs_cmpl, k_blur, b_blur):
    f_model_phases = f_obs_cmpl.phases().data()
    sin_f_model_phases = flex.sin(f_model_phases)
    cos_f_model_phases = flex.cos(f_model_phases)
    ss = 1. / flex.pow2(f_obs_cmpl.d_spacings().data()) / 4.
    t = 2 * k_blur * flex.exp(-b_blur * ss)
    hl_a_model = t * cos_f_model_phases
    hl_b_model = t * sin_f_model_phases
    hl_data = flex.hendrickson_lattman(a=hl_a_model, b=hl_b_model)
    hl = f_obs_cmpl.customized_copy(data=hl_data)
    return hl
def log_fit(x, y, degree=5):
    """Fit the values log(y(x)) then return exp() to this fit.

    x, y should be iterables containing floats of the same size. The order is the order
    of polynomial to use for this fit. This will be useful for e.g. I/sigma."""

    fit = curve_fitting.univariate_polynomial_fit(
        x, flex.log(y), degree=degree, max_iterations=100
    )
    f = curve_fitting.univariate_polynomial(*fit.params)
    return flex.exp(f(x))
Example #26
0
        def fvec_callable(pfh,current_values):

          rotx = current_values[0]
          roty = current_values[1]
          ry = spot_radius
          rz = spot_radius
          G = scale_factors[0]
          B_factor = scale_factors[1]


          I_obs = observations_original_sel.data()
          sigI_obs = observations_original_sel.sigmas()
          observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength)
          two_theta = observations_original_sel_two_theta.data()
          sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data()

          effective_orientation = crystal_init_orientation.rotate_thru((1,0,0),rotx
           ).rotate_thru((0,1,0),roty)

          effective_a_star = sqr(effective_orientation.reciprocal_matrix())
          ph = partiality_handler(wavelength, 0)
          partiality = ph.calc_partiality_anisotropy_set(effective_a_star, observations_original_sel.indices(), ry, rz, two_theta)

          excursions = (((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality) - I_ref) / sigI_obs

          corr_now, slope_now = get_overall_correlation((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality, I_ref)

          """
          print "ROTATION G=%5.3f B_factor=%5.3f rotx=%6.5f roty=%6.5f ry=%6.5f rz=%6.5f J=%6.3f cc=%6.3f slope=%6.3f p_mean=%6.3f"% \
          (G, B_factor, rotx*180/math.pi, roty*180/math.pi, ry, rz, sum(excursions**2), corr_now, slope_now, flex.mean(partiality))


          plt.scatter(I_ref,(G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs)/partiality,s=10, marker='x', c='r')
          plt.title('J=%6.5f CC=%6.5f Slope=%6.5f'%(sum(excursions**2), corr_now, slope_now))
          plt.xlabel('Reference intensity')
          plt.ylabel('Observed intensity (scaled)')
          plt.show()
          """

          return excursions
Example #27
0
  def compute_cost_refine_scale(self, I_ref, observations_original_sel, wavelength, parameters):
    G = parameters[0]
    B_factor = parameters[1]

    I_obs = observations_original_sel.data()
    sigI_obs = observations_original_sel.sigmas()
    observations_original_sel_two_theta = observations_original_sel.two_theta(wavelength=wavelength)
    two_theta = observations_original_sel_two_theta.data()
    sin_theta_over_lambda_sq = observations_original_sel_two_theta.sin_theta_over_lambda_sq().data()

    excursions = ((G * (flex.exp(-2*B_factor*sin_theta_over_lambda_sq)) * I_obs) - I_ref) / sigI_obs

    return excursions
def f_ordered_solvent(f,
                      n_water_atoms_absent,
                      bf_atoms_absent,
                      absent_atom_type):
  nsym = f.space_group().order_z()
  n_lost_w = nsym * n_water_atoms_absent
  data = f.data() * n_lost_w
  d_star_sq_data = f.d_star_sq().data()
  table = wk1995(absent_atom_type).fetch()
  ff = table.at_d_star_sq(d_star_sq_data)
  factor = ff * flex.exp(-bf_atoms_absent/4.0*d_star_sq_data)
  f_by_m = miller.array(miller_set = f, data = data*factor)
  return f_by_m
  def __init__(self,
               scattering_info,
               d_star_sq_array,
               p_scale=0.0,
               b_wilson=0.0,
               magic_fudge_factor=2.0):
    ## First recompute some parameters
    scattering_info.scat_data(d_star_sq_array)
    ## mean intensity
    self.mean_intensity = scattering_info.sigma_tot_sq
    self.mean_intensity = self.mean_intensity*(1.0+scattering_info.gamma_tot)
    ## I am missing a factor 2 somewhere
    self.mean_intensity/=magic_fudge_factor
    self.mean_intensity=self.mean_intensity*flex.exp(
      -d_star_sq_array*b_wilson/2.0)
    self.mean_intensity*=math.exp(-p_scale)

    ## the associated standard deviation
    self.sigma_intensity = scattering_info.gamma_tot_sigma
    self.sigma_intensity = scattering_info.sigma_tot_sq*self.sigma_intensity
    self.sigma_intensity = self.sigma_intensity*flex.exp(
      -d_star_sq_array*b_wilson/2.0)
    self.sigma_intensity*= math.exp(-p_scale)
Example #30
0
  def __init__(self,
               scattering_info,
               d_star_sq_array,
               p_scale=0.0,
               b_wilson=0.0,
               magic_fudge_factor=2.0):
    ## First recompute some parameters
    scattering_info.scat_data(d_star_sq_array)
    ## mean intensity
    self.mean_intensity = scattering_info.sigma_tot_sq
    self.mean_intensity = self.mean_intensity*(1.0+scattering_info.gamma_tot)
    ## I am missing a factor 2 somewhere
    self.mean_intensity/=magic_fudge_factor
    self.mean_intensity=self.mean_intensity*flex.exp(
      -d_star_sq_array*b_wilson/2.0)
    self.mean_intensity*=math.exp(-p_scale)

    ## the associated standard deviation
    self.sigma_intensity = scattering_info.gamma_tot_sigma
    self.sigma_intensity = scattering_info.sigma_tot_sq*self.sigma_intensity
    self.sigma_intensity = self.sigma_intensity*flex.exp(
      -d_star_sq_array*b_wilson/2.0)
    self.sigma_intensity*= math.exp(-p_scale)
def alpha_beta(f_dist,
               n_atoms_included,
               n_nonwater_atoms_absent,
               n_water_atoms_absent,
               bf_atoms_absent,
               final_error,
               absent_atom_type):
  nsym = f_dist.space_group().order_z()
  ss = 1./flex.pow2(f_dist.d_spacings().data())
  n_part   = nsym * n_atoms_included
  n_lost_p = nsym * n_nonwater_atoms_absent
  n_lost_w = nsym * n_water_atoms_absent
  f_dist_data = flex.abs(f_dist.data())
  a_d = flex.exp( -0.25 * ss * final_error**2 * math.pi**3 )
  d_star_sq_data = f_dist.d_star_sq().data()
  assert approx_equal(ss,d_star_sq_data)
  table = wk1995(absent_atom_type).fetch()
  ff = table.at_d_star_sq(d_star_sq_data)
  factor = ff * flex.exp(-bf_atoms_absent/4.0*d_star_sq_data)
  b_d = ((1.-a_d*a_d)*n_part+n_lost_p+n_lost_w*(1.-f_dist_data*f_dist_data))*\
                                                                  factor*factor
  alpha = f_dist.array(data = a_d)
  beta  = f_dist.array(data = b_d)
  return alpha, beta
Example #32
0
def sharp_map(sites_frac, map_coeffs, ss = None, b_sharp=None, b_min = -150,
              b_max = 150, step = 10):
  if(ss is None):
    ss = 1./flex.pow2(map_coeffs.d_spacings().data()) / 4.
  from cctbx import miller
  if(b_sharp is None):
    t=-1
    map_coeffs_best = None
    b_sharp_best = None
    for b_sharp in range(b_min,b_max,step):
      map_coeffs_ = map_coeffs.deep_copy()
      sc2 = flex.exp(b_sharp*ss)
      map_coeffs_ = map_coeffs_.customized_copy(data = map_coeffs_.data()*sc2)
      t_=sharp_evaluation_target(sites_frac=sites_frac, map_coeffs=map_coeffs_)
      if(t_>t):
        t=t_
        b_sharp_best = b_sharp
        map_coeffs_best = map_coeffs_.deep_copy()
    print "b_sharp:", b_sharp_best, t
  else:
    scale = flex.exp(b_sharp*ss)
    map_coeffs_best = map_coeffs.customized_copy(data=map_coeffs.data()*scale)
    b_sharp_best = b_sharp
  return map_coeffs_best, b_sharp_best
Example #33
0
def random_data(B_add=35,
                n_residues=585.0,
                d_min=3.5):
  unit_cell = uctbx.unit_cell( (81.0,  81.0,  61.0,  90.0,  90.0, 120.0) )
  xtal = crystal.symmetry(unit_cell, " P 3 ")
  ## In P3 I do not have to worry about centrics or reflections with different
  ## epsilons.
  miller_set = miller.build_set(
    crystal_symmetry = xtal,
    anomalous_flag = False,
    d_min = d_min)
  ## Now make an array with d_star_sq values
  d_star_sq = miller_set.d_spacings().data()
  d_star_sq = 1.0/(d_star_sq*d_star_sq)
  asu = {"H":8.0*n_residues*1.0,
         "C":5.0*n_residues*1.0,
         "N":1.5*n_residues*1.0,
         "O":1.2*n_residues*1.0}
  scat_info = absolute_scaling.scattering_information(
    asu_contents = asu,
    fraction_protein=1.0,
    fraction_nucleic=0.0)
  scat_info.scat_data(d_star_sq)
  gamma_prot = scat_info.gamma_tot
  sigma_prot = scat_info.sigma_tot_sq
  ## The number of residues is multriplied by the Z of the spacegroup
  protein_total = sigma_prot * (1.0+gamma_prot)
  ## add a B-value of 35 please
  protein_total = protein_total*flex.exp(-B_add*d_star_sq/2.0)
  ## Now that has been done,
  ## We can make random structure factors
  normalised_random_intensities = \
     random_transform.wilson_intensity_variate(protein_total.size())
  random_intensities = normalised_random_intensities*protein_total*math.exp(6)
  std_dev = random_intensities*5.0/100.0
  noise = random_transform.normal_variate(N=protein_total.size())
  noise = noise*std_dev
  random_intensities=noise+random_intensities
  ## STuff the arrays in the miller array
  miller_array = miller.array(miller_set,
                              data=random_intensities,
                              sigmas=std_dev)
  miller_array=miller_array.set_observation_type(
    xray.observation_types.intensity())
  miller_array = miller_array.f_sq_as_f()
  return (miller_array)
Example #34
0
def find_b(fo, fc):
    # TODO: Need to use linear
    #tmp(f=fo)
    #fo=fo.resolution_filter(d_min=5)
    #fo, fc, = fo.common_sets(fc)
    cc = -999
    b = None
    ss = 1. / flex.pow2(fc.d_spacings().data()) / 4.
    data = fc.data()
    for b_ in range(-500, 500, 5):
        sc = flex.exp(-b_ * ss)
        fc_ = fc.customized_copy(data=data * sc)
        cc_ = fo.map_correlation(other=fc_)
        if (cc_ > cc):
            cc = cc_
            b = b_
    return b
Example #35
0
    def extreme_wilson_outliers(self,
                                p_extreme_wilson=1e-1,
                                return_data=False):

        n_acentric = self.acentric_work.data().size()
        n_centric = self.centric_work.data().size()

        extreme_acentric = 1.0 -  \
           flex.pow(1.0 - flex.exp(-self.acentric_work.data() ),float(n_acentric))
        extreme_centric = 1.0 - \
           flex.pow(erf(flex.sqrt(self.centric_work.data()/2.0) ),float(n_centric))

        acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson)
        centric_selection = flex.bool(extreme_centric > p_extreme_wilson)
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection))
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=extreme_acentric.concatenate(extreme_centric))
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        log_string = """
Outlier rejection based on extreme value Wilson statistics.
-----------------------------------------------------------

Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
The p-value is obtained using extreme value distributions of the
Wilson distribution.
    """ % (p_extreme_wilson)

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)

        print >> self.out
        print >> self.out, log_string
        print >> self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
Example #36
0
def random_data(B_add=35, n_residues=585.0, d_min=3.5):
    unit_cell = uctbx.unit_cell((81.0, 81.0, 61.0, 90.0, 90.0, 120.0))
    xtal = crystal.symmetry(unit_cell, " P 3 ")
    ## In P3 I do not have to worry about centrics or reflections with different
    ## epsilons.
    miller_set = miller.build_set(crystal_symmetry=xtal,
                                  anomalous_flag=False,
                                  d_min=d_min)
    ## Now make an array with d_star_sq values
    d_star_sq = miller_set.d_spacings().data()
    d_star_sq = 1.0 / (d_star_sq * d_star_sq)
    asu = {
        "H": 8.0 * n_residues * 1.0,
        "C": 5.0 * n_residues * 1.0,
        "N": 1.5 * n_residues * 1.0,
        "O": 1.2 * n_residues * 1.0
    }
    scat_info = absolute_scaling.scattering_information(asu_contents=asu,
                                                        fraction_protein=1.0,
                                                        fraction_nucleic=0.0)
    scat_info.scat_data(d_star_sq)
    gamma_prot = scat_info.gamma_tot
    sigma_prot = scat_info.sigma_tot_sq
    ## The number of residues is multriplied by the Z of the spacegroup
    protein_total = sigma_prot * (1.0 + gamma_prot)
    ## add a B-value of 35 please
    protein_total = protein_total * flex.exp(-B_add * d_star_sq / 2.0)
    ## Now that has been done,
    ## We can make random structure factors
    normalised_random_intensities = \
       random_transform.wilson_intensity_variate(protein_total.size())
    random_intensities = normalised_random_intensities * protein_total * math.exp(
        6)
    std_dev = random_intensities * 5.0 / 100.0
    noise = random_transform.normal_variate(N=protein_total.size())
    noise = noise * std_dev
    random_intensities = noise + random_intensities
    ## STuff the arrays in the miller array
    miller_array = miller.array(miller_set,
                                data=random_intensities,
                                sigmas=std_dev)
    miller_array = miller_array.set_observation_type(
        xray.observation_types.intensity())
    miller_array = miller_array.f_sq_as_f()
    return (miller_array)
    def extreme_wilson_outliers(self, p_extreme_wilson=1e-1, return_data=False):

        n_acentric = self.acentric_work.data().size()
        n_centric = self.centric_work.data().size()

        extreme_acentric = 1.0 - flex.pow(1.0 - flex.exp(-self.acentric_work.data()), float(n_acentric))
        extreme_centric = 1.0 - flex.pow(erf(flex.sqrt(self.centric_work.data() / 2.0)), float(n_centric))

        acentric_selection = flex.bool(extreme_acentric > p_extreme_wilson)
        centric_selection = flex.bool(extreme_centric > p_extreme_wilson)
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection),
        )
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=extreme_acentric.concatenate(extreme_centric),
        )
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        log_string = """
Outlier rejection based on extreme value Wilson statistics.
-----------------------------------------------------------

Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
The p-value is obtained using extreme value distributions of the
Wilson distribution.
    """ % (
            p_extreme_wilson
        )

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)

        print >>self.out
        print >>self.out, log_string
        print >>self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
    def basic_wilson_outliers(self, p_basic_wilson=1e-6, return_data=False):
        p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data()))
        p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0))

        acentric_selection = flex.bool(p_acentric_single > p_basic_wilson)
        centric_selection = flex.bool(p_centric_single > p_basic_wilson)

        # combine all in a single miller array
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection),
        )
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(self.centric_work.indices()),
            data=p_acentric_single.concatenate(p_centric_single),
        )

        # get the order right
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        # prepare a table with results please
        log_string = """
Outlier rejection based on basic Wilson statistics.
--------------------------------------------------

See Read, Acta Cryst. (1999). D55, 1759-1764. for details.
Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
    """ % (
            p_basic_wilson
        )

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)
        print >>self.out
        print >>self.out, log_string
        print >>self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
Example #39
0
    def basic_wilson_outliers(self, p_basic_wilson=1E-6, return_data=False):
        p_acentric_single = 1.0 - (1.0 - flex.exp(-self.acentric_work.data()))
        p_centric_single = 1.0 - erf(flex.sqrt(self.centric_work.data() / 2.0))

        acentric_selection = flex.bool(p_acentric_single > p_basic_wilson)
        centric_selection = flex.bool(p_centric_single > p_basic_wilson)

        # combine all in a single miller array
        all_flags = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=acentric_selection.concatenate(centric_selection))
        all_p_values = self.work_obs.customized_copy(
            indices=self.acentric_work.indices().concatenate(
                self.centric_work.indices()),
            data=p_acentric_single.concatenate(p_centric_single))

        # get the order right
        all_flags = all_flags.common_set(self.miller_obs)
        all_p_values = all_p_values.common_set(self.miller_obs)

        # prepare a table with results please
        log_string = """
Outlier rejection based on basic Wilson statistics.
--------------------------------------------------

See Read, Acta Cryst. (1999). D55, 1759-1764. for details.
Reflections whose normalized intensity have an associated p-value
lower than %s are flagged as possible outliers.
    """ % (p_basic_wilson)

        log_string = self.make_log_wilson(log_string, all_flags, all_p_values)
        print >> self.out
        print >> self.out, log_string
        print >> self.out

        if not return_data:
            return all_flags
        else:
            return self.miller_obs.select(all_flags.data())
  def __init__(self,
               miller_array,
               kernel_width=None,
               n_bins=23,
               n_term=13,
               d_star_sq_low=None,
               d_star_sq_high=None,
               auto_kernel=False,
               number_of_sorted_reflections_for_auto_kernel=50):
    ## Autokernel is either False, true or a specific integer
    if kernel_width is None:
      assert (auto_kernel is not False)
    if auto_kernel is not False:
      assert (kernel_width==None)
    assert miller_array.size()>0
    ## intensity arrays please
    work_array = None
    if not miller_array.is_real_array():
      raise RuntimeError("Please provide real arrays only")
      ## I might have to change this upper condition
    if miller_array.is_xray_amplitude_array():
      work_array = miller_array.f_as_f_sq()
    if miller_array.is_xray_intensity_array():
      work_array = miller_array.deep_copy()
      work_array = work_array.set_observation_type(miller_array)
    ## If type is not intensity or amplitude
    ## raise an execption please
    if not miller_array.is_xray_intensity_array():
      if not miller_array.is_xray_amplitude_array():
        raise RuntimeError("Observation type unknown")
    ## declare some shorthands
    I_obs = work_array.data()
    epsilons = work_array.epsilons().data().as_double()
    d_star_sq_hkl = work_array.d_spacings().data()
    d_star_sq_hkl = 1.0/(d_star_sq_hkl*d_star_sq_hkl)
    ## Set up some limits
    if d_star_sq_low is None:
      d_star_sq_low = flex.min(d_star_sq_hkl)
    if d_star_sq_high is None:
      d_star_sq_high = flex.max(d_star_sq_hkl)
    ## A feeble attempt to determine an appropriate kernel width
    ## that seems to work reasonable in practice
    self.kernel_width=kernel_width
    if auto_kernel is not False:
      ## get the d_star_sq_array and sort it
      sort_permut = flex.sort_permutation(d_star_sq_hkl)
      ##
      if auto_kernel==True:
        number=number_of_sorted_reflections_for_auto_kernel
      else:
        number=int(auto_kernel)
      if number > d_star_sq_hkl.size():
        number = d_star_sq_hkl.size()-1
      self.kernel_width = d_star_sq_hkl[sort_permut[number]]-d_star_sq_low
      assert self.kernel_width > 0
    ## Making the d_star_sq_array
    assert (n_bins>1) ## assure that there are more then 1 bins for interpolation
    self.d_star_sq_array = chebyshev_lsq_fit.chebyshev_nodes(
      n=n_bins,
      low=d_star_sq_low,
      high=d_star_sq_high,
      include_limits=True)

    ## Now get the average intensity please
    ##
    ## This step can be reasonably time consuming
    self.mean_I_array = scaling.kernel_normalisation(
      d_star_sq_hkl = d_star_sq_hkl,
      I_hkl = I_obs,
      epsilon = epsilons,
      d_star_sq_array = self.d_star_sq_array,
      kernel_width = self.kernel_width
      )
    self.var_I_array = scaling.kernel_normalisation(
      d_star_sq_hkl = d_star_sq_hkl,
      I_hkl = I_obs*I_obs,
      epsilon = epsilons*epsilons,
      d_star_sq_array = self.d_star_sq_array,
      kernel_width = self.kernel_width
      )
    self.var_I_array = self.var_I_array - self.mean_I_array*self.mean_I_array
    self.weight_sum = self.var_I_array = scaling.kernel_normalisation(
      d_star_sq_hkl = d_star_sq_hkl,
      I_hkl = I_obs*0.0+1.0,
      epsilon = epsilons*0.0+1.0,
      d_star_sq_array = self.d_star_sq_array,
      kernel_width = self.kernel_width
      )
    eps = 1e-16 # XXX Maybe this should be larger?
    self.bin_selection = (self.mean_I_array > eps)
    sel_pos = self.bin_selection.iselection()
    # FIXME rare bug: this crashes when the majority of the data are zero,
    # e.g. because resolution limit was set too high and F/I filled in with 0.
    # it would be good to catch such cases in advance by inspecting the binned
    # values, and raise a different error message.
    assert sel_pos.size() > 0
    if (sel_pos.size() < self.mean_I_array.size() / 2) :
      raise Sorry("Analysis could not be continued because more than half "+
        "of the data have values below 1e-16.  This usually indicates either "+
        "an inappropriately high resolution cutoff, or an error in the data "+
        "file which artificially creates a higher resolution limit.")
    self.mean_I_array = self.mean_I_array.select(sel_pos)
    self.d_star_sq_array = self.d_star_sq_array.select(sel_pos)
    self.var_I_array = flex.log( self.var_I_array.select( sel_pos ) )
    self.weight_sum = self.weight_sum.select(sel_pos)
    self.mean_I_array = flex.log( self.mean_I_array )
    ## Fit a chebyshev polynome please
    normalizer_fit_lsq = chebyshev_lsq_fit.chebyshev_lsq_fit(
      n_term,
      self.d_star_sq_array,
      self.mean_I_array )
    self.normalizer = chebyshev_polynome(
      n_term,
      d_star_sq_low,
      d_star_sq_high,
      normalizer_fit_lsq.coefs)
    var_lsq_fit = chebyshev_lsq_fit.chebyshev_lsq_fit(
      n_term,
      self.d_star_sq_array,
      self.var_I_array )
    self.var_norm = chebyshev_polynome(
      n_term,
      d_star_sq_low,
      d_star_sq_high,
      var_lsq_fit.coefs)
    ws_fit = chebyshev_lsq_fit.chebyshev_lsq_fit(
      n_term,
      self.d_star_sq_array,
      self.weight_sum )
    self.weight_sum = chebyshev_polynome(
      n_term,
      d_star_sq_low,
      d_star_sq_high,
      ws_fit.coefs)

    ## The data wil now be normalised using the
    ## chebyshev polynome we have just obtained
    self.mean_I_array = flex.exp( self.mean_I_array)
    self.normalizer_for_miller_array =  flex.exp( self.normalizer.f(d_star_sq_hkl) )
    self.var_I_array = flex.exp( self.var_I_array )
    self.var_norm = flex.exp( self.var_norm.f(d_star_sq_hkl) )
    self.weight_sum = flex.exp( self.weight_sum.f(d_star_sq_hkl))
    self.normalised_miller = None
    self.normalised_miller_dev_eps = None
    if work_array.sigmas() is not None:
      self.normalised_miller = work_array.customized_copy(
        data = work_array.data()/self.normalizer_for_miller_array,
        sigmas = work_array.sigmas()/self.normalizer_for_miller_array
        ).set_observation_type(work_array)
      self.normalised_miller_dev_eps = self.normalised_miller.customized_copy(
        data = self.normalised_miller.data()/epsilons,
        sigmas = self.normalised_miller.sigmas()/epsilons)\
        .set_observation_type(work_array)
    else:
      self.normalised_miller = work_array.customized_copy(
        data = work_array.data()/self.normalizer_for_miller_array
        ).set_observation_type(work_array)
      self.normalised_miller_dev_eps = self.normalised_miller.customized_copy(
        data = self.normalised_miller.data()/epsilons)\
        .set_observation_type(work_array)
 def calc_full_refl(self, I_o_p_set, sin_theta_over_lambda_sq_set,
                  G, B, p_set, rs_set, flag_volume_correction=True):
   I_o_full_set = I_o_p_set/(G * flex.exp(-2*B*sin_theta_over_lambda_sq_set) * p_set)
   return I_o_full_set
Example #42
0
def ls_ff_weights(f_obs, atom, B):
  d_star_sq_data = f_obs.d_star_sq().data()
  table = wk1995(atom).fetch()
  ff = table.at_d_star_sq(d_star_sq_data) * flex.exp(-B/4.0*d_star_sq_data)
  weights = 1.0/flex.pow2(ff)
  return weights
  def do_something_clever(self,obs,sobs,calc,mock):
    # first get the sort order
    # sort on the calculated data please
    sort_order = flex.sort_permutation( calc )
    inverse_sort_order = sort_order.inverse_permutation()

    sorted_obs  = obs.select(sort_order)
    sorted_sobs = sobs.select(sort_order)
    sorted_calc = calc.select(sort_order)
    sorted_mock = mock.select(sort_order)

    log_calc = flex.log(sorted_mock)
    deltas   = flex.log(sorted_obs) - flex.log(sorted_calc)

    old_deltas = deltas.deep_copy()

    # make bins on the basis of the order
    bin_size = float(sorted_obs.size())/self.n_e_bins
    bin_size = int(bin_size) + 1
    ebin = flex.int()
    count=0
    for ii in xrange( sorted_obs.size() ):
      if ii%bin_size==0:
        count+=1
      ebin.append( count-1 )

    # the bins have been setup, now we can reorder stuff
    for ibin in xrange(self.n_e_bins):
      this_bin_selection = flex.bool( ebin == ibin )
      tmp_n = (this_bin_selection).count(True)
      permute = flex.sort_permutation( flex.random_double( tmp_n ) )

      #select and swap
      selected_deltas = deltas.select( this_bin_selection )
      selected_deltas = selected_deltas.select( permute )
      selected_sobs   = sorted_sobs.select( this_bin_selection )
      selected_sobs   = selected_sobs.select( permute )


      # we have to make a sanity check so that the selected deltas are not very weerd
      # a safeguard to prevent the introductoin of outliers
      mean_delta = flex.mean( selected_deltas )
      std_delta  = math.sqrt( flex.mean( selected_deltas*selected_deltas ) - mean_delta*mean_delta )
      outliers = flex.bool( flex.abs(selected_deltas-mean_delta)>self.thres*std_delta )
      #print list( flex.abs(selected_deltas-mean_delta)/std_delta )
      #print list( outliers )

      if (outliers).count(True) > 0 :
        non_out_delta   = selected_deltas.select( ~outliers )
        tmp_permut      = flex.sort_permutation( flex.random_double( (~outliers).count(True)  ) )
        tmp_delta       = non_out_delta.select( tmp_permut )
        tmp_delta       = tmp_delta[0:(outliers).count(True)]
        selected_deltas = selected_deltas.set_selected( outliers.iselection(), tmp_delta )


      #set the deltas back please
      deltas = deltas.set_selected(this_bin_selection, selected_deltas)
      sorted_sobs = sorted_sobs.set_selected(this_bin_selection, selected_sobs)

    #the deltas have been swapped, apply things back please
    log_calc = log_calc + deltas
    log_calc = flex.exp(log_calc)

    #now we have to get things back in proper order again thank you
    new_fobs = log_calc.select(inverse_sort_order)
    new_sobs = sorted_sobs.select(inverse_sort_order)
    return new_fobs, new_sobs
Example #44
0
                print "Scale for", Is[i][0], "is", scale

    print Is[0][1].data().size()
    # Prepare plot data
    for_plot = OrderedDict() # {name: [mean, ...], ..}
    binner = Is[0][1].setup_binner(n_bins=params.nbins)#reflections_per_bin=50)
    for i_bin in binner.range_used():
        for name, I, (scale, b) in Is:
            dmax, dmin = binner.bin_d_range(i_bin)
            Isel = I.resolution_filter(d_max=dmax, d_min=dmin)

            #Isel = I.select(binner.bin_indices() == i_bin) # crash if not common
            if params.over_sigma:
                data = Isel.data() / Isel.sigmas()
            else:
                bfac = flex.exp(-b * Isel.d_star_sq().data()) if b != 0 else 1.
                data = Isel.data() *scale*bfac
            if len(data)==0:
                print "WARNING: ", name, "No data in %f .. %f" % binner.bin_d_range(i_bin)
                for_plot.setdefault(name, []).append(float("nan"))
            elif params.logscale:
                for_plot.setdefault(name, []).append(math.log(flex.mean(data))) # taking log<I>
            else:
                for_plot.setdefault(name, []).append(flex.mean(data))

    # If only two data in, calc CC.
    extra = []
    if len(Is) == 2 and params.extra.lower() != "no":
        for i_bin in binner.range_used():
            dmax, dmin = binner.bin_d_range(i_bin)
            #Isel0, Isel1 = map(lambda x:x[1].select(binner.bin_indices() == i_bin), Is)
Example #45
0
def run(params, xfiles):
    # read reference
    arrays = iotbx.file_reader.any_file(params.reference.file).file_server.miller_arrays
    arrays = filter(lambda ar: ar.is_xray_data_array(), arrays)
    if params.reference.label is not None:
        arrays = filter(lambda ar: ar.info().label_string() == params.reference.label, arrays)

    if len(arrays) != 1:
        print "Can't decide data to use in reference file:", params.reference.file
        print "Choose label"
        for ar in arrays: print ar.info().label_string()
        return

    refdata = arrays[0].as_intensity_array()
    refdata = refdata.resolution_filter(d_max=params.reference.d_max, d_min=params.reference.d_min)

    print "file n.common k b cc.org cc.mean cc.scaled a b c al be ga"
    
    for xf in xfiles:
        print "# Reading", xf
        try:
            xfile = DenzoXfile(xf)
        except:
            traceback.print_exc()
            continue
        a = xfile.miller_array(anomalous_flag=refdata.anomalous_flag())
        a = a.select(a.sigmas() > 0)
        a = a.resolution_filter(d_min=params.d_min, d_max=params.d_max)
        if params.sigma_cutoff is not None:
            a = a.select(a.data()/a.sigmas() >= params.sigma_cutoff)

        a = a.merge_equivalents(use_internal_variance=False).array()

        tmp, a = refdata.common_sets(a, assert_is_similar_symmetry=False)
        n_common = tmp.size()

        if n_common == 0:
            print "# No useful reflection in this file. skip."
            continue

        corr = flex.linear_correlation(tmp.data(), a.data())
        cc_org = corr.coefficient() if corr.is_well_defined() else float("nan")

        # Calc CC in resolution bin and average
        tmp.setup_binner(auto_binning=True)
        cc_bins = []
        for i_bin in tmp.binner().range_used():
            sel = tmp.binner().selection(i_bin)
            corr = flex.linear_correlation(tmp.select(sel).data(), a.select(sel).data())
            if not corr.is_well_defined(): continue
            cc_bins.append(corr.coefficient())

        cc_mean = sum(cc_bins) / float(len(cc_bins)) if len(cc_bins) > 0 else float("nan")
            
        # Determine scale and B
        k, b = kBdecider(tmp, a).run()

        bfac = flex.exp(-b * a.d_star_sq().data()) if b != 0 else 1.
        corr = flex.linear_correlation(tmp.data(), a.data() * k*bfac)
        cc_scaled = corr.coefficient() if corr.is_well_defined() else float("nan")

        print "%s %5d %.3e %.3e %.4f %.4f %.4f" % (xf, n_common, k, b, cc_org, cc_mean, cc_scaled),
        print ("%.3f "*6)%a.unit_cell().parameters()

        if params.show_plot:
            import pylab
            from matplotlib.ticker import FuncFormatter
            s3_formatter = lambda x,pos: "inf" if x == 0 else "%.2f" % (x**(-1/3))

            fig, ax1 = pylab.plt.subplots()

            plot_x = map(lambda i: tmp.binner().bin_d_range(i)[1]**(-3), tmp.binner().range_used())

            #for name, ar in (("reference", tmp), ("data", a)):
            vals = map(lambda i: flex.mean(tmp.data().select(tmp.binner().selection(i))), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="reference")

            scale = flex.sum(tmp.data()*a.data()) / flex.sum(flex.pow2(a.data()))
            print "Linear-scale=", scale
            vals = map(lambda i: scale*flex.mean(a.data().select(tmp.binner().selection(i))), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data")
            vals = map(lambda i: flex.mean((a.data()*k*bfac).select(tmp.binner().selection(i))), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data_scaled")

            """
            from mmtbx.scaling import absolute_scaling, relative_scaling
            ls_scaling = relative_scaling.ls_rel_scale_driver(tmp, tmp.customized_copy(data=a.data(),sigmas=a.sigmas()), use_intensities=True, scale_weight=True, use_weights=True)
            ls_scaling.show()
            vals = map(lambda i: flex.mean(ls_scaling.derivative.resolution_filter(*tmp.binner().bin_d_range(i)).data()), tmp.binner().range_used())
            pylab.plot(plot_x, vals, label="data_scaled2")
            """
            
            pylab.legend()
            pylab.xlabel('resolution (d^-3)')
            pylab.ylabel('<I>')
            pylab.setp(pylab.gca().get_legend().get_texts(), fontsize="small")
            pylab.title('Scaled with B-factors (%.2f)' % b)

            pylab.gca().xaxis.set_major_formatter(FuncFormatter(s3_formatter))

            ax2 = ax1.twinx()
            ax2.plot(plot_x, cc_bins, "black")
            ax2.set_ylabel('CC')
            pylab.show()
Example #46
0
def run(params, mtzfiles):
    arrays = get_arrays(mtzfiles, d_min=params.dmin, d_max=params.dmax)

    if params.take_common:
        arrays = commonalize(arrays)

    maxlen_f = max(map(lambda x: len(x[0]), arrays))

    ref_f_obs = arrays[0][1]

    scales = []
    for f, f_obs, f_model, flag in arrays:
        if ref_f_obs == f_obs: k, B = 1., 0
        else: k, B = kBdecider(ref_f_obs, f_obs).run()

        scales.append((k, B))

    if params.reference != "first":
        if params.reference == "bmin": # scale to strongest
            kref, bref = max(scales, key=lambda x:x[1])
        elif params.reference == "bmax": # scale to most weak
            kref, bref = min(scales, key=lambda x:x[1])
        elif params.reference == "bmed": # scale to most weak
            perm = range(len(scales))
            perm.sort(key=lambda i:scales[i][1])
            kref, bref = scales[perm[len(perm)//2]]
        else:
            raise "Never reaches here"

        print "# Set K=%.2f B=%.2f as reference" % (kref,bref)
        scales = map(lambda x: (x[0]/kref, x[1]-bref), scales) # not bref-x[1], because negated later

    print ("%"+str(maxlen_f)+"s r_work r_free cc_work.E cc_free.E sigmaa fom k B") % "filename"
    for (f, f_obs, f_model, flag), (k, B) in zip(arrays, scales):
        d_star_sq = f_obs.d_star_sq().data()
        scale = k * flex.exp(-B*d_star_sq)
        
        # Normalized
        #f_obs.setup_binner(auto_binning=True)
        #f_model.setup_binner(auto_binning=True)
        #e_obs, e_model = map(lambda x:x.quasi_normalize_structure_factors(), (f_obs, f_model))
        e_obs = absolute_scaling.kernel_normalisation(f_obs.customized_copy(data=f_obs.data()*scale, sigmas=None), auto_kernel=True)
        e_obs = e_obs.normalised_miller_dev_eps.f_sq_as_f()
        e_model = absolute_scaling.kernel_normalisation(f_model.customized_copy(data=f_model.data()*scale, sigmas=None), auto_kernel=True)
        e_model = e_model.normalised_miller_dev_eps.f_sq_as_f()

        f_obs_w, f_obs_t = f_obs.select(~flag.data()), f_obs.select(flag.data())
        f_model_w, f_model_t = f_model.select(~flag.data()), f_model.select(flag.data())

        e_obs_w, e_obs_t = e_obs.select(~flag.data()), e_obs.select(flag.data())
        e_model_w, e_model_t = e_model.select(~flag.data()), e_model.select(flag.data())

        r_work = calc_r(f_obs_w, f_model_w, scale.select(~flag.data()))
        r_free = calc_r(f_obs_t, f_model_t, scale.select(flag.data()))

        cc_work_E = calc_cc(e_obs_w, e_model_w, False)
        cc_free_E = calc_cc(e_obs_t, e_model_t, False)
        #cc_work_E2 = calc_cc(e_obs_w, e_model_w, True)
        #cc_free_E2 = calc_cc(e_obs_t, e_model_t, True)

        se = calc_sigmaa(f_obs, f_model, flag)
        sigmaa = flex.mean(se.sigmaa().data())
        fom = flex.mean(se.fom().data())

        print ("%"+str(maxlen_f)+"s %.4f %.4f % 7.4f % 7.4f %.4e %.4e %.3e %.3e") % (f, r_work, r_free, cc_work_E, cc_free_E, sigmaa, fom, k, B)
    def do_something_clever(self, obs, sobs, calc, mock):
        # first get the sort order
        # sort on the calculated data please
        sort_order = flex.sort_permutation(calc)
        inverse_sort_order = sort_order.inverse_permutation()

        sorted_obs = obs.select(sort_order)
        sorted_sobs = sobs.select(sort_order)
        sorted_calc = calc.select(sort_order)
        sorted_mock = mock.select(sort_order)

        log_calc = flex.log(sorted_mock)
        deltas = flex.log(sorted_obs) - flex.log(sorted_calc)

        old_deltas = deltas.deep_copy()

        # make bins on the basis of the order
        bin_size = float(sorted_obs.size()) / self.n_e_bins
        bin_size = int(bin_size) + 1
        ebin = flex.int()
        count = 0
        for ii in range(sorted_obs.size()):
            if ii % bin_size == 0:
                count += 1
            ebin.append(count - 1)

        # the bins have been setup, now we can reorder stuff
        for ibin in range(self.n_e_bins):
            this_bin_selection = flex.bool(ebin == ibin)
            tmp_n = (this_bin_selection).count(True)
            permute = flex.sort_permutation(flex.random_double(tmp_n))

            #select and swap
            selected_deltas = deltas.select(this_bin_selection)
            selected_deltas = selected_deltas.select(permute)
            selected_sobs = sorted_sobs.select(this_bin_selection)
            selected_sobs = selected_sobs.select(permute)

            # we have to make a sanity check so that the selected deltas are not very weerd
            # a safeguard to prevent the introductoin of outliers
            mean_delta = flex.mean(selected_deltas)
            std_delta = math.sqrt(
                flex.mean(selected_deltas * selected_deltas) -
                mean_delta * mean_delta)
            outliers = flex.bool(
                flex.abs(selected_deltas - mean_delta) > self.thres *
                std_delta)
            #print list( flex.abs(selected_deltas-mean_delta)/std_delta )
            #print list( outliers )

            if (outliers).count(True) > 0:
                non_out_delta = selected_deltas.select(~outliers)
                tmp_permut = flex.sort_permutation(
                    flex.random_double((~outliers).count(True)))
                tmp_delta = non_out_delta.select(tmp_permut)
                tmp_delta = tmp_delta[0:(outliers).count(True)]
                selected_deltas = selected_deltas.set_selected(
                    outliers.iselection(), tmp_delta)

            #set the deltas back please
            deltas = deltas.set_selected(this_bin_selection, selected_deltas)
            sorted_sobs = sorted_sobs.set_selected(this_bin_selection,
                                                   selected_sobs)

        #the deltas have been swapped, apply things back please
        log_calc = log_calc + deltas
        log_calc = flex.exp(log_calc)

        #now we have to get things back in proper order again thank you
        new_fobs = log_calc.select(inverse_sort_order)
        new_sobs = sorted_sobs.select(inverse_sort_order)
        return new_fobs, new_sobs
Example #48
0
    def calc_average_I_sigI(self, I, sigI, G, B, p, SE_I, sin_theta_over_lambda_sq, avg_mode, SE, iph, d_spacings):
        for i in range(len(d_spacings)):
            if d_spacings[i] < iph.d_min_partiality:
                p[i] = 1.0

        I_full = I / (G * flex.exp(-2 * B * sin_theta_over_lambda_sq) * p)
        sigI_full = sigI / (G * flex.exp(-2 * B * sin_theta_over_lambda_sq) * p)

        # filter out outliers
        if np.std(I_full) > 0:
            I_full_as_sigma = (I_full - np.mean(I_full)) / np.std(I_full)
            i_sel = flex.abs(I_full_as_sigma) <= iph.sigma_max_merge
            I_full = I_full.select(i_sel)
            sigI_full = sigI_full.select(i_sel)
            SE = SE.select(i_sel)

        # normalize the SE
        max_w = 1.0
        min_w = 0.6
        if len(SE) == 1 or ((flex.min(SE) - flex.max(SE)) == 0):
            SE_norm = flex.double([min_w + ((max_w - min_w) / 2)] * len(SE))
        else:
            m = (max_w - min_w) / (flex.min(SE) - flex.max(SE))
            b = max_w - (m * flex.min(SE))
            SE_norm = (m * SE) + b

        if avg_mode == "weighted":
            I_avg = flex.sum(SE_norm * I_full) / flex.sum(SE_norm)
            sigI_avg = flex.sum(SE_norm * sigI_full) / flex.sum(SE_norm)
        elif avg_mode == "average":
            I_avg = flex.mean(I_full)
            sigI_avg = flex.mean(sigI_full)

        # Rmeas, Rmeas_w, multiplicity
        multiplicity = len(I_full)
        if multiplicity == 1:
            r_meas_w_top = 0
            r_meas_w_btm = 0
            r_meas_top = 0
            r_meas_btm = 0
        else:
            n_obs = multiplicity
            r_meas_w_top = flex.sum(((I_full - I_avg) * SE_norm) ** 2) * math.sqrt(n_obs / (n_obs - 1))
            r_meas_w_btm = flex.sum((I_full * SE_norm) ** 2)
            r_meas_top = flex.sum((I_full - I_avg) ** 2) * math.sqrt(n_obs / (n_obs - 1))
            r_meas_btm = flex.sum((I_full) ** 2)

        # for calculattion of cc1/2
        # sepearte the observations into two groups
        if multiplicity == 1:
            I_avg_even = 0
            I_avg_odd = 0
        else:
            i_even = range(0, len(I_full), 2)
            i_odd = range(1, len(I_full), 2)
            I_even = I_full.select(i_even)
            sigI_even = sigI_full.select(i_even)
            SE_norm_even = SE_norm.select(i_even)
            I_odd = I_full.select(i_odd)
            sigI_odd = sigI_full.select(i_odd)
            SE_norm_odd = SE_norm.select(i_odd)
            if len(i_even) > len(i_odd):
                I_odd.append(I_even[len(I_even) - 1])
                sigI_odd.append(sigI_even[len(I_even) - 1])
                SE_norm_odd.append(SE_norm_even[len(I_even) - 1])

            if avg_mode == "weighted":
                I_avg_even = flex.sum(SE_norm_even * I_even) / flex.sum(SE_norm_even)
                I_avg_odd = flex.sum(SE_norm_odd * I_odd) / flex.sum(SE_norm_odd)
            elif avg_mode == "average":
                I_avg_even = flex.mean(I_even)
                I_avg_odd = flex.mean(I_odd)

        return (
            I_avg,
            sigI_avg,
            (r_meas_w_top, r_meas_w_btm, r_meas_top, r_meas_btm, multiplicity),
            I_avg_even,
            I_avg_odd,
        )
Example #49
0
    def do_clustering(self, nproc=1, b_scale=False, use_normalized=False, html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor 
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling
            
            ofs_wilson = open("%s_wilson_scales.dat"%prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1, 0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0: # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b * arr.unit_cell().d_star_sq(arr.indices())/4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data()*tmp,
                                                         sigmas=arr.sigmas()*tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(data=arr.data()/normaliser.normalizer_for_miller_array,
                                                     sigmas=arr.sigmas()/normaliser.normalizer_for_miller_array)
        # Prep 
        args = []
        for i in xrange(len(self.arrays)-1):
            for j in xrange(i+1, len(self.arrays)):
                args.append((i,j))
           
        # Calc all CC
        worker = lambda x: calc_cc(self.arrays.values()[x[0]], self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i,j), (cc,nref) in zip(args, results):
            cc_data_for_html.append((i,j,cc,nref))
            if cc==cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i,j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x:x[1])
        remove_idxes = set()
        
        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes, xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst"%prefix, "w").write("\n".join(map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i,j), (cc,nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc
            
        open("%s.matrix"%prefix, "w").write(" ".join(map(lambda x:"%.4f"%x, mat.flatten())))

        ofs = open("%s.dat"%prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i,j), (cc,nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i,j,cc,nref))

        open("%s_ana.R"%prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d"%(x+1), org2now.keys()))))

        call(cmd="Rscript", arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int,ids)]
Example #50
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      cluster_method="ward",
                      distance_eqn="sqrt(1-cc)",
                      min_common_refs=3,
                      html_maker=None):
        """
        Using correlation as distance metric (for hierarchical clustering)
        https://stats.stackexchange.com/questions/165194/using-correlation-as-distance-metric-for-hierarchical-clustering

        Correlation "Distances" and Hierarchical Clustering
        http://research.stowers.org/mcm/efg/R/Visualization/cor-cluster/index.htm
        """

        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        distance_eqns = {
            "sqrt(1-cc)": lambda x: numpy.sqrt(1. - x),
            "1-cc": lambda x: 1. - x,
            "sqrt(1-cc^2)": lambda x: numpy.sqrt(1. - x**2),
        }
        cc_to_distance = distance_eqns[
            distance_eqn]  # Fail when unknown options
        assert cluster_method in ("single", "complete", "average", "weighted",
                                  "centroid", "median", "ward"
                                  )  # available methods in scipy

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            failed = {}
            for f in self.arrays:
                arr = self.arrays[f]
                try:
                    normaliser = kernel_normalisation(arr, auto_kernel=True)
                    self.arrays[f] = arr.customized_copy(
                        data=arr.data() /
                        normaliser.normalizer_for_miller_array,
                        sigmas=arr.sigmas() /
                        normaliser.normalizer_for_miller_array)
                except Exception, e:
                    failed.setdefault(e.message, []).append(f)

            if failed:
                msg = ""
                for r in failed:
                    msg += " %s\n%s\n" % (r, "\n".join(
                        map(lambda x: "  %s" % x, failed[r])))
                raise Sorry(
                    "intensity normalization failed by following reason(s):\n%s"
                    % msg)
 def reverse_reparam(values):
     return 1.0 / (1.0 + flex.exp(-values))
 def compute(self, f_calc, scale_factor=None):
     self.calculated = f_calc
     a, b, c, d, e, f = self._params
     if self._obs_part_dirty:
         # The part depending only on |F_o|^2
         if c == 0:
             q = None
         else:
             exp_args = self.observed.sin_theta_over_lambda_sq().data()
             if e != 0: k_sqr = exp_args.deep_copy()
             exp_args *= c
             exp_vals = flex.exp(exp_args)
             if c > 0:
                 q = exp_vals
             else:
                 q = 1 - exp_vals
         self._q = q
         if self.observed.sigmas() is not None:
             self._den_obs = flex.pow2(self.observed.sigmas())
             if d != 0:
                 self._den_obs += d
             if e != 0:
                 e_times_sin_theta_sq = k_sqr
                 e_times_sin_theta_sq *= e * self._wavelength
                 self._den_obs += e_times_sin_theta_sq
         else:
             self._den_obs = None
         negatives = self.observed.data() < 0
         self._p_obs = self.observed.data().deep_copy()
         self._p_obs.set_selected(negatives, 0)
         self._p_obs *= f
         #
         self._obs_part_dirty = False
     # The part depending on |F_c|^2 as well
     q = self._q
     f_c = self.calculated.data()
     p = flex.norm(f_c)
     if scale_factor is None:
         scale_factor = self.observed.scale_factor(self.calculated,
                                                   cutoff_factor=0.99)
     self.scale_factor = scale_factor
     p *= scale_factor
     p *= 1 - f
     p += self._p_obs
     den = p.deep_copy()
     den *= a * a
     der = None
     if self.computing_derivatives_wrt_f_c: der = 2 * den
     den += b
     if self.computing_derivatives_wrt_f_c: der += b
     den *= p
     if self._den_obs is not None: den += self._den_obs
     if q is None:
         w = 1 / den
     else:
         w = q / den
     if self.computing_derivatives_wrt_f_c:
         if scale_factor is not None:
             # don't modify f_c in place
             f_c = f_c * math.sqrt(scale_factor)
         der *= -flex.pow2(w)
         der *= 4. / 3
         der = der * f_c
     self.weights = w
     self.derivatives_wrt_f_c = der
Example #53
0
 def reverse_reparam(values): return 1.0/(1.0 + flex.exp(-values))
 self.sigmaa_fitted = reverse_reparam(cheb_pol.f(self.h_array))
Example #54
0
 def map_coefficients(self,
                      map_type,
                      acentrics_scale = 2.0,
                      centrics_pre_scale = 1.0,
                      exclude_free_r_reflections=False,
                      fill_missing=False,
                      fill_missing_method="f_model",
                      ncs_average=False,
                      isotropize=True,
                      sharp=False,
                      post_processing_callback=None,
                      pdb_hierarchy=None, # XXX required for map_type=llg
                      merge_anomalous=None,
                      use_shelx_weight=False,
                      shelx_weight_parameter=1.5) :
   map_name_manager = mmtbx.map_names(map_name_string = map_type)
   # Special case #1: anomalous map
   if(map_name_manager.anomalous):
     if(self.anom_diff is not None):
       # Formula from page 141 in "The Bijvoet-Difference Fourier Synthesis",
       # Jeffrey Roach, METHODS IN ENZYMOLOGY, VOL. 374
       return miller.array(miller_set = self.anom_diff,
                           data       = self.anom_diff.data()/(2j))
     else: return None
   # Special case #2: anomalous residual map
   elif (map_name_manager.anomalous_residual) :
     if (self.anom_diff is not None) :
       return anomalous_residual_map_coefficients(
         fmodel=self.fmodel,
         exclude_free_r_reflections=exclude_free_r_reflections)
     else : return None
   # Special case #3: Phaser SAD LLG map
   elif (map_name_manager.phaser_sad_llg) :
     if (pdb_hierarchy is None) :
       raise RuntimeError("pdb_hierarchy must not be None when a Phaser SAD "+
         "LLG map is requested.")
     if (self.anom_diff is not None) :
       return get_phaser_sad_llg_map_coefficients(
         fmodel=self.fmodel,
         pdb_hierarchy=pdb_hierarchy)
     else :
       return None
   # Special case #4: Fcalc map
   mnm = mmtbx.map_names(map_name_string = map_type)
   if(mnm.k==0 and abs(mnm.n)==1):
     if(fill_missing):
       return self.fmodel.xray_structure.structure_factors(
         d_min = self.fmodel.f_obs().d_min()).f_calc()
     else:
       return self.fmodel.f_obs().structure_factors_from_scatterers(
         xray_structure = self.fmodel.xray_structure).f_calc()
   #
   if(self.mch is None):
     self.mch = self.fmodel.map_calculation_helper()
   ffs = fo_fc_scales(
     fmodel          = self.fmodel,
     map_type_str    = map_type,
     acentrics_scale = acentrics_scale,
     centrics_scale  = centrics_pre_scale)
   fo_scale, fc_scale = ffs.fo_scale, ffs.fc_scale
   coeffs = combine(
     fmodel                 = self.fmodel,
     map_type_str           = map_type,
     fo_scale               = fo_scale,
     fc_scale               = fc_scale,
     map_calculation_helper = self.mch,
     use_shelx_weight       = use_shelx_weight,
     shelx_weight_parameter = shelx_weight_parameter).map_coefficients()
   r_free_flags = None
   # XXX the default scale array (used for the isotropize option) needs to be
   # calculated and processed now to avoid array size errors
   scale_default = 1. / (self.fmodel.k_isotropic()*self.fmodel.k_anisotropic())
   scale_array = coeffs.customized_copy(data=scale_default)
   if (exclude_free_r_reflections) :
     if (coeffs.anomalous_flag()) :
       coeffs = coeffs.average_bijvoet_mates()
     r_free_flags = self.fmodel.r_free_flags()
     if (r_free_flags.anomalous_flag()) :
       r_free_flags = r_free_flags.average_bijvoet_mates()
       scale_array = scale_array.average_bijvoet_mates()
     coeffs = coeffs.select(~r_free_flags.data())
     scale_array = scale_array.select(~r_free_flags.data())
   scale=None
   if (ncs_average) and (post_processing_callback is not None) :
     # XXX NCS averaging done here
     assert hasattr(post_processing_callback, "__call__")
     coeffs = post_processing_callback(
       map_coeffs=coeffs,
       fmodel=self.fmodel,
       map_type=map_type)
   if(isotropize):
     if(scale is None):
       if (scale_array.anomalous_flag()) and (not coeffs.anomalous_flag()) :
         scale_array = scale_array.average_bijvoet_mates()
       scale = scale_array.data()
     coeffs = coeffs.customized_copy(data = coeffs.data()*scale)
   if(fill_missing):
     if(coeffs.anomalous_flag()):
       coeffs = coeffs.average_bijvoet_mates()
     coeffs = fill_missing_f_obs(
       coeffs = coeffs,
       fmodel = self.fmodel,
       method = fill_missing_method)
   if(sharp):
     ss = 1./flex.pow2(coeffs.d_spacings().data()) / 4.
     from cctbx import adptbx
     b = flex.mean(self.fmodel.xray_structure.extract_u_iso_or_u_equiv() *
       adptbx.u_as_b(1))/2
     k_sharp = 1./flex.exp(-ss * b)
     coeffs = coeffs.customized_copy(data = coeffs.data()*k_sharp)
   if (merge_anomalous) and (coeffs.anomalous_flag()) :
     return coeffs.average_bijvoet_mates()
   return coeffs
Example #55
0
 def map_coefficients(self,
                      map_type,
                      acentrics_scale = 2.0,
                      centrics_pre_scale = 1.0,
                      exclude_free_r_reflections=False,
                      fill_missing=False,
                      fill_missing_method="f_model",
                      isotropize=True,
                      sharp=False,
                      pdb_hierarchy=None, # XXX required for map_type=llg
                      merge_anomalous=None,
                      use_shelx_weight=False,
                      shelx_weight_parameter=1.5):
   map_name_manager = mmtbx.map_names(map_name_string = map_type)
   # Special case #1: anomalous map
   if(map_name_manager.anomalous):
     if(self.anom_diff is not None):
       # Formula from page 141 in "The Bijvoet-Difference Fourier Synthesis",
       # Jeffrey Roach, METHODS IN ENZYMOLOGY, VOL. 374
       return miller.array(miller_set = self.anom_diff,
                           data       = self.anom_diff.data()/(2j))
     else: return None
   # Special case #2: anomalous residual map
   elif (map_name_manager.anomalous_residual):
     if (self.anom_diff is not None):
       return anomalous_residual_map_coefficients(
         fmodel=self.fmodel,
         exclude_free_r_reflections=exclude_free_r_reflections)
     else : return None
   # Special case #3: Phaser SAD LLG map
   elif (map_name_manager.phaser_sad_llg):
     if (pdb_hierarchy is None):
       raise RuntimeError("pdb_hierarchy must not be None when a Phaser SAD "+
         "LLG map is requested.")
     if (self.anom_diff is not None):
       return get_phaser_sad_llg_map_coefficients(
         fmodel=self.fmodel,
         pdb_hierarchy=pdb_hierarchy)
     else :
       return None
   # Special case #4: Fcalc map
   mnm = mmtbx.map_names(map_name_string = map_type)
   if(mnm.k==0 and abs(mnm.n)==1):
     if(fill_missing):
       return self.fmodel.xray_structure.structure_factors(
         d_min = self.fmodel.f_obs().d_min()).f_calc()
     else:
       return self.fmodel.f_obs().structure_factors_from_scatterers(
         xray_structure = self.fmodel.xray_structure).f_calc()
   #
   if(self.mch is None):
     self.mch = self.fmodel.map_calculation_helper()
   ffs = fo_fc_scales(
     fmodel          = self.fmodel,
     map_type_str    = map_type,
     acentrics_scale = acentrics_scale,
     centrics_scale  = centrics_pre_scale)
   fo_scale, fc_scale = ffs.fo_scale, ffs.fc_scale
   coeffs = combine(
     fmodel                 = self.fmodel,
     map_type_str           = map_type,
     fo_scale               = fo_scale,
     fc_scale               = fc_scale,
     map_calculation_helper = self.mch,
     use_shelx_weight       = use_shelx_weight,
     shelx_weight_parameter = shelx_weight_parameter).map_coefficients()
   r_free_flags = None
   # XXX the default scale array (used for the isotropize option) needs to be
   # calculated and processed now to avoid array size errors
   scale_default = 1. / (self.fmodel.k_isotropic()*self.fmodel.k_anisotropic())
   scale_array = coeffs.customized_copy(data=scale_default)
   if (exclude_free_r_reflections):
     if (coeffs.anomalous_flag()):
       coeffs = coeffs.average_bijvoet_mates()
     r_free_flags = self.fmodel.r_free_flags()
     if (r_free_flags.anomalous_flag()):
       r_free_flags = r_free_flags.average_bijvoet_mates()
       scale_array = scale_array.average_bijvoet_mates()
     coeffs = coeffs.select(~r_free_flags.data())
     scale_array = scale_array.select(~r_free_flags.data())
   scale=None
   if(isotropize):
     if(scale is None):
       if (scale_array.anomalous_flag()) and (not coeffs.anomalous_flag()):
         scale_array = scale_array.average_bijvoet_mates()
       scale = scale_array.data()
     coeffs = coeffs.customized_copy(data = coeffs.data()*scale)
   if(fill_missing):
     if(coeffs.anomalous_flag()):
       coeffs = coeffs.average_bijvoet_mates()
     coeffs = fill_missing_f_obs(
       coeffs = coeffs,
       fmodel = self.fmodel,
       method = fill_missing_method)
   if(sharp):
     ss = 1./flex.pow2(coeffs.d_spacings().data()) / 4.
     from cctbx import adptbx
     b = flex.mean(self.fmodel.xray_structure.extract_u_iso_or_u_equiv() *
       adptbx.u_as_b(1))/2
     k_sharp = 1./flex.exp(-ss * b)
     coeffs = coeffs.customized_copy(data = coeffs.data()*k_sharp)
   if (merge_anomalous) and (coeffs.anomalous_flag()):
     return coeffs.average_bijvoet_mates()
   return coeffs
Example #56
0
    def determine_polar(self, observations_original, iparams, pickle_filename, pres=None):
        """
    Determine polarity based on input data.
    The function still needs isomorphous reference so, if flag_polar is True,
    miller_array_iso must be supplied in input file.
    """
        if iparams.indexing_ambiguity.flag_on == False:
            return "h,k,l", 0, 0
        cc_asu = 0
        cc_rev = 0
        if iparams.indexing_ambiguity.index_basis_in is not None:
            if iparams.indexing_ambiguity.index_basis_in.endswith("mtz"):
                # use reference mtz file to determine polarity
                from iotbx import reflection_file_reader

                reflection_file_polar = reflection_file_reader.any_reflection_file(
                    iparams.indexing_ambiguity.index_basis_in
                )
                miller_arrays_polar = reflection_file_polar.as_miller_arrays()
                miller_array_polar = miller_arrays_polar[0]
                miller_array_polar = miller_array_polar.resolution_filter(
                    d_min=iparams.indexing_ambiguity.d_min, d_max=iparams.indexing_ambiguity.d_max
                )
                # for post-refinement, apply the scale factors and partiality first
                if pres is not None:
                    # observations_original = pres.observations_original.deep_copy()
                    two_theta = observations_original.two_theta(wavelength=pres.wavelength).data()
                    from mod_leastsqr import calc_partiality_anisotropy_set

                    alpha_angle = flex.double([0] * len(observations_original.indices()))
                    spot_pred_x_mm = flex.double([0] * len(observations_original.indices()))
                    spot_pred_y_mm = flex.double([0] * len(observations_original.indices()))
                    detector_distance_mm = pres.detector_distance_mm
                    partiality, dummy, dummy, dummy = calc_partiality_anisotropy_set(
                        pres.unit_cell,
                        0,
                        0,
                        observations_original.indices(),
                        pres.ry,
                        pres.rz,
                        pres.r0,
                        pres.re,
                        two_theta,
                        alpha_angle,
                        pres.wavelength,
                        pres.crystal_orientation,
                        spot_pred_x_mm,
                        spot_pred_y_mm,
                        detector_distance_mm,
                        iparams.partiality_model,
                        iparams.flag_beam_divergence,
                    )
                    # partiality = pres.partiality
                    sin_theta_over_lambda_sq = (
                        observations_original.two_theta(pres.wavelength).sin_theta_over_lambda_sq().data()
                    )
                    I_full = flex.double(
                        observations_original.data()
                        / (pres.G * flex.exp(flex.double(-2 * pres.B * sin_theta_over_lambda_sq)) * partiality)
                    )
                    sigI_full = flex.double(
                        observations_original.sigmas()
                        / (pres.G * flex.exp(flex.double(-2 * pres.B * sin_theta_over_lambda_sq)) * partiality)
                    )
                    observations_original = observations_original.customized_copy(data=I_full, sigmas=sigI_full)
                observations_asu = observations_original.map_to_asu()
                observations_rev = self.get_observations_non_polar(
                    observations_original, iparams.indexing_ambiguity.assigned_basis
                )
                matches = miller.match_multi_indices(
                    miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_asu.indices()
                )
                I_ref_match = flex.double([miller_array_polar.data()[pair[0]] for pair in matches.pairs()])
                I_obs_match = flex.double([observations_asu.data()[pair[1]] for pair in matches.pairs()])
                cc_asu = flex.linear_correlation(I_ref_match, I_obs_match).coefficient()
                n_refl_asu = len(matches.pairs())
                matches = miller.match_multi_indices(
                    miller_indices_unique=miller_array_polar.indices(), miller_indices=observations_rev.indices()
                )
                I_ref_match = flex.double([miller_array_polar.data()[pair[0]] for pair in matches.pairs()])
                I_obs_match = flex.double([observations_rev.data()[pair[1]] for pair in matches.pairs()])
                cc_rev = flex.linear_correlation(I_ref_match, I_obs_match).coefficient()
                n_refl_rev = len(matches.pairs())
                polar_hkl = "h,k,l"
                if cc_rev > (cc_asu * 1.01):
                    polar_hkl = iparams.indexing_ambiguity.assigned_basis
            else:
                # use basis in the given input file
                polar_hkl = "h,k,l"
                basis_pickle = pickle.load(open(iparams.indexing_ambiguity.index_basis_in, "rb"))
                if pickle_filename in basis_pickle:
                    polar_hkl = basis_pickle[pickle_filename]
        else:
            # set default polar_hkl to h,k,l
            polar_hkl = "h,k,l"
        return polar_hkl, cc_asu, cc_rev
Example #57
0
    def do_clustering(self,
                      nproc=1,
                      b_scale=False,
                      use_normalized=False,
                      html_maker=None):
        self.clusters = {}
        prefix = os.path.join(self.wdir, "cctable")
        assert (b_scale, use_normalized).count(True) <= 1

        if len(self.arrays) < 2:
            print "WARNING: less than two data! can't do cc-based clustering"
            self.clusters[1] = [float("nan"), [0]]
            return

        # Absolute scaling using Wilson-B factor
        if b_scale:
            from mmtbx.scaling.matthews import p_vm_calculator
            from mmtbx.scaling.absolute_scaling import ml_iso_absolute_scaling

            ofs_wilson = open("%s_wilson_scales.dat" % prefix, "w")
            n_residues = p_vm_calculator(self.arrays.values()[0], 1,
                                         0).best_guess
            ofs_wilson.write("# guessed n_residues= %d\n" % n_residues)
            ofs_wilson.write("file wilsonB\n")
            for f in self.arrays:
                arr = self.arrays[f]
                iso_scale_and_b = ml_iso_absolute_scaling(arr, n_residues, 0)
                wilson_b = iso_scale_and_b.b_wilson
                ofs_wilson.write("%s %.3f\n" % (f, wilson_b))
                if wilson_b > 0:  # Ignoring data with B<0? is a bad idea.. but how..?
                    tmp = flex.exp(-2. * wilson_b *
                                   arr.unit_cell().d_star_sq(arr.indices()) /
                                   4.)
                    self.arrays[f] = arr.customized_copy(data=arr.data() * tmp,
                                                         sigmas=arr.sigmas() *
                                                         tmp)
            ofs_wilson.close()

        elif use_normalized:
            from mmtbx.scaling.absolute_scaling import kernel_normalisation
            for f in self.arrays:
                arr = self.arrays[f]
                normaliser = kernel_normalisation(arr, auto_kernel=True)
                self.arrays[f] = arr.customized_copy(
                    data=arr.data() / normaliser.normalizer_for_miller_array,
                    sigmas=arr.sigmas() /
                    normaliser.normalizer_for_miller_array)
        # Prep
        args = []
        for i in xrange(len(self.arrays) - 1):
            for j in xrange(i + 1, len(self.arrays)):
                args.append((i, j))

        # Calc all CC
        if self.use_sfdist:
            worker = lambda x: calc_sfdist(self.arrays.values()[x[0]],
                                           self.arrays.values()[x[1]])
        else:
            worker = lambda x: calc_cc(self.arrays.values()[x[0]],
                                       self.arrays.values()[x[1]])
        results = easy_mp.pool_map(fixed_func=worker,
                                   args=args,
                                   processes=nproc)

        # Check NaN and decide which data to remove
        idx_bad = {}
        nans = []
        cc_data_for_html = []
        for (i, j), (cc, nref) in zip(args, results):
            cc_data_for_html.append((i, j, cc, nref))
            if cc == cc: continue
            idx_bad[i] = idx_bad.get(i, 0) + 1
            idx_bad[j] = idx_bad.get(j, 0) + 1
            nans.append([i, j])

        if html_maker is not None:
            html_maker.add_cc_clustering_details(cc_data_for_html)

        idx_bad = idx_bad.items()
        idx_bad.sort(key=lambda x: x[1])
        remove_idxes = set()

        for idx, badcount in reversed(idx_bad):
            if len(filter(lambda x: idx in x, nans)) == 0: continue
            remove_idxes.add(idx)
            nans = filter(lambda x: idx not in x, nans)
            if len(nans) == 0: break

        use_idxes = filter(lambda x: x not in remove_idxes,
                           xrange(len(self.arrays)))

        # Make table: original index (in file list) -> new index (in matrix)
        count = 0
        org2now = collections.OrderedDict()
        for i in xrange(len(self.arrays)):
            if i in remove_idxes: continue
            org2now[i] = count
            count += 1

        if len(remove_idxes) > 0:
            open("%s_notused.lst" % prefix, "w").write("\n".join(
                map(lambda x: self.arrays.keys()[x], remove_idxes)))

        # Make matrix
        mat = numpy.zeros(shape=(len(use_idxes), len(use_idxes)))
        for (i, j), (cc, nref) in zip(args, results):
            if i in remove_idxes or j in remove_idxes: continue
            mat[org2now[j], org2now[i]] = cc

        open("%s.matrix" % prefix,
             "w").write(" ".join(map(lambda x: "%.4f" % x, mat.flatten())))

        ofs = open("%s.dat" % prefix, "w")
        ofs.write("   i    j     cc  nref\n")
        for (i, j), (cc, nref) in zip(args, results):
            ofs.write("%4d %4d %.4f %4d\n" % (i, j, cc, nref))

        open("%s_ana.R" % prefix, "w").write("""\
treeToList2 <- function(htree)
{  # stolen from $CCP4/share/blend/R/blend0.R
 groups <- list()
 itree <- dim(htree$merge)[1]
 for (i in 1:itree)
 { 
  il <- htree$merge[i,1]
  ir <- htree$merge[i,2]
  if (il < 0) lab1 <- htree$labels[-il]
  if (ir < 0) lab2 <- htree$labels[-ir]
  if (il > 0) lab1 <- groups[[il]]
  if (ir > 0) lab2 <- groups[[ir]]
  lab <- c(lab1,lab2)
  lab <- as.integer(lab)
  groups <- c(groups,list(lab))
 }
 return(groups)
}

cc<-scan("%(prefix)s.matrix")
md<-matrix(1-cc, ncol=%(ncol)d, byrow=TRUE)
hc <- hclust(as.dist(md),method="ward")
pdf("tree.pdf")
plot(hc)
dev.off()
png("tree.png",height=1000,width=1000)
plot(hc)
dev.off()

hc$labels <- c(%(hclabels)s)
groups <- treeToList2(hc)
cat("ClNumber             Nds         Clheight   IDs\\n",file="./CLUSTERS.txt")
for (i in 1:length(groups))
{
 sorted_groups <- sort(groups[[i]])
 linea <- sprintf("%%04d %%4d %%7.3f %%s\\n",
                  i,length(groups[[i]]),hc$height[i], paste(sorted_groups,collapse=" "))
 cat(linea, file="./CLUSTERS.txt", append=TRUE)
}

# reference: http://www.coppelia.io/2014/07/converting-an-r-hclust-object-into-a-d3-js-dendrogram/
library(rjson)
HCtoJSON<-function(hc){
  labels<-hc$labels
  merge<-data.frame(hc$merge)
  for (i in (1:nrow(merge))) {
    if (merge[i,1]<0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]),list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]>0 & merge[i,2]<0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node", merge[i,1], ", list(name=labels[-merge[i,2]])))")))}
    else if (merge[i,1]<0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(list(name=labels[-merge[i,1]]), node", merge[i,2],"))")))}
    else if (merge[i,1]>0 & merge[i,2]>0) {eval(parse(text=paste0("node", i, "<-list(name=\\"", i, "\\", children=list(node",merge[i,1] , ", node" , merge[i,2]," ))")))}
  }
  eval(parse(text=paste0("JSON<-toJSON(node",nrow(merge), ")")))
  return(JSON)
}

JSON<-HCtoJSON(hc)
cat(JSON, file="dendro.json")

q(save="yes")
""" % dict(prefix=os.path.basename(prefix),
           ncol=len(self.arrays),
           hclabels=",".join(map(lambda x: "%d" % (x + 1), org2now.keys()))))

        call(cmd="Rscript",
             arg="%s_ana.R" % os.path.basename(prefix),
             wdir=self.wdir)

        output = open(os.path.join(self.wdir, "CLUSTERS.txt")).readlines()
        for l in output[1:]:
            sp = l.split()
            clid, clheight, ids = sp[0], sp[2], sp[3:]
            self.clusters[int(clid)] = [float(clheight), map(int, ids)]