예제 #1
0
 def __init__(self, model,  n_histogram_slots = 10, file_name=None,
     selection=None):
   self.wilson_b = model.wilson_b
   self.file_name = file_name
   self.selection = selection
   self.rms_b_iso_or_b_equiv_bonded = model.rms_b_iso_or_b_equiv_bonded()
   eps = math.pi**2*8
   solvent_selection = model.solvent_selection()
   hd_selection = model.xray_structure.hd_selection()
   m_noH_sel = ((~solvent_selection) & (~hd_selection))
   s_noH_sel = ((solvent_selection) & (~hd_selection))
   #
   xs_a     = model.xray_structure
   xs_a_noH = model.xray_structure.select(~hd_selection)
   xs_s_noH = model.xray_structure.select(s_noH_sel)
   xs_m_noH = model.xray_structure.select(m_noH_sel)
   xs_h     = model.xray_structure.select(hd_selection)
   #
   u_a     = xs_a    .extract_u_iso_or_u_equiv()
   u_a_noH = xs_a_noH.extract_u_iso_or_u_equiv()
   u_s_noH = xs_s_noH.extract_u_iso_or_u_equiv()
   u_m_noH = xs_m_noH.extract_u_iso_or_u_equiv()
   u_h     = xs_h    .extract_u_iso_or_u_equiv()
   self.b_min_a,    self.b_max_a,    self.b_mean_a    = self.mmmd(u_a,    eps)
   self.b_min_a_noH,self.b_max_a_noH,self.b_mean_a_noH= self.mmmd(u_a_noH,eps)
   self.b_min_s_noH,self.b_max_s_noH,self.b_mean_s_noH= self.mmmd(u_s_noH,eps)
   self.b_min_m_noH,self.b_max_m_noH,self.b_mean_m_noH= self.mmmd(u_m_noH,eps)
   self.b_min_h,    self.b_max_h,    self.b_mean_h    = self.mmmd(u_h,    eps)
   #
   uc = model.xray_structure.unit_cell()
   a_a     = xs_a    .scatterers().anisotropy(unit_cell =uc).select(xs_a    .use_u_aniso())
   a_a_noH = xs_a_noH.scatterers().anisotropy(unit_cell =uc).select(xs_a_noH.use_u_aniso())
   a_s_noH = xs_s_noH.scatterers().anisotropy(unit_cell =uc).select(xs_s_noH.use_u_aniso())
   a_m_noH = xs_m_noH.scatterers().anisotropy(unit_cell =uc).select(xs_m_noH.use_u_aniso())
   a_h     = xs_h    .scatterers().anisotropy(unit_cell =uc).select(xs_h    .use_u_aniso())
   #
   self.n_aniso_a     = xs_a    .use_u_aniso().count(True)
   self.n_aniso_a_noH = xs_a_noH.use_u_aniso().count(True)
   self.n_aniso_s_noH = xs_s_noH.use_u_aniso().count(True)
   self.n_aniso_m_noH = xs_m_noH.use_u_aniso().count(True)
   self.n_aniso_h     = xs_h    .use_u_aniso().count(True)
   self.n_iso_a       = xs_a    .use_u_iso().count(True)
   self.n_iso_a_noH   = xs_a_noH.use_u_iso().count(True)
   self.n_iso_s_noH   = xs_s_noH.use_u_iso().count(True)
   self.n_iso_m_noH   = xs_m_noH.use_u_iso().count(True)
   self.n_iso_h       = xs_h    .use_u_iso().count(True)
   #
   self.a_min_a,    self.a_max_a,    self.a_mean_a    = self.mmmd(a_a)
   self.a_min_a_noH,self.a_max_a_noH,self.a_mean_a_noH= self.mmmd(a_a_noH)
   self.a_min_s_noH,self.a_max_s_noH,self.a_mean_s_noH= self.mmmd(a_s_noH)
   self.a_min_m_noH,self.a_max_m_noH,self.a_mean_m_noH= self.mmmd(a_m_noH)
   self.a_min_h,    self.a_max_h,    self.a_mean_h    = self.mmmd(a_h)
   #
   self.b_a_noH_histogram = flex.histogram(data = u_a_noH * eps,
     n_slots = n_histogram_slots)
   self.b_a_noH = u_a_noH * eps # need this for phenix gui
   self.a_a_noH_histogram = flex.histogram(data = a_a_noH,
     n_slots = n_histogram_slots)
   #
   self._show_anisotropy = (xs_a.use_u_aniso()).count(True)
예제 #2
0
 def __init__(self, model,  n_histogram_slots = 10, file_name=None,
     selection=None):
   self.wilson_b = model.wilson_b
   self.file_name = file_name
   self.selection = selection
   self.rms_b_iso_or_b_equiv_bonded = model.rms_b_iso_or_b_equiv_bonded()
   eps = math.pi**2*8
   solvent_selection = model.solvent_selection()
   hd_selection = model.xray_structure.hd_selection()
   m_noH_sel = ((~solvent_selection) & (~hd_selection))
   s_noH_sel = ((solvent_selection) & (~hd_selection))
   #
   xs_a     = model.xray_structure
   xs_a_noH = model.xray_structure.select(~hd_selection)
   xs_s_noH = model.xray_structure.select(s_noH_sel)
   xs_m_noH = model.xray_structure.select(m_noH_sel)
   xs_h     = model.xray_structure.select(hd_selection)
   #
   u_a     = xs_a    .extract_u_iso_or_u_equiv()
   u_a_noH = xs_a_noH.extract_u_iso_or_u_equiv()
   u_s_noH = xs_s_noH.extract_u_iso_or_u_equiv()
   u_m_noH = xs_m_noH.extract_u_iso_or_u_equiv()
   u_h     = xs_h    .extract_u_iso_or_u_equiv()
   self.b_min_a,    self.b_max_a,    self.b_mean_a    = self.mmmd(u_a,    eps)
   self.b_min_a_noH,self.b_max_a_noH,self.b_mean_a_noH= self.mmmd(u_a_noH,eps)
   self.b_min_s_noH,self.b_max_s_noH,self.b_mean_s_noH= self.mmmd(u_s_noH,eps)
   self.b_min_m_noH,self.b_max_m_noH,self.b_mean_m_noH= self.mmmd(u_m_noH,eps)
   self.b_min_h,    self.b_max_h,    self.b_mean_h    = self.mmmd(u_h,    eps)
   #
   uc = model.xray_structure.unit_cell()
   a_a     = xs_a    .scatterers().anisotropy(unit_cell =uc).select(xs_a    .use_u_aniso())
   a_a_noH = xs_a_noH.scatterers().anisotropy(unit_cell =uc).select(xs_a_noH.use_u_aniso())
   a_s_noH = xs_s_noH.scatterers().anisotropy(unit_cell =uc).select(xs_s_noH.use_u_aniso())
   a_m_noH = xs_m_noH.scatterers().anisotropy(unit_cell =uc).select(xs_m_noH.use_u_aniso())
   a_h     = xs_h    .scatterers().anisotropy(unit_cell =uc).select(xs_h    .use_u_aniso())
   #
   self.n_aniso_a     = xs_a    .use_u_aniso().count(True)
   self.n_aniso_a_noH = xs_a_noH.use_u_aniso().count(True)
   self.n_aniso_s_noH = xs_s_noH.use_u_aniso().count(True)
   self.n_aniso_m_noH = xs_m_noH.use_u_aniso().count(True)
   self.n_aniso_h     = xs_h    .use_u_aniso().count(True)
   self.n_iso_a       = xs_a    .use_u_iso().count(True)
   self.n_iso_a_noH   = xs_a_noH.use_u_iso().count(True)
   self.n_iso_s_noH   = xs_s_noH.use_u_iso().count(True)
   self.n_iso_m_noH   = xs_m_noH.use_u_iso().count(True)
   self.n_iso_h       = xs_h    .use_u_iso().count(True)
   #
   self.a_min_a,    self.a_max_a,    self.a_mean_a    = self.mmmd(a_a)
   self.a_min_a_noH,self.a_max_a_noH,self.a_mean_a_noH= self.mmmd(a_a_noH)
   self.a_min_s_noH,self.a_max_s_noH,self.a_mean_s_noH= self.mmmd(a_s_noH)
   self.a_min_m_noH,self.a_max_m_noH,self.a_mean_m_noH= self.mmmd(a_m_noH)
   self.a_min_h,    self.a_max_h,    self.a_mean_h    = self.mmmd(a_h)
   #
   self.b_a_noH_histogram = flex.histogram(data = u_a_noH * eps,
     n_slots = n_histogram_slots)
   self.b_a_noH = u_a_noH * eps # need this for phenix gui
   self.a_a_noH_histogram = flex.histogram(data = a_a_noH,
     n_slots = n_histogram_slots)
   #
   self._show_anisotropy = (xs_a.use_u_aniso()).count(True)
예제 #3
0
def run(args):
    for file_name in args:
        print "File name:", file_name
        try:
            pdb_inp = iotbx.pdb.input(file_name=file_name)
        except KeyboardInterrupt:
            raise
        except Exception:
            libtbx.utils.format_exception()
        isotropic_b_factors = flex.double()
        all_eigenvalues = flex.double()
        for atom in pdb_inp.atoms():
            if (atom.uij == (-1, -1, -1, -1, -1, -1)):
                isotropic_b_factors.append(atom.b)
            else:
                all_eigenvalues.extend(
                    flex.double(adptbx.eigenvalues(atom.uij)))
        all_eigenvalues *= adptbx.u_as_b(1)
        print "Number of isotropic atoms:  ", isotropic_b_factors.size()
        print "Number of anisotropic atoms:", all_eigenvalues.size() // 3
        if (isotropic_b_factors.size() != 0):
            print "Histogram of isotropic B-factors:"
            flex.histogram(data=isotropic_b_factors,
                           n_slots=10).show(prefix="  ",
                                            format_cutoffs="%7.2f")
        if (all_eigenvalues.size() != 0):
            print "Histogram of eigenvalues of anisotropic B-factors:"
            flex.histogram(data=all_eigenvalues,
                           n_slots=10).show(prefix="  ",
                                            format_cutoffs="%7.2f")
        print
예제 #4
0
def run(args):
  for file_name in args:
    print "File name:", file_name
    try:
      pdb_inp = iotbx.pdb.input(file_name=file_name)
    except KeyboardInterrupt: raise
    except Exception:
      libtbx.utils.format_exception()
    isotropic_b_factors = flex.double()
    all_eigenvalues = flex.double()
    for atom in pdb_inp.atoms():
      if (atom.uij == (-1,-1,-1,-1,-1,-1)):
        isotropic_b_factors.append(atom.b)
      else:
        all_eigenvalues.extend(flex.double(adptbx.eigenvalues(atom.uij)))
    all_eigenvalues *= adptbx.u_as_b(1)
    print "Number of isotropic atoms:  ", isotropic_b_factors.size()
    print "Number of anisotropic atoms:", all_eigenvalues.size() // 3
    if (isotropic_b_factors.size() != 0):
      print "Histogram of isotropic B-factors:"
      flex.histogram(data=isotropic_b_factors, n_slots=10).show(
        prefix="  ", format_cutoffs="%7.2f")
    if (all_eigenvalues.size() != 0):
      print "Histogram of eigenvalues of anisotropic B-factors:"
      flex.histogram(data=all_eigenvalues, n_slots=10).show(
        prefix="  ", format_cutoffs="%7.2f")
    print
def exercise_03(mon_lib_srv, ener_lib, verbose=0):
  #
  # normal run with real model
  #
  pdb_file = libtbx.env.find_in_repositories(
    relative_path="phenix_regression/pdb/2ERL_noH.pdb", test=os.path.isfile)
  if (pdb_file is None):
    print("Skipping exercise_03: input file not available")
    return
  if (verbose): log = sys.stdout
  else:         log = StringIO()
  params = mmtbx.monomer_library.pdb_interpretation.master_params.extract()
  params.nonbonded_weight = 16
  processed_pdb = mmtbx.monomer_library.pdb_interpretation.process(
    mon_lib_srv = mon_lib_srv,
    params=params,
    ener_lib = ener_lib,
    file_name = pdb_file,
    log = log)
  xray_structure = processed_pdb.xray_structure()
  restraints_manager = mmtbx.restraints.manager(
    geometry=processed_pdb.geometry_restraints_manager())
  structure_ = xray_structure.deep_copy_scatterers()
  gradients_calculator=cartesian_dynamics.gradients_calculator_reciprocal_space(
    restraints_manager = restraints_manager,
    sites_cart         = xray_structure.sites_cart(),
    wc                 = 1)
  cartesian_dynamics.run(
    xray_structure = xray_structure,
    gradients_calculator = gradients_calculator,
    temperature = 300,
    n_steps = 200,
    time_step = 0.0005,
    log = log,
    verbose = 1)
  rms1 = xray_structure.rms_difference(structure_)
  rms2 = structure_.rms_difference(xray_structure)
  assert rms1 == rms2
  rms = rms1
  if(verbose):
    print("rms between structures before and after dynamics = ", rms)
  array_of_distances_between_each_atom = \
       flex.sqrt(structure_.difference_vectors_cart(xray_structure).dot())
  if(verbose):
    flex.histogram(
      data=array_of_distances_between_each_atom,
      n_slots=12).show(
        format_cutoffs="%6.4f")
  n_rms = 5.3
  selected_by_rms = (array_of_distances_between_each_atom > n_rms * rms)
  outlier_sc = xray_structure.scatterers().select(selected_by_rms)
  if (outlier_sc.size() != 0):
    print("number of rms outliers:", outlier_sc.size())
    outlier_d = array_of_distances_between_each_atom.select(selected_by_rms)
    for sc,d in zip(outlier_sc, outlier_d):
      print(sc.label, d)
    raise RuntimeError("rms outliers.")
예제 #6
0
    def report(O, plot=None, xy_prefix=None):
        from cctbx.array_family import flex
        print "Number of shots:", O.completeness_history.size() - 1
        print
        print "Histogram of counts per reflection:"
        flex.histogram(O.counts.as_double(),
                       n_slots=8).show(prefix="  ", format_cutoffs="%7.0f")
        print
        print "Observations per reflection:"
        flex.show_count_stats(counts=O.counts, prefix="  ")
        print "  Median:", int(flex.median(O.counts.as_double()) + 0.5)
        print
        sys.stdout.flush()
        if (xy_prefix is None):
            xy_prefix = ""
        elif (len(xy_prefix) != 0):
            xy_prefix = xy_prefix + "_"

        def dump_xy(name, array):
            f = open(xy_prefix + "%s.xy" % name, "w")
            for i, c in enumerate(array):
                print >> f, i, c

        dump_xy("completeness_history", O.completeness_history)
        dump_xy("min_count_history", O.min_count_history)
        if (O.use_symmetry): _ = O.i_calc.asu
        else: _ = O.i_calc.p1_anom
        _ = _.customized_copy(data=O.counts).sort(by_value="resolution")
        sym_factors = _.space_group().order_p()
        if (not O.i_calc.asu.anomalous_flag()):
            sym_factors *= 2
        sym_factors /= _.multiplicities().data()
        counts_sorted_by_resolution = _.data().as_int() * sym_factors
        dump_xy("counts_sorted_by_resolution", counts_sorted_by_resolution)
        dump_xy("d_spacings_sorted_by_resolution", _.d_spacings().data())
        if (plot == "completeness"):
            from libtbx import pyplot
            fig = pyplot.figure()
            ax = fig.add_subplot(1, 1, 1)
            _ = O.completeness_history
            nx = _.size()
            ax.plot(range(nx), _, "r-")
            ax.axis([0, nx, 0, 1])
            pyplot.show()
        elif (plot == "redundancy"):
            from libtbx import pyplot
            fig = pyplot.figure()
            ax = fig.add_subplot(1, 1, 1)
            _ = counts_sorted_by_resolution
            ax.plot(range(len(_)), _, "r-")
            ax.axis([-_.size() * 0.05, _.size() * 1.05, 0, None])
            pyplot.show()
        elif (plot is not None):
            raise RuntimeError('Unknown plot type: "%s"' % plot)
 def report(O, plot=None, xy_prefix=None):
   from cctbx.array_family import flex
   print "Number of shots:", O.completeness_history.size()-1
   print
   print "Histogram of counts per reflection:"
   flex.histogram(O.counts.as_double(), n_slots=8).show(
     prefix="  ", format_cutoffs="%7.0f")
   print
   print "Observations per reflection:"
   flex.show_count_stats(counts=O.counts, prefix="  ")
   print "  Median:", int(flex.median(O.counts.as_double())+0.5)
   print
   sys.stdout.flush()
   if (xy_prefix is None):
     xy_prefix = ""
   elif (len(xy_prefix) != 0):
     xy_prefix = xy_prefix + "_"
   def dump_xy(name, array):
     f = open(xy_prefix + "%s.xy" % name, "w")
     for i,c in enumerate(array):
       print >> f, i, c
   dump_xy("completeness_history", O.completeness_history)
   dump_xy("min_count_history", O.min_count_history)
   if (O.use_symmetry): _ = O.i_calc.asu
   else:                _ = O.i_calc.p1_anom
   _ = _.customized_copy(data=O.counts).sort(by_value="resolution")
   sym_factors = _.space_group().order_p()
   if (not O.i_calc.asu.anomalous_flag()):
     sym_factors *= 2
   sym_factors /= _.multiplicities().data()
   counts_sorted_by_resolution = _.data().as_int() * sym_factors
   dump_xy("counts_sorted_by_resolution", counts_sorted_by_resolution)
   dump_xy("d_spacings_sorted_by_resolution", _.d_spacings().data())
   if (plot == "completeness"):
     from libtbx import pyplot
     fig = pyplot.figure()
     ax = fig.add_subplot(1, 1, 1)
     _ = O.completeness_history
     nx = _.size()
     ax.plot(range(nx), _, "r-")
     ax.axis([0, nx, 0, 1])
     pyplot.show()
   elif (plot == "redundancy"):
     from libtbx import pyplot
     fig = pyplot.figure()
     ax = fig.add_subplot(1, 1, 1)
     _ = counts_sorted_by_resolution
     ax.plot(range(len(_)), _, "r-")
     ax.axis([-_.size()*0.05, _.size()*1.05, 0, None])
     pyplot.show()
   elif (plot is not None):
     raise RuntimeError('Unknown plot type: "%s"' % plot)
def exercise_03(mon_lib_srv, ener_lib, verbose=0):
    #
    # normal run with real model
    #
    pdb_file = libtbx.env.find_in_repositories(relative_path="phenix_regression/pdb/2ERL_noH.pdb", test=os.path.isfile)
    if pdb_file is None:
        print "Skipping exercise_03: input file not available"
        return
    if verbose:
        log = sys.stdout
    else:
        log = StringIO()
    params = mmtbx.monomer_library.pdb_interpretation.master_params.extract()
    params.nonbonded_weight = 16
    processed_pdb = mmtbx.monomer_library.pdb_interpretation.process(
        mon_lib_srv=mon_lib_srv, params=params, ener_lib=ener_lib, file_name=pdb_file, log=log
    )
    xray_structure = processed_pdb.xray_structure()
    restraints_manager = mmtbx.restraints.manager(geometry=processed_pdb.geometry_restraints_manager())
    structure_ = xray_structure.deep_copy_scatterers()
    gradients_calculator = cartesian_dynamics.gradients_calculator_reciprocal_space(
        restraints_manager=restraints_manager, sites_cart=xray_structure.sites_cart(), wc=1
    )
    cartesian_dynamics.run(
        xray_structure=xray_structure,
        gradients_calculator=gradients_calculator,
        temperature=300,
        n_steps=200,
        time_step=0.0005,
        log=log,
        verbose=1,
    )
    rms1 = xray_structure.rms_difference(structure_)
    rms2 = structure_.rms_difference(xray_structure)
    assert rms1 == rms2
    rms = rms1
    if verbose:
        print "rms between structures before and after dynamics = ", rms
    array_of_distances_between_each_atom = flex.sqrt(structure_.difference_vectors_cart(xray_structure).dot())
    if verbose:
        flex.histogram(data=array_of_distances_between_each_atom, n_slots=12).show(format_cutoffs="%6.4f")
    n_rms = 5.3
    selected_by_rms = array_of_distances_between_each_atom > n_rms * rms
    outlier_sc = xray_structure.scatterers().select(selected_by_rms)
    if outlier_sc.size() != 0:
        print "number of rms outliers:", outlier_sc.size()
        outlier_d = array_of_distances_between_each_atom.select(selected_by_rms)
        for sc, d in zip(outlier_sc, outlier_d):
            print sc.label, d
        raise RuntimeError("rms outliers.")
예제 #9
0
 def density_truncation(self):
     min_fraction = self.params.density_truncation.fraction_min
     max_fraction = self.params.density_truncation.fraction_max
     if min_fraction is None and max_fraction is None: return
     if min_fraction is Auto:
         min_fraction = self.mean_protein_density - self.f000_over_v
     hist = flex.histogram(self.map.select(self.protein_iselection),
                           n_slots=10000)
     if max_fraction is not None:
         self.truncate_max = hist.get_cutoff(
             int(self.n_protein_grid_points * (1 - max_fraction)))
         truncate_max_sel = (self.map >
                             self.truncate_max) & self.protein_selection
         self.map.set_selected(truncate_max_sel, self.truncate_max)
         self.truncate_max_percent = (truncate_max_sel.count(True) /
                                      self.n_protein_grid_points) * 100
     if min_fraction is not None:
         self.truncate_min = hist.get_cutoff(
             int(self.n_protein_grid_points * (1 - min_fraction)))
         truncate_min_sel = (self.map <
                             self.truncate_min) & self.protein_selection
         self.map.set_selected(truncate_min_sel, self.truncate_min)
         self.truncate_min_percent = (truncate_min_sel.count(True) /
                                      self.n_protein_grid_points) * 100
     self.mean_protein_density = flex.mean(
         self.map.select(self.protein_iselection))
예제 #10
0
    def __init__(self,
                 pdb_hierarchy,
                 xray_structure,
                 use_hydrogens=False,
                 geometry_restraints_manager=None):
        if (not use_hydrogens):
            not_hd_sel = ~xray_structure.hd_selection()
            pdb_hierarchy = pdb_hierarchy.select(not_hd_sel)
            xray_structure = xray_structure.select(not_hd_sel)
            if (geometry_restraints_manager is not None):
                geometry_restraints_manager = \
                  geometry_restraints_manager.select(not_hd_sel)
        b_isos = xray_structure.extract_u_iso_or_u_equiv() * adptbx.u_as_b(1.)
        sites_cart = xray_structure.sites_cart()
        asc = pdb_hierarchy.atom_selection_cache()

        def get_stats(sel_str, rms_bonded=False):
            sel = asc.selection(sel_str)
            xrs = xray_structure.select(sel)
            n_iso = xrs.use_u_iso().count(True)
            n_aniso = xrs.use_u_aniso().count(True)
            anisotropy = xrs.scatterers().anisotropy(unit_cell=xrs.unit_cell())
            if (sel.count(True) == 0): return None
            b_isos_selected = b_isos.select(sel)
            sites_cart_selected = sites_cart.select(sel)
            mi, ma, me = b_isos_selected.min_max_mean().as_tuple()
            rms_b_iso_bonded = None
            if (rms_bonded and geometry_restraints_manager is not None):
                grm = geometry_restraints_manager.select(sel)
                rms_b_iso_bonded = rms_b_iso_or_b_equiv_bonded(
                    geometry_restraints_manager=geometry_restraints_manager.
                    select(sel),
                    sites_cart=sites_cart_selected,
                    b_isos=b_isos_selected)
            return group_args(min=mi,
                              max=ma,
                              mean=me,
                              n_iso=n_iso,
                              n_aniso=n_aniso,
                              n_zero=(b_isos_selected < 0.01).count(True),
                              rms_b_iso_bonded=rms_b_iso_bonded)

        overall = get_stats(sel_str="all", rms_bonded=True)
        protein = get_stats(sel_str="protein", rms_bonded=True)
        nucleotide = get_stats(sel_str="nucleotide", rms_bonded=True)
        hd = get_stats(sel_str="element H or element D")
        water = get_stats(sel_str="water")
        other = get_stats(sel_str="not (water or nucleotide or protein)")
        chains = {}
        for chain in pdb_hierarchy.chains():
            chains[chain.id] = get_stats(sel_str="chain '%s'" % chain.id)
        histogram = flex.histogram(data=b_isos, n_slots=10)
        self._result = group_args(overall=overall,
                                  protein=protein,
                                  nucleotide=nucleotide,
                                  hd=hd,
                                  water=water,
                                  other=other,
                                  chains=chains,
                                  histogram=histogram)
예제 #11
0
        def show_histogram(data, n_slots=50, out=None, prefix=""):
            if (out is None): out = sys.stdout
            print('\n' + prefix, file=out)

            # Stats
            data_basic_stats = scitbx.math.basic_statistics(data)
            print('\n  Number  : %7.4f ' % (data_basic_stats.n), file=out)
            print('  Min     : %7.4f ' % (data_basic_stats.min), file=out)
            print('  Max     : %7.4f ' % (data_basic_stats.max), file=out)
            print('  Mean    : %7.4f ' % (data_basic_stats.mean), file=out)
            print('  Stdev   : %7.4f ' %
                  (data_basic_stats.biased_standard_deviation),
                  file=out)
            print('  Skew    : %7.4f ' % (data_basic_stats.skew), file=out)
            print('  Sum     : %7.4f ' % (data_basic_stats.sum), file=out)

            # Histo
            histogram = flex.histogram(data=data, n_slots=n_slots)
            low_cutoff = histogram.data_min()
            for i, n in enumerate(histogram.slots()):
                high_cutoff = histogram.data_min() + histogram.slot_width() * (
                    i + 1)
                print("%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n),
                      file=out)
                low_cutoff = high_cutoff
            out.flush()
            return histogram
예제 #12
0
def estimate_d_c(Dij):
    ''' Estimate the value of d_c using the assumption that each cluster will be gaussian distributed in it's dij values.
      If we can find out how many of those gaussians are there in the Dij distribution, we can get an estimate of the d_c
      from the standard deviation of the individual gaussians'''
    from scitbx.array_family import flex
    Dij_max = max(Dij.as_1d())
    Dij_min = min(Dij.as_1d())
    # Rounding off to closest multiple of 10
    n_slots = (int(Dij_max) // 10 + 1) * 10
    if n_slots == 10:
        return 1.0
    hist_data = flex.histogram(Dij.as_1d(), n_slots=n_slots)
    # Divide the data further into bins and see if there are dead zones with data on either sides.
    # This will indicate that there are 2+ clusters
    y = hist_data.slots()
    x = hist_data.slot_centers()
    moving_avg_bin = []
    for i in range(0, n_slots, 10):
        moving_avg_bin.append(flex.mean(flex.double(list(y[i:i + 10]))))
    # There has to be one cluster close to 0.0, take that as reference point and find out where the next cluster is
    min_avg = min(moving_avg_bin) * 2.0
    d_c = 1.0
    for i, avg in enumerate(moving_avg_bin):
        if avg <= min_avg:
            d_c = float(i * 10.0)
            break
    return d_c
예제 #13
0
def show_histogram(data, n_slots):
    hm = flex.histogram(data=data, n_slots=n_slots)
    lc_1 = hm.data_min()
    s_1 = enumerate(hm.slots())
    for (i_1, n_1) in s_1:
        hc_1 = hm.data_min() + hm.slot_width() * (i_1 + 1)
        print "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1)
        lc_1 = hc_1
예제 #14
0
def show_histogram(data, n_slots):
  hm = flex.histogram(data = data, n_slots = n_slots)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    print "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1)
    lc_1 = hc_1
def show_histogram(data, n_slots):
  print(flex.min(data), flex.max(data), flex.mean(data))
  hm = flex.histogram(data = data, n_slots = n_slots)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    print("%10.3f - %-10.3f : %10.2f" % (lc_1, hc_1, float(n_1)/(data.size())*100.))
    lc_1 = hc_1
예제 #16
0
def show_histogram(data, n_slots, log):
  from cctbx.array_family import flex
  hm = flex.histogram(data = data, n_slots = n_slots)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    print >> log, "%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1)
    lc_1 = hc_1
def show_histogram(data, n_slots):
  print flex.min(data), flex.max(data), flex.mean(data)
  hm = flex.histogram(data = data, n_slots = n_slots)
  lc_1 = hm.data_min()
  s_1 = enumerate(hm.slots())
  for (i_1,n_1) in s_1:
    hc_1 = hm.data_min() + hm.slot_width() * (i_1+1)
    print "%10.3f - %-10.3f : %10.2f" % (lc_1, hc_1, float(n_1)/(data.size())*100.)
    lc_1 = hc_1
예제 #18
0
파일: f_model_info.py 프로젝트: dials/cctbx
def show_histogram(data, n_slots, log):
    from cctbx.array_family import flex
    hm = flex.histogram(data=data, n_slots=n_slots)
    lc_1 = hm.data_min()
    s_1 = enumerate(hm.slots())
    for (i_1, n_1) in s_1:
        hc_1 = hm.data_min() + hm.slot_width() * (i_1 + 1)
        print("%10.3f - %-10.3f : %d" % (lc_1, hc_1, n_1), file=log)
        lc_1 = hc_1
예제 #19
0
def show_histogram(data, n_slots, out=None, prefix=""):
    if (out is None): out = sys.stdout
    print >> out, prefix
    histogram = flex.histogram(data=data, n_slots=n_slots)
    low_cutoff = histogram.data_min()
    for i, n in enumerate(histogram.slots()):
        high_cutoff = histogram.data_min() + histogram.slot_width() * (i + 1)
        print >> out, "%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n)
        low_cutoff = high_cutoff
    out.flush()
    return histogram
예제 #20
0
 def _show_each(edges):
   for edge, ref_edge, label in zip(edges, ref_edges, labels):
     h = flex.histogram(edge, n_slots=n_slots)
     smin, smax = flex.min(edge), flex.max(edge)
     stats = flex.mean_and_variance(edge)
     print >> out, "  %s edge" % label
     print >> out, "     range:     %6.2f - %.2f" % (smin, smax)
     print >> out, "     mean:      %6.2f +/- %6.2f on N = %d" % (
       stats.mean(), stats.unweighted_sample_standard_deviation(), edge.size())
     print >> out, "     reference: %6.2f" % ref_edge
     h.show(f=out, prefix="    ", format_cutoffs="%6.2f")
     print >> out, ""
예제 #21
0
def unit_cell_histograms(crystals):
    params = [flex.double() for i in range(6)]
    for cryst in crystals:
        unit_cell = cryst.get_unit_cell().parameters()
        for i in range(6):
            params[i].append(unit_cell[i])

    histograms = []
    for i in range(6):
        histograms.append(flex.histogram(params[i], n_slots=100))

    return histograms
예제 #22
0
def unit_cell_histograms(crystals):
  params = [flex.double() for i in range(6)]
  for cryst in crystals:
    unit_cell = cryst.get_unit_cell().parameters()
    for i in range(6):
      params[i].append(unit_cell[i])

  histograms = []
  for i in range(6):
    histograms.append(flex.histogram(params[i], n_slots=100))

  return histograms
예제 #23
0
 def _show_each (edges) :
   for edge, ref_edge, label in zip(edges, ref_edges, labels) :
     h = flex.histogram(edge, n_slots=n_slots)
     smin, smax = flex.min(edge), flex.max(edge)
     stats = flex.mean_and_variance(edge)
     print >> out, "  %s edge" % label
     print >> out, "     range:     %6.2f - %.2f" % (smin, smax)
     print >> out, "     mean:      %6.2f +/- %6.2f on N = %d" % (
       stats.mean(), stats.unweighted_sample_standard_deviation(), edge.size())
     print >> out, "     reference: %6.2f" % ref_edge
     h.show(f=out, prefix="    ", format_cutoffs="%6.2f")
     print >> out, ""
예제 #24
0
def shelxd_cc_hist(filename):
    """Read the logs from filename (from shelxd) scrape out CC's, compute
    histogram of all, weak, write to stdout"""
    from cctbx.array_family import flex

    all = flex.double()
    weak = flex.double()

    for record in open(filename):
        if not record.startswith(" Try"):
            continue
        a = float(record[31:36])
        w = float(record[38:43])
        all.append(a)
        weak.append(w)

    h_all = flex.histogram(all, n_slots=220, data_min=-10, data_max=100)
    h_weak = flex.histogram(weak, n_slots=220, data_min=-10, data_max=100)

    for b, a, w in zip(h_all.slot_centers(), h_all.slots(), h_weak.slots()):
        print(b, a, w)
예제 #25
0
def spot_count_histogram(n_spots, n_bins=20, filename='n_spots_hist.png', log=False):
  hist = flex.histogram(n_spots.as_double(), n_slots=n_bins)

  blue = '#3498db'

  from matplotlib import pyplot
  pyplot.bar(
    hist.slot_centers().as_numpy_array(),
    hist.slots().as_numpy_array(),
    width=0.75*hist.slot_width(), align='center',
    color=blue, edgecolor=blue, log=log)
  pyplot.savefig(filename)
  pyplot.clf()
예제 #26
0
def spot_count_histogram(n_spots, n_bins=20, filename='n_spots_hist.png', log=False):
  hist = flex.histogram(n_spots.as_double(), n_slots=n_bins)

  blue = '#3498db'

  from matplotlib import pyplot
  pyplot.bar(
    hist.slot_centers().as_numpy_array(),
    hist.slots().as_numpy_array(),
    width=0.75*hist.slot_width(), align='center',
    color=blue, edgecolor=blue, log=log)
  pyplot.savefig(filename)
  pyplot.clf()
예제 #27
0
def show_histogram(data,
                   n_slots,
                   out=None,
                   prefix=""):
    if (out is None): out = sys.stdout
    print >> out, prefix
    histogram = flex.histogram(data    = data,
                               n_slots = n_slots)
    low_cutoff = histogram.data_min()
    for i,n in enumerate(histogram.slots()):
      high_cutoff = histogram.data_min() + histogram.slot_width() * (i+1)
      print >> out, "%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n)
      low_cutoff = high_cutoff
    out.flush()
    return histogram
예제 #28
0
 def plot_wij_histogram(self, plot_name=None):
   if self._weights is None:
     return
   wij = self.wij_matrix.as_1d()
   hist = flex.histogram(wij, n_slots=50)
   logger.debug('Histogram of Wij values:')
   hist.show(f=debug_handle)
   from matplotlib import pyplot as plt
   fig = plt.figure(figsize=(10,8))
   plt.clf()
   plt.bar(hist.slot_centers(), hist.slots(), width=hist.slot_width())
   plt.yscale('log')
   plt.xlabel(r'$w_{ij}$')
   plt.ylabel('Frequency')
   if plot_name is not None:
     plt.savefig(plot_name)
   else:
     plt.show()
예제 #29
0
 def plot_rij_histogram(self, plot_name=None):
   rij = self.rij_matrix.as_1d()
   rij = rij.select(rij != 0)
   hist = flex.histogram(rij, data_min=-1, data_max=1, n_slots=100)
   logger.debug('Histogram of Rij values:')
   hist.show(f=debug_handle)
   from matplotlib import pyplot as plt
   fig = plt.figure(figsize=(10,8))
   plt.clf()
   plt.bar(hist.slot_centers(), hist.slots(), width=hist.slot_width())
   fontsize = 24
   plt.xlabel(r'$r_{ij}$', size=fontsize)
   plt.ylabel('Frequency', size=fontsize)
   plt.tick_params(axis='both', which='both', labelsize=fontsize)
   plt.tight_layout()
   if plot_name is not None:
     plt.savefig(plot_name, dpi=300)
   else:
     plt.show()
예제 #30
0
def unit_cell_histograms(crystals):
    params = [flex.double() for i in range(6)]
    for cryst in crystals:
        unit_cell = cryst.get_unit_cell().parameters()
        for i in range(6):
            params[i].append(unit_cell[i])

    histograms = []
    for i in range(6):
        histograms.append(flex.histogram(params[i], n_slots=100))

    median_unit_cell = uctbx.unit_cell([flex.median(p) for p in params])
    modal_unit_cell = uctbx.unit_cell(
        [h.slot_centers()[flex.max_index(h.slots())] for h in histograms]
    )
    print("Modal unit cell: %s" % str(modal_unit_cell))
    print("Median unit cell: %s" % str(median_unit_cell))

    return histograms
예제 #31
0
 def density_truncation(self):
   min_fraction = self.params.density_truncation.fraction_min
   max_fraction = self.params.density_truncation.fraction_max
   if min_fraction is None and max_fraction is None: return
   if min_fraction is Auto:
     min_fraction = self.mean_protein_density-self.f000_over_v
   hist = flex.histogram(
     self.map.select(self.protein_iselection), n_slots=10000)
   if max_fraction is not None:
     self.truncate_max = hist.get_cutoff(
       int(self.n_protein_grid_points * (1-max_fraction)))
     truncate_max_sel = (self.map > self.truncate_max) & self.protein_selection
     self.map.set_selected(truncate_max_sel, self.truncate_max)
     self.truncate_max_percent = (
       truncate_max_sel.count(True) / self.n_protein_grid_points) * 100
   if min_fraction is not None:
     self.truncate_min = hist.get_cutoff(
       int(self.n_protein_grid_points * (1-min_fraction)))
     truncate_min_sel = (self.map < self.truncate_min) & self.protein_selection
     self.map.set_selected(truncate_min_sel, self.truncate_min)
     self.truncate_min_percent = (
       truncate_min_sel.count(True) / self.n_protein_grid_points) * 100
   self.mean_protein_density = flex.mean(
     self.map.select(self.protein_iselection))
예제 #32
0
        def show_histogram(data, n_slots=50, out=None, prefix=""):
            if out is None:
                out = sys.stdout
            print >> out, "\n" + prefix

            # Stats
            data_basic_stats = scitbx.math.basic_statistics(data)
            print >> out, "\n  Number  : %7.4f " % (data_basic_stats.n)
            print >> out, "  Min     : %7.4f " % (data_basic_stats.min)
            print >> out, "  Max     : %7.4f " % (data_basic_stats.max)
            print >> out, "  Mean    : %7.4f " % (data_basic_stats.mean)
            print >> out, "  Stdev   : %7.4f " % (data_basic_stats.biased_standard_deviation)
            print >> out, "  Skew    : %7.4f " % (data_basic_stats.skew)
            print >> out, "  Sum     : %7.4f " % (data_basic_stats.sum)

            # Histo
            histogram = flex.histogram(data=data, n_slots=n_slots)
            low_cutoff = histogram.data_min()
            for i, n in enumerate(histogram.slots()):
                high_cutoff = histogram.data_min() + histogram.slot_width() * (i + 1)
                print >> out, "%7.3f - %7.3f: %d" % (low_cutoff, high_cutoff, n)
                low_cutoff = high_cutoff
            out.flush()
            return histogram
예제 #33
0
    def add_cells_and_files(self, cells, symm_str):
        self.cells = cells
        # Table
        table_str = ""
        for idx, xac in enumerate(cells):
            cell = cells[xac]
            table_str += "<tr>\n"
            table_str += " <td>%.4d</td><td>%s</td>" % (idx+1, xac) # idx, file
            table_str += "".join(map(lambda x: "<td>%.2f</td>"%x, cell))
            table_str += "\n</tr>\n"

        # Hist
        cellconstr = CellConstraints(sgtbx.space_group_info(symm_str).group())
        show_flags = (True, not cellconstr.is_b_equal_a(), not cellconstr.is_c_equal_a_b(),
                      not cellconstr.is_angle_constrained("alpha"),
                      not cellconstr.is_angle_constrained("beta"),
                      not cellconstr.is_angle_constrained("gamma"))
        names = ("a", "b", "c", "&alpha;", "&beta;", "&gamma;")

        hist_str = ""
        label1 = ""
        for i, (name, show) in enumerate(zip(names, show_flags)):
            tmp = ""
            if i in (0,3): tmp += "<tr>"
            if show: tmp += "<th>%s</th>" % name
            if i in (2,5): tmp += "</tr>"

            if i < 3: hist_str += tmp
            else: label1 += tmp

        hist_str += "\n<tr>\n"

        for idx, (name, show) in enumerate(zip(names, show_flags)):
            if idx==3: hist_str += "</tr>" + label1 + "<tr>"
            if not show: continue
            vals = flex.double(map(lambda x: x[idx], cells.values()))
            if len(vals) == 0: continue
            nslots = max(30, int((max(vals) - min(vals)) / 0.5))
            hist = flex.histogram(vals, n_slots=nslots)
            x_vals = map(lambda i: hist.data_min() + hist.slot_width() * (i+.5), xrange(len(hist.slots())))
            y_vals = hist.slots()
            hist_str += """
<td>
<div id="chartdiv_cell%(idx)d" style="width: 500px; height: 400px;"></div>
<script>
 var chart_cell%(idx)d = AmCharts.makeChart("chartdiv_cell%(idx)d", {
    "type": "serial",
    "theme": "none",  
    "legend": {
        "useGraphSettings": true,
        "markerSize":12,
        "valueWidth":0,
        "verticalGap":0
    },
    "dataProvider": [%(data)s],
    "valueAxes": [{
        "minorGridAlpha": 0.08,
        "minorGridEnabled": true,
        "position": "top",
        "axisAlpha":0
    }],
    "graphs": [{
        "balloonText": "[[category]]: [[value]]",
        "title": "%(name)s",
        "type": "column",
        "fillAlphas": 0.8,
        "valueField": "yval"
    }],
    "rotate": false,
    "categoryField": "xval",
    "categoryAxis": {
        "gridPosition": "start",
        "title": ""
    }
});
</script>
</td>
""" % dict(idx=idx, name=name,
           data=",".join(map(lambda x: '{"xval":%.2f,"yval":%d}'%x, zip(x_vals,y_vals)))
           )

        hist_str += "</tr>"

        self.html_inputfiles = """
<h2>Input files</h2>
%d files for merging in %s symmetry

<h3>Unit cell histogram</h3>
<table>
%s
</table>

<h3>Files</h3>
<a href="#" onClick="toggle_show('div-input-files'); return false;">Show/Hide</a>
<div id="div-input-files" style="display:none;">
<table class="cells">
<tr>
 <th>idx</th> <th>file</th> <th>a</th> <th>b</th> <th>c</th> <th>&alpha;</th> <th>&beta;</th> <th>&gamma;</th>
</tr>
%s
</table>
</div>
""" % (len(cells), symm_str, hist_str, table_str)
        self.write_html()
예제 #34
0
  def __init__(
        self,
        pdb_hierarchy,
        restraints_manager,
        molprobity_scores=False,
        n_histogram_slots=10,
        cdl_restraints=False,
        ignore_hydrogens=False,  #only used by amber
        automatically_use_amber=True,
        ):
    super(geometry, self).__init__(
        pdb_hierarchy=pdb_hierarchy,
        molprobity_scores=molprobity_scores)
    self.cdl_restraints=cdl_restraints
    sites_cart = pdb_hierarchy.atoms().extract_xyz()
    energies_sites = \
      restraints_manager.energies_sites(
        sites_cart        = sites_cart,
        compute_gradients = False)
    if(hasattr(energies_sites, "geometry")):
      esg = energies_sites.geometry
    else: esg = energies_sites
    self.a = None
    self.b = None
    self.angle_deltas = None
    self.bond_deltas = None
    if not hasattr(esg, "angle_deviations"): return
    if automatically_use_amber and hasattr(esg, "amber"):
      self.used_amber=True
      amber_parm = restraints_manager.amber_structs.parm
      self.a, angle_deltas = esg.angle_deviations(sites_cart, amber_parm,
                                        ignore_hd=ignore_hydrogens,
                                        get_deltas=True)
      self.b, bond_deltas = esg.bond_deviations(sites_cart, amber_parm,
                                        ignore_hd=ignore_hydrogens,
                                        get_deltas=True)
      self.a_number = esg.n_angle_proxies(amber_parm,
                                          ignore_hd=ignore_hydrogens)
      self.b_number = esg.n_bond_proxies(amber_parm,
                                         ignore_hd=ignore_hydrogens)
      self.c, self.p, self.ll, self.d, self.n = None, None, None, None, None
      self.c_number=0
      self.p_number=0
      self.d_number=0

      self.bond_deltas_histogram = \
        flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots)
      self.angle_deltas_histogram = \
        flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots)
      # nonbonded_distances = esg.nonbonded_distances()
      # self.nonbonded_distances_histogram = flex.histogram(
      #   data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots)
      for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] :
        for value_type in [("mean",2), ("max",1), ("min",0)] :
          name = "%s_%s" % (restraint_type, value_type[0])
          if getattr(self, restraint_type) is None:
            setattr(self, name, None)
            continue
          setattr(self, name, getattr(self, restraint_type)[value_type[1]])
      return
    self.a = esg.angle_deviations()
    self.b = esg.bond_deviations()
    self.a_number = esg.get_filtered_n_angle_proxies()
    self.b_number = esg.get_filtered_n_bond_proxies()
    self.c = esg.chirality_deviations()
    self.d = esg.dihedral_deviations()
    self.p = esg.planarity_deviations()
    self.ll = esg.parallelity_deviations()
    self.n = esg.nonbonded_deviations()
    self.c_number = esg.n_chirality_proxies
    self.d_number = esg.n_dihedral_proxies
    self.p_number = esg.n_planarity_proxies
    self.n_number = esg.n_nonbonded_proxies
    #
    for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] :
      for value_type in [("mean",2), ("max",1), ("min",0)] :
        name = "%s_%s" % (restraint_type, value_type[0])
        if getattr(self, restraint_type) is None: continue
        setattr(self, name, getattr(self, restraint_type)[value_type[1]])
    #
    if(hasattr(restraints_manager, "geometry")):
      rmg = restraints_manager.geometry
    else: rmg = restraints_manager
    self.bond_deltas = geometry_restraints.bond_deltas(
      sites_cart         = sites_cart,
      sorted_asu_proxies = rmg.pair_proxies().bond_proxies)
    self.angle_deltas = geometry_restraints.angle_deltas(
      sites_cart = sites_cart,
      proxies    = rmg.angle_proxies)
    self.nonbonded_distances = esg.nonbonded_distances()
    self.number_of_worst_clashes = (self.nonbonded_distances<0.5).count(True)
    self.bond_deltas_histogram = \
      flex.histogram(data = flex.abs(self.bond_deltas), n_slots = n_histogram_slots)
    self.angle_deltas_histogram = \
      flex.histogram(data = flex.abs(self.angle_deltas), n_slots = n_histogram_slots)
    self.nonbonded_distances_histogram = flex.histogram(
      data = flex.abs(self.nonbonded_distances), n_slots = n_histogram_slots)
    #
    assert approx_equal(
      esg.target,
      esg.angle_residual_sum+
      esg.bond_residual_sum+
      esg.chirality_residual_sum+
      esg.dihedral_residual_sum+
      esg.nonbonded_residual_sum+
      esg.planarity_residual_sum+
      esg.parallelity_residual_sum+
      esg.reference_coordinate_residual_sum+
      esg.reference_dihedral_residual_sum+
      esg.ncs_dihedral_residual_sum+
      esg.den_residual_sum+
      esg.ramachandran_residual_sum)
    del energies_sites, esg # we accumulate this object, so make it clean asap
예제 #35
0
    def add_cells_and_files(self, cells, symm_str):
        self.cells = cells
        # Table
        table_str = ""
        for idx, xac in enumerate(cells):
            cell = cells[xac]
            table_str += "<tr>\n"
            table_str += " <td>%.4d</td><td>%s</td>" % (idx+1, xac) # idx, file
            table_str += "".join(map(lambda x: "<td>%.2f</td>"%x, cell))
            table_str += "\n</tr>\n"

        # Hist
        cellconstr = CellConstraints(sgtbx.space_group_info(symm_str).group())
        show_flags = (True, not cellconstr.is_b_equal_a(), not cellconstr.is_c_equal_a_b(),
                      not cellconstr.is_angle_constrained("alpha"),
                      not cellconstr.is_angle_constrained("beta"),
                      not cellconstr.is_angle_constrained("gamma"))
        names = ("a", "b", "c", "&alpha;", "&beta;", "&gamma;")

        hist_str = ""
        label1 = ""
        for i, (name, show) in enumerate(zip(names, show_flags)):
            tmp = ""
            if i in (0,3): tmp += "<tr>"
            if show: tmp += "<th>%s</th>" % name
            if i in (2,5): tmp += "</tr>"

            if i < 3: hist_str += tmp
            else: label1 += tmp

        hist_str += "\n<tr>\n"

        for idx, (name, show) in enumerate(zip(names, show_flags)):
            if idx==3: hist_str += "</tr>" + label1 + "<tr>"
            if not show: continue
            vals = flex.double(map(lambda x: x[idx], cells.values()))
            if len(vals) == 0: continue
            nslots = max(30, int((max(vals) - min(vals)) / 0.5))
            hist = flex.histogram(vals, n_slots=nslots)
            x_vals = map(lambda i: hist.data_min() + hist.slot_width() * (i+.5), xrange(len(hist.slots())))
            y_vals = hist.slots()
            hist_str += """
<td>
<div id="chartdiv_cell%(idx)d" style="width: 500px; height: 400px;"></div>
<script>
 var chart_cell%(idx)d = AmCharts.makeChart("chartdiv_cell%(idx)d", {
    "type": "serial",
    "theme": "none",  
    "legend": {
        "useGraphSettings": true,
        "markerSize":12,
        "valueWidth":0,
        "verticalGap":0
    },
    "dataProvider": [%(data)s],
    "valueAxes": [{
        "minorGridAlpha": 0.08,
        "minorGridEnabled": true,
        "position": "top",
        "axisAlpha":0
    }],
    "graphs": [{
        "balloonText": "[[category]]: [[value]]",
        "title": "%(name)s",
        "type": "column",
        "fillAlphas": 0.8,
        "valueField": "yval"
    }],
    "rotate": false,
    "categoryField": "xval",
    "categoryAxis": {
        "gridPosition": "start",
        "title": ""
    }
});
</script>
</td>
""" % dict(idx=idx, name=name,
           data=",".join(map(lambda x: '{"xval":%.2f,"yval":%d}'%x, zip(x_vals,y_vals)))
           )

        hist_str += "</tr>"

        self.html_inputfiles = """
<h2>Input files</h2>
%d files for merging in %s symmetry

<h3>Unit cell histogram</h3>
<table>
%s
</table>

<h3>Files</h3>
<a href="#" onClick="toggle_show('div-input-files'); return false;">Show/Hide</a>
<div id="div-input-files" style="display:none;">
<table class="cells">
<tr>
 <th>idx</th> <th>file</th> <th>a</th> <th>b</th> <th>c</th> <th>&alpha;</th> <th>&beta;</th> <th>&gamma;</th>
</tr>
%s
</table>
</div>
""" % (len(cells), symm_str, hist_str, table_str)
        self.write_html()
예제 #36
0
    elif key == 'mapped_predictions':
      print key, data[key][0][0], "(only first shown of %d)"%len(data[key][0])
    elif key == 'correction_vectors' and data[key] is not None and data[key][0] is not None:
      if data[key][0] is None:
        print key, "None"
      else:
        print key, data[key][0][0], "(only first shown)"
    elif key == "DATA":
      print key,"len=%d max=%f min=%f dimensions=%s"%(data[key].size(),flex.max(data[key]),flex.min(data[key]),str(data[key].focus()))
    elif key == "WAVELENGTH":
      print "WAVELENGTH", data[key], ", converted to eV:", 12398.4187/data[key]
    elif key == "applied_absorption_correction":
      print key, data[key]
      if doplots:
        c = data[key][0]
        hist = flex.histogram(c, n_slots=30)
        from matplotlib import pyplot as plt
        plt.scatter(hist.slot_centers(), hist.slots())
        plt.show()

        obs = data['observations'][0]
        preds = data['mapped_predictions'][0]
        p1 = preds.select(c == 1.0)
        p2 = preds.select((c != 1.0) & (c <= 1.5))
        plt.scatter(preds.parts()[1], preds.parts()[0], c='g')
        plt.scatter(p1.parts()[1], p1.parts()[0], c='b')
        plt.scatter(p2.parts()[1], p2.parts()[0], c='r')
        plt.show()

    else:
      print key, data[key]
예제 #37
0
    def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None):

        assert self.r_free_flags is not None
        if self.r_free_flags.data().count(True) == 0:
            self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13,
        )

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data(),
        )
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs="%7.3f")

        print >>self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())
예제 #38
0
def run(args):

    from cctbx.array_family import flex
    from dials.util.options import OptionParser
    from dials.util.options import flatten_reflections
    import libtbx.load_env

    usage = "%s [options] reflections_1.pickle reflections_2.pickle" % (
        libtbx.env.dispatcher_name)

    parser = OptionParser(usage=usage,
                          phil=phil_scope,
                          read_reflections=True,
                          epilog=help_message)

    params, options, args = parser.parse_args(show_diff_phil=True,
                                              return_unhandled=True)
    reflections = flatten_reflections(params.input.reflections)

    if flex.max(reflections[0]["id"]) > 0:
        reflections = list(reversed(reflections))
    assert flex.max(reflections[0]["id"]) == 0

    assert len(reflections) == 2
    partialities = []
    intensities = []
    sigmas = []
    ids = []
    xyz = []

    # only want fully-recorded reflections in full dataset
    # reflections[0] = reflections[0].select(reflections[0]['partiality'] > 0.99)
    print(reflections[0].size())
    # only want partial reflections in sliced dataset
    # reflections[1] = reflections[1].select(reflections[1]['partiality'] < 0.99)
    print(reflections[1].size())

    for refl in reflections:
        # sel = refl.get_flags(refl.flags.integrated_sum)
        sel = refl.get_flags(refl.flags.integrated)
        sel &= refl["intensity.sum.value"] > 0
        sel &= refl["intensity.sum.variance"] > 0
        refl = refl.select(sel)
        hkl = refl["miller_index"]
        partiality = refl["partiality"]
        intensity = refl["intensity.sum.value"]
        vari = refl["intensity.sum.variance"]
        assert vari.all_gt(0)
        sigi = flex.sqrt(vari)
        intensities.append(intensity)
        partialities.append(partiality)
        sigmas.append(sigi)
        ids.append(refl["id"])
        xyz.append(refl["xyzcal.px"])

    from annlib_ext import AnnAdaptor as ann_adaptor

    ann = ann_adaptor(xyz[0].as_double().as_1d(), 3)
    ann.query(xyz[1].as_double().as_1d())
    distances = flex.sqrt(ann.distances)
    matches = distances < 2  # pixels
    isel0 = flex.size_t(list(ann.nn.select(matches)))
    isel1 = flex.size_t(list(matches.iselection()))

    p0 = partialities[0].select(isel0)
    p1 = partialities[1].select(isel1)
    i0 = intensities[0].select(isel0)
    i1 = intensities[1].select(isel1)

    print((p0 > p1).count(True), (p0 < p1).count(True))

    h0 = flex.histogram(p0, data_min=0, data_max=1, n_slots=20)
    h1 = flex.histogram(p1, data_min=0, data_max=1, n_slots=20)
    h0.show()
    h1.show()

    from matplotlib import pyplot

    perm0 = flex.sort_permutation(p0)
    perm1 = flex.sort_permutation(p1)
    fig, axes = pyplot.subplots(nrows=2, sharex=True)
    axes[0].plot(p0.select(perm0), flex.int_range(p0.size()))
    axes[1].plot(p1.select(perm1), flex.int_range(p1.size()))
    axes[1].set_xlabel("Partiality")
    for ax in axes:
        ax.set_ylabel("Cumulative frequency")
    for ax in axes:
        ax.set_yscale("log")
    pyplot.savefig("sorted_partialities.png")
    pyplot.clf()

    blue = "#3498db"
    fig, axes = pyplot.subplots(nrows=2, sharex=True)
    axes[0].bar(
        h0.slot_centers(),
        h0.slots(),
        width=h0.slot_width(),
        align="center",
        color=blue,
        edgecolor=blue,
    )
    axes[1].bar(
        h1.slot_centers(),
        h1.slots(),
        width=h1.slot_width(),
        align="center",
        color=blue,
        edgecolor=blue,
    )
    axes[1].set_xlabel("Partiality")
    for ax in axes:
        ax.set_ylabel("Frequency")
    for ax in axes:
        ax.set_yscale("log")
    pyplot.savefig("partiality_histogram.png")
    # pyplot.show()
    pyplot.clf()

    pyplot.scatter(p0, p1, s=5, alpha=0.3, marker="+")
    pyplot.xlabel("Partiality (full)")
    pyplot.ylabel("Partiality (sliced)")
    pyplot.savefig("partiality_full_vs_sliced.png")
    pyplot.clf()

    pyplot.scatter(i0, i1, s=5, alpha=0.3, marker="+")
    pyplot.xlim(flex.min(i0), flex.max(i0))
    pyplot.ylim(flex.min(i1), flex.max(i1))
    pyplot.xlabel("Intensity (full)")
    pyplot.ylabel("Intensity (sliced)")
    pyplot.xscale("log")
    pyplot.yscale("log")
    pyplot.savefig("intensity_full_vs_sliced.png")
    pyplot.clf()

    i_ratio = i1 / i0
    p_ratio = p1 / p0
    pyplot.scatter(p_ratio, i_ratio, s=5, alpha=0.3, marker="+")
    pyplot.ylim(flex.min(i_ratio), flex.max(i_ratio))
    pyplot.yscale("log")
    pyplot.xlabel("P(full)/P(sliced)")
    pyplot.ylabel("I(full)/I(sliced)")
    pyplot.savefig("partiality_ratio_vs_intensity_ratio.png")
    pyplot.clf()
예제 #39
0
 def histogram(self, n_slots=10000):
     return flex.histogram(data=self.map.as_1d(), n_slots=n_slots)
예제 #40
0
 def __init__(
       self,
       pdb_hierarchy,
       restraints_manager,
       molprobity_scores=False,
       n_histogram_slots=10,
       cdl_restraints=False,
       ignore_hydrogens=False,  #only used by amber
       automatically_use_amber=True,
       ):
   super(geometry, self).__init__(
       pdb_hierarchy=pdb_hierarchy,
       molprobity_scores=molprobity_scores)
   if(restraints_manager is not None):
     self.cdl_restraints=cdl_restraints
     sites_cart = pdb_hierarchy.atoms().extract_xyz()
     energies_sites = \
       restraints_manager.energies_sites(
         sites_cart        = sites_cart,
         compute_gradients = False)
     if(hasattr(energies_sites, "geometry")):
       esg = energies_sites.geometry
     else: esg = energies_sites
     self.a = None
     self.b = None
     self.angle_deltas = None
     self.bond_deltas = None
     if not hasattr(esg, "angle_deviations"): return
     if automatically_use_amber and hasattr(esg, "amber"):
       self.used_amber=True
       amber_parm = restraints_manager.amber_structs.parm
       self.a, angle_deltas, angle_extremes = esg.angle_deviations(
         sites_cart, amber_parm,
         ignore_hd=ignore_hydrogens,
         get_deltas=True,
         get_extremes=True,
         )
       self.angle_extremes = angle_extremes
       self.b, bond_deltas, bond_extremes = esg.bond_deviations(
         sites_cart, amber_parm,
         ignore_hd=ignore_hydrogens,
         get_deltas=True,
         get_extremes=True,
         )
       self.bond_extremes = bond_extremes
       self.a_number = esg.n_angle_proxies(amber_parm,
                                           ignore_hd=ignore_hydrogens)
       self.b_number = esg.n_bond_proxies(amber_parm,
                                          ignore_hd=ignore_hydrogens)
       self.c, self.p, self.ll, self.d, self.n = None, None, None, None, None
       self.c_number=0
       self.p_number=0
       self.d_number=0
       self.bond_deltas_histogram = \
         flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots)
       self.angle_deltas_histogram = \
         flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots)
       # nonbonded_distances = esg.nonbonded_distances()
       # self.nonbonded_distances_histogram = flex.histogram(
       #   data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots)
       for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] :
         for value_type in [("mean",2), ("max",1), ("min",0)] :
           name = "%s_%s" % (restraint_type, value_type[0])
           if getattr(self, restraint_type) is None:
             setattr(self, name, None)
             continue
           setattr(self, name, getattr(self, restraint_type)[value_type[1]])
       return
     self.a = esg.angle_deviations()
     self.b = esg.bond_deviations()
     self.b_z = esg.bond_deviations_z()
     self.a_z = esg.angle_deviations_z()
     self.b_w = esg.bond_deviations_weighted()
     self.a_w = esg.angle_deviations_weighted()
     self.a_number = esg.get_filtered_n_angle_proxies()
     self.b_number = esg.get_filtered_n_bond_proxies()
     self.c = esg.chirality_deviations()
     self.d = esg.dihedral_deviations()
     self.p = esg.planarity_deviations()
     self.ll = esg.parallelity_deviations()
     self.n = esg.nonbonded_deviations()
     self.c_number = esg.n_chirality_proxies
     self.d_number = esg.n_dihedral_proxies
     self.p_number = esg.n_planarity_proxies
     self.n_number = esg.n_nonbonded_proxies
     #
     for restraint_type in ["b", "a", "c", "p", "ll", "d", "n"] :
       for value_type in [("mean",2), ("max",1), ("min",0)] :
         name = "%s_%s" % (restraint_type, value_type[0])
         if getattr(self, restraint_type) is None: continue
         setattr(self, name, getattr(self, restraint_type)[value_type[1]])
     #
     if(hasattr(restraints_manager, "geometry")):
       rmg = restraints_manager.geometry
     else: rmg = restraints_manager
     bond_deltas = geometry_restraints.bond_deltas(
       sites_cart         = sites_cart,
       sorted_asu_proxies = rmg.pair_proxies().bond_proxies)
     angle_deltas = geometry_restraints.angle_deltas(
       sites_cart = sites_cart,
       proxies    = rmg.angle_proxies)
     nonbonded_distances = esg.nonbonded_distances()
     self.number_of_worst_clashes = (nonbonded_distances<0.5).count(True)
     self.bond_deltas_histogram = \
       flex.histogram(data = flex.abs(bond_deltas), n_slots = n_histogram_slots)
     self.angle_deltas_histogram = \
       flex.histogram(data = flex.abs(angle_deltas), n_slots = n_histogram_slots)
     self.nonbonded_distances_histogram = flex.histogram(
       data = flex.abs(nonbonded_distances), n_slots = n_histogram_slots)
     #
     assert approx_equal(
       esg.target,
       esg.angle_residual_sum+
       esg.bond_residual_sum+
       esg.chirality_residual_sum+
       esg.dihedral_residual_sum+
       esg.nonbonded_residual_sum+
       esg.planarity_residual_sum+
       esg.parallelity_residual_sum+
       esg.reference_coordinate_residual_sum+
       esg.reference_dihedral_residual_sum+
       esg.ncs_dihedral_residual_sum+
       esg.den_residual_sum+
       esg.ramachandran_residual_sum)
     del energies_sites, esg # we accumulate this object, so make it clean asap
예제 #41
0
            if data[key][0] is None:
                print key, "None"
            else:
                print key, data[key][0][0], "(only first shown)"
        elif key == "DATA":
            print key, "len=%d max=%f min=%f dimensions=%s" % (
                data[key].size(), flex.max(data[key]), flex.min(
                    data[key]), str(data[key].focus()))
        elif key == "WAVELENGTH":
            print "WAVELENGTH", data[
                key], ", converted to eV:", 12398.4187 / data[key]
        elif key == "fuller_kapton_absorption_correction":
            print key, data[key]
            if doplots:
                c = data[key][0]
                hist = flex.histogram(c, n_slots=30)
                from matplotlib import pyplot as plt
                plt.scatter(hist.slot_centers(), hist.slots())
                plt.show()

                obs = data['observations'][0]
                preds = data['mapped_predictions'][0]
                p1 = preds.select(c == 1.0)
                p2 = preds.select((c != 1.0) & (c <= 1.5))
                plt.scatter(preds.parts()[1], preds.parts()[0], c='g')
                plt.scatter(p1.parts()[1], p1.parts()[0], c='b')
                plt.scatter(p2.parts()[1], p2.parts()[0], c='r')
                plt.show()

        else:
            print key, data[key]
예제 #42
0
 def hist(data):
   from cStringIO import StringIO
   sio = StringIO()
   flex.histogram(data=data, n_slots=10) \
     .show(f=sio, prefix="  ", format_cutoffs="%8.2f")
   return sio.getvalue().splitlines()
예제 #43
0
 def histogram(self, n_slots=10000):
   return flex.histogram(data=self.map.as_1d(), n_slots=n_slots)
예제 #44
0
def keywise_printout(data):
  for key in data:
    if key == 'ACTIVE_AREAS':
      print int(len(data[key])/4), "active areas, first one: ", list(data[key][0:4])
    elif key == 'observations':
      print key, data[key], "Showing unit cell/spacegroup:"
      obs = data[key][0]
      uc = obs.unit_cell()
      uc.show_parameters()
      obs.space_group().info().show_summary()
      d = uc.d(obs.indices())
      print "Number of observations:", len(obs.indices())
      print "Max resolution: %f"%flex.min(d)
      print "Mean I/sigma:", flex.mean(obs.data())/flex.mean(obs.sigmas())
      print "I/sigma > 1 count:", (obs.data()/obs.sigmas() > 1).count(True)
      print "I <= 0:", len(obs.data().select(obs.data() <= 0))

      from cctbx.crystal import symmetry
      sym = symmetry(unit_cell = uc, space_group = obs.space_group())
      mset = sym.miller_set(indices = obs.indices(), anomalous_flag=False)
      binner = mset.setup_binner(n_bins=20)
      acceptable_resolution_bins = []
      binned_avg_i_sigi = []
      for i in binner.range_used():
        d_max, d_min = binner.bin_d_range(i)
        sel = (d <= d_max) & (d > d_min)
        sel &= obs.data() > 0
        intensities = obs.data().select(sel)
        sigmas = obs.sigmas().select(sel)
        n_refls = len(intensities)
        avg_i = flex.mean(intensities) if n_refls > 0 else 0
        avg_i_sigi = flex.mean(intensities / sigmas) if n_refls > 0 else 0
        acceptable_resolution_bins.append(avg_i_sigi >= 1.0)

      acceptable_resolution_bins = [acceptable_resolution_bins[i] if False not in acceptable_resolution_bins[:i+1] else False
                                    for i in range(len(acceptable_resolution_bins))]
      best_res = None
      for i, ok in zip(binner.range_used(), acceptable_resolution_bins):
        d_max, d_min = binner.bin_d_range(i)
        if ok:
          best_res = d_min
        else:
          break
      if best_res is None:
        print "Highest resolution with I/sigI >= 1.0: None"
      else:
        print "Highest resolution with I/sigI >= 1.0: %f"%d_min

    elif key == 'mapped_predictions':
      print key, data[key][0][0], "(only first shown of %d)"%len(data[key][0])
    elif key == 'correction_vectors' and data[key] is not None and data[key][0] is not None:
      if data[key][0] is None:
        print key, "None"
      else:
        print key, data[key][0][0], "(only first shown)"
    elif key == "DATA":
      print key,"len=%d max=%f min=%f dimensions=%s"%(data[key].size(),flex.max(data[key]),flex.min(data[key]),str(data[key].focus()))
    elif key == "WAVELENGTH":
      print "WAVELENGTH", data[key], ", converted to eV:", 12398.4187/data[key]
    elif key == "fuller_kapton_absorption_correction":
      print key, data[key]
      if doplots:
        c = data[key][0]
        hist = flex.histogram(c, n_slots=30)
        from matplotlib import pyplot as plt
        plt.scatter(hist.slot_centers(), hist.slots())
        plt.show()

        obs = data['observations'][0]
        preds = data['mapped_predictions'][0]
        p1 = preds.select(c == 1.0)
        p2 = preds.select((c != 1.0) & (c <= 1.5))
        plt.scatter(preds.parts()[1], preds.parts()[0], c='g')
        plt.scatter(p1.parts()[1], p1.parts()[0], c='b')
        plt.scatter(p2.parts()[1], p2.parts()[0], c='r')
        plt.show()

    else:
      print key, data[key]
예제 #45
0
    def model_based_outliers(self,
                             f_model,
                             level=.01,
                             return_data=False,
                             plot_out=None):

        assert self.r_free_flags is not None
        if (self.r_free_flags.data().count(True) == 0):
            self.r_free_flags = self.r_free_flags.array(
                data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13)

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data())
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain,
                                          p_values, obs_norm, calc_norm,
                                          sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs='%7.3f')

        print >> self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())
예제 #46
0
 def hist(data):
     from cStringIO import StringIO
     sio = StringIO()
     flex.histogram(data=data, n_slots=10) \
       .show(f=sio, prefix="  ", format_cutoffs="%8.2f")
     return sio.getvalue().splitlines()