Ejemplo n.º 1
0
    def __init__(self, atom_group, global_b_facs_stats=None):

        self.atom_group = atom_group

        # Meta
        self.residue_class = iotbx.pdb.common_residue_names_get_class(
            atom_group.resname)

        # B-Factors
        self.b_facs = atom_group.atoms().extract_b()
        self.b_facs_stats = basic_statistics(atom_group.atoms().extract_b())
        # B-Factors Z-Statistics
        if global_b_facs_stats is not None:
            self.b_facs_z = global_b_facs_stats.to_z_score(
                b_vals=atom_group.atoms().extract_b())
            self.b_facs_z_stats = basic_statistics(self.b_factors_z)
        else:
            self.b_facs_z = None
            self.b_facs_z_stats = None

        # Occupancies
        self.occies = atom_group.atoms().extract_occ()
        self.occies_stats = basic_statistics(self.occies)

        # Coordinates
        self.centroid = atom_group.atoms().extract_xyz().mean()
Ejemplo n.º 2
0
def exercise_variate_generators():
    from scitbx.random \
         import variate, normal_distribution, bernoulli_distribution, \
                gamma_distribution, poisson_distribution
    for i in range(10):
        scitbx.random.set_random_seed(0)
        g = variate(normal_distribution())
        assert approx_equal(g(), -0.917787219374)
        assert approx_equal(
            g(10),
            (1.21838707856, 1.732426915, 0.838038157555, -0.296895169923,
             0.246451144946, -0.635474652255, -0.0980626986425, 0.36458295417,
             0.534073780268, -0.665073136294))

    stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
    assert approx_equal(stat.mean, 0, eps=0.005)
    assert approx_equal(stat.biased_variance, 1, eps=0.005)
    assert approx_equal(stat.skew, 0, eps=0.005)
    assert approx_equal(stat.kurtosis, 3, eps=0.005)

    bernoulli_seq = variate(bernoulli_distribution(0.1))
    for b in itertools.islice(bernoulli_seq, 10):
        assert b in (True, False)
    bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000))
    assert approx_equal(bernoulli_sample.count(True) / len(bernoulli_sample),
                        0.1,
                        eps=0.01)

    # Boost 1.64 changes the exponential distribution to use Ziggurat algorithm
    scitbx.random.set_random_seed(0)
    g = variate(gamma_distribution())
    if (boost_version < 106400):
        assert approx_equal(g(), 0.79587450456577546)
        assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893))
    else:
        assert approx_equal(g(), 0.864758191783)
        assert approx_equal(g(2), (1.36660841837, 2.26740986094))
    stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
    assert approx_equal(stat.mean, 1, eps=0.005)
    assert approx_equal(stat.skew, 2, eps=0.01)
    assert approx_equal(stat.biased_variance, 1, eps=0.005)
    scitbx.random.set_random_seed(0)
    g = variate(gamma_distribution(alpha=2, beta=3))
    assert approx_equal(g(), 16.670850592722729)
    assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873))
    stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
    assert approx_equal(stat.mean, 6, eps=0.005)
    assert approx_equal(stat.skew, 2 / math.sqrt(2), eps=0.05)
    assert approx_equal(stat.biased_variance, 18, eps=0.05)

    mean = 10.0
    pv = variate(poisson_distribution(mean))
    draws = pv(1000000).as_double()
    m = flex.mean(draws)
    v = flex.mean(draws * draws) - m * m
    assert approx_equal(m, mean, eps=0.05)
    assert approx_equal(v, mean, eps=0.05)
Ejemplo n.º 3
0
def exercise_variate_generators():
  from scitbx.random \
       import variate, normal_distribution, bernoulli_distribution, \
              gamma_distribution, poisson_distribution
  for i in xrange(10):
    scitbx.random.set_random_seed(0)
    g = variate(normal_distribution())
    assert approx_equal(g(), -1.2780081289048213)
    assert approx_equal(g(10),
      (-0.40474189234755492, -0.41845505596083288,
       -1.8825790263067721, -1.5779112018107659,
       -1.1888174422378859, -1.8619619179878537,
       -0.53946818661388318, -1.2400941724410812,
       0.64511959841907285, -0.59934120033270688))

  stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
  assert approx_equal(stat.mean,            0, eps=0.005)
  assert approx_equal(stat.biased_variance, 1, eps=0.005)
  assert approx_equal(stat.skew,            0, eps=0.005)
  assert approx_equal(stat.kurtosis,        3, eps=0.005)

  bernoulli_seq = variate(bernoulli_distribution(0.1))
  for b in itertools.islice(bernoulli_seq, 10):
    assert b in (True, False)
  bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000))
  assert approx_equal(
    bernoulli_sample.count(True)/len(bernoulli_sample),
    0.1,
    eps = 0.01)

  scitbx.random.set_random_seed(0)
  g = variate(gamma_distribution())
  assert approx_equal(g(), 0.79587450456577546)
  assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893))
  stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
  assert approx_equal(stat.mean,            1, eps=0.005)
  assert approx_equal(stat.skew,            2, eps=0.005)
  assert approx_equal(stat.biased_variance, 1, eps=0.005)
  scitbx.random.set_random_seed(0)
  g = variate(gamma_distribution(alpha=2, beta=3))
  assert approx_equal(g(), 16.670850592722729)
  assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873))
  stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
  assert approx_equal(stat.mean,            6, eps=0.005)
  assert approx_equal(stat.skew,            2/math.sqrt(2), eps=0.05)
  assert approx_equal(stat.biased_variance, 18, eps=0.05)

  mean = 10.0
  pv = variate(poisson_distribution(mean))
  draws = pv(1000000).as_double()
  m = flex.mean(draws)
  v = flex.mean(draws*draws) - m*m
  assert approx_equal(m,mean,eps=0.05)
  assert approx_equal(v,mean,eps=0.05)
Ejemplo n.º 4
0
def exercise_variate_generators():
    from scitbx.random \
         import variate, normal_distribution, bernoulli_distribution, \
                gamma_distribution, poisson_distribution
    for i in xrange(10):
        scitbx.random.set_random_seed(0)
        g = variate(normal_distribution())
        assert approx_equal(g(), -1.2780081289048213)
        assert approx_equal(
            g(10),
            (-0.40474189234755492, -0.41845505596083288, -1.8825790263067721,
             -1.5779112018107659, -1.1888174422378859, -1.8619619179878537,
             -0.53946818661388318, -1.2400941724410812, 0.64511959841907285,
             -0.59934120033270688))

    stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
    assert approx_equal(stat.mean, 0, eps=0.005)
    assert approx_equal(stat.biased_variance, 1, eps=0.005)
    assert approx_equal(stat.skew, 0, eps=0.005)
    assert approx_equal(stat.kurtosis, 3, eps=0.005)

    bernoulli_seq = variate(bernoulli_distribution(0.1))
    for b in itertools.islice(bernoulli_seq, 10):
        assert b in (True, False)
    bernoulli_sample = flex.bool(itertools.islice(bernoulli_seq, 10000))
    assert approx_equal(bernoulli_sample.count(True) / len(bernoulli_sample),
                        0.1,
                        eps=0.01)

    scitbx.random.set_random_seed(0)
    g = variate(gamma_distribution())
    assert approx_equal(g(), 0.79587450456577546)
    assert approx_equal(g(2), (0.89856038848394115, 1.2559307580473893))
    stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
    assert approx_equal(stat.mean, 1, eps=0.005)
    assert approx_equal(stat.skew, 2, eps=0.005)
    assert approx_equal(stat.biased_variance, 1, eps=0.005)
    scitbx.random.set_random_seed(0)
    g = variate(gamma_distribution(alpha=2, beta=3))
    assert approx_equal(g(), 16.670850592722729)
    assert approx_equal(g(2), (10.03662877519449, 3.9357158398972873))
    stat = basic_statistics(flex.double(itertools.islice(g, 1000000)))
    assert approx_equal(stat.mean, 6, eps=0.005)
    assert approx_equal(stat.skew, 2 / math.sqrt(2), eps=0.05)
    assert approx_equal(stat.biased_variance, 18, eps=0.05)

    mean = 10.0
    pv = variate(poisson_distribution(mean))
    draws = pv(1000000).as_double()
    m = flex.mean(draws)
    v = flex.mean(draws * draws) - m * m
    assert approx_equal(m, mean, eps=0.05)
    assert approx_equal(v, mean, eps=0.05)
Ejemplo n.º 5
0
 def _calculate_statistics(self, observations, uncertainties):
     """Calculate statistics for one set of observations and uncertainties"""
     guess_factor = 0.001
     stats_obj = basic_statistics(
         flex.double(numpy.ascontiguousarray(observations)))
     stdv = stats_obj.bias_corrected_standard_deviation
     sadj = estimate_true_underlying_sd(obs_vals=observations,
                                        obs_error=uncertainties,
                                        est_sigma=stdv * guess_factor)
     skew = stats_obj.skew
     kurt = stats_obj.kurtosis
     bimo = (skew**2 + 1) / kurt
     return (stdv, sadj, skew, kurt, bimo)
Ejemplo n.º 6
0
    def from_pdb(cls, pdb_input=None, pdb_hierarchy=None):
        """Calculate the b-factor statistics of a model"""

        assert [pdb_input, pdb_hierarchy
                ].count(None) == 1, 'Provide pdb_input OR pdb_hierarchy'
        if pdb_input: pdb_hierarchy = pdb_input.construct_hierarchy()

        cache = pdb_hierarchy.atom_selection_cache()

        all_b = non_h(hierarchy=pdb_hierarchy, cache=cache,
                      copy=True).atoms().extract_b()
        protein_b = protein(hierarchy=pdb_hierarchy, cache=cache,
                            copy=True).atoms().extract_b()
        backbone_b = backbone(hierarchy=pdb_hierarchy, cache=cache,
                              copy=True).atoms().extract_b()
        sidechain_b = sidechains(hierarchy=pdb_hierarchy,
                                 cache=cache,
                                 copy=True).atoms().extract_b()

        return cls(all=basic_statistics(all_b),
                   protein=basic_statistics(protein_b),
                   backbone=basic_statistics(backbone_b),
                   sidechain=basic_statistics(sidechain_b))
Ejemplo n.º 7
0
  def run_detail(self, reflections):
    # Get pre-created resolution binning objects from the parameters
    self.resolution_binner = self.params.statistics.resolution_binner
    self.hkl_resolution_bins = self.params.statistics.hkl_resolution_bins

    # How many bins do we have?
    n_bins = self.resolution_binner.n_bins_all() # (self.params.statistics.n_bins + 2), 2 - to account for the hkls outside of the binner resolution range

    # To enable MPI all-rank reduction, every rank must initialize statistics array(s), even if the rank doesn't have any reflections.
    self.I_sum      = flex.double(n_bins, 0.0) # a sum of the weighted mean intensities from all asu HKLs
    self.Isig_sum   = flex.double(n_bins, 0.0) # a sum of I/sigma from all asu HKLs
    self.Isig_list  = [flex.double() for _ in range(n_bins)]
    self.n_sum      = flex.int(n_bins, 0) # number of theoretically prediced asu hkls
    self.m_sum      = flex.int(n_bins, 0) # number of observed asu hkls
    self.mm_sum     = flex.int(n_bins, 0) # number of observed asu hkls with multiplicity > 1

    # Calculate, format and output statistics for each rank
    self.logger.log("Calculating intensity statistics...")
    self.calculate_intensity_statistics(reflections)
    Intensity_Table = self.build_intensity_table(
                                                I_sum = self.I_sum,
                                                Isig_sum = self.Isig_sum,
                                                n_sum = self.n_sum,
                                                m_sum = self.m_sum,
                                                mm_sum = self.mm_sum,
                                                unmerged_meanIsig = None,
                                                unmerged_stddevIsig = None,
                                                unmerged_skewIsig = None)
    if self.params.output.log_level == 0:
      self.logger.log(Intensity_Table.get_table_text(), rank_prepend=False)

    # Accumulate statistics from all ranks
    all_ranks_I_sum       = self.mpi_helper.cumulative_flex(self.I_sum, flex.double)
    all_ranks_Isig_sum    = self.mpi_helper.cumulative_flex(self.Isig_sum, flex.double)
    all_ranks_n_sum       = self.mpi_helper.cumulative_flex(self.n_sum, flex.int)
    all_ranks_m_sum       = self.mpi_helper.cumulative_flex(self.m_sum, flex.int)
    all_ranks_mm_sum      = self.mpi_helper.cumulative_flex(self.mm_sum, flex.int)

    all_ranks_unmerged_meanIsig = []
    all_ranks_unmerged_stddevIsig = []
    all_ranks_unmerged_skewIsig = []
    for bin_id in range(n_bins):
      all_ranks_isigi_list = self.mpi_helper.comm.gather(self.Isig_list[bin_id], 0)
      if self.mpi_helper.rank == 0:
        all_isigi = flex.double()
        for ranklist in all_ranks_isigi_list:
          all_isigi.extend(ranklist)
        stats = basic_statistics(all_isigi)
        all_ranks_unmerged_meanIsig.append(stats.mean)
        all_ranks_unmerged_stddevIsig.append(stats.bias_corrected_standard_deviation)
        all_ranks_unmerged_skewIsig.append(stats.skew)

    # Calculate, format and output all-rank total statistics
    if self.mpi_helper.rank == 0:
      Intensity_Table = self.build_intensity_table(
                                                  I_sum      = all_ranks_I_sum,
                                                  Isig_sum   = all_ranks_Isig_sum,
                                                  n_sum      = all_ranks_n_sum,
                                                  m_sum      = all_ranks_m_sum,
                                                  mm_sum     = all_ranks_mm_sum,
                                                  unmerged_meanIsig   = all_ranks_unmerged_meanIsig,
                                                  unmerged_stddevIsig = all_ranks_unmerged_stddevIsig,
                                                  unmerged_skewIsig   = all_ranks_unmerged_skewIsig)
      self.logger.main_log(Intensity_Table.get_table_text())
      if self.last_bin_incomplete:
        self.logger.main_log("Warning: the last resolution shell is incomplete. If your data was integrated to that resolution,\nconsider using scaling.resolution_scalar=%f or lower."%self.suggested_resolution_scalar)
Ejemplo n.º 8
0
    def run(self):
        """Process the dataset"""

        dataset, dataset_map, grid, map_analyser, args, verbose = self.data

        # TODO Hardcoded check - to be removed? TODO
        assert dataset_map.is_sparse()

        # ============================================================================>
        # Prepare output objects
        # ============================================================================>
        log_strs = []
        log_file = dataset.file_manager.get_file('dataset_log')
        log = Log(log_file=log_file, verbose=False, silent=True)

        # ============================================================================>
        # Build new blob search object
        # ============================================================================>
        blob_finder = PanddaZMapAnalyser(params=args.params.z_map_analysis,
                                         grid=grid,
                                         log=log)
        print('Writing log for dataset {!s} to ...{}'.format(
            dataset.tag, log_file[log_file.index('processed'):]))

        # ============================================================================>
        # Extract the global mask object from the grid
        # ============================================================================>
        dset_total_temp = grid.global_mask().total_mask_binary().copy()

        # ============================================================================>
        # Generate symmetry masks for this dataset
        # ============================================================================>
        log.bar()
        log('Masking symetry contacts from Z-map.')
        # Generate symmetry contacts for this dataset and align to reference frame
        dataset_sym_copies = dataset.model.crystal_contacts(
            distance_cutoff=args.params.masks.outer_mask + 5,
            combine_copies=True)
        dataset_sym_copies.atoms().set_xyz(
            dataset.model.alignment.nat2ref(
                dataset_sym_copies.atoms().extract_xyz()))
        # Only need to write if writing reference frame maps
        if args.output.developer.write_reference_frame_maps:
            dataset_sym_copies.write_pdb_file(
                dataset.file_manager.get_file('symmetry_copies'))
        # Extract protein atoms from the symmetry copies
        dataset_sym_sites_cart = non_water(
            dataset_sym_copies).atoms().extract_xyz()
        # Generate symmetry contacts grid mask
        dataset_mask = GridMask(parent=grid,
                                sites_cart=dataset_sym_sites_cart,
                                max_dist=args.params.masks.outer_mask,
                                min_dist=args.params.masks.inner_mask_symmetry)
        # Combine with the total mask to generate custom mask for this dataset
        dset_total_temp.put(dataset_mask.inner_mask_indices(), 0)
        dset_total_idxs = numpy.where(dset_total_temp)[0]
        log('After masking with symmetry contacts: {} points for Z-map analysis'
            .format(len(dset_total_idxs)))
        # Write map of grid + symmetry mask
        if args.output.developer.write_reference_frame_grid_masks:
            grid.write_indices_as_map(
                indices=dset_total_idxs,
                f_name=dataset.file_manager.get_file('grid_mask'),
                origin_shift=True)

        # ============================================================================>
        # Generate custom masks for this dataset
        # ============================================================================>
        if args.params.z_map_analysis.masks.selection_string is not None:
            log.bar()
            log('Applying custom mask to the Z-map: "{}"'.format(
                args.params.z_map_analysis.masks.selection_string))
            cache = dataset.model.hierarchy.atom_selection_cache()
            custom_mask_selection = cache.selection(
                args.params.z_map_analysis.masks.selection_string)
            custom_mask_sites = dataset.model.hierarchy.select(
                custom_mask_selection).atoms().extract_xyz()
            log('Masking with {} atoms'.format(len(custom_mask_sites)))
            # Generate custom grid mask
            dataset_mask = GridMask(
                parent=grid,
                sites_cart=custom_mask_sites,
                max_dist=args.params.z_map_analysis.masks.outer_mask,
                min_dist=args.params.z_map_analysis.masks.inner_mask)
            # Combine with the total mask to generate custom mask for this dataset
            dset_total_temp *= dataset_mask.total_mask_binary()
            dset_total_idxs = numpy.where(dset_total_temp)[0]
            log('After masking with custom mask: {} points for Z-map analysis'.
                format(len(dset_total_idxs)))
            # Write out mask
            grid.write_indices_as_map(
                indices=dset_total_idxs,
                f_name=dataset.file_manager.get_file('z_map_mask'),
                origin_shift=True)

        # ============================================================================>
        #####
        # CALCULATE Z-MAPS AND LOOK FOR LARGE BLOBS
        #####
        # ============================================================================>
        # Check maps and that all maps are sparse
        # ============================================================================>
        assert dataset_map.data is not None, 'Something has gone wrong - this dataset has no loaded map'
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.mean_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.medn_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.stds_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.sadj_map.is_sparse()
        # ============================================================================>
        # CALCULATE MEAN-DIFF MAPS
        # ============================================================================>
        mean_diff_map = map_analyser.calculate_z_map(map=dataset_map,
                                                     method='none')
        #        # ============================================================================>
        #        # NAIVE Z-MAP - NOT USING UNCERTAINTY ESTIMATION OR ADJUSTED STDS
        #        # ============================================================================>
        #        z_map_naive = map_analyser.calculate_z_map(map=dataset_map, method='naive')
        #        z_map_naive_normalised = z_map_naive.normalised_copy()
        # ============================================================================>
        # UNCERTAINTY Z-MAP - NOT USING ADJUSTED STDS
        # ============================================================================>
        z_map_uncty = map_analyser.calculate_z_map(
            map=dataset_map,
            uncertainty=dataset_map.meta.map_uncertainty,
            method='uncertainty')
        z_map_uncty_normalised = z_map_uncty.normalised_copy()
        # ============================================================================>
        # ADJUSTED+UNCERTAINTY Z-MAP
        # ============================================================================>
        z_map_compl = map_analyser.calculate_z_map(
            map=dataset_map,
            uncertainty=dataset_map.meta.map_uncertainty,
            method='adjusted+uncertainty')
        z_map_compl_normalised = z_map_compl.normalised_copy()

        # ============================================================================>
        # SELECT WHICH MAP TO DO THE BLOB SEARCHING ON
        # ============================================================================>
        #        if args.params.statistical_maps.z_map_type == 'naive':
        #            z_map = z_map_naive_normalised
        #            z_map_stats = basic_statistics(flex.double(z_map_naive.data))
        if args.params.statistical_maps.z_map_type == 'uncertainty':
            z_map = z_map_uncty_normalised
            z_map_stats = basic_statistics(flex.double(z_map_uncty.data))
        elif args.params.statistical_maps.z_map_type == 'adjusted+uncertainty':
            z_map = z_map_compl_normalised
            z_map_stats = basic_statistics(flex.double(z_map_compl.data))
        else:
            raise Exception('Invalid Z-map type')

        # ============================================================================>
        # RECORD Z-MAP FOR STATISTICS
        # ============================================================================>
        # Calculate statistics of z-maps
        dataset_map.meta.z_mean = z_map_stats.mean
        dataset_map.meta.z_stdv = z_map_stats.bias_corrected_standard_deviation
        dataset_map.meta.z_skew = z_map_stats.skew
        dataset_map.meta.z_kurt = z_map_stats.kurtosis
        # ============================================================================>
        z_map.meta.type = 'z-map'
        # ============================================================================>

        # ============================================================================>
        #####
        # WRITE ALL MAP DISTRIBUTIONS (THESE DON'T USE MUCH SPACE)
        #####
        # ============================================================================>
        # Sampled Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('s_map_png'),
            plot_vals=dataset_map.get_map_data(sparse=True))
        # Mean-Difference
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('d_mean_map_png'),
            plot_vals=mean_diff_map.get_map_data(sparse=True))
        #        # Naive Z-Map
        #        analyse_graphs.map_value_distribution(f_name      = dataset.file_manager.get_file('z_map_naive_png'),
        #                                              plot_vals   = z_map_naive.get_map_data(sparse=True),
        #                                              plot_normal = True)
        #        # Normalised Naive Z-Map
        #        analyse_graphs.map_value_distribution(f_name      = dataset.file_manager.get_file('z_map_naive_normalised_png'),
        #                                              plot_vals   = z_map_naive_normalised.get_map_data(sparse=True),
        #                                              plot_normal = True)
        # Uncertainty Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('z_map_uncertainty_png'),
            plot_vals=z_map_uncty.get_map_data(sparse=True),
            plot_normal=True)
        # Normalised Uncertainty Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file(
                'z_map_uncertainty_normalised_png'),
            plot_vals=z_map_uncty_normalised.get_map_data(sparse=True),
            plot_normal=True)
        # Corrected Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('z_map_corrected_png'),
            plot_vals=z_map_compl.get_map_data(sparse=True),
            plot_normal=True)
        # Normalised Corrected Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file(
                'z_map_corrected_normalised_png'),
            plot_vals=z_map_compl_normalised.get_map_data(sparse=True),
            plot_normal=True)
        # Plot Q-Q Plot of Corrected Z-Map to see how normal it is
        analyse_graphs.qq_plot_against_normal(
            f_name=dataset.file_manager.get_file('z_map_qq_plot_png'),
            plot_vals=z_map_compl_normalised.get_map_data(sparse=True))

        # ============================================================================>
        #####
        # LOOK FOR CLUSTERS OF LARGE Z-SCORES
        #####
        # ============================================================================>
        # Contour the grid at a particular Z-Value
        # ============================================================================>
        num_clusters, z_clusters = blob_finder.cluster_high_z_values(
            z_map_data=z_map.get_map_data(sparse=False),
            point_mask_idx=dset_total_idxs)
        # ============================================================================>
        # Too many points to cluster -- probably a bad dataset
        # ============================================================================>
        if num_clusters == -1:
            # This dataset is too noisy to analyse - flag!
            log_strs.append(
                'Z-Map too noisy to analyse -- not sure what has gone wrong here...'
            )
            return dataset, dataset_map.meta, log_strs

        # ============================================================================>
        #####
        # FILTER/SELECT CLUSTERS OF Z-SCORES
        #####
        # ============================================================================>
        # Filter the clusters by size and peak height
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_1(
                z_clusters=z_clusters)
            blob_finder.validate_clusters(z_clusters)
            if num_clusters == 0:
                log_strs.append('===> Minimum cluster peak/size not reached.')
        # ============================================================================>
        # Filter the clusters by distance from protein
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_2(
                z_clusters=z_clusters, dataset=dataset)
            blob_finder.validate_clusters(z_clusters)
            if num_clusters == 0:
                log_strs.append('===> Clusters too far from protein.')
        # ============================================================================>
        # Group Nearby Clusters Together
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.group_clusters(
                z_clusters=z_clusters)
            blob_finder.validate_clusters(z_clusters)
        # ============================================================================>
        # Filter the clusters by symmetry equivalence
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_3(
                z_clusters=z_clusters, dataset=dataset)
            blob_finder.validate_clusters(z_clusters)

        # ============================================================================>
        #####
        # WRITE MAPS
        #####
        # ============================================================================>
        # write dataset maps in the reference frame
        # ============================================================================>
        if args.output.developer.write_reference_frame_maps:
            dataset_map.to_file(
                filename=dataset.file_manager.get_file('sampled_map'),
                space_group=grid.space_group())
            mean_diff_map.to_file(
                filename=dataset.file_manager.get_file('mean_diff_map'),
                space_group=grid.space_group())
            z_map.to_file(filename=dataset.file_manager.get_file('z_map'),
                          space_group=grid.space_group())
        # ============================================================================>
        # Write out mask of the high z-values
        # ============================================================================>
        if args.output.developer.write_reference_frame_grid_masks:
            # Write map of where the blobs are (high-Z mask)
            highz_points = []
            [highz_points.extend(list(x[0])) for x in z_clusters]
            highz_points = [map(int, v) for v in highz_points]
            highz_indices = map(grid.indexer(), list(highz_points))
            grid.write_indices_as_map(
                indices=highz_indices,
                f_name=dataset.file_manager.get_file('high_z_mask'),
                origin_shift=True)
        # ============================================================================>
        # Write different Z-Maps? (Probably only needed for testing)
        # ============================================================================>
        if args.output.developer.write_reference_frame_all_z_map_types:
            #            z_map_naive.to_file(filename=dataset.file_manager.get_file('z_map_naive'), space_group=grid.space_group())
            #            z_map_naive_normalised.to_file(filename=dataset.file_manager.get_file('z_map_naive_normalised'), space_group=grid.space_group())
            z_map_uncty.to_file(
                filename=dataset.file_manager.get_file('z_map_uncertainty'),
                space_group=grid.space_group())
            z_map_uncty_normalised.to_file(
                filename=dataset.file_manager.get_file(
                    'z_map_uncertainty_normalised'),
                space_group=grid.space_group())
            z_map_compl.to_file(
                filename=dataset.file_manager.get_file('z_map_corrected'),
                space_group=grid.space_group())
            z_map_compl_normalised.to_file(
                filename=dataset.file_manager.get_file(
                    'z_map_corrected_normalised'),
                space_group=grid.space_group())

        # ============================================================================>
        # Skip to next dataset if no clusters found
        # ============================================================================>
        if num_clusters > 0:
            log_strs.append('===> {!s} Cluster(s) found.'.format(num_clusters))
        else:
            log_strs.append('===> No Clusters found.')
            return (dataset, dataset_map.meta, log_strs)
        assert num_clusters > 0, 'NUMBER OF CLUSTERS AFTER FILTERING == 0!'

        # ============================================================================>
        # Extract the map data in non-sparse format
        # ============================================================================>
        dset_map_data = dataset_map.get_map_data(sparse=False)
        avrg_map_data = map_analyser.average_map().get_map_data(sparse=False)
        # ============================================================================>
        # Process the identified features
        # ============================================================================>
        for event_idx, (event_points, event_values) in enumerate(z_clusters):
            # Number events from 1
            event_num = event_idx + 1
            # Create a unique identifier for this event
            event_key = (dataset.tag, event_num)
            # ============================================================================>
            # Create a point cluster object
            # ============================================================================>
            point_cluster = PointCluster(id=event_key,
                                         points=event_points,
                                         values=event_values)
            # ============================================================================>
            # Estimate the background correction of the detected feature
            # ============================================================================>
            # Extract sites for this cluster and estimate the background correction for the event
            log_strs.append('----------------------------------->>>')
            log_strs.append(
                'Estimating Event {!s} Background Correction'.format(
                    event_num))
            # Generate custom grid mask for this dataset
            event_mask = GridMask(parent=grid,
                                  sites_cart=grid.grid2cart(
                                      point_cluster.points, origin_shift=True),
                                  max_dist=2.0,
                                  min_dist=0.0)
            log_strs.append(
                '=> Event sites ({!s} points) expanded to {!s} points'.format(
                    len(point_cluster.points),
                    len(event_mask.outer_mask_indices())))
            # Select masks to define regions for bdc calculation
            exp_event_idxs = flex.size_t(event_mask.outer_mask_indices())
            reference_idxs = flex.size_t(
                grid.global_mask().inner_mask_indices())
            # ============================================================================>
            # Generate BDC-estimation curve and estimate BDC
            # ============================================================================>
            event_remains, event_corrs, global_corrs = calculate_varying_bdc_correlations(
                ref_map_data=avrg_map_data,
                query_map_data=dset_map_data,
                feature_idxs=exp_event_idxs,
                reference_idxs=reference_idxs,
                min_remain=1.0 - args.params.background_correction.max_bdc,
                max_remain=1.0 - args.params.background_correction.min_bdc,
                bdc_increment=args.params.background_correction.increment,
                verbose=verbose)
            event_remain_est = calculate_maximum_series_discrepancy(
                labels=event_remains,
                series_1=global_corrs,
                series_2=event_corrs)
            analyse_graphs.write_occupancy_graph(
                f_name=dataset.file_manager.get_file('bdc_est_png').format(
                    event_num),
                x_values=event_remains,
                global_values=global_corrs,
                local_values=event_corrs)
            log_strs.append(
                '=> Event Background Correction estimated as {!s}'.format(
                    1 - event_remain_est))
            # Reporting (log is normally silenced)
            blob_finder.log('Min-Max: {} {}'.format(
                1.0 - args.params.background_correction.max_bdc,
                1.0 - args.params.background_correction.min_bdc))
            blob_finder.log('Event number: {}'.format(event_num))
            blob_finder.log('Event Remains: {}'.format(','.join(
                map(str, event_remains))))
            blob_finder.log('Event Corrs:  {}'.format(','.join(
                map(str, event_corrs))))
            blob_finder.log('Global Corrs: {}'.format(','.join(
                map(str, global_corrs))))
            # Apply multiplier if provided
            blob_finder.log('Applying multiplier to output 1-BDC: {}'.format(
                args.params.background_correction.output_multiplier))
            event_remain_est = min(
                event_remain_est *
                args.params.background_correction.output_multiplier,
                1.0 - args.params.background_correction.min_bdc)
            # ============================================================================>
            # Calculate the map correlations at the selected BDC
            # ============================================================================>
            event_map_data = calculate_bdc_subtracted_map(
                ref_map_data=avrg_map_data,
                query_map_data=dset_map_data,
                bdc=1.0 - event_remain_est)
            global_corr = numpy.corrcoef(
                event_map_data.select(reference_idxs),
                avrg_map_data.select(reference_idxs))[0, 1]
            local_corr = numpy.corrcoef(
                event_map_data.select(exp_event_idxs),
                avrg_map_data.select(exp_event_idxs))[0, 1]
            # ============================================================================>
            # Write out EVENT map (in the reference frame) and grid masks
            # ============================================================================>
            if args.output.developer.write_reference_frame_maps:
                event_map = dataset_map.new_from_template(event_map_data,
                                                          sparse=False)
                event_map.to_file(
                    filename=dataset.file_manager.get_file('event_map').format(
                        event_num, event_remain_est),
                    space_group=grid.space_group())
            if args.output.developer.write_reference_frame_grid_masks:
                grid.write_indices_as_map(
                    indices=event_mask.outer_mask_indices(),
                    f_name=dataset.file_manager.get_file('grid_mask').replace(
                        '.ccp4', '') + '-event-mask-{}.ccp4'.format(event_num))

            # ============================================================================>
            # Find the nearest atom to the event
            # ============================================================================>
            atm = find_nearest_atoms(atoms=list(
                protein(dataset.model.hierarchy).atoms_with_labels()),
                                     query=dataset.model.alignment.ref2nat(
                                         grid.grid2cart(sites_grid=[
                                             map(int, point_cluster.centroid)
                                         ],
                                                        origin_shift=True)))[0]
            log_strs.append(
                '=> Nearest Residue to event: Chain {}, Residue {} {}'.format(
                    atm.chain_id, atm.resname, atm.resid()))
            # ============================================================================>
            # Create an event object
            # ============================================================================>
            event_obj = Event(id=point_cluster.id, cluster=point_cluster)
            event_obj.info.estimated_pseudo_occupancy = event_remain_est
            event_obj.info.estimated_bdc = 1.0 - event_remain_est
            event_obj.info.global_correlation = global_corr
            event_obj.info.local_correlation = local_corr
            # ============================================================================>
            # Append to dataset handler
            # ============================================================================>
            dataset.events.append(event_obj)

        # ============================================================================>
        # Write out pymol script to load all of the maps easily
        # ============================================================================>
        pml = PythonScript()
        pml.set_normalise_maps(False)
        # Load Structures
        name = pml.load_pdb(
            f_name=dataset.file_manager.get_file('aligned_model'))
        pml.repr_as(obj=name, style='sticks')
        name = pml.load_pdb(
            f_name=dataset.file_manager.get_file('symmetry_copies'))
        pml.repr_hide(obj=name)
        # Load Sampled Map
        name = pml.load_map(
            f_name=dataset.file_manager.get_file('sampled_map'))
        mesh = pml.make_mesh(obj=name, contour_level=1.0, colour='blue')
        # Load Z-maps
        name = pml.load_map(f_name=dataset.file_manager.get_file('z_map'))
        mesh = pml.make_mesh(obj=name,
                             mesh_suffix='.plus',
                             contour_level=3.0,
                             colour='green')
        mesh = pml.make_mesh(obj=name,
                             mesh_suffix='.mins',
                             contour_level=-3.0,
                             colour='red')
        # Load Event maps
        for f in sorted(
                glob.glob(
                    dataset.file_manager.get_file('event_map').format(
                        '*', '*'))):
            name = pml.load_map(f_name=f)
            mesh = pml.make_mesh(obj=name,
                                 contour_level=float(f.split('_')[-2]),
                                 colour='hotpink')
        # Load Miscellaneous maps (e.g. masks)
        for f in sorted(
                glob.glob(
                    os.path.join(dataset.file_manager.get_dir('root'),
                                 '*mask*.ccp4'))):
            name = pml.load_map(f_name=f)
            mesh = pml.make_mesh(obj=name, contour_level=0.0, colour='grey')

        pml.write_script(f_name=dataset.file_manager.get_file('pymol_script'),
                         overwrite=True)

        return (dataset, dataset_map.meta, log_strs)
def run(args):

  from libtbx.phil import command_line

  from dials.util.command_line import Importer
  from dials.array_family import flex
  print args
  importer = Importer(args, check_format=False)
  assert len(importer.datablocks) == 1
  sweeps = importer.datablocks[0].extract_imagesets()
  assert len(sweeps) == 1
  sweep = sweeps[0]

  cmd_line = command_line.argument_interpreter(master_params=master_phil_scope)
  working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments)
  working_phil.show()

  params = working_phil.extract()
  assert params.unit_cell is not None
  assert params.space_group is not None
  unit_cell = params.unit_cell
  space_group = params.space_group.group()

  import random
  from dxtbx.model.crystal import crystal_model
  from cctbx import crystal, miller
  from scitbx import matrix

  flex.set_random_seed(params.random_seed)
  random.seed(params.random_seed)

  crystal_symmetry = crystal.symmetry(unit_cell=unit_cell,
                                      space_group=space_group)

  # the reciprocal matrix
  B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose()

  n_predicted = flex.double()

  def predict_once(args):
    from dxtbx.model.experiment.experiment_list import Experiment
    U = args[0]
    A = U * B
    direct_matrix = A.inverse()
    cryst_model = crystal_model(direct_matrix[0:3],
                                direct_matrix[3:6],
                                direct_matrix[6:9],
                                space_group=space_group)
    experiment = Experiment(imageset=sweep,
                            beam=sweep.get_beam(),
                            detector=sweep.get_detector(),
                            goniometer=sweep.get_goniometer(),
                            scan=sweep.get_scan(),
                            crystal=cryst_model)
    predicted_reflections = flex.reflection_table.from_predictions(
      experiment)
    miller_indices = predicted_reflections['miller_index']
    miller_set = miller.set(
      crystal_symmetry, miller_indices, anomalous_flag=True)
    if params.d_min is not None:
      resolution_sel = miller_set.d_spacings().data() > params.d_min
      predicted_reflections = predicted_reflections.select(resolution_sel)
    return len(predicted_reflections)

  from libtbx import easy_mp
  args = [(random_rotation(),) for i in range(params.n_samples)]
  results = easy_mp.parallel_map(
    func=predict_once,
    iterable=args,
    processes=params.nproc,
    preserve_order=True,
    preserve_exception_message=True)
  n_predicted = flex.double(results)

  print "Basic statistics:"
  from scitbx.math import basic_statistics
  stats = basic_statistics(n_predicted)
  stats.show()

  print "Histogram:"
  hist = flex.histogram(n_predicted, n_slots=20)
  hist.show()

  print "Raw spot counts:"
  print list(n_predicted)

  if params.plot:
    from matplotlib import pyplot
    from matplotlib.backends.backend_pdf import PdfPages

    pyplot.rc('font', family='serif')
    pyplot.rc('font', serif='Times New Roman')

    red, blue = '#B2182B', '#2166AC'
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
           color=blue, edgecolor=blue)
    ax.set_xlabel('Spot count')
    ax.set_ylabel('Frequency')
    pdf = PdfPages("predicted_count_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()
def run(args):

    from libtbx.phil import command_line

    from dials.util.command_line import Importer
    from dials.array_family import flex

    print(args)
    importer = Importer(args, check_format=False)
    assert len(importer.datablocks) == 1
    sweeps = importer.datablocks[0].extract_imagesets()
    assert len(sweeps) == 1
    sweep = sweeps[0]

    cmd_line = command_line.argument_interpreter(master_params=master_phil_scope)
    working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments)
    working_phil.show()

    params = working_phil.extract()
    assert params.unit_cell is not None
    assert params.space_group is not None
    unit_cell = params.unit_cell
    space_group = params.space_group.group()

    import random
    from dxtbx.model.crystal import crystal_model
    from cctbx import crystal, miller
    from scitbx import matrix

    flex.set_random_seed(params.random_seed)
    random.seed(params.random_seed)

    crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group)

    # the reciprocal matrix
    B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose()

    n_predicted = flex.double()

    def predict_once(args):
        from dxtbx.model.experiment.experiment_list import Experiment

        U = args[0]
        A = U * B
        direct_matrix = A.inverse()
        cryst_model = crystal_model(
            direct_matrix[0:3],
            direct_matrix[3:6],
            direct_matrix[6:9],
            space_group=space_group,
        )
        experiment = Experiment(
            imageset=sweep,
            beam=sweep.get_beam(),
            detector=sweep.get_detector(),
            goniometer=sweep.get_goniometer(),
            scan=sweep.get_scan(),
            crystal=cryst_model,
        )
        predicted_reflections = flex.reflection_table.from_predictions(experiment)
        miller_indices = predicted_reflections["miller_index"]
        miller_set = miller.set(crystal_symmetry, miller_indices, anomalous_flag=True)
        if params.d_min is not None:
            resolution_sel = miller_set.d_spacings().data() > params.d_min
            predicted_reflections = predicted_reflections.select(resolution_sel)
        return len(predicted_reflections)

    from libtbx import easy_mp

    args = [(random_rotation(),) for i in range(params.n_samples)]
    results = easy_mp.parallel_map(
        func=predict_once,
        iterable=args,
        processes=params.nproc,
        preserve_order=True,
        preserve_exception_message=True,
    )
    n_predicted = flex.double(results)

    print("Basic statistics:")
    from scitbx.math import basic_statistics

    stats = basic_statistics(n_predicted)
    stats.show()

    print("Histogram:")
    hist = flex.histogram(n_predicted, n_slots=20)
    hist.show()

    print("Raw spot counts:")
    print(list(n_predicted))

    if params.plot:
        from matplotlib import pyplot
        from matplotlib.backends.backend_pdf import PdfPages

        pyplot.rc("font", family="serif")
        pyplot.rc("font", serif="Times New Roman")

        red, blue = "#B2182B", "#2166AC"
        fig = pyplot.figure()
        ax = fig.add_subplot(1, 1, 1)
        ax.bar(
            hist.slot_centers(),
            hist.slots(),
            width=0.75 * hist.slot_width(),
            color=blue,
            edgecolor=blue,
        )
        ax.set_xlabel("Spot count")
        ax.set_ylabel("Frequency")
        pdf = PdfPages("predicted_count_histogram.pdf")
        pdf.savefig(fig)
        pdf.close()
Ejemplo n.º 11
0
def scitbx_stats(data):
    from scitbx.math import basic_statistics
    bs = basic_statistics(values=data)
    return bs.mean, bs.bias_corrected_standard_deviation
Ejemplo n.º 12
0
def scitbx_stats(data):
  from scitbx.math import basic_statistics
  bs = basic_statistics(values = data)
  return bs.mean, bs.bias_corrected_standard_deviation
  def add_species(self,pdb_input=None,n_copies=1,form_factor_table='WK1995'):
    if (n_copies <= 0):
      raise Sorry('n_copies has to be greater than or equal to 1')

    # construct entry
    s = species()
    s.n_copies = n_copies
    s.pdb_input = pdb_input

    # find center and radius of sphere enclosing molecule
    atoms = s.pdb_input.atoms()
    x = flex.double(len(atoms))
    y = flex.double(len(atoms))
    z = flex.double(len(atoms))
    for i in xrange(len(atoms)):
      x[i] = atoms[i].xyz[0]
      y[i] = atoms[i].xyz[1]
      z[i] = atoms[i].xyz[2]
    x_stats = basic_statistics(x)
    y_stats = basic_statistics(y)
    z_stats = basic_statistics(z)
    r = flex.double( ( (x_stats.max - x_stats.min)/2.0,
                       (y_stats.max - y_stats.min)/2.0,
                       (z_stats.max - z_stats.min)/2.0 ) )
    center = (x_stats.mean,y_stats.mean,z_stats.mean)
    s.radius = r.norm()

    # center model at origin
    center = flex.double(center)
    s.xyz = flex.vec3_double(len(atoms))
    for i in xrange(len(atoms)):
      s.xyz[i] = tuple(flex.double(atoms[i].xyz) - center)

    # determine scattering types
    mon_lib_srv = server.server()
    ener_lib = server.ener_lib()
    interpreted_pdb = pdb_interpretation.process\
                        (mon_lib_srv=mon_lib_srv,ener_lib=ener_lib,
                         pdb_inp=s.pdb_input)
    s.scattering_types = interpreted_pdb.all_chain_proxies.\
                         scattering_type_registry.symbols
    s.scattering_type_registry = scattering_type_registry()
    for f in s.scattering_types:
      s.scattering_type_registry.process(f)
    s.scattering_type_registry.assign_from_table(form_factor_table)
    s.n_electrons = s.scattering_type_registry.\
                    sum_of_scattering_factors_at_diffraction_angle_0()

    # apply solvent model
    if (self.use_solvent):
      sm = solvent_model()
      sm.interpreted_pdb = interpreted_pdb
      sm.xyz = s.xyz
      sm.fudge_factor = 0.6
      s.scattering_type_registry = sm.add_bulk_solvent(s.scattering_type_registry)
      s.scattering_types = sm.scattering_types
      sm.boundary_layer_scale = 0.6
      s.boundary_layer_scaling_factors = sm.add_boundary_layer_solvent\
                                         (s.scattering_type_registry)
    else:
      s.boundary_layer_scaling_factors = flex.double(len(s.xyz),0.0)

    # finalize entry
    self.species.append(s)