def optimize(self):
    # initialise the population please
    self.make_random_population()
    # score the population please
    self.score_population()
    converged = False
    monitor_score = flex.min( self.scores )
    self.count = 0
    while not converged:
      self.evolve()
      location = flex.min_index( self.scores )
      if self.show_progress:
        if self.count%self.show_progress_nth_cycle==0:
          # make here a call to a custom print_status function in the evaluator function
          # the function signature should be (min_target, mean_target, best vector)
          self.evaluator.print_status(
            flex.min(self.scores),
            flex.mean(self.scores),
            self.population[ flex.min_index( self.scores ) ],
            self.count)

      self.count += 1
      if self.count%self.monitor_cycle==0:
        if (monitor_score-flex.min(self.scores) ) < self.eps:
          converged = True
        else:
         monitor_score = flex.min(self.scores)
      rd = (flex.mean(self.scores) - flex.min(self.scores) )
      rd = rd*rd/(flex.min(self.scores)*flex.min(self.scores) + self.eps )
      if ( rd < self.eps ):
        converged = True


      if self.count>=self.max_iter:
        converged =True
Esempio n. 2
0
    def optimize(self):
        # initialise the population please
        self.make_random_population()
        # score the population please
        self.score_population()
        converged = False
        monitor_score = flex.min(self.scores)
        self.count = 0
        while not converged:
            self.evolve()
            location = flex.min_index(self.scores)
            if self.show_progress:
                if self.count % self.show_progress_nth_cycle == 0:
                    # make here a call to a custom print_status function in the evaluator function
                    # the function signature should be (min_target, mean_target, best vector)
                    self.evaluator.print_status(
                        flex.min(self.scores), flex.mean(self.scores),
                        self.population[flex.min_index(self.scores)],
                        self.count)

            self.count += 1
            if self.count % self.monitor_cycle == 0:
                if (monitor_score - flex.min(self.scores)) < self.eps:
                    converged = True
                else:
                    monitor_score = flex.min(self.scores)
            rd = (flex.mean(self.scores) - flex.min(self.scores))
            rd = rd * rd / (flex.min(self.scores) * flex.min(self.scores) +
                            self.eps)
            if (rd < self.eps):
                converged = True

            if self.count >= self.max_iter:
                converged = True
Esempio n. 3
0
 def collect_scores(self):
     self.chi = []
     self.ent = []
     for oo in self.trials:
         c, t, a, b = oo.get_scores()
         self.chi.append(c)
         self.ent.append(t)
         print "#", self.dmax, c, t, a, b
     self.chi_index = flex.min_index(flex.double(self.chi))
     self.ent_index = flex.min_index(flex.double(self.ent))
Esempio n. 4
0
 def collect_scores(self):
     self.chi = []
     self.ent = []
     for oo in self.trials:
         c = oo.target(oo.solution)
         self.chi.append(c)
     self.chi_index = flex.min_index(flex.double(self.chi))
Esempio n. 5
0
def cross_validate_to_determine_number_of_terms(
    x_obs, y_obs, w_obs=None, min_terms=10, max_terms=25, n_free=100, n_goes=5
):
    if n_goes == None:
        if min_terms < 2:
            min_terms = 2

        free_residuals = []

        free_flags = flex.bool(x_obs.size(), True)
        free_permut = flex.random_permutation(x_obs.size())
        for ii in range(n_free):
            free_flags[free_permut[ii]] = False

        for count in range(min_terms, max_terms):
            fit = chebyshev_lsq_fit(count, x_obs, y_obs, w_obs, free_flags)
            free_residuals.append(fit.free_f)
        return flex.double(free_residuals)

    else:
        if w_obs is None:
            w_obs = flex.double(x_obs.size(), 1)
        free_resid = flex.double(max_terms - min_terms, 0)
        for jj in range(n_goes):
            free_resid += cross_validate_to_determine_number_of_terms(
                x_obs, y_obs, w_obs, min_terms=min_terms, max_terms=max_terms, n_free=n_free, n_goes=None
            )
        return min_terms + flex.min_index(free_resid)
    def simplex(self):
        self.simplex_scores = []
        self.simplex_solutions = []
        for ii in xrange(self.simplex_trial):
            #make a random simplex
            self.starting_simplex = []
            for ii in range(self.n):
                self.starting_simplex.append(
                    random.random() * (flex.random_double(3) * 2 - 1.0) * 2 +
                    self.x)
            self.starting_simplex.append(self.x)

            self.optimizer = simplex.simplex_opt(dimension=self.n,
                                                 matrix=self.starting_simplex,
                                                 evaluator=self,
                                                 max_iter=50,
                                                 tolerance=1e-4)

            self.solution = self.optimizer.GetResult()
            self.score = self.target(self.solution)
            self.simplex_scores.append(self.score)
            self.simplex_solutions.append(self.solution)

        best_index = flex.min_index(flex.double(self.simplex_scores))
        self.score = self.simplex_scores[best_index]
        self.solution = self.simplex_solutions[best_index]
        if (self.translate):
            self.vector = self.solution.deep_copy()
            print "translate:", list(self.vector)
        else:
            self.angle = self.solution.deep_copy()
            print "rotate:", list(self.angle)
        self.target(self.solution)
Esempio n. 7
0
    def simplex(self):
        self.simplex_scores = []
        self.simplex_solutions = []
        for ii in xrange(self.simplex_trial):
            #make a random simplex
            self.starting_simplex = []
            for ii in range(self.n * self.n_refine):
                self.starting_simplex.append(
                    (flex.random_double(self.n * self.n_refine) * 2 - 1.0) *
                    1.0 + self.x)
            self.starting_simplex.append(self.x)

            self.optimizer = simplex.simplex_opt(dimension=self.n *
                                                 self.n_refine,
                                                 matrix=self.starting_simplex,
                                                 evaluator=self,
                                                 max_iter=500,
                                                 tolerance=1e-4)

            self.solution = self.optimizer.get_solution()
            self.score = self.target(self.solution)
            self.simplex_scores.append(self.score)
            self.simplex_solutions.append(self.solution)

        best_index = flex.min_index(flex.double(self.simplex_scores))
        self.solution = self.simplex_solutions[best_index]
        self.score = self.target(self.solution)
    def simplex(self):
        self.n = self.n_params

        if (self.x is None):
            self.x = flex.double([1] + [0] * (self.n_params - 1))
        else:
            self.x = self.x.concatenate(
                flex.double([0] * (self.n_params - self.n_fst_pass)))
        self.pofr = pr_tools.pofr(self.d_max, self.n, self.prior)

        self.simplex_scores = []
        self.simplex_solutions = []
        for ii in xrange(self.simplex_trial):
            #make a random simplex
            self.starting_simplex = []
            for ii in range(self.n):
                self.starting_simplex.append((flex.random_double(self.n) * 2 -
                                              1.0) + self.x)
            self.starting_simplex.append(self.x)

            self.optimizer = simplex.simplex_opt(dimension=self.n,
                                                 matrix=self.starting_simplex,
                                                 evaluator=self,
                                                 tolerance=1e-4)

            self.solution = self.optimizer.get_solution()
            self.score = self.target(self.solution)
            self.simplex_scores.append(self.score)
            self.simplex_solutions.append(self.solution)

        best_index = flex.min_index(flex.double(self.simplex_scores))
        self.solution = self.simplex_solutions[best_index]
        #self.cma(m=self.solution)
        self.score = self.simplex_scores[best_index]
        self.pofr.update(self.solution)
  def __init__(self,
               evaluator,
               population_size=50,
               f=None,
               cr=0.9,
               eps=1e-2,
               n_cross=1,
               max_iter=10000,
               monitor_cycle=200,
               out=None,
               show_progress=False,
               show_progress_nth_cycle=1,
               insert_solution_vector=None,
               dither_constant=0.4):
    self.dither=dither_constant
    self.show_progress=show_progress
    self.show_progress_nth_cycle=show_progress_nth_cycle
    self.evaluator = evaluator
    self.population_size = population_size
    self.f = f
    self.cr = cr
    self.n_cross = n_cross
    self.max_iter = max_iter
    self.monitor_cycle = monitor_cycle
    self.vector_length = evaluator.n
    self.eps = eps
    self.population = []
    self.seeded = False
    if insert_solution_vector is not None:
      assert len( insert_solution_vector )==self.vector_length
      self.seeded = insert_solution_vector
    for ii in xrange(self.population_size):
      self.population.append( flex.double(self.vector_length,0) )


    self.scores = flex.double(self.population_size,1000)
    self.optimize()
    self.best_score = flex.min( self.scores )
    self.best_vector = self.population[ flex.min_index( self.scores ) ]
    self.evaluator.x = self.best_vector
    if self.show_progress:
      self.evaluator.print_status(
            flex.min(self.scores),
            flex.mean(self.scores),
            self.population[ flex.min_index( self.scores ) ],
            'Final')
Esempio n. 10
0
 def get_best_fit(self):
     scores = []
     for ii in range(self.n_trial):
         scores.append(self.trials[ii].score)
     best_index = flex.min_index(flex.double(scores))
     self.best_fit = self.trials[best_index]
     self.best_score = scores[best_index]
     ### update best fitting info for print out ###
     self.calc_i = self.trials[best_index].calc_i
     self.r = self.trials[best_index].r
     self.pr = self.trials[best_index].best_pr
Esempio n. 11
0
    def _determine_dimensions(self):
        if self.params.dimensions is Auto and self.target.dim == 2:
            self.params.dimensions = 2
        elif self.params.dimensions is Auto:
            logger.info("=" * 80)
            logger.info(
                "\nAutomatic determination of number of dimensions for analysis"
            )
            dimensions = []
            functional = []
            termination_params = copy.deepcopy(self.params.termination_params)
            termination_params.max_iterations = min(
                20, termination_params.max_iterations
            )
            for dim in range(1, self.target.dim + 1):
                logger.debug("Testing dimension: %i", dim)
                self.target.set_dimensions(dim)
                self._optimise(termination_params)
                dimensions.append(dim)
                functional.append(self.minimizer.f)

            # Find the elbow point of the curve, in the same manner as that used by
            # distl spotfinder for resolution method 1 (Zhang et al 2006).
            # See also dials/algorithms/spot_finding/per_image_analysis.py

            x = flex.double(dimensions)
            y = flex.double(functional)
            slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
            p_m = flex.min_index(slopes)

            x1 = matrix.col((x[p_m], y[p_m]))
            x2 = matrix.col((x[-1], y[-1]))

            gaps = flex.double()
            v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

            for i in range(p_m, len(x)):
                x0 = matrix.col((x[i], y[i]))
                r = x1 - x0
                g = abs(v.dot(r))
                gaps.append(g)

            p_g = flex.max_index(gaps)

            x_g = x[p_g + p_m]

            logger.info(
                dials.util.tabulate(
                    zip(dimensions, functional), headers=("Dimensions", "Functional")
                )
            )
            logger.info("Best number of dimensions: %i" % x_g)
            self.target.set_dimensions(int(x_g))
            logger.info("Using %i dimensions for analysis" % self.target.dim)
Esempio n. 12
0
 def get_scores(self):
     scores = []
     rg_s = []
     for fitter in self.fitters:
         rg = fitter.best_fit.pofr.get_rg()
         s = abs(self.rg - rg)
         rg_s.append(rg)
         scores.append(s)
     self.best_index = flex.min_index(flex.double(scores))
     self.dmax_best = self.d_array[self.best_index]
     self.calc_i = self.fitters[self.best_index].calc_i
     print "#DMAX=", self.dmax_best, self.d_max, self.rg, rg_s[
         self.best_index]
Esempio n. 13
0
 def compute_score(self, nstruct, prefix):
   new_indx = flex.int(self.she_obj.natom)
   files = []
   results = flex.double()
   pdb_files = glob.glob(prefix+"*.pdb") # including the input pdb model, avoiding no entry case
   for pdb_file_name in pdb_files:
     new_xyz = extract_xyz(pdb_file_name)
     self.she_obj.engine.update_coord(new_xyz,new_indx)
     new_i = self.she_obj.engine.I()
     s,o = she.linear_fit(new_i, self.obs.i, self.obs.s)
     chi2= flex.mean( flex.pow2( (self.obs.i-(s*new_i+o))  /self.obs.s ))
     files.append( pdb_file_name )
     results.append( chi2 )
   self.min_indx = flex.min_index( results )
   self.min_file = files[ self.min_indx ]
   self.min_score= results[ self.min_indx ]
   return files, results
Esempio n. 14
0
    def _determine_dimensions(self):
        if self.params.dimensions is Auto and self.target.dim == 2:
            self.params.dimensions = 2
        elif self.params.dimensions is Auto:
            dimensions = []
            functional = []
            explained_variance = []
            explained_variance_ratio = []
            for dim in range(1, self.target.dim + 1):
                self.target.set_dimensions(dim)
                self._optimise()
                logger.info("Functional: %g" % self.minimizer.f)
                self._principal_component_analysis()
                dimensions.append(dim)
                functional.append(self.minimizer.f)
                explained_variance.append(self.explained_variance)
                explained_variance_ratio.append(self.explained_variance_ratio)

            # Find the elbow point of the curve, in the same manner as that used by
            # distl spotfinder for resolution method 1 (Zhang et al 2006).
            # See also dials/algorithms/spot_finding/per_image_analysis.py

            x = flex.double(dimensions)
            y = flex.double(functional)
            slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
            p_m = flex.min_index(slopes)

            x1 = matrix.col((x[p_m], y[p_m]))
            x2 = matrix.col((x[-1], y[-1]))

            gaps = flex.double()
            v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

            for i in range(p_m, len(x)):
                x0 = matrix.col((x[i], y[i]))
                r = x1 - x0
                g = abs(v.dot(r))
                gaps.append(g)

            p_g = flex.max_index(gaps)

            x_g = x[p_g + p_m]

            logger.info("Best number of dimensions: %i" % x_g)
            self.target.set_dimensions(int(x_g))
Esempio n. 15
0
  def optimize_further(self):
    self.solutions=[]
    self.scores= flex.double()
    for candi in self.candidates:
      starting_simplex = []
      for ii in range(self.dimension+1):
        starting_simplex.append(flex.random_double(self.dimension)*self.simplex_scale + candi)
      optimizer = simplex.simplex_opt(     dimension=self.dimension,
                                           matrix = starting_simplex,
                                           evaluator = self.evaluator,
                                           tolerance=self.tolerance
                                     )
      self.solutions.append( optimizer.get_solution() )
      self.scores.append( optimizer.get_score() )

    min_index = flex.min_index( self.scores )
    self.best_solution = self.solutions[ min_index ]
    self.best_score = self.scores[ min_index ]
  def optimize_further(self):
    self.solutions=[]
    self.scores= flex.double()
    for candi in self.candidates:
      starting_simplex = []
      for ii in range(self.dimension+1):
        starting_simplex.append(flex.random_double(self.dimension)*self.simplex_scale + candi)
      optimizer = simplex.simplex_opt(     dimension=self.dimension,
                                           matrix = starting_simplex,
                                           evaluator = self.evaluator,
                                           tolerance=self.tolerance
                                     )
      self.solutions.append( optimizer.get_solution() )
      self.scores.append( optimizer.get_score() )

    min_index = flex.min_index( self.scores )
    self.best_solution = self.solutions[ min_index ]
    self.best_score = self.scores[ min_index ]
Esempio n. 17
0
 def __init__(self,
              pdb_file,
              target,
              nstruct=500,
              np=50,
              max_np=100,
              prefix='prefix'):
     self.pdb_file = pdb_file
     self.obs = saxs_read_write.read_standard_ascii_qis(target)
     if (self.obs.q.size() > max_np):
         self.obs = self.reduction(
             self.obs)  # reduce number_of_point in q-array
     self.she_obj = she.she(pdb_file, self.obs.q)
     # More options are available, see line #10 for class she definition ##
     self.run_concoord(nstruct, prefix=prefix)
     self.files, self.scores = self.compute_score(nstruct, prefix=prefix)
     self.min_indx = flex.min_index(self.scores)
     self.min_file = self.files[self.min_indx]
     self.min_score = self.scores[self.min_indx]
Esempio n. 18
0
def cross_validate_to_determine_number_of_terms(x_obs,
                                                y_obs,
                                                w_obs=None,
                                                min_terms=10,
                                                max_terms=25,
                                                n_free=100,
                                                n_goes=5):
    if (n_goes == None):
        if (min_terms < 2):
            min_terms = 2

        free_residuals = []

        free_flags = flex.bool(x_obs.size(), True)
        free_permut = flex.random_permutation(x_obs.size())
        for ii in range(n_free):
            free_flags[free_permut[ii]] = False

        for count in range(min_terms, max_terms):
            fit = chebyshev_lsq_fit(count, x_obs, y_obs, w_obs, free_flags)
            free_residuals.append(fit.free_f)
        return (flex.double(free_residuals))

    else:
        if w_obs is None:
            w_obs = flex.double(x_obs.size(), 1)
        free_resid = flex.double(max_terms - min_terms, 0)
        for jj in range(n_goes):
            free_resid += cross_validate_to_determine_number_of_terms(
                x_obs,
                y_obs,
                w_obs,
                min_terms=min_terms,
                max_terms=max_terms,
                n_free=n_free,
                n_goes=None)
        return (min_terms + flex.min_index(free_resid))
Esempio n. 19
0
    def get_ks_dist(self):
        self.dist_mat = []
        n_trials = len(self.fitters)
        self.saved_trials = n_trials
        for ii in range(n_trials):
            self.dist_mat.append(flex.double([0] * n_trials))

        for ii in range(n_trials):
            for jj in range(ii):
                d_cdf = self.cdfs[ii] - self.cdfs[jj]
                max_d_cdf = flex.max(flex.abs(d_cdf))
                self.dist_mat[ii][jj] = max_d_cdf
                self.dist_mat[jj][ii] = max_d_cdf

        average_mcdf = flex.double()
        for ii in range(n_trials):
            average_mcdf.append(flex.mean(self.dist_mat[ii]))

        self.best_index = flex.min_index(average_mcdf)
        self.dmax_best = self.d_array[self.best_index]
        self.mcdf_mean = average_mcdf[self.best_index]
        self.mcdf_var = flex.mean(
            flex.pow2(self.dist_mat[self.best_index] - self.mcdf_mean))
        self.mcdf_sigma = math.sqrt(self.mcdf_var)
Esempio n. 20
0
def run(args):
    master_phil = iotbx.phil.parse(master_phil_str)
    processed = iotbx.phil.process_command_line(args=args,
                                                master_string=master_phil_str)
    args = processed.remaining_args
    work_params = processed.work.extract()

    x_offsets = work_params.x_offsets
    bg_range_min, bg_range_max = work_params.bg_range
    if work_params.plot_range is not None:
        x_min, x_max = work_params.plot_range
    else:
        x_min, x_max = (0, 385)

    print bg_range_min, bg_range_max
    if x_offsets is None:
        x_offsets = [0] * len(args)
    legend = work_params.legend
    linewidth = 2
    fontsize = 26
    xy_pairs = []
    colours = [
        "cornflowerblue", "darkmagenta", "darkgreen", "black", "red", "blue",
        "pink"
    ]
    colours[2] = "orangered"
    colours[1] = "olivedrab"
    min_background = 1e16
    #x_min, x_max = (0, 391)
    #x_min, x_max = (0, 360)
    #x_min, x_max = (200, 360)

    for i, filename in enumerate(args):
        print filename
        f = open(filename, 'rb')
        x, y = zip(*[
            line.split() for line in f.readlines() if not line.startswith("#")
        ])
        x = flex.double(flex.std_string(x))
        y = flex.double(flex.std_string(y))

        if work_params.smoothing.method is not None:
            savitzky_golay_half_window = work_params.smoothing.savitzky_golay.half_window
            savitzky_golay_degree = work_params.smoothing.savitzky_golay.degree
            fourier_cutoff = work_params.smoothing.fourier_filter_cutoff

            method = work_params.smoothing.method
            if method == "fourier_filter":
                assert work_params.smoothing.fourier_filter_cutoff is not None

            if method == "savitzky_golay":
                x, y = smoothing.savitzky_golay_filter(
                    x, y, savitzky_golay_half_window, savitzky_golay_degree)

            elif method == "fourier_filter":
                x, y = smooth_spectrum.fourier_filter(
                    x, y, cutoff_frequency=fourier_cutoff)

        x += x_offsets[i]
        y = y.select((x <= x_max) & (x > 0))
        x = x.select((x <= x_max) & (x > 0))
        bg_sel = (x > bg_range_min) & (x < bg_range_max)
        xy_pairs.append((x, y))
        min_background = min(min_background,
                             flex.mean(y.select(bg_sel)) / flex.max(y))
        y -= min_background
        print "Peak maximum at: %i" % int(x[flex.max_index(y)])
    for i, filename in enumerate(args):
        if legend is None:
            label = filename
        else:
            print legend
            assert len(legend) == len(args)
            label = legend[i]
        x, y = xy_pairs[i]
        if i == -1:
            x, y = interpolate(x, y)
            x, y = savitzky_golay_filter(x, y)
        #if i == 0:
        #y -= 10
        bg_sel = (x > bg_range_min) & (x < bg_range_max)
        y -= (flex.mean(y.select(bg_sel)) - min_background * flex.max(y))
        #y -= flex.min(y)
        y_min = flex.min(y.select(bg_sel))
        if i == -2:
            y += 0.2 * flex.max(y)
        print "minimum at: %i" % int(x[flex.min_index(y)]), flex.min(y)
        #print "fwhm: %.2f" %full_width_half_max(x, y)
        y /= flex.max(y)
        if len(colours) > i:
            pyplot.plot(x,
                        y,
                        label=label,
                        linewidth=linewidth,
                        color=colours[i])
        else:
            pyplot.plot(x, y, label=label, linewidth=linewidth)
    pyplot.ylabel("Intensity", fontsize=fontsize)
    pyplot.xlabel("Pixel column", fontsize=fontsize)
    if i > 0:
        # For some reason the line below causes a floating point error if we only
        # have one plot (i.e. i==0)
        legend = pyplot.legend(loc=2)
        for t in legend.get_texts():
            t.set_fontsize(fontsize)
    axes = pyplot.axes()
    for tick in axes.xaxis.get_ticklabels():
        tick.set_fontsize(20)
    for tick in axes.yaxis.get_ticklabels():
        tick.set_fontsize(20)
    pyplot.ylim(0, 1)
    pyplot.xlim(x_min, x_max)
    ax = pyplot.axes()
    #ax.xaxis.set_minor_locator(pyplot.MultipleLocator(5))
    #ax.yaxis.set_major_locator(pyplot.MultipleLocator(0.1))
    #ax.yaxis.set_minor_locator(pyplot.MultipleLocator(0.05))
    pyplot.show()
Esempio n. 21
0
    def _label_clusters_first_pass(self, n_datasets, n_sym_ops):
        """First pass labelling of clusters.

        Labels points into clusters such that cluster contains exactly one copy
        of each dataset.

        Args:
          n_datasets (int): The number of datasets.
          n_sym_ops (int): The number of symmetry operations.

        Returns:
          cluster_labels (scitbx.array_family.flex.int): A label for each coordinate, labelled from
          0 .. n_sym_ops.
        """
        # initialise cluster labels: -1 signifies doesn't belong to a cluster
        cluster_labels = flex.int(self.coords.all()[0], -1)
        X_orig = self.coords.as_numpy_array()

        cluster_id = 0
        while cluster_labels.count(-1) > 0:
            dataset_ids = (flex.int_range(n_datasets * n_sym_ops) %
                           n_datasets).as_numpy_array()
            coord_ids = flex.int_range(dataset_ids.size).as_numpy_array()

            # select only those points that don't already belong to a cluster
            sel = np.where(cluster_labels == -1)
            X = X_orig[sel]
            dataset_ids = dataset_ids[sel]
            coord_ids = coord_ids[sel]

            # choose a high density point as seed for cluster
            nbrs = NearestNeighbors(n_neighbors=min(11, len(X)),
                                    algorithm="brute",
                                    metric="cosine").fit(X)
            distances, indices = nbrs.kneighbors(X)
            average_distance = flex.double(
                [dist[1:].mean() for dist in distances])
            i = flex.min_index(average_distance)

            d_id = dataset_ids[i]
            cluster = np.array([coord_ids[i]])
            cluster_dataset_ids = np.array([d_id])
            xis = np.array([X[i]])

            for j in range(n_datasets - 1):
                # select only those rows that don't correspond to a dataset already
                # present in current cluster
                sel = np.where(dataset_ids != d_id)
                X = X[sel]
                dataset_ids = dataset_ids[sel]
                coord_ids = coord_ids[sel]

                assert len(X) > 0

                # Find nearest neighbour in cosine-space to the current cluster centroid
                nbrs = NearestNeighbors(n_neighbors=min(1, len(X)),
                                        algorithm="brute",
                                        metric="cosine").fit(X)
                distances, indices = nbrs.kneighbors([xis.mean(axis=0)])
                k = indices[0][0]
                d_id = dataset_ids[k]
                cluster = np.append(cluster, coord_ids[k])
                cluster_dataset_ids = np.append(cluster_dataset_ids, d_id)
                xis = np.append(xis, [X[k]], axis=0)

            # label this cluster
            cluster_labels.set_selected(flex.size_t(cluster.tolist()),
                                        cluster_id)
            cluster_id += 1
        return cluster_labels
def estimate_global_threshold(image, mask=None, plot=False):
    n_above_threshold = flex.size_t()
    threshold = flex.double()
    for i in range(1, 20):
        g = 1.5**i
        g = int(g)
        n_above_threshold.append((image > g).count(True))
        threshold.append(g)

    # Find the elbow point of the curve, in the same manner as that used by
    # distl spotfinder for resolution method 1 (Zhang et al 2006).
    # See also dials/algorithms/spot_finding/per_image_analysis.py

    x = threshold.as_double()
    y = n_above_threshold.as_double()
    slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
    p_m = flex.min_index(slopes)

    x1 = matrix.col((x[p_m], y[p_m]))
    x2 = matrix.col((x[-1], y[-1]))

    gaps = flex.double()
    v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

    for i in range(p_m, len(x)):
        x0 = matrix.col((x[i], y[i]))
        r = x1 - x0
        g = abs(v.dot(r))
        gaps.append(g)

    p_g = flex.max_index(gaps)

    x_g_ = x[p_g + p_m]
    y_g_ = y[p_g + p_m]

    # more conservative, choose point 2 left of the elbow point
    x_g = x[p_g + p_m - 2]
    # y_g = y[p_g + p_m - 2]

    if plot:
        from matplotlib import pyplot

        pyplot.figure(figsize=(16, 12))
        pyplot.scatter(threshold, n_above_threshold, marker="+")
        # for i in range(len(threshold)-1):
        #  pyplot.plot([threshold[i], threshold[-1]],
        #              [n_above_threshold[i], n_above_threshold[-1]])
        # for i in range(1, len(threshold)):
        #  pyplot.plot([threshold[0], threshold[i]],
        #              [n_above_threshold[0], n_above_threshold[i]])
        pyplot.plot([x_g, x_g], pyplot.ylim())
        pyplot.plot(
            [threshold[p_m], threshold[-1]],
            [n_above_threshold[p_m], n_above_threshold[-1]],
        )
        pyplot.plot([x_g_, threshold[-1]], [y_g_, n_above_threshold[-1]])
        pyplot.xlabel("Threshold")
        pyplot.ylabel("Number of pixels above threshold")
        pyplot.savefig("global_threshold.png")
        pyplot.clf()

    return x_g
def estimate_global_threshold(image, mask=None):

  from scitbx.array_family import flex
  from scitbx import matrix

  n_above_threshold = flex.size_t()
  threshold = flex.double()
  for i in range(1, 20):
    g = 1.5**i
    g = int(g)
    n_above_threshold.append((image > g).count(True))
    threshold.append(g)

  # Find the elbow point of the curve, in the same manner as that used by
  # distl spotfinder for resolution method 1 (Zhang et al 2006).
  # See also dials/algorithms/spot_finding/per_image_analysis.py

  x = threshold.as_double()
  y = n_above_threshold.as_double()
  slopes = (y[-1] - y[:-1])/(x[-1] - x[:-1])
  p_m = flex.min_index(slopes)

  x1 = matrix.col((x[p_m], y[p_m]))
  x2 = matrix.col((x[-1], y[-1]))

  gaps = flex.double()
  v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

  for i in range(p_m, len(x)):
    x0 = matrix.col((x[i], y[i]))
    r = x1 - x0
    g = abs(v.dot(r))
    gaps.append(g)

  mv = flex.mean_and_variance(gaps)
  s = mv.unweighted_sample_standard_deviation()

  p_k = flex.max_index(gaps)
  g_k = gaps[p_k]
  p_g = p_k

  #x_g = x[p_g + p_m]
  #y_g = y[p_g + p_m]

  #x_g = x[p_g + p_m -1]
  #y_g = y[p_g + p_m -1]

  # more conservative, choose point 2 left of the elbow point
  x_g = x[p_g + p_m -2]
  y_g = y[p_g + p_m -2]

  #from matplotlib import pyplot
  #pyplot.scatter(threshold, n_above_threshold)
  ##for i in range(len(threshold)-1):
    ##pyplot.plot([threshold[i], threshold[-1]],
                ##[n_above_threshold[i], n_above_threshold[-1]])
  ##for i in range(1, len(threshold)):
    ##pyplot.plot([threshold[0], threshold[i]],
                ##[n_above_threshold[0], n_above_threshold[i]])
  #pyplot.plot(
    #[threshold[p_m], threshold[-1]], [n_above_threshold[p_m], n_above_threshold[-1]])
  #pyplot.plot(
    #[x_g, threshold[-1]], [y_g, n_above_threshold[-1]])
  #pyplot.show()

  return x_g
Esempio n. 24
0
    def lookup(self, coefs, codes, ntop):
        global stdfile
        global outfilelog
        with open(outfilelog, "a") as filelog:
            print >> filelog, self.prefix + '.sum'

        self.out = open(self.prefix + '.sum', 'w')
        self.coefs = []
        for c in coefs:
            self.coefs.append(c[0:self.nn_total])
        self.codes = codes
        self.ntop = ntop
        self.mean_ws = flex.sqrt(1.0 / flex.double(range(1, ntop + 1)))

        if (self.scan):

            self.rmax_list = flex.double()
            self.top_hits = []
            self.scores = []
            self.scales = []
            self.ave_scores = flex.double()
            #self.rmax_max = 3.14/self.data.q[0]
            self.rmax_max = self.rmax * 2.5
            self.rmax_min = max(self.rmax / 2.0, 1)
            with open(stdfile, "a") as log:
                print >> log, "   Search range of rmax  :   %5.2f A  ----  %5.2f A" % (
                    self.rmax_max, self.rmax_min)
            print "   Search range of rmax  :   %5.2f A  ----  %5.2f A" % (
                self.rmax_max, self.rmax_min)

            gss(self.score_at_rmax,
                self.rmax_min,
                self.rmax_max,
                eps=0.5,
                N=30,
                out=self.stdfile,
                monitor_progress=True)

            rmax_indx = flex.min_index(self.ave_scores)
            self.best_rmax = self.rmax_list[rmax_indx]
            self.best_models = self.top_hits[rmax_indx]
            with open(stdfile, "a") as log:
                print >> log, "   Best rmax found       :   %5.2f A" % self.best_rmax
            print "   Best rmax found       :   %5.2f A" % self.best_rmax

            print >> self.out, "Best Result from Golden Section Search:", self.best_rmax
            with open(stdfile, "a") as log:
                self.show_result(self.best_rmax, self.best_models,
                                 self.scores[rmax_indx], codes, self.out, log)
            #self.show_result2(self.best_rmax, self.best_models, self.scores[rmax_indx], codes,self.stdfile)
            # import threading
            # t = []
            # t.append(threading.Thread(target=self.show_result()))
            # t.append(threading.Thread(target=self.show_result2()))
            # for t1 in t:
            #   t1.setDaemon(True)
            #   t1.start()
            #   t1.join()
            self.plot_intensity(self.best_rmax,
                                self.best_models,
                                self.scores[rmax_indx],
                                self.coefs,
                                codes,
                                qmax=None,
                                scales=self.scales[rmax_indx])
            #self.plot_intensity(self.best_rmax, self.best_models, self.scores[rmax_indx], self.coefs, codes, qmax=0.5,scales=self.scales[rmax_indx])
            self.print_rmax_profile(self.rmax_list, self.ave_scores)
            with open(self.outfilelog, "a") as f:
                print >> f, self.prefix + '.sta'
            self.summary(self.top_hits,
                         self.scores,
                         comment="----Statistics from Golden Section Scan----")

            self.local_scan(int(self.best_rmax + 0.5), local=5)

        else:
            print
            log = open(stdfile, "a")
            print >> log, "   Not performing search in rmax. Using fixed value of rmax=%5.3f" % self.rmax
            log.close()
            print
            self.score_at_rmax(self.rmax)
            self.plot_intensity(self.best_rmax,
                                self.best_models,
                                self.scores,
                                self.coefs,
                                codes,
                                qmax=None)

        self.out.close()
Esempio n. 25
0
    def seed_clustering(self):
        eps = 1e-6
        X_orig = self.coords.as_numpy_array()

        import numpy as np
        from scipy.cluster import hierarchy
        import scipy.spatial.distance as ssd
        from sklearn.neighbors import NearestNeighbors
        from sklearn import metrics

        # initialise cluster labels: -1 signifies doesn't belong to a cluster
        self.cluster_labels = flex.int(self.coords.all()[0], -1)

        cluster_id = 0
        while self.cluster_labels.count(-1) > 0:
            dataset_ids = (flex.int_range(
                len(self.datasets) * len(self.target.get_sym_ops())) %
                           len(self.datasets)).as_numpy_array()
            coord_ids = flex.int_range(dataset_ids.size).as_numpy_array()

            # select only those points that don't already belong to a cluster
            sel = np.where(self.cluster_labels == -1)
            X = X_orig[sel]
            dataset_ids = dataset_ids[sel]
            coord_ids = coord_ids[sel]

            # choose a high density point as seed for cluster
            nbrs = NearestNeighbors(n_neighbors=min(11, len(X)),
                                    algorithm='brute',
                                    metric='cosine').fit(X)
            distances, indices = nbrs.kneighbors(X)
            average_distance = flex.double(
                [dist[1:].mean() for dist in distances])
            i = flex.min_index(average_distance)

            d_id = dataset_ids[i]
            cluster = np.array([coord_ids[i]])
            cluster_dataset_ids = np.array([d_id])
            xis = np.array([X[i]])

            for j in range(len(self.datasets) - 1):
                # select only those rows that don't correspond to a dataset already
                # present in current cluster
                sel = np.where(dataset_ids != d_id)
                X = X[sel]
                dataset_ids = dataset_ids[sel]
                coord_ids = coord_ids[sel]

                assert len(X) > 0

                # Find nearest neighbour in cosine-space to the current cluster centroid
                nbrs = NearestNeighbors(n_neighbors=min(1, len(X)),
                                        algorithm='brute',
                                        metric='cosine').fit(X)
                distances, indices = nbrs.kneighbors([xis.mean(axis=0)])
                k = indices[0][0]
                d_id = dataset_ids[k]
                cluster = np.append(cluster, coord_ids[k])
                cluster_dataset_ids = np.append(cluster_dataset_ids, d_id)
                xis = np.append(xis, [X[k]], axis=0)

            # label this cluster
            self.cluster_labels.set_selected(flex.size_t(cluster.tolist()),
                                             cluster_id)
            cluster_id += 1

        if flex.max(self.cluster_labels) == 0:
            # assume single cluster
            return self.cluster_labels

        cluster_centroids = []
        X = self.coords.as_numpy_array()
        for i in set(self.cluster_labels):
            sel = self.cluster_labels == i
            cluster_centroids.append(X[(
                self.cluster_labels == i).iselection().as_numpy_array()].mean(
                    axis=0))

        # hierarchical clustering of cluster centroids, using cosine metric
        dist_mat = ssd.pdist(cluster_centroids, metric='cosine')
        linkage_matrix = hierarchy.linkage(dist_mat, method='average')

        # compare valid equal-sized clustering using silhouette scores
        # https://en.wikipedia.org/wiki/Silhouette_(clustering)
        # http://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_silhouette_analysis.html
        distances = linkage_matrix[::, 2]
        distances = np.insert(distances, 0, 0)
        silhouette_scores = flex.double()
        thresholds = flex.double()
        n_clusters = flex.size_t()
        for threshold in distances[1:]:
            cluster_labels = self.cluster_labels.deep_copy()
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold - eps,
                                        criterion='distance').tolist()
            counts = [labels.count(l) for l in set(labels)]
            if len(set(counts)) > 1:
                # only equal-sized clusters are valid
                continue

            n = len(set(labels))
            if n == 1: continue
            for i in range(len(labels)):
                cluster_labels.set_selected(self.cluster_labels == i,
                                            int(labels[i] - 1))
            silhouette_avg = metrics.silhouette_score(
                X, cluster_labels.as_numpy_array(), metric='cosine')
            # Compute the silhouette scores for each sample
            sample_silhouette_values = metrics.silhouette_samples(
                X, cluster_labels.as_numpy_array(), metric='cosine')
            silhouette_avg = sample_silhouette_values.mean()
            silhouette_scores.append(silhouette_avg)
            thresholds.append(threshold)
            n_clusters.append(n)

            count_negative = (sample_silhouette_values < 0).sum()
            logger.info('Clustering:')
            logger.info('  Number of clusters: %i' % n)
            logger.info('  Threshold score: %.3f (%.1f deg)' %
                        (threshold, math.degrees(math.acos(1 - threshold))))
            logger.info('  Silhouette score: %.3f' % silhouette_avg)
            logger.info('  -ve silhouette scores: %.1f%%' %
                        (100 * count_negative / sample_silhouette_values.size))

            if self.params.save_plot:
                plot_silhouette(sample_silhouette_values,
                                cluster_labels.as_numpy_array(),
                                file_name='%ssilhouette_%i.png' %
                                (self.params.plot_prefix, n))

        if self.params.cluster.seed.n_clusters is Auto:
            idx = flex.max_index(silhouette_scores)
        else:
            idx = flex.first_index(n_clusters,
                                   self.params.cluster.seed.n_clusters)
            if idx is None:
                raise Sorry('No valid clustering with %i clusters' %
                            self.params.cluster.seed.n_clusters)

        if (self.params.cluster.seed.n_clusters is Auto
                and silhouette_scores[idx] <
                self.params.cluster.seed.min_silhouette_score):
            # assume single cluster
            self.cluster_labels = flex.int(self.cluster_labels.size(), 0)
        else:
            threshold = thresholds[idx] - eps
            labels = hierarchy.fcluster(linkage_matrix,
                                        threshold,
                                        criterion='distance')
            cluster_labels = flex.double(self.cluster_labels.size(), -1)
            for i in range(len(labels)):
                cluster_labels.set_selected(self.cluster_labels == i,
                                            labels[i] - 1)
            self.cluster_labels = cluster_labels

        if self.params.save_plot:
            plot_matrix(1 - ssd.squareform(dist_mat),
                        linkage_matrix,
                        '%sseed_clustering_cos_angle_matrix.png' %
                        self.params.plot_prefix,
                        color_threshold=threshold)
            plot_dendrogram(linkage_matrix,
                            '%sseed_clustering_cos_angle_dendrogram.png' %
                            self.params.plot_prefix,
                            color_threshold=threshold)

        return self.cluster_labels
Esempio n. 26
0
    def __init__(self, datasets, params):
        self.datasets = datasets
        self.params = params

        self.input_space_group = None
        for dataset in datasets:
            if self.input_space_group is None:
                self.input_space_group = dataset.space_group()
            else:
                assert dataset.space_group() == self.input_space_group

        if self.params.dimensions is Auto:
            dimensions = None
        else:
            dimensions = self.params.dimensions
        lattice_group = None
        if self.params.lattice_group is not None:
            lattice_group = self.params.lattice_group.group()
        self.target = target.Target(
            self.datasets,
            min_pairs=self.params.min_pairs,
            lattice_group=lattice_group,
            dimensions=dimensions,
            verbose=self.params.verbose,
            weights=self.params.weights,
            nproc=self.params.nproc,
        )
        if self.params.dimensions is Auto:
            dimensions = []
            functional = []
            explained_variance = []
            explained_variance_ratio = []
            for dim in range(1, self.target.dim + 1):
                self.target.set_dimensions(dim)
                self.optimise()
                logger.info('Functional: %g' % self.minimizer.f)
                self.principal_component_analysis()
                dimensions.append(dim)
                functional.append(self.minimizer.f)
                explained_variance.append(self.explained_variance)
                explained_variance_ratio.append(self.explained_variance_ratio)

            # Find the elbow point of the curve, in the same manner as that used by
            # distl spotfinder for resolution method 1 (Zhang et al 2006).
            # See also dials/algorithms/spot_finding/per_image_analysis.py

            from scitbx import matrix
            x = flex.double(dimensions)
            y = flex.double(functional)
            slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
            p_m = flex.min_index(slopes)

            x1 = matrix.col((x[p_m], y[p_m]))
            x2 = matrix.col((x[-1], y[-1]))

            gaps = flex.double()
            v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

            for i in range(p_m, len(x)):
                x0 = matrix.col((x[i], y[i]))
                r = x1 - x0
                g = abs(v.dot(r))
                gaps.append(g)

            p_k = flex.max_index(gaps)
            g_k = gaps[p_k]
            p_g = p_k

            x_g = x[p_g + p_m]
            y_g = y[p_g + p_m]

            logger.info('Best number of dimensions: %i' % x_g)
            self.target.set_dimensions(int(x_g))

            if params.save_plot:
                from matplotlib import pyplot as plt
                fig = plt.figure(figsize=(10, 8))
                plt.clf()
                plt.plot(dimensions, functional)
                plt.plot([x_g, x_g], plt.ylim())
                plt.xlabel('Dimensions')
                plt.ylabel('Functional')
                plt.savefig('%sfunctional_vs_dimension.png' %
                            params.plot_prefix)

                plt.clf()
                for dim, expl_var in zip(dimensions, explained_variance):
                    plt.plot(range(1, dim + 1), expl_var, label='%s' % dim)
                plt.plot([x_g, x_g], plt.ylim())
                plt.xlabel('Dimension')
                plt.ylabel('Explained variance')
                plt.savefig('%sexplained_variance_vs_dimension.png' %
                            params.plot_prefix)

                plt.clf()
                for dim, expl_var_ratio in zip(dimensions,
                                               explained_variance_ratio):
                    plt.plot(range(1, dim + 1),
                             expl_var_ratio,
                             label='%s' % dim)
                plt.plot([x_g, x_g], plt.ylim())
                plt.xlabel('Dimension')
                plt.ylabel('Explained variance ratio')
                plt.savefig('%sexplained_variance_ratio_vs_dimension.png' %
                            params.plot_prefix)
                plt.close(fig)

        self.optimise()
        self.principal_component_analysis()

        self.cosine_analysis()
        self.cluster_analysis()
        if self.params.save_plot:
            self.plot()
def estimate_global_threshold(image, mask=None):

  from scitbx.array_family import flex
  from scitbx import matrix

  n_above_threshold = flex.size_t()
  threshold = flex.double()
  for i in range(1, 20):
    g = 1.5**i
    g = int(g)
    n_above_threshold.append((image > g).count(True))
    threshold.append(g)

  # Find the elbow point of the curve, in the same manner as that used by
  # distl spotfinder for resolution method 1 (Zhang et al 2006).
  # See also dials/algorithms/spot_finding/per_image_analysis.py

  x = threshold.as_double()
  y = n_above_threshold.as_double()
  slopes = (y[-1] - y[:-1])/(x[-1] - x[:-1])
  p_m = flex.min_index(slopes)

  x1 = matrix.col((x[p_m], y[p_m]))
  x2 = matrix.col((x[-1], y[-1]))

  gaps = flex.double()
  v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

  for i in range(p_m, len(x)):
    x0 = matrix.col((x[i], y[i]))
    r = x1 - x0
    g = abs(v.dot(r))
    gaps.append(g)

  mv = flex.mean_and_variance(gaps)
  s = mv.unweighted_sample_standard_deviation()

  p_k = flex.max_index(gaps)
  g_k = gaps[p_k]
  p_g = p_k

  #x_g = x[p_g + p_m]
  #y_g = y[p_g + p_m]

  #x_g = x[p_g + p_m -1]
  #y_g = y[p_g + p_m -1]

  # more conservative, choose point 2 left of the elbow point
  x_g = x[p_g + p_m -2]
  y_g = y[p_g + p_m -2]

  #from matplotlib import pyplot
  #pyplot.scatter(threshold, n_above_threshold)
  ##for i in range(len(threshold)-1):
    ##pyplot.plot([threshold[i], threshold[-1]],
                ##[n_above_threshold[i], n_above_threshold[-1]])
  ##for i in range(1, len(threshold)):
    ##pyplot.plot([threshold[0], threshold[i]],
                ##[n_above_threshold[0], n_above_threshold[i]])
  #pyplot.plot(
    #[threshold[p_m], threshold[-1]], [n_above_threshold[p_m], n_above_threshold[-1]])
  #pyplot.plot(
    #[x_g, threshold[-1]], [y_g, n_above_threshold[-1]])
  #pyplot.show()

  return x_g
Esempio n. 28
0
    def scan(self):
        fft = fftpack.complex_to_complex_2d(self.ngrid, self.ngrid)
        inversion = False
        for beta in self.beta:
            self.cc_obj.set_beta(beta)
            mm = self.cc_obj.mm_coef(0, inversion)
            if self.pad > 0:
                mm = self.cc_obj.mm_coef(self.pad, inversion)
            fft_input = mm
            scores = fft.backward(fft_input).as_1d()
            self.scores = self.scores.concatenate(-flex.norm(scores))
        self.best_indx = flex.min_index(self.scores)
        self.best_score = smath.sqrt(-self.scores[self.best_indx])

        if self.check_inversion:
            ### Inversion of the Spherical Harmonics ###
            inversion = True
            inversion_scores = flex.double()
            for beta in self.beta:
                self.cc_obj.set_beta(beta)
                mm = self.cc_obj.mm_coef(0, inversion)
                if self.pad > 0:
                    mm = self.cc_obj.mm_coef(self.pad, inversion)
                fft_input = mm.deep_copy()
                scores = fft.backward(fft_input).as_1d()
                inversion_scores = inversion_scores.concatenate(-flex.norm(scores))
            inv_best_indx = flex.min_index(inversion_scores)
            inv_best_score = smath.sqrt(-inversion_scores[inv_best_indx])

            if inv_best_score < self.best_score:
                self.score = inversion_scores
                self.best_indx = inv_best_indx
                self.best_score = inv_best_score
                self.inversion = True
            else:
                self.inversion = False

        b = self.best_indx // (self.ngrid * self.ngrid)
        a = (self.best_indx - self.ngrid * self.ngrid * b) // self.ngrid
        g = self.best_indx - self.ngrid * self.ngrid * b - self.ngrid * a

        b = self.beta[b]
        g = smath.pi * 2.0 * (float(g) / (self.ngrid - 1))
        a = smath.pi * 2.0 * (float(a) / (self.ngrid - 1))

        self.best_ea = (a, b, g)

        self.find_top(self.topn)
        if self.refine:
            self.refined = []
            self.refined_moving_nlm = []
            self.refined_score = flex.double()
            for t in self.top_align:
                r = self.run_simplex(t)
                self.refined.append(r)
                self.refined_score.append(self.get_cc(self.target(r)))
                self.refined_moving_nlm.append(self.cc_obj.rotate_moving_obj(r[0], r[1], r[2], self.inversion))
            orders = flex.sort_permutation(self.refined_score, True)
            self.best_score = -self.refined_score[orders[0]]

            # show the refined results
            if self.show_result:
                print "refined results:"
                for ii in range(self.topn):
                    o = orders[ii]
                    o = ii
                    print ii, ":", list(self.refined[o]), ":", self.refined_score[o]
            ea = self.refined[orders[0]]
            self.best_ea = (ea[0], ea[1], ea[2])
            self.moving_nlm = self.cc_obj.rotate_moving_obj(ea[0], ea[1], ea[2], self.inversion)
Esempio n. 29
0
def optimize_origin_offset_local_scope(
    experiments,
    reflection_lists,
    solution_lists,
    amax_lists,
    mm_search_scope=4,
    wide_search_binning=1,
    plot_search_scope=False,
):
    """Local scope: find the optimal origin-offset closest to the current overall detector position
    (local minimum, simple minimization)"""

    beam = experiments[0].beam
    s0 = matrix.col(beam.get_s0())
    # construct two vectors that are perpendicular to the beam.  Gives a basis for refining beam
    axis = matrix.col((1, 0, 0))
    beamr0 = s0.cross(axis).normalize()
    beamr1 = beamr0.cross(s0).normalize()
    beamr2 = beamr1.cross(s0).normalize()

    assert approx_equal(s0.dot(beamr1), 0.0)
    assert approx_equal(s0.dot(beamr2), 0.0)
    assert approx_equal(beamr2.dot(beamr1), 0.0)
    # so the orthonormal vectors are s0, beamr1 and beamr2

    if mm_search_scope:
        plot_px_sz = experiments[0].detector[0].get_pixel_size()[0]
        plot_px_sz *= wide_search_binning
        grid = max(1, int(mm_search_scope / plot_px_sz))
        widegrid = 2 * grid + 1

        def get_experiment_score_for_coord(x, y):
            new_origin_offset = x * plot_px_sz * beamr1 + y * plot_px_sz * beamr2
            return sum(
                _get_origin_offset_score(
                    new_origin_offset,
                    solution_lists[i],
                    amax_lists[i],
                    reflection_lists[i],
                    experiment,
                ) for i, experiment in enumerate(experiments))

        scores = flex.double(
            get_experiment_score_for_coord(x, y)
            for y in range(-grid, grid + 1) for x in range(-grid, grid + 1))

        def igrid(x):
            return x - (widegrid // 2)

        idxs = [igrid(i) * plot_px_sz for i in range(widegrid)]

        # if there are several similarly high scores, then choose the closest
        # one to the current beam centre
        potential_offsets = flex.vec3_double()
        if scores.all_eq(0):
            raise Sorry("No valid scores")
        sel = scores > (0.9 * flex.max(scores))
        for i in sel.iselection():
            offset = (idxs[i % widegrid]) * beamr1 + (idxs[i //
                                                           widegrid]) * beamr2
            potential_offsets.append(offset.elems)
            # print offset.length(), scores[i]
        wide_search_offset = matrix.col(potential_offsets[flex.min_index(
            potential_offsets.norms())])

    else:
        wide_search_offset = None

    # Do a simplex minimization
    class simplex_minimizer(object):
        def __init__(self, wide_search_offset):
            self.n = 2
            self.wide_search_offset = wide_search_offset
            self.optimizer = simplex_opt(
                dimension=self.n,
                matrix=[flex.random_double(self.n) for _ in range(self.n + 1)],
                evaluator=self,
                tolerance=1e-7,
            )
            self.x = self.optimizer.get_solution()
            self.offset = self.x[0] * 0.2 * beamr1 + self.x[1] * 0.2 * beamr2
            if self.wide_search_offset is not None:
                self.offset += self.wide_search_offset

        def target(self, vector):
            trial_origin_offset = vector[0] * 0.2 * beamr1 + vector[
                1] * 0.2 * beamr2
            if self.wide_search_offset is not None:
                trial_origin_offset += self.wide_search_offset
            target = 0
            for i, experiment in enumerate(experiments):
                target -= _get_origin_offset_score(
                    trial_origin_offset,
                    solution_lists[i],
                    amax_lists[i],
                    reflection_lists[i],
                    experiment,
                )
            return target

    new_offset = simplex_minimizer(wide_search_offset).offset

    if plot_search_scope:
        plot_px_sz = experiments[0].get_detector()[0].get_pixel_size()[0]
        grid = max(1, int(mm_search_scope / plot_px_sz))
        scores = flex.double()
        for y in range(-grid, grid + 1):
            for x in range(-grid, grid + 1):
                new_origin_offset = x * plot_px_sz * beamr1 + y * plot_px_sz * beamr2
                score = 0
                for i, experiment in enumerate(experiments):
                    score += _get_origin_offset_score(
                        new_origin_offset,
                        solution_lists[i],
                        amax_lists[i],
                        reflection_lists[i],
                        experiment,
                    )
                scores.append(score)

        def show_plot(widegrid, excursi):
            excursi.reshape(flex.grid(widegrid, widegrid))
            idx_max = flex.max_index(excursi)

            def igrid(x):
                return x - (widegrid // 2)

            idxs = [igrid(i) * plot_px_sz for i in range(widegrid)]

            from matplotlib import pyplot as plt

            plt.figure()
            CS = plt.contour(
                [igrid(i) * plot_px_sz for i in range(widegrid)],
                [igrid(i) * plot_px_sz for i in range(widegrid)],
                excursi.as_numpy_array(),
            )
            plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f")
            plt.title("Wide scope search for detector origin offset")
            plt.scatter([0.0], [0.0], color="g", marker="o")
            plt.scatter([new_offset[0]], [new_offset[1]],
                        color="r",
                        marker="*")
            plt.scatter(
                [idxs[idx_max % widegrid]],
                [idxs[idx_max // widegrid]],
                color="k",
                marker="s",
            )
            plt.axes().set_aspect("equal")
            plt.xlabel("offset (mm) along beamr1 vector")
            plt.ylabel("offset (mm) along beamr2 vector")
            plt.savefig("search_scope.png")

            # changing value
            trial_origin_offset = (idxs[idx_max % widegrid]) * beamr1 + (
                idxs[idx_max // widegrid]) * beamr2
            return trial_origin_offset

        show_plot(widegrid=2 * grid + 1, excursi=scores)

    new_experiments = copy.deepcopy(experiments)
    for expt in new_experiments:
        expt.detector = dps_extended.get_new_detector(expt.detector,
                                                      new_offset)
    return new_experiments
Esempio n. 30
0
def run(args):
  master_phil = iotbx.phil.parse(master_phil_str)
  processed = iotbx.phil.process_command_line(
    args=args, master_string=master_phil_str)
  args = processed.remaining_args
  work_params = processed.work.extract()

  x_offsets = work_params.x_offsets
  bg_range_min, bg_range_max = work_params.bg_range
  if work_params.plot_range is not None:
    x_min, x_max = work_params.plot_range
  else:
    x_min, x_max = (0, 385)

  print bg_range_min, bg_range_max
  if x_offsets is None:
    x_offsets = [0]*len(args)
  legend = work_params.legend
  linewidth = 2
  fontsize = 26
  xy_pairs = []
  colours = ["cornflowerblue", "darkmagenta", "darkgreen", "black", "red", "blue", "pink"]
  colours[2] = "orangered"
  colours[1] = "olivedrab"
  min_background = 1e16
  #x_min, x_max = (0, 391)
  #x_min, x_max = (0, 360)
  #x_min, x_max = (200, 360)

  for i, filename in enumerate(args):
    print filename
    f = open(filename, 'rb')
    x, y = zip(*[line.split() for line in f.readlines() if not line.startswith("#")])
    x = flex.double(flex.std_string(x))
    y = flex.double(flex.std_string(y))

    if work_params.smoothing.method is not None:
      savitzky_golay_half_window = work_params.smoothing.savitzky_golay.half_window
      savitzky_golay_degree = work_params.smoothing.savitzky_golay.degree
      fourier_cutoff = work_params.smoothing.fourier_filter_cutoff

      method = work_params.smoothing.method
      if method == "fourier_filter":
        assert work_params.smoothing.fourier_filter_cutoff is not None

      if method == "savitzky_golay":
        x, y = smoothing.savitzky_golay_filter(
          x, y, savitzky_golay_half_window, savitzky_golay_degree)

      elif method == "fourier_filter":
        x, y = smooth_spectrum.fourier_filter(x, y, cutoff_frequency=fourier_cutoff)


    x += x_offsets[i]
    y = y.select((x <= x_max) & (x > 0))
    x = x.select((x <= x_max) & (x > 0))
    bg_sel = (x > bg_range_min) & (x < bg_range_max)
    xy_pairs.append((x,y))
    min_background = min(min_background, flex.mean(y.select(bg_sel))/flex.max(y))
    y -= min_background
    print "Peak maximum at: %i" %int(x[flex.max_index(y)])
  for i, filename in enumerate(args):
    if legend is None:
      label = filename
    else:
      print legend
      assert len(legend) == len(args)
      label = legend[i]
    x, y = xy_pairs[i]
    if i == -1:
      x, y = interpolate(x, y)
      x, y = savitzky_golay_filter(x, y)
    #if i == 0:
      #y -= 10
    bg_sel = (x > bg_range_min) & (x < bg_range_max)
    y -= (flex.mean(y.select(bg_sel)) - min_background*flex.max(y))
    #y -= flex.min(y)
    y_min = flex.min(y.select(bg_sel))
    if i == -2:
      y += 0.2 * flex.max(y)
    print "minimum at: %i" %int(x[flex.min_index(y)]), flex.min(y)
    #print "fwhm: %.2f" %full_width_half_max(x, y)
    y /= flex.max(y)
    if len(colours) > i:
      pyplot.plot(x, y, label=label, linewidth=linewidth, color=colours[i])
    else:
      pyplot.plot(x, y, label=label, linewidth=linewidth)
  pyplot.ylabel("Intensity", fontsize=fontsize)
  pyplot.xlabel("Pixel column", fontsize=fontsize)
  if i > 0:
    # For some reason the line below causes a floating point error if we only
    # have one plot (i.e. i==0)
    legend = pyplot.legend(loc=2)
    for t in legend.get_texts():
      t.set_fontsize(fontsize)
  axes = pyplot.axes()
  for tick in axes.xaxis.get_ticklabels():
    tick.set_fontsize(20)
  for tick in axes.yaxis.get_ticklabels():
    tick.set_fontsize(20)
  pyplot.ylim(0,1)
  pyplot.xlim(x_min, x_max)
  ax = pyplot.axes()
  #ax.xaxis.set_minor_locator(pyplot.MultipleLocator(5))
  #ax.yaxis.set_major_locator(pyplot.MultipleLocator(0.1))
  #ax.yaxis.set_minor_locator(pyplot.MultipleLocator(0.05))
  pyplot.show()
Esempio n. 31
0
  def scan( self ):
    fft = fftpack.complex_to_complex_2d( self.ngrid, self.ngrid )
    inversion = False
    for beta in self.beta:
      self.cc_obj.set_beta( beta )
      mm = self.cc_obj.mm_coef(0,inversion)
      if( self.pad > 0):
        mm = self.cc_obj.mm_coef(self.pad, inversion)
      fft_input= mm
      scores = fft.backward( fft_input ).as_1d()
      self.scores = self.scores.concatenate( -flex.norm( scores )  )
    self.best_indx = flex.min_index( self.scores )
    self.best_score = math.sqrt( -self.scores[ self.best_indx ])


    if self.check_inversion:
    ### Inversion of the Spherical Harmonics ###
      inversion = True
      inversion_scores = flex.double()
      for beta in self.beta:
        self.cc_obj.set_beta( beta )
        mm = self.cc_obj.mm_coef(0,inversion)
        if( self.pad > 0):
          mm = self.cc_obj.mm_coef(self.pad, inversion)
        fft_input= mm.deep_copy()
        scores = fft.backward( fft_input ).as_1d()
        inversion_scores = inversion_scores.concatenate( -flex.norm( scores )  )
      inv_best_indx = flex.min_index( inversion_scores )
      inv_best_score = math.sqrt(-inversion_scores[ inv_best_indx ] )

      if( inv_best_score < self.best_score ):
        self.score = inversion_scores
        self.best_indx = inv_best_indx
        self.best_score = inv_best_score
        self.inversion =  True
      else:
        self.inversion = False



    b=self.best_indx//(self.ngrid*self.ngrid)
    a=(self.best_indx - self.ngrid*self.ngrid*b ) // self.ngrid
    g=self.best_indx - self.ngrid*self.ngrid*b - self.ngrid*a

    b = self.beta[b]
    g = math.pi*2.0 *( float(g)/(self.ngrid-1) )
    a = math.pi*2.0 *( float(a)/(self.ngrid-1) )

    self.best_ea = (a, b, g )

    self.find_top( self.topn )
    if( self.refine ):
      self.refined = []
      self.refined_moving_nlm = []
      self.refined_score = flex.double()
      for t in self.top_align:
        r = self.run_simplex( t )
        self.refined.append ( r )
        self.refined_score.append( self.get_cc( self.target( r ) ) )
        self.refined_moving_nlm.append(  self.cc_obj.rotate_moving_obj( r[0],r[1], r[2], self.inversion )  )
      orders=flex.sort_permutation( self.refined_score, True )
      self.best_score = -self.refined_score[orders[0]]


# show the refined results
      if( self.show_result ):
        print("refined results:")
        for ii in range( self.topn ):
          o = orders[ii]
          o = ii
          print(ii, ":", list( self.refined[o] ), ":", self.refined_score[o])
      ea = self.refined[ orders[0] ]
      self.best_ea = (ea[0], ea[1], ea[2] )
      self.moving_nlm = self.cc_obj.rotate_moving_obj( ea[0],ea[1], ea[2], self.inversion )
Esempio n. 32
0
    def lookup(self, coefs, codes, ntop):
        self.out = open(self.prefix + '.sum', 'w')
        self.coefs = []
        for c in coefs:
            self.coefs.append(c[0:self.nlm_total])
        self.codes = codes
        self.ntop = ntop
        self.mean_ws = flex.sqrt(1.0 / flex.double(range(1, ntop + 1)))

        if (self.scan):

            self.rmax_list = flex.double()
            self.top_hits = []
            self.scores = []
            self.scales = []
            self.ave_scores = flex.double()
            #self.rmax_max = 3.14/self.data.q[0]
            self.rmax_max = self.rmax * 2.5
            self.rmax_min = max(self.rmax / 2.0, 1)
            print "   Search range of rmax  :   %5.2f A  ----  %5.2f A" % (
                self.rmax_max, self.rmax_min)
            gss(self.score_at_rmax,
                self.rmax_min,
                self.rmax_max,
                eps=0.5,
                N=30,
                monitor_progress=True)
            rmax_indx = flex.min_index(self.ave_scores)
            self.best_rmax = self.rmax_list[rmax_indx]
            self.best_models = self.top_hits[rmax_indx]
            print "   Best rmax found       :   %5.2f A" % self.best_rmax

            print >> self.out, "Best Result from Golden Section Search:", self.best_rmax
            self.show_result(self.best_rmax, self.best_models,
                             self.scores[rmax_indx], codes, self.out)
            self.plot_intensity(self.best_rmax,
                                self.best_models,
                                self.scores[rmax_indx],
                                self.coefs,
                                codes,
                                qmax=None,
                                scales=self.scales[rmax_indx])
            self.print_rmax_profile(self.rmax_list, self.ave_scores)
            self.summary(self.top_hits,
                         self.scores,
                         comment="----Statistics from Golden Section Scan----")

            self.local_scan(int(self.best_rmax + 0.5), local=5)

        else:
            print
            print "   Not performing search in rmax. Using fixed value of rmax=%5.3f" % self.rmax
            print
            self.score_at_rmax(self.rmax)
            self.plot_intensity(self.best_rmax,
                                self.best_models,
                                self.scores,
                                self.coefs,
                                codes,
                                qmax=None)

        self.out.close()