Ejemplo n.º 1
0
def tst_outliers_find_posterior_mode():
    # first check if we can find the posterior mode for the acentric
    # when alpha is close to 1, this should be very close to fcalc
    fobs = flex.double([3] * 10)
    fcalc = flex.double(range(10)) * 10 + 10
    sigmas = flex.double([0] * 10)
    epsilon = flex.double([1] * 10)
    centric = flex.bool([False] * 10)
    beta = flex.double([1] * 10)
    alpha = flex.double([0.99] * 10)
    tmp_object = scaling.likelihood_ratio_outlier_test(fobs, sigmas, fcalc,
                                                       epsilon, centric, alpha,
                                                       beta)
    posterior_mode = tmp_object.posterior_mode()
    for f, m in zip(fcalc, posterior_mode):
        assert approx_equal(f / m, 1, eps=0.05)

    # have a look at centrics
    fobs = flex.double([3] * 10)
    fcalc = flex.double(range(10)) * 100 + 100
    sigmas = flex.double([0] * 10)
    epsilon = flex.double([1] * 10)
    centric = flex.bool([True] * 10)
    beta = flex.double([1] * 10)
    alpha = flex.double([0.099] * 10)
    tmp_object = scaling.likelihood_ratio_outlier_test(fobs, sigmas, fcalc,
                                                       epsilon, centric, alpha,
                                                       beta)
    posterior_mode = tmp_object.posterior_mode()
    for f, m in zip(fcalc, posterior_mode):
        assert approx_equal(m / f, 0.099, eps=0.001)
Ejemplo n.º 2
0
def tst_outliers_compare_mode_mean():
  fobs  = flex.double( range(1000) )/300.0
  fcalc = flex.double( range(1000) )/300.0
  sigmas = None
  epsilon = flex.double( range(1000) )*0 +1.0
  centric = flex.bool( [False]*1000 )

  for ii in xrange(50):
    a = ii/50.0
    b = 1.0-a*a
    alpha = flex.double( [a]*1000 )
    beta  = flex.double( [b]*1000 )
    tmp_object = scaling.likelihood_ratio_outlier_test(
      fobs,
      sigmas,
      fcalc,
      epsilon,
      centric,
      alpha,
      beta )
    mean = tmp_object.mean_fobs()
    std  = tmp_object.std_fobs()
    mode = tmp_object.posterior_mode()
    sdmo = tmp_object.posterior_mode_snd_der()
    for a,mm,m,v in zip(alpha,mean,mode,sdmo):
      assert  (-1.0/v>0)
      if (a>0.9):
        assert approx_equal(mm,m,eps=1e-1)

  for ii in xrange(1,50):
    a = ii/50.0
    b = 1.0-a*a
    alpha = flex.double( [a]*1000 )
    beta  = flex.double( [b]*1000 )
    tmp_object = scaling.likelihood_ratio_outlier_test(
      fobs,
      sigmas,
      fcalc,
      epsilon,
      ~centric,
      alpha,
      beta )
    mean = tmp_object.mean_fobs()
    std  = tmp_object.std_fobs()
    mode = tmp_object.posterior_mode()
    sdmo = tmp_object.posterior_mode_snd_der()
    for a,b,fc,mm,m,v in zip(alpha,beta,fcalc,mean,mode,sdmo):
      assert  (-1.0/v>0)
      if (a>0.9):
        if (fc>1.0):
          assert approx_equal(mm,m,eps=1e-1)
Ejemplo n.º 3
0
def tst_loglikelihoods():
  fobs  =  flex.double( range(1000) )/200
  fcalc =  flex.double( [1]*1000 )
  sigmas = flex.double( [0]*1000 )
  epsilon = flex.double( [1]*1000 )
  centric = flex.bool( [False]*1000 )
  beta = flex.double( [1]*1000 )
  alpha = flex.double( [0.99]*1000 )
  tmp_object = scaling.likelihood_ratio_outlier_test(
     fobs,
     sigmas,
     fcalc,
     epsilon,
     centric,
     alpha,
     beta)
  cur_lik = tmp_object.log_likelihood()
  pm_lik  = tmp_object.posterior_mode_log_likelihood()
  mode    = tmp_object.posterior_mode()
  level   = 4.5
  flags   = tmp_object.flag_potential_outliers( 2.0*level )
  for fl,pl,l,m,fo in zip(flags,pm_lik,cur_lik,mode,fobs):
    if pl-l < level*2.0:
      assert fl
    else:
      assert not fl
Ejemplo n.º 4
0
def tst_outliers_find_posterior_mode():
  # first check if we can find the posterior mode for the acentric
  # when alpha is close to 1, this should be very close to fcalc
  fobs  =  flex.double( [3]*10 )
  fcalc =  flex.double( range(10) )*10 + 10
  sigmas = flex.double( [0]*10 )
  epsilon = flex.double( [1]*10 )
  centric = flex.bool( [False]*10 )
  beta = flex.double( [1]*10 )
  alpha = flex.double( [0.99]*10 )
  tmp_object = scaling.likelihood_ratio_outlier_test(
     fobs,
     sigmas,
     fcalc,
     epsilon,
     centric,
     alpha,
     beta)
  posterior_mode = tmp_object.posterior_mode()
  for f, m in zip(fcalc,posterior_mode):
    assert approx_equal(f/m, 1, eps=0.05)

  # have a look at centrics
  fobs  =  flex.double( [3]*10 )
  fcalc =  flex.double( range(10) )*100 + 100
  sigmas = flex.double( [0]*10 )
  epsilon = flex.double( [1]*10 )
  centric = flex.bool( [True]*10 )
  beta = flex.double( [1]*10 )
  alpha = flex.double( [0.099]*10 )
  tmp_object = scaling.likelihood_ratio_outlier_test(
     fobs,
     sigmas,
     fcalc,
     epsilon,
     centric,
     alpha,
     beta)
  posterior_mode = tmp_object.posterior_mode()
  for f, m in zip(fcalc,posterior_mode):
    assert approx_equal(m/f, 0.099, eps=0.001)
Ejemplo n.º 5
0
def tst_outliers_compare_mode_mean():
    fobs = flex.double(range(1000)) / 300.0
    fcalc = flex.double(range(1000)) / 300.0
    sigmas = None
    epsilon = flex.double(range(1000)) * 0 + 1.0
    centric = flex.bool([False] * 1000)

    for ii in xrange(50):
        a = ii / 50.0
        b = 1.0 - a * a
        alpha = flex.double([a] * 1000)
        beta = flex.double([b] * 1000)
        tmp_object = scaling.likelihood_ratio_outlier_test(
            fobs, sigmas, fcalc, epsilon, centric, alpha, beta)
        mean = tmp_object.mean_fobs()
        std = tmp_object.std_fobs()
        mode = tmp_object.posterior_mode()
        sdmo = tmp_object.posterior_mode_snd_der()
        for a, mm, m, v in zip(alpha, mean, mode, sdmo):
            assert (-1.0 / v > 0)
            if (a > 0.9):
                assert approx_equal(mm, m, eps=1e-1)

    for ii in xrange(1, 50):
        a = ii / 50.0
        b = 1.0 - a * a
        alpha = flex.double([a] * 1000)
        beta = flex.double([b] * 1000)
        tmp_object = scaling.likelihood_ratio_outlier_test(
            fobs, sigmas, fcalc, epsilon, ~centric, alpha, beta)
        mean = tmp_object.mean_fobs()
        std = tmp_object.std_fobs()
        mode = tmp_object.posterior_mode()
        sdmo = tmp_object.posterior_mode_snd_der()
        for a, b, fc, mm, m, v in zip(alpha, beta, fcalc, mean, mode, sdmo):
            assert (-1.0 / v > 0)
            if (a > 0.9):
                if (fc > 1.0):
                    assert approx_equal(mm, m, eps=1e-1)
Ejemplo n.º 6
0
def tst_loglikelihoods():
    fobs = flex.double(range(1000)) / 200
    fcalc = flex.double([1] * 1000)
    sigmas = flex.double([0] * 1000)
    epsilon = flex.double([1] * 1000)
    centric = flex.bool([False] * 1000)
    beta = flex.double([1] * 1000)
    alpha = flex.double([0.99] * 1000)
    tmp_object = scaling.likelihood_ratio_outlier_test(fobs, sigmas, fcalc,
                                                       epsilon, centric, alpha,
                                                       beta)
    cur_lik = tmp_object.log_likelihood()
    pm_lik = tmp_object.posterior_mode_log_likelihood()
    mode = tmp_object.posterior_mode()
    level = 4.5
    flags = tmp_object.flag_potential_outliers(2.0 * level)
    for fl, pl, l, m, fo in zip(flags, pm_lik, cur_lik, mode, fobs):
        if pl - l < level * 2.0:
            assert fl
        else:
            assert not fl
Ejemplo n.º 7
0
def plotit(fobs,
           sigma,
           fcalc,
           alpha,
           beta,
           epsilon,
           centric,
           out,
           limit=5.0,
           steps=1000,
           plot_title="Outlier plot"):

  fobs_a    = flex.double( [fobs] )
  fcalc_a   = flex.double( [fcalc] )
  epsilon_a = flex.double( [epsilon] )
  alpha_a   = flex.double( [alpha] )
  beta_a    = flex.double( [beta] )
  centric_a = flex.bool  ( [centric] )

  p_calc = scaling.likelihood_ratio_outlier_test(
    fobs_a,
    None,
    fcalc_a,
    epsilon_a,
    centric_a,
    alpha_a,
    beta_a)
  print >> out
  print >> out,"#Input parameters: "
  print >> out,"#Title        : ", plot_title
  print >> out,"#F-calc       : ", fcalc
  print >> out,"#F-obs        : ", fobs
  print >> out,"#epsilon      : ", epsilon
  print >> out,"#alpha        : ", alpha
  print >> out,"#beta         : ", beta
  print >> out,"#centric      : ", centric
  mode = p_calc.posterior_mode()[0]

  snd_der = math.sqrt(1.0/ math.fabs( p_calc.posterior_mode_snd_der()[0] )  )
  print >> out,"#A Gaussian approximation of the likelihood function"
  print >> out,"#could be constructed as follows with: "
  print >> out,"# exp[-(fobs-mode)**2/(2*stdev**2)] /(sqrt(2 pi) stdev)"
  print >> out,"#with"
  print >> out,"#mode         = ", mode
  print >> out,"#stdev        = ", snd_der
  print >> out
  print >> out,"#The log likelihood values for the mode and "
  print >> out,"#observed values are"
  print >> out,"#Log[P(fobs)] : ",  p_calc.log_likelihood()[0]
  print >> out,"#Log[P(mode)] : ",  p_calc.posterior_mode_log_likelihood()[0]
  print >> out,"#Their difference is:"
  print >> out,"#delta        : ",  p_calc.log_likelihood()[0]-p_calc.posterior_mode_log_likelihood()[0]
  print >> out,"#"
  mean_fobs = p_calc.mean_fobs()
  print >> out,"#mean f_obs   : ", mean_fobs[0], "   (first moment)"


  low_limit = mode-snd_der*limit
  if low_limit<0:
    low_limit=0
  high_limit = mode+limit*snd_der

  if fobs < low_limit:
    low_limit = fobs-2.0*snd_der
    if low_limit<0:
      low_limit=0
  if fobs > high_limit:
    high_limit = fobs+2.0*snd_der

  fobs_a = flex.double( range(steps) )*(
    high_limit-low_limit)/float(steps)+low_limit

  fcalc_a   = flex.double( [fcalc]*steps )
  epsilon_a = flex.double( [epsilon]*steps )
  alpha_a   = flex.double( [alpha]*steps )
  beta_a    = flex.double( [beta]*steps )
  centric_a = flex.bool  ( [centric]*steps )

  p_calc = scaling.likelihood_ratio_outlier_test(
    fobs_a,
    None,
    fcalc_a,
    epsilon_a,
    centric_a,
    alpha_a,
    beta_a)

  ll = p_calc.log_likelihood()    #-p_calc.posterior_mode_log_likelihood()
  ll = flex.exp( ll )
  if (sigma is None) or (sigma <=0 ):
    sigma=fobs/30.0

  obs_gauss = (fobs_a - fobs)/float(sigma)
  obs_gauss = flex.exp( -obs_gauss*obs_gauss/2.0 ) /(
    math.sqrt(2.0*math.pi*sigma*sigma))

  max_ll = flex.max( ll )*1.10
  truncate_mask = flex.bool( obs_gauss >= max_ll )
  obs_gauss = obs_gauss.set_selected( truncate_mask, max_ll )


  ccp4_loggraph_plot = data_plots.plot_data(
    plot_title=plot_title,
    x_label = 'Fobs',
    y_label = 'P(Fobs)',
    x_data = fobs_a,
    y_data = ll,
    y_legend = 'P(Fobs|Fcalc,alpha,beta)',
    comments = 'Fobs=%5.2f, sigma=%5.2f, Fcalc=%5.2f'%(fobs,sigma,fcalc) )
  ccp4_loggraph_plot.add_data(
    y_data = obs_gauss,
    y_legend = "P(Fobs|<Fobs>,sigma)"
    )
  data_plots.plot_data_loggraph(ccp4_loggraph_plot,out)
Ejemplo n.º 8
0
    def model_based_outliers(self,
                             f_model,
                             level=.01,
                             return_data=False,
                             plot_out=None):

        assert self.r_free_flags is not None
        if (self.r_free_flags.data().count(True) == 0):
            self.r_free_flags = self.r_free_flags.array(
                data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13)

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data())
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain,
                                          p_values, obs_norm, calc_norm,
                                          sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs='%7.3f')

        print >> self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())
Ejemplo n.º 9
0
    def model_based_outliers(self, f_model, level=0.01, return_data=False, plot_out=None):

        assert self.r_free_flags is not None
        if self.r_free_flags.data().count(True) == 0:
            self.r_free_flags = self.r_free_flags.array(data=~self.r_free_flags.data())
        sigmaa_estimator = sigmaa_estimation.sigmaa_estimator(
            miller_obs=self.miller_obs,
            miller_calc=f_model,
            r_free_flags=self.r_free_flags,
            kernel_width_free_reflections=200,
            n_sampling_points=20,
            n_chebyshev_terms=13,
        )

        sigmaa_estimator.show(out=self.out)
        sigmaa = sigmaa_estimator.sigmaa()
        obs_norm = abs(sigmaa_estimator.normalized_obs)
        calc_norm = sigmaa_estimator.normalized_calc

        f_model_outlier_object = scaling.likelihood_ratio_outlier_test(
            f_obs=obs_norm.data(),
            sigma_obs=None,
            f_calc=calc_norm.data(),
            # the data is prenormalized, all epsies are unity
            epsilon=flex.double(calc_norm.data().size(), 1.0),
            centric=obs_norm.centric_flags().data(),
            alpha=sigmaa.data(),
            beta=1.0 - sigmaa.data() * sigmaa.data(),
        )
        modes = f_model_outlier_object.posterior_mode()
        lik = f_model_outlier_object.log_likelihood()
        p_lik = f_model_outlier_object.posterior_mode_log_likelihood()
        s_der = f_model_outlier_object.posterior_mode_snd_der()

        ll_gain = f_model_outlier_object.standardized_likelihood()

        # The smallest vallue should be 0.
        # sometimes, due to numerical issues, it comes out
        # a wee bit negative. please repair that
        eps = 1.0e-10
        zeros = flex.bool(ll_gain < eps)
        p_values = ll_gain
        p_values = p_values.set_selected(zeros, eps)
        p_values = erf(flex.sqrt(p_values / 2.0))
        p_values = 1.0 - flex.pow(p_values, float(p_values.size()))

        # select on p-values
        flags = flex.bool(p_values > level)
        flags = self.miller_obs.customized_copy(data=flags)
        ll_gain = self.miller_obs.customized_copy(data=ll_gain)
        p_values = self.miller_obs.customized_copy(data=p_values)

        log_message = """

Model based outlier rejection.
------------------------------

Calculated amplitudes and estimated values of alpha and beta
are used to compute the log-likelihood of the observed amplitude.
The method is inspired by Read, Acta Cryst. (1999). D55, 1759-1764.
Outliers are rejected on the basis of the assumption that a scaled
log likelihood differnce 2(log[P(Fobs)]-log[P(Fmode)])/Q\" is distributed
according to a Chi-square distribution (Q\" is equal to the second
derivative of the log likelihood function of the mode of the
distribution).
The outlier threshold of the p-value relates to the p-value of the
extreme value distribution of the chi-square distribution.

"""

        flags.map_to_asu()
        ll_gain.map_to_asu()
        p_values.map_to_asu()

        assert flags.indices().all_eq(self.miller_obs.indices())
        assert ll_gain.indices().all_eq(self.miller_obs.indices())
        assert p_values.indices().all_eq(self.miller_obs.indices())

        log_message = self.make_log_model(log_message, flags, ll_gain, p_values, obs_norm, calc_norm, sigmaa, plot_out)
        tmp_log = StringIO()
        print >> tmp_log, log_message
        # histogram of log likelihood gain values
        print >> tmp_log
        print >> tmp_log, "The histoghram of scaled (LL-gain) values is shown below."
        print >> tmp_log, "  Note: scaled (LL-gain) is approximately Chi-square distributed."
        print >> tmp_log
        print >> tmp_log, "  scaled(LL-gain)  Frequency"
        histo = flex.histogram(ll_gain.data(), 15)
        histo.show(f=tmp_log, format_cutoffs="%7.3f")

        print >>self.out, tmp_log.getvalue()

        if not return_data:
            return flags
        else:
            assert flags.indices().all_eq(self.miller_obs.indices())
            return self.miller_obs.select(flags.data())