Ejemplo n.º 1
0
def bin_size(orig_exp, ci_method, nib_lim=0.01, max_ci_diff=0.25, min_corcoef=0.3):
    for bin_size in itertools.count(1):
        exp = orig_exp.aggregate_bins(times_bin_size=bin_size)
        df = logit.ci_for_df(exp.as_data_frame(), ci_method, ci_min=max_ci_diff)
        ratio_nib = logit.get_nib_ratio(df)
        pvar = corrcoeff(df)
        log.notice('testing bin size %d, nib ratio: %.4f, spearmanr: %.3f' % (bin_size, ratio_nib, pvar))
        if ratio_nib <= nib_lim and pvar > min_corcoef:
            return exp.bin_size
        assert bin_size < 100, "Could not find a suitable bin size."
Ejemplo n.º 2
0
def bin_size(orig_exp,
             ci_method,
             nib_lim=0.01,
             max_ci_diff=0.25,
             min_corcoef=0.3):
    for bin_size in itertools.count(1):
        exp = orig_exp.aggregate_bins(times_bin_size=bin_size)
        df = logit.ci_for_df(exp.as_data_frame(),
                             ci_method,
                             ci_min=max_ci_diff)
        ratio_nib = logit.get_nib_ratio(df)
        pvar = corrcoeff(df)
        log.notice('testing bin size %d, nib ratio: %.4f, spearmanr: %.3f' %
                   (bin_size, ratio_nib, pvar))
        if ratio_nib <= nib_lim and pvar > min_corcoef:
            return exp.bin_size
        assert bin_size < 100, "Could not find a suitable bin size."
Ejemplo n.º 3
0
    def __adjust_bin_size_and_get_df(self, exp):
        if self.bin_size is None:
            self.bin_size = estimate.bin_size(exp, self.ci_method,
                                              max_ci_diff=self.ci_lim,
                                              nib_lim=self.nib_lim)
            log.notice('Optimal bin size: %d' % self.bin_size)
        else:
            log.notice('Using preset bin size of: %d' % self.bin_size)
        odf = exp.aggregate_bins(new_bin_size=self.bin_size).as_data_frame()
        df = logit.ci_for_df(odf, self.ci_method, ci_min=self.ci_lim)
        ratio_nib = logit.get_nib_ratio(df)
        log.notice('''Manually specified bin size of %dKB gives %.2f%% informative bins. \
The required amount is %.2f%%.''' % (self.bin_size / 1000, (1-ratio_nib)*100, (1-self.nib_lim)*100))
        assert ratio_nib < self.nib_lim, '''\
The selected bin size results in less informative bins that what specified by the\
parameter required_fraction_of_informative_bins. Please try a bigger bin size or let \
EDD auto-estimate a bin size for you. Please consult the EDD manual for more information'''
        return df
Ejemplo n.º 4
0
    def __adjust_bin_size_and_get_df(self, exp):
        if self.bin_size is None:
            self.bin_size = estimate.bin_size(exp,
                                              self.ci_method,
                                              max_ci_diff=self.ci_lim,
                                              nib_lim=self.nib_lim)
            log.notice('Optimal bin size: %d' % self.bin_size)
        else:
            log.notice('Using preset bin size of: %d' % self.bin_size)
        odf = exp.aggregate_bins(new_bin_size=self.bin_size).as_data_frame()
        df = logit.ci_for_df(odf, self.ci_method, ci_min=self.ci_lim)
        ratio_nib = logit.get_nib_ratio(df)
        log.notice(
            '''Manually specified bin size of %dKB gives %.2f%% informative bins. \
The required amount is %.2f%%.''' % (self.bin_size / 1000,
                                     (1 - ratio_nib) * 100,
                                     (1 - self.nib_lim) * 100))
        assert ratio_nib < self.nib_lim, '''\
The selected bin size results in less informative bins that what specified by the\
parameter required_fraction_of_informative_bins. Please try a bigger bin size or let \
EDD auto-estimate a bin size for you. Please consult the EDD manual for more information'''
        return df