コード例 #1
0
plt.show()

fig, ax = plt.subplots(1, 1)
c, d = 10.5, 4.3
mean, var, skew, kurt = burr.stats(c, d, moments='mvsk')

x = np.linspace(burr.ppf(0.01, c, d), burr.ppf(0.99, c, d), 100)
burr.ppf

print(stats.norm.__doc__)

alpha, loc, beta = b[0], b[1], b[2]
pdf = nct.pdf()
data = ss.genlogistic.rvs(alpha, loc=loc, scale=beta, size=5000)
myHist = plt.hist(distribution_car_price, 500, normed=True)
rv = ss.genlogistic(alpha, loc, beta)
x = np.linspace(0, 500000)
h = plt.plot(x, rv.pdf(x), lw=2)

axes = plt.gca()
axes.set_xlim([0, 150000])
plt.show()

alpha, loc, beta = b[0], b[1], b[2]
data = ss.genlogistic.rvs(alpha, loc=loc, scale=beta, size=10000)
myHist = plt.hist(distribution_car_price, 500, normed=True)
rv = ss.nct(a[0], a[1], a[2], a[3])
x = np.linspace(0, 500000)
h = plt.plot(x, rv.pdf(x), lw=2)

axes = plt.gca()
コード例 #2
0
x = np.linspace(genlogistic.ppf(0.01, c), genlogistic.ppf(0.99, c), 100)
ax.plot(x,
        genlogistic.pdf(x, c),
        'r-',
        lw=5,
        alpha=0.6,
        label='genlogistic pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = genlogistic(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = genlogistic.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genlogistic.cdf(vals, c))
# True

# Generate random numbers:

r = genlogistic.rvs(c, size=1000)

# And compare the histogram:

ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
コード例 #3
0
                                                       9.1193851632305201,
                                                       261.3457987967214)
drivingduration_model_dict['exponweib'] = st.exponweib(2.6443841639764942,
                                                       0.89242254172118096,
                                                       10.603640861374947,
                                                       40.28556311444698)
drivingduration_model_dict['gengamma'] = st.gengamma(4.8743515108339581,
                                                     0.61806208678747043,
                                                     9.4649293818479716,
                                                     5.431576919220225)
drivingduration_model_dict['recipinvgauss'] = st.recipinvgauss(
    0.499908918842556, 0.78319699707613699, 28.725450197674746)
drivingduration_model_dict['f'] = st.f(9.8757694313677113, 12.347442183821462,
                                       0.051160749890587665,
                                       73.072591767722287)

carprice_model_dict = ct.OrderedDict()
carprice_model_dict['nct'] = st.nct(7.3139456577106312, 3.7415255108348946,
                                    -46.285705145385577, 7917.0860181436065)
carprice_model_dict['genlogistic'] = st.genlogistic(10.736440967148635,
                                                    3735.7049978006107,
                                                    10095.421377235754)
carprice_model_dict['gumbel_r'] = st.gumbel_r(26995.077239517472,
                                              10774.370808211244)
carprice_model_dict['f'] = st.f(24168.523476867485, 35.805656864712923,
                                -21087.314142557225, 51154.0328397044)
carprice_model_dict['johnsonsu'] = st.johnsonsu(-1.7479864366935538,
                                                1.8675670208081987,
                                                14796.793096897647,
                                                14716.575397771712)
コード例 #4
0
ax1 = fig.add_subplot(1, 2, 1)
sm.qqplot(logeados,
          stats.mielke(parametros_mielke[0],
                       parametros_mielke[1],
                       loc=parametros_mielke[2],
                       scale=parametros_mielke[3]),
          line="45",
          ax=ax1)
ax1.set_title('mielke', size=11.0)
ax1.set_xlabel("")
ax1.set_ylabel("")

ax2 = fig.add_subplot(1, 2, 2)
sm.qqplot(logeados,
          stats.genlogistic(parametros_genlogistic[0],
                            parametros_genlogistic[1],
                            parametros_genlogistic[2]),
          line="45",
          ax=ax2)
ax2.set_title('genlogistic', size=11.0)
ax2.set_xlabel("")
ax2.set_ylabel("")

fig.tight_layout(pad=0.7)

fig.text(0.5, 0, 'Cuantiles teóricos', ha='center', va='center')
fig.text(0.,
         0.5,
         'Cuantiles observados',
         ha='center',
         va='center',
コード例 #5
0
ファイル: conftest.py プロジェクト: ashutoshvarma/dfit
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
コード例 #6
0
ファイル: ScoringManager.py プロジェクト: AviranLab/patteRNA
    def execute_scoring(self):

        # Compile scoring configuration parameters
        scoring_config = {
            'posteriors':
            self.run_config['posteriors'],
            'hdsl':
            self.run_config['HDSL'],
            'spp':
            self.run_config['SPP'],
            'viterbi':
            self.run_config['viterbi'],
            'suppress_nan':
            True,
            'fp_posteriors':
            os.path.join(self.run_config['output'], 'posteriors.txt'),
            'fp_scores_pre':
            os.path.join(self.run_config['output'], 'scores_pre'),
            'fp_scores':
            os.path.join(self.run_config['output'], 'scores.txt'),
            'fp_hdsl':
            os.path.join(self.run_config['output'], 'hdsl.txt'),
            'fp_spp':
            os.path.join(self.run_config['output'], 'spp.txt'),
            'fp_viterbi':
            os.path.join(self.run_config['output'], 'viterbi.txt'),
            'no_cscores':
            self.run_config['no_cscores'],
            'min_cscores':
            self.run_config['min_cscores'],
            'batch_size':
            self.run_config['batch_size'],
            'motifs':
            self.motifs,
            'path':
            self.run_config['path'],
            'context':
            self.run_config['context'],
            'cscore_dists':
            None,
            'no_vienna':
            self.no_vienna,
            'energy':
            ~np.any([
                self.no_vienna, self.run_config['no_cscores'],
                not viennalib.vienna_imported
            ]),
            'lbc':
            self.lbc,
            'hdsl_params':
            self.run_config['hdsl_params']
        }

        self.pool_init()  # Initialize parallelized pool

        # Prepare score distributions for c-score normalization
        if not scoring_config['no_cscores']:
            logger.info('Sampling null sites for c-score normalization')
            clock.tick()

            self.cscore_dists = dict.fromkeys(self.motifs)

            cscore_batch = self.make_cscore_batch(
                scoring_config['min_cscores'])
            cscore_batch.pre_process(self.model, scoring=True)

            with tqdm(total=len(self.motifs), leave=False,
                      unit='motif') as pb_samples:

                try:

                    if scoring_config['path']:
                        path = np.array(list(scoring_config['path']),
                                        dtype=int)
                    else:
                        path = None

                    worker = partial(self.sample_worker,
                                     path=path,
                                     batch=cscore_batch)
                    samples_pool = self.mp_pool.imap_unordered(
                        worker, self.motifs)

                    for (motif, samples) in samples_pool:
                        params = genlogistic.fit(samples)
                        self.cscore_dists[motif] = genlogistic(c=params[0],
                                                               loc=params[1],
                                                               scale=params[2])
                        pb_samples.update()

                    self.mp_pool.close()
                    self.mp_pool.join()

                except Exception:
                    self.mp_pool.terminate()
                    raise

            scoring_config['cscore_dists'] = self.cscore_dists
            logger.info(' ... done in {}'.format(
                misclib.seconds_to_hms(clock.tock())))

        # Begin formal scoring phase by making batches to save on memory
        batches = self.make_batches(scoring_config['batch_size'])

        n_batches = len(self.dataset.rnas) // scoring_config[
            'batch_size'] + 1  # Number of batches

        if self.motifs:
            header = "transcript\tstart score c-score BCE MEL Prob(motif) motif path seq\n"
            with open(scoring_config['fp_scores_pre'], 'w') as f:
                f.write(header)

        logger.info("Executing scoring")
        clock.tick()

        with tqdm(total=n_batches,
                  leave=False,
                  unit='batch',
                  desc='      Overall') as pbar_batches:

            # Process batches sequentially
            for i, batch in enumerate(batches):

                self.pool_init()
                batch.pre_process(self.model)

                with tqdm(total=len(batch.rnas),
                          leave=False,
                          unit="transcript",
                          desc="Current batch") as pbar_transcripts:

                    try:
                        worker = partial(self.score_worker,
                                         model=self.model,
                                         config=scoring_config)
                        jobs = self.mp_pool.imap_unordered(
                            worker, batch.rnas.values())

                        for _ in jobs:
                            pbar_transcripts.update()
                        self.mp_pool.close()
                        self.mp_pool.join()

                    except Exception:
                        self.mp_pool.terminate()
                        raise

                batch.clear()
                pbar_batches.update()

        # Sort score file
        if self.motifs:
            scores = filelib.read_score_file(scoring_config['fp_scores_pre'])
            if not scores:
                os.rename(scoring_config['fp_scores_pre'],
                          scoring_config['fp_scores'])
            else:
                if scoring_config['no_cscores']:
                    filelib.write_score_file(
                        sorted(scores,
                               key=lambda score: score['score'],
                               reverse=True), scoring_config['fp_scores'])
                else:
                    if scoring_config['energy']:
                        filelib.write_score_file(
                            sorted(scores,
                                   key=lambda score: score['Prob(motif)'],
                                   reverse=True), scoring_config['fp_scores'])
                    else:
                        filelib.write_score_file(
                            sorted(scores,
                                   key=lambda score: score['c-score'],
                                   reverse=True), scoring_config['fp_scores'])
                os.remove(scoring_config['fp_scores_pre'])  # Clean-up
        logger.info(' ... done in {}'.format(
            misclib.seconds_to_hms(clock.tock())))