plt.show() fig, ax = plt.subplots(1, 1) c, d = 10.5, 4.3 mean, var, skew, kurt = burr.stats(c, d, moments='mvsk') x = np.linspace(burr.ppf(0.01, c, d), burr.ppf(0.99, c, d), 100) burr.ppf print(stats.norm.__doc__) alpha, loc, beta = b[0], b[1], b[2] pdf = nct.pdf() data = ss.genlogistic.rvs(alpha, loc=loc, scale=beta, size=5000) myHist = plt.hist(distribution_car_price, 500, normed=True) rv = ss.genlogistic(alpha, loc, beta) x = np.linspace(0, 500000) h = plt.plot(x, rv.pdf(x), lw=2) axes = plt.gca() axes.set_xlim([0, 150000]) plt.show() alpha, loc, beta = b[0], b[1], b[2] data = ss.genlogistic.rvs(alpha, loc=loc, scale=beta, size=10000) myHist = plt.hist(distribution_car_price, 500, normed=True) rv = ss.nct(a[0], a[1], a[2], a[3]) x = np.linspace(0, 500000) h = plt.plot(x, rv.pdf(x), lw=2) axes = plt.gca()
x = np.linspace(genlogistic.ppf(0.01, c), genlogistic.ppf(0.99, c), 100) ax.plot(x, genlogistic.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genlogistic pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = genlogistic(c) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = genlogistic.ppf([0.001, 0.5, 0.999], c) np.allclose([0.001, 0.5, 0.999], genlogistic.cdf(vals, c)) # True # Generate random numbers: r = genlogistic.rvs(c, size=1000) # And compare the histogram: ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
9.1193851632305201, 261.3457987967214) drivingduration_model_dict['exponweib'] = st.exponweib(2.6443841639764942, 0.89242254172118096, 10.603640861374947, 40.28556311444698) drivingduration_model_dict['gengamma'] = st.gengamma(4.8743515108339581, 0.61806208678747043, 9.4649293818479716, 5.431576919220225) drivingduration_model_dict['recipinvgauss'] = st.recipinvgauss( 0.499908918842556, 0.78319699707613699, 28.725450197674746) drivingduration_model_dict['f'] = st.f(9.8757694313677113, 12.347442183821462, 0.051160749890587665, 73.072591767722287) carprice_model_dict = ct.OrderedDict() carprice_model_dict['nct'] = st.nct(7.3139456577106312, 3.7415255108348946, -46.285705145385577, 7917.0860181436065) carprice_model_dict['genlogistic'] = st.genlogistic(10.736440967148635, 3735.7049978006107, 10095.421377235754) carprice_model_dict['gumbel_r'] = st.gumbel_r(26995.077239517472, 10774.370808211244) carprice_model_dict['f'] = st.f(24168.523476867485, 35.805656864712923, -21087.314142557225, 51154.0328397044) carprice_model_dict['johnsonsu'] = st.johnsonsu(-1.7479864366935538, 1.8675670208081987, 14796.793096897647, 14716.575397771712)
ax1 = fig.add_subplot(1, 2, 1) sm.qqplot(logeados, stats.mielke(parametros_mielke[0], parametros_mielke[1], loc=parametros_mielke[2], scale=parametros_mielke[3]), line="45", ax=ax1) ax1.set_title('mielke', size=11.0) ax1.set_xlabel("") ax1.set_ylabel("") ax2 = fig.add_subplot(1, 2, 2) sm.qqplot(logeados, stats.genlogistic(parametros_genlogistic[0], parametros_genlogistic[1], parametros_genlogistic[2]), line="45", ax=ax2) ax2.set_title('genlogistic', size=11.0) ax2.set_xlabel("") ax2.set_ylabel("") fig.tight_layout(pad=0.7) fig.text(0.5, 0, 'Cuantiles teóricos', ha='center', va='center') fig.text(0., 0.5, 'Cuantiles observados', ha='center', va='center',
def all_dists(): # dists param were taken from scipy.stats official # documentaion examples # Total - 89 return { "alpha": stats.alpha(a=3.57, loc=0.0, scale=1.0), "anglit": stats.anglit(loc=0.0, scale=1.0), "arcsine": stats.arcsine(loc=0.0, scale=1.0), "beta": stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0), "betaprime": stats.betaprime(a=5, b=6, loc=0.0, scale=1.0), "bradford": stats.bradford(c=0.299, loc=0.0, scale=1.0), "burr": stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0), "cauchy": stats.cauchy(loc=0.0, scale=1.0), "chi": stats.chi(df=78, loc=0.0, scale=1.0), "chi2": stats.chi2(df=55, loc=0.0, scale=1.0), "cosine": stats.cosine(loc=0.0, scale=1.0), "dgamma": stats.dgamma(a=1.1, loc=0.0, scale=1.0), "dweibull": stats.dweibull(c=2.07, loc=0.0, scale=1.0), "erlang": stats.erlang(a=2, loc=0.0, scale=1.0), "expon": stats.expon(loc=0.0, scale=1.0), "exponnorm": stats.exponnorm(K=1.5, loc=0.0, scale=1.0), "exponweib": stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0), "exponpow": stats.exponpow(b=2.7, loc=0.0, scale=1.0), "f": stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0), "fatiguelife": stats.fatiguelife(c=29, loc=0.0, scale=1.0), "fisk": stats.fisk(c=3.09, loc=0.0, scale=1.0), "foldcauchy": stats.foldcauchy(c=4.72, loc=0.0, scale=1.0), "foldnorm": stats.foldnorm(c=1.95, loc=0.0, scale=1.0), # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0), # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0), "genlogistic": stats.genlogistic(c=0.412, loc=0.0, scale=1.0), "genpareto": stats.genpareto(c=0.1, loc=0.0, scale=1.0), "gennorm": stats.gennorm(beta=1.3, loc=0.0, scale=1.0), "genexpon": stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0), "genextreme": stats.genextreme(c=-0.1, loc=0.0, scale=1.0), "gausshyper": stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0), "gamma": stats.gamma(a=1.99, loc=0.0, scale=1.0), "gengamma": stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0), "genhalflogistic": stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0), "gilbrat": stats.gilbrat(loc=0.0, scale=1.0), "gompertz": stats.gompertz(c=0.947, loc=0.0, scale=1.0), "gumbel_r": stats.gumbel_r(loc=0.0, scale=1.0), "gumbel_l": stats.gumbel_l(loc=0.0, scale=1.0), "halfcauchy": stats.halfcauchy(loc=0.0, scale=1.0), "halflogistic": stats.halflogistic(loc=0.0, scale=1.0), "halfnorm": stats.halfnorm(loc=0.0, scale=1.0), "halfgennorm": stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0), "hypsecant": stats.hypsecant(loc=0.0, scale=1.0), "invgamma": stats.invgamma(a=4.07, loc=0.0, scale=1.0), "invgauss": stats.invgauss(mu=0.145, loc=0.0, scale=1.0), "invweibull": stats.invweibull(c=10.6, loc=0.0, scale=1.0), "johnsonsb": stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0), "johnsonsu": stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0), "ksone": stats.ksone(n=1e03, loc=0.0, scale=1.0), "kstwobign": stats.kstwobign(loc=0.0, scale=1.0), "laplace": stats.laplace(loc=0.0, scale=1.0), "levy": stats.levy(loc=0.0, scale=1.0), "levy_l": stats.levy_l(loc=0.0, scale=1.0), "levy_stable": stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0), "logistic": stats.logistic(loc=0.0, scale=1.0), "loggamma": stats.loggamma(c=0.414, loc=0.0, scale=1.0), "loglaplace": stats.loglaplace(c=3.25, loc=0.0, scale=1.0), "lognorm": stats.lognorm(s=0.954, loc=0.0, scale=1.0), "lomax": stats.lomax(c=1.88, loc=0.0, scale=1.0), "maxwell": stats.maxwell(loc=0.0, scale=1.0), "mielke": stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0), "nakagami": stats.nakagami(nu=4.97, loc=0.0, scale=1.0), "ncx2": stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0), "ncf": stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0), "nct": stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0), "norm": stats.norm(loc=0.0, scale=1.0), "pareto": stats.pareto(b=2.62, loc=0.0, scale=1.0), "pearson3": stats.pearson3(skew=0.1, loc=0.0, scale=1.0), "powerlaw": stats.powerlaw(a=1.66, loc=0.0, scale=1.0), "powerlognorm": stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0), "powernorm": stats.powernorm(c=4.45, loc=0.0, scale=1.0), "rdist": stats.rdist(c=0.9, loc=0.0, scale=1.0), "reciprocal": stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0), "rayleigh": stats.rayleigh(loc=0.0, scale=1.0), "rice": stats.rice(b=0.775, loc=0.0, scale=1.0), "recipinvgauss": stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0), "semicircular": stats.semicircular(loc=0.0, scale=1.0), "t": stats.t(df=2.74, loc=0.0, scale=1.0), "triang": stats.triang(c=0.158, loc=0.0, scale=1.0), "truncexpon": stats.truncexpon(b=4.69, loc=0.0, scale=1.0), "truncnorm": stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0), "tukeylambda": stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0), "uniform": stats.uniform(loc=0.0, scale=1.0), "vonmises": stats.vonmises(kappa=3.99, loc=0.0, scale=1.0), "vonmises_line": stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0), "wald": stats.wald(loc=0.0, scale=1.0), "weibull_min": stats.weibull_min(c=1.79, loc=0.0, scale=1.0), "weibull_max": stats.weibull_max(c=2.87, loc=0.0, scale=1.0), "wrapcauchy": stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0), }
def execute_scoring(self): # Compile scoring configuration parameters scoring_config = { 'posteriors': self.run_config['posteriors'], 'hdsl': self.run_config['HDSL'], 'spp': self.run_config['SPP'], 'viterbi': self.run_config['viterbi'], 'suppress_nan': True, 'fp_posteriors': os.path.join(self.run_config['output'], 'posteriors.txt'), 'fp_scores_pre': os.path.join(self.run_config['output'], 'scores_pre'), 'fp_scores': os.path.join(self.run_config['output'], 'scores.txt'), 'fp_hdsl': os.path.join(self.run_config['output'], 'hdsl.txt'), 'fp_spp': os.path.join(self.run_config['output'], 'spp.txt'), 'fp_viterbi': os.path.join(self.run_config['output'], 'viterbi.txt'), 'no_cscores': self.run_config['no_cscores'], 'min_cscores': self.run_config['min_cscores'], 'batch_size': self.run_config['batch_size'], 'motifs': self.motifs, 'path': self.run_config['path'], 'context': self.run_config['context'], 'cscore_dists': None, 'no_vienna': self.no_vienna, 'energy': ~np.any([ self.no_vienna, self.run_config['no_cscores'], not viennalib.vienna_imported ]), 'lbc': self.lbc, 'hdsl_params': self.run_config['hdsl_params'] } self.pool_init() # Initialize parallelized pool # Prepare score distributions for c-score normalization if not scoring_config['no_cscores']: logger.info('Sampling null sites for c-score normalization') clock.tick() self.cscore_dists = dict.fromkeys(self.motifs) cscore_batch = self.make_cscore_batch( scoring_config['min_cscores']) cscore_batch.pre_process(self.model, scoring=True) with tqdm(total=len(self.motifs), leave=False, unit='motif') as pb_samples: try: if scoring_config['path']: path = np.array(list(scoring_config['path']), dtype=int) else: path = None worker = partial(self.sample_worker, path=path, batch=cscore_batch) samples_pool = self.mp_pool.imap_unordered( worker, self.motifs) for (motif, samples) in samples_pool: params = genlogistic.fit(samples) self.cscore_dists[motif] = genlogistic(c=params[0], loc=params[1], scale=params[2]) pb_samples.update() self.mp_pool.close() self.mp_pool.join() except Exception: self.mp_pool.terminate() raise scoring_config['cscore_dists'] = self.cscore_dists logger.info(' ... done in {}'.format( misclib.seconds_to_hms(clock.tock()))) # Begin formal scoring phase by making batches to save on memory batches = self.make_batches(scoring_config['batch_size']) n_batches = len(self.dataset.rnas) // scoring_config[ 'batch_size'] + 1 # Number of batches if self.motifs: header = "transcript\tstart score c-score BCE MEL Prob(motif) motif path seq\n" with open(scoring_config['fp_scores_pre'], 'w') as f: f.write(header) logger.info("Executing scoring") clock.tick() with tqdm(total=n_batches, leave=False, unit='batch', desc=' Overall') as pbar_batches: # Process batches sequentially for i, batch in enumerate(batches): self.pool_init() batch.pre_process(self.model) with tqdm(total=len(batch.rnas), leave=False, unit="transcript", desc="Current batch") as pbar_transcripts: try: worker = partial(self.score_worker, model=self.model, config=scoring_config) jobs = self.mp_pool.imap_unordered( worker, batch.rnas.values()) for _ in jobs: pbar_transcripts.update() self.mp_pool.close() self.mp_pool.join() except Exception: self.mp_pool.terminate() raise batch.clear() pbar_batches.update() # Sort score file if self.motifs: scores = filelib.read_score_file(scoring_config['fp_scores_pre']) if not scores: os.rename(scoring_config['fp_scores_pre'], scoring_config['fp_scores']) else: if scoring_config['no_cscores']: filelib.write_score_file( sorted(scores, key=lambda score: score['score'], reverse=True), scoring_config['fp_scores']) else: if scoring_config['energy']: filelib.write_score_file( sorted(scores, key=lambda score: score['Prob(motif)'], reverse=True), scoring_config['fp_scores']) else: filelib.write_score_file( sorted(scores, key=lambda score: score['c-score'], reverse=True), scoring_config['fp_scores']) os.remove(scoring_config['fp_scores_pre']) # Clean-up logger.info(' ... done in {}'.format( misclib.seconds_to_hms(clock.tock())))