def plot_gev(shape, loc, scale):
    
    dist = ss.genextreme(c=shape, loc=loc, scale=scale)
    xs = dist.rvs(size=100)
    ys = dist.cdf(xs)
    plt.plot(np.sort(xs), np.sort(ys))
    return dist
Ejemplo n.º 2
0
def extremeDistribution_blockMaximaGEV(x, t, t_st):
    '''Approximates the short-term extreme distribution using the block maxima
    method and the Generalized Extreme Value distribution.

    Parameters
    ----------
        x : np.array
            Independent random variable (global peaks)
        t : np.array
            Time vector corresponding to x
        t_st : float
            Short-term period

    Returns
    -------
        stextreme_dist: scipy.stats rv_frozen
            Probability distribution of the short-term extreme.
        stextreme_dist : scipy.stats rv_frozen
            Probability distribution of the short-term extreme.
        ste_params: np.array length 3
            Parameters of the short term extreme distribution (Generalized
            Extreme Value) [shape_c, loc, scale].
        block_maxima: np.array
            Block maxima (i.e. largest peak in each block).
    '''
    block_maxima = blockMaxima(x, t, t_st)
    ste_parameters = stats.genextreme.fit(block_maxima)
    stextreme_dist = stats.genextreme(c=ste_parameters[0],
                                      loc=ste_parameters[1],
                                      scale=ste_parameters[2])
    return stextreme_dist, ste_parameters, block_maxima
Ejemplo n.º 3
0
    def parse_epistatic_data(self):

        if self.software.name != 'bagpipe':
            data_file = self.base_dir + os.sep + self.data_prefix + '.' + str(self.sweep_size/1000000) + '.dat'
        else:
            data_file = self.base_dir + os.sep + self.data_prefix + '_add.' + str(self.sweep_size/1000000) + '.dat'

        run_number = 1
        np_extreme_values = np.genfromtxt(data_file, skip_header=1, usecols=(1, 2, 3))
        frozen_gev = genextreme(
            self.gev_model_params.shape, loc=self.gev_model_params.location,
            scale=self.gev_model_params.scale)
        epistatic_data_list = []

        for data in np_extreme_values:
            adj_pvalues = self.get_adjusted_pvalue_scipy(data, frozen_gev)
            if run_number <= 1000:
                snp_id = 'fa0'
            else:
                snp_id = 'fa1'

            epistatic_data_list.append(exp_data.EpistaticModel(
                parameter=self.params, software=self.software,
                run_number=run_number % 1000, locus_span=self.sweep_size,
                snp_id=snp_id, locus_pvalue=data[0],
                adj_locus_pvalue=adj_pvalues[0], non_locus_pvalue=data[1],
                adj_non_locus_pvalue=adj_pvalues[1], non_chrm_pvalue=data[2],
                adj_non_chrm_pvalue=adj_pvalues[2]))
            run_number += 1

        exp_data.EpistaticModel.objects.bulk_create(epistatic_data_list)

        return 0
Ejemplo n.º 4
0
def extremeDistribution_blockMaximaGEV(x, t, t_st):
    '''Approximates the short-term extreme distribution using the block maxima
    method and the Generalized Extreme Value distribution.

    Parameters
    ----------
        x : np.array
            Independent random variable (global peaks)
        t : np.array
            Time vector corresponding to x
        t_st : float
            Short-term period

    Returns
    -------
        stextreme_dist: scipy.stats rv_frozen
            Probability distribution of the short-term extreme.
        stextreme_dist : scipy.stats rv_frozen
            Probability distribution of the short-term extreme.
        ste_params: np.array length 3
            Parameters of the short term extreme distribution (Generalized
            Extreme Value) [shape_c, loc, scale].
        block_maxima: np.array
            Block maxima (i.e. largest peak in each block).
    '''
    block_maxima = blockMaxima(x, t, t_st)
    ste_parameters = stats.genextreme.fit(block_maxima)
    stextreme_dist = stats.genextreme(c=ste_parameters[0],
                                      loc=ste_parameters[1],
                                      scale=ste_parameters[2])
    return stextreme_dist, ste_parameters, block_maxima
Ejemplo n.º 5
0
def generate_gev_noise(c, N, Y, target_R2=0.005):
    """Draw N observations from a generalized extreme value distribution.

    The GEV distribution is described at
    https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous_genextreme.html.

    Parameters
    ----------
    c: real
        The shape parameter for the distribution. The distribution is skewed
        left if c > 0 and skewed right if c < 0. The larger the magnitude of c,
        the higher the kurtosis of the distribution.
    N: integer
        The number of observations to draw.
    Y: vector of reals.
        The true output of the data, ie. f(X).
    target_R2: real in [0, 1]
        The target R^2, if errors were Gaussian. For example, 0.5% would be
        0.005. Default is 0.5%.
    """
    coef = (1 - target_R2) / target_R2
    scale = np.sqrt(coef * np.var(Y))
    # Center the distribution to have mean 0.
    center = -1 / c * (1 - special.gamma(1 + c))
    noise = stats.genextreme(c=c, loc=center, scale=scale).rvs(size=N)
    return noise
Ejemplo n.º 6
0
 def testBijector(self):
     loc = 0.3
     scale = 5.
     concentration = np.array([[[-5.5], [-20], [0.], [1.]]],
                              dtype=np.float32)
     bijector = tfb.GeneralizedExtremeValueCDF(loc=loc,
                                               scale=scale,
                                               concentration=concentration,
                                               validate_args=True)
     self.assertStartsWith(bijector.name, "generalizedextremevalue")
     x = np.array([[[0.], [-3.], [0.], [4.2]]], dtype=np.float32)
     # GeneralizedExtremeValue distribution
     gev_dist = stats.genextreme(-concentration, loc=loc, scale=scale)
     y = gev_dist.cdf(x).astype(np.float32)
     self.assertAllClose(y, self.evaluate(bijector.forward(x)))
     self.assertAllClose(x, self.evaluate(bijector.inverse(y)))
     self.assertAllClose(
         np.squeeze(gev_dist.logpdf(x), axis=-1),
         self.evaluate(bijector.forward_log_det_jacobian(x, event_ndims=1)))
     self.assertAllClose(
         self.evaluate(
             -bijector.inverse_log_det_jacobian(y, event_ndims=1)),
         self.evaluate(bijector.forward_log_det_jacobian(x, event_ndims=1)),
         rtol=1e-4,
         atol=0.)
Ejemplo n.º 7
0
    def parse_additive_env_sweep(self):

        data_file = self.base_dir + os.sep + self.data_prefix + '.' + str(self.sweep_size/1000000) + '.dat'

        run_number = 1

        np_extreme_values = np.genfromtxt(data_file, skip_header=1, usecols=(1, 2, 3))

        frozen_gev = genextreme(
            self.gev_model_params.shape, loc=self.gev_model_params.location,
            scale=self.gev_model_params.scale)
        additive_models_list = []
        for data in np_extreme_values:
            adj_pvalues = self.get_adjusted_pvalue_scipy(data, frozen_gev)
            additive_models_list.append(exp_data.AdditiveEnvironmentalSweepModel(
                parameter=self.params, software=self.software,
                run_number=run_number, locus_span=self.sweep_size, locus_pvalue=data[0],
                adj_locus_pvalue=adj_pvalues[0], non_locus_pvalue=data[1],
                adj_non_locus_pvalue=adj_pvalues[1], non_chrm_pvalue=data[2],
                adj_non_chrm_pvalue=adj_pvalues[2]))
            run_number += 1

        exp_data.AdditiveEnvironmentalSweepModel.objects.bulk_create(additive_models_list)

        return 0
Ejemplo n.º 8
0
    def _fit(self):
        # Fit can be made using Maximum Likelihood Estimation (mle) or using
        # l-moments.
        # L-moments is fast and accurate most of the time for the GEV
        # distribution.

        # MLE FIT
        # In the case of the mle estimation, sometimes we get unstable values
        # if we don't provide an initial guess of the parameters. Loc and scale
        # are more or less stable but shape can be quite unstable depending the
        # input data. This is why we are using lmoments to obtain start values
        # for the mle optimization. For mle we are using fmin_bfgs as it is
        # faster than others and with the first guess provide accurate results.
        if self.fit_method == 'mle':
            # Initial guess to make the fit of GEV more stable
            # To do the initial guess we are using lmoments...
            _params0 = _lmdistr.gev.lmom_fit(self.data)
            # The mle fit will start with the initial estimators obtained
            # with lmoments above
            _params = _st.genextreme.fit(self.data,
                                         _params0['c'],
                                         loc=_params0['loc'],
                                         scale=_params0['scale'],
                                         optimizer=_op.fmin_bfgs)
            self.params = OrderedDict()
            # For the shape parameter the value provided by scipy
            # is defined as negative as that obtained from other
            # packages in R, some textbooks, wikipedia,... ¿?
            self.params["shape"] = _params[0]
            self.params["location"] = _params[1]
            self.params["scale"] = _params[2]

        # L-MOMENTS FIT
        if self.fit_method == 'lmoments':
            _params = _lmdistr.gev.lmom_fit(self.data)
            self.params = OrderedDict()
            # For the shape parameter the value provided by lmoments3
            # is defined as negative as that obtained from other
            # packages in R, some textbooks, wikipedia,... ¿?
            self.params["shape"] = _params['c']
            self.params["location"] = _params['loc']
            self.params["scale"] = _params['scale']

        # METHOD OF MOMENTS FIT
        if self.fit_method == 'mom':
            _params = _gev_momfit(self.data)
            self.params = OrderedDict()
            self.params["shape"] = _params[0]
            self.params["location"] = _params[1]
            self.params["scale"] = _params[2]

        # Estimators and a frozen distribution for the estimators
        self.c = self.params['shape']  # shape
        self.loc = self.params['location']  # location
        self.scale = self.params['scale']  # scale
        self.distr = _st.genextreme(
            self.c,  # frozen distribution
            loc=self.loc,
            scale=self.scale)
Ejemplo n.º 9
0
    def mml(self):
        if self.data is None:
            raise e.DataNotExist("Data not's None", 25)
        mml = gev.lmom_fit(self.data)
        self.estimador = 'MML'
        self.shape = mml['c']
        self.loc = mml['loc']
        self.scale = mml['scale']
        self.dist = genextreme(c=self.shape, loc=self.loc, scale=self.scale)

        return self.shape, self.loc, self.scale
Ejemplo n.º 10
0
    def mvs(self):
        if self.data is None:
            raise e.DataNotExist("Data not's None", 35)
        mvs = genextreme.fit(data=self.data)
        self.estimador = 'MVS'
        self.shape = mvs[0]
        self.loc = mvs[1]
        self.scale = mvs[2]
        self.dist = genextreme(c=self.shape, loc=self.loc, scale=self.scale)

        return self.shape, self.loc, self.scale
Ejemplo n.º 11
0
def extreme_value_prob(NPM, perc):
    n = NPM.shape[0]
    t = NPM.shape[1]
    n_perc = int(round(t * perc))
    m = np.zeros(n)
    for i in range(n):
        temp = np.abs(NPM[i, :])
        temp = np.sort(temp)
        temp = temp[t - n_perc:]
        m[i] = trim_mean(temp, 0.05)
    params = genextreme.fit(m)
    ev = genextreme(params[0])
    probs = ev.cdf(m)
    return probs
Ejemplo n.º 12
0
    def calc_match_statistics(self,
                              oligo,
                              charges,
                              modifications,
                              ms,
                              ppm_error,
                              score_to_test,
                              random_oligo_to_test=1000):

        fr = Fragmentor()
        matcher = Matcher()
        column_headers = ['Sequence', 'Score']

        min_char = len(oligo)
        max_char = len(oligo)
        allchar = ['A', 'G', 'C', 'T', 'U']

        data_to_save = []

        for i in range(random_oligo_to_test):

            random_oligo = "".join(
                choice(allchar) for _ in range(randint(min_char, max_char)))

            fragments = fr.fragment_oligo(random_oligo)
            df_search_space = matcher.create_search_space(
                fragments, charges, modifications)
            df_results = matcher.match_oligo_fragments_pandas(
                df_search_space, ms, ppm_error)

            score = self.simple_score(df_results)

            print('Oligo: {0:<30} Score: {1:7.3f}'.format(random_oligo, score))

            data_to_save.append([random_oligo, score])

        dist_df = pd.DataFrame(data_to_save, columns=column_headers)

        extreme_fit = genextreme.fit(dist_df.Score)
        c = extreme_fit[0]
        loc = extreme_fit[1]
        scale = extreme_fit[2]
        print(("Extreme value fits c = {0}, loc = {1}, scale = {2}").format(
            c, loc, scale))

        extreme_to_plot = genextreme(c, loc, scale)
        p_value = extreme_to_plot.pdf(score_to_test)
        print(("p value of score {0} = {1}").format(score_to_test, p_value))

        return dist_df, p_value, score_to_test, extreme_to_plot
Ejemplo n.º 13
0
    def testGEVLogPdf(self):
        batch_size = 6
        loc = np.array([0.] * batch_size, dtype=self._dtype)
        scale = np.array([3.] * batch_size, dtype=self._dtype)
        conc = np.array([2.] * batch_size, dtype=self._dtype)
        gev_dist = stats.genextreme(-conc, loc=loc, scale=scale)
        x = np.array([2., 3., 4., 5., 6., 7.], dtype=self._dtype)
        gev = tfd.GeneralizedExtremeValue(loc=self.make_tensor(loc),
                                          scale=self.make_tensor(scale),
                                          concentration=self.make_tensor(conc),
                                          validate_args=True)
        log_pdf = gev.log_prob(self.make_tensor(x))
        self.assertAllClose(gev_dist.logpdf(x), self.evaluate(log_pdf))

        pdf = gev.prob(x)
        self.assertAllClose(gev_dist.pdf(x), self.evaluate(pdf))
Ejemplo n.º 14
0
    def testGEVLogPdfMultidimensional(self):
        batch_size = 6
        loc = np.array([[-2.0, -4.0, -5.0]] * batch_size, dtype=self._dtype)
        scale = np.array([1.0], dtype=self._dtype)
        conc = np.array([[0.0, 1.0, 2.0]] * batch_size, dtype=self._dtype)
        gev_dist = stats.genextreme(-conc, loc=loc, scale=scale)
        x = np.array([[2., 3., 4., 5., 6., 7.]], dtype=self._dtype).T

        gev = tfd.GeneralizedExtremeValue(loc=self.make_tensor(loc),
                                          scale=self.make_tensor(scale),
                                          concentration=self.make_tensor(conc),
                                          validate_args=True)
        log_pdf = gev.log_prob(self.make_tensor(x))
        self.assertAllClose(self.evaluate(log_pdf), gev_dist.logpdf(x))

        pdf = gev.prob(self.make_tensor(x))
        self.assertAllClose(self.evaluate(pdf), gev_dist.pdf(x))
Ejemplo n.º 15
0
    def testGEVSample(self):
        loc = self._dtype(4.0)
        scale = self._dtype(1.0)
        conc = self._dtype(0.2)
        n = int(1e6)
        gev_dist = stats.genextreme(-conc, loc=loc, scale=scale)

        gev = tfd.GeneralizedExtremeValue(loc=self.make_tensor(loc),
                                          scale=self.make_tensor(scale),
                                          concentration=self.make_tensor(conc),
                                          validate_args=True)

        samples = gev.sample(n, seed=test_util.test_seed())
        sample_values = self.evaluate(samples)
        self.assertEqual((n, ), sample_values.shape)
        self.assertAllClose(gev_dist.mean(), sample_values.mean(), rtol=.01)
        self.assertAllClose(gev_dist.var(), sample_values.var(), rtol=.01)
Ejemplo n.º 16
0
    def testGEVSampleMultidimensionalVar(self):
        loc = np.array([2.0, 4.0, 5.0], dtype=self._dtype)
        scale = np.array([1.0, 0.8, 0.5], dtype=self._dtype)
        conc = np.array([0.2], dtype=self._dtype)
        gev_dist = stats.genextreme(-conc, loc=loc, scale=scale)
        n = int(1e6)

        gev = tfd.GeneralizedExtremeValue(loc=self.make_tensor(loc),
                                          scale=self.make_tensor(scale),
                                          concentration=self.make_tensor(conc),
                                          validate_args=True)

        samples = gev.sample(n, seed=test_util.test_seed())
        sample_values = self.evaluate(samples)
        self.assertAllClose(gev_dist.var(),
                            sample_values.var(axis=0),
                            rtol=.03,
                            atol=0)
Ejemplo n.º 17
0
def plot_ks_gev_gauss(data_sample, alg_name):
    data_min = min(data_sample)
    data_max = max(data_sample)
    n_points = 100
    plot_points = [(data_min + (i / n_points) * (data_max - data_min))
                   for i in range(0, n_points + 1)]

    # Estimate gaussian:
    nrm_fit = norm.fit(data_sample)
    # GEV parameters from fit:
    (mu, sigma) = nrm_fit
    rv_nrm = norm(loc=mu, scale=sigma)
    # Create data from estimated GEV to plot:
    nrm_pdf = rv_nrm.pdf(plot_points)

    # Estimate GEV:
    gev_fit = genextreme.fit(data_sample)
    # GEV parameters from fit:
    c, loc, scale = gev_fit
    rv_gev = genextreme(c, loc=loc, scale=scale)
    # Create data from estimated GEV to plot:
    gev_pdf = rv_gev.pdf(plot_points)

    # Use Kernel-Density Estimation for comparison

    # Make a Kernel density plot:
    sns.set(color_codes=True)
    plt.figure()
    ax = sns.kdeplot(data_sample, kernel='gau', label='Kernel Density')
    #####ax.plot(plot_points, gev_pdf, label='Estimated GEV')
    ax.plot(plot_points, nrm_pdf, label='Estimated Gaussian')
    ax.legend()

    # Use title to indicate parameters found:
    plot_title = "PDF estimated from data created for " + alg_name + "\n"
    #####plot_title += "Estimated parameters for GEV: location={:.2f} scale={:.2f} c={:.2f}\n".format(loc, scale, c)
    plot_title += "Estimated parameters for Gaussian: location={:.2f} scale={:.2f}\n".format(
        mu, sigma)

    plt.title(plot_title)
    plt.xlabel("Independent Variable")
    plt.ylabel("Probability Density from " + str(len(data_sample)) + " points")
    plt.tight_layout()
    plt.draw()
Ejemplo n.º 18
0
    def testGEVMean(self):
        loc = np.array([2.0], dtype=self._dtype)
        scale = np.array([1.5], dtype=self._dtype)
        conc = np.array([-0.9, 0.0], dtype=self._dtype)
        gev_dist = stats.genextreme(-conc, loc=loc, scale=scale)

        gev = tfd.GeneralizedExtremeValue(loc=self.make_tensor(loc),
                                          scale=self.make_tensor(scale),
                                          concentration=self.make_tensor(conc),
                                          validate_args=True)
        self.assertAllClose(self.evaluate(gev.mean()), gev_dist.mean())

        conc_with_inf_mean = np.array([2.], dtype=self._dtype)
        gev_with_inf_mean = tfd.GeneralizedExtremeValue(
            loc=self.make_tensor(loc),
            scale=self.make_tensor(scale),
            concentration=self.make_tensor(conc_with_inf_mean),
            validate_args=True)
        self.assertAllClose(self.evaluate(gev_with_inf_mean.mean()), [np.inf])
Ejemplo n.º 19
0
    def evdplot(df,outfile):
        '''
        Distribution of escores.
        '''
        matplotlib.use('pdf')
        f, (ax1, ax2, ax3) = subplots(1,3,figsize=[10,5])
        # s score distribution
        df.escore.hist(ax=ax1,normed=1,bins=50,histtype='stepfilled', alpha=0.2,label='EScore')
        c,loc,scale = stats.genextreme.fit(df.escore)
        rv = stats.genextreme(c,loc=loc,scale=scale)
        x = numpy.linspace(0,df.escore.max(),50)
        ax1.plot(x, rv.pdf(x), 'k-', lw=1, label='EVD')
        ax1.legend(loc='best')
        ax1.set_xlabel('Enrichment score')
        ax1.set_ylabel('Probability')
    
        # cummulative distribution
        df.escore.hist(ax=ax2,cumulative=True, normed=1, bins=50,histtype='stepfilled', alpha=0.2,label='EScore')
        ax2.plot(x, rv.cdf(x), 'k-', lw=1, label='EVD')
        ax2.legend(loc='best')
        ax2.set_xlabel('Enrichment score')
        ax2.set_ylabel('Probability')
    
        # fitness test
        nbin = int(round(1+numpy.log2(df.escore.size)))
        x = numpy.linspace(0,df.escore.max(),nbin+1)
        y = rv.cdf(x)
        counts, bin_edges = numpy.histogram(df.escore, bins=nbin, normed=False)
        counts = [df.escore.size-len(df.escore.nonzero()[0])] + list(counts)
        cdf = numpy.cumsum(counts)
        cdf = cdf/float(max(cdf))
    
        kst,ksp = stats.ks_2samp(y,cdf)
        chit,chip = stats.chisquare(cdf,y)
        ax3.plot(bin_edges,y,'r-',label='EVD')
        ax3.plot(bin_edges, cdf,'b-',label='EScore')
        ax3.legend(loc='best')
        ax3.text(df.escore.max()/4,0.3,"KS test:\nstat={0:.2f},p={1:.2e}\nChiSquare test:\nstat={2:.2f},p={3:.2e}".format(kst,ksp,chit,chip))
        ax3.set_xlabel('Enrichment score')
        ax3.set_ylabel('Probability')

        savefig(outfile,format='pdf')
        return rv
Ejemplo n.º 20
0
def compute_ehalf(distribution, a, b, intertions, ranks, gev):

    gevparam = row['actual_gev']
    gevdist = stats.genextreme(*gevparam)

    # Sample-projected runtime on 1 block is just the expected value of the GEV distribution
    runtime_sample_emma = [gevdist.moment(4) * iterations / 1000000]

    # Distribution-projected runtime based on either the original distribution or its EMMA
    # project to the initial number of ranks
    rank = ranks
    if rank == 0:
        runtime_original_emma = [odist.moment(1) * iterations / 1000000]
    else:
        runtime_original_emma = [emma(odist, rank) * iterations / 1000000]

    # And project everything to more ranks
    rank_list = []
    for i in range(1, 5):
        rank = ranks * (2**i)
        rank_list.append(rank)
    runtime_sample_emma.append(emma(gevdist, 2**i) * iterations / 1000000)
    runtime_original_emma.append(emma(odist, rank) * iterations / 1000000)

    # For each set of distributions and parameters, project runtimes from smallest
    # experiment and plot larger experiments versus this projection
    for frame in expdata.groupby(['workload', 'a', 'b', 'iterations']):
        exps = frame[1].sort_values(['ranks']).reset_index(drop=True)
        print('Projecting runtimes for experiment {0}'.format(frame[0]))

        fig, ax = plt.subplots()
        runtimes = []
        sizes = []
        for iter, row in exps.iterrows():
            sizes.append(row['ranks'])
            runtimes.append(row['runtime'] / 1000000)
            if iter == 0:
                project_runtimes(frame[0], row, fig, ax)
            ax.scatter(sizes, runtimes, label='Actual Runtimes')
            ax.grid()
            plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
            plt.show()
Ejemplo n.º 21
0
def test_gev_genextreme(case):
    gev = stats.genextreme(0)
    # check ev copulas, cdf and transform against R `evt` package
    ev_tr, v1, v2, args, res0, res1, res2 = case
    y = [v1, v2]
    u = gev.cdf(y)
    res = copula_bv_ev(u, ev_tr, args=args)
    assert_allclose(res, res1, rtol=1e-13)

    ev = ExtremeValueCopula(ev_tr)
    # evaluated at using u = y
    cdf1 = ev.cdf(y, args)
    assert_allclose(cdf1, res0, rtol=1e-13)

    # evaluated at transformed u = F(y)
    cdf1 = ev.cdf(u, args)
    assert_allclose(cdf1, res1, rtol=1e-13)

    cev = CopulaDistribution([gev, gev], ev, copargs=args)
    cdfd = cev.cdf(np.array(y), args=args)
    assert_allclose(cdfd, res1, rtol=1e-13)
    pdfd = cev.pdf(np.array(y), args=args)
    assert_allclose(pdfd, res2, rtol=1e-13)
Ejemplo n.º 22
0
def main():
    # Get DataFrame of All Experiments
    df = getAllExperiments()

    # Get Only Runs from Experiment 15
    df_filtered = df[df['Experiment'] == 15]

    # Get Specific Run - No Rabbit Workload and No Stencil and 8 ppn on 2 nodes
    df_filtered = df_filtered[df_filtered['cores'] == 8]
    df_filtered = df_filtered[df_filtered['processors'] == 16] # 2 Nodes
    df_filtered = df_filtered[df_filtered['rabbit_workload'] == 0]
    df_filtered = df_filtered[df_filtered['stencil_size'] == 0]
    print(df_filtered)

    # Get Data from Specific Run
    data = getData(df_filtered)
    print(data.head())

    # Get Data Only from Rank 0
    data_rank0 = data[data['rank'] == 0]

    # Find Shape, Location, and Scale of Max Data
    shape, loc, scale = gevfit.fit(data_rank0['workload_max_usec'])
    print("Shape: ", shape, "\tLocation: ", loc, "\tScale: ", scale)

    # Get Overall Runtime
    runtime0 = data_rank0['interval_max_usec'].sum()
    dist = stats.genextreme(shape, loc, scale)
    print("Runtime: ", runtime0, "Microseconds at Initial ", data['comm_size'].iloc[0], " Ranks")

    # Projected Runtime at k = 8 -> 128 ranks (16 nodes)
    projected = emma(dist, 8) * data['iterations'].iloc[0]
    print("K = 8: Projected Runtime: ", projected, "\tProjected Efficiency: ", runtime0 / projected)

    # Get Projected Scale of When Efficiency Reaches 50%
    eh = ehalf(runtime0, data['comm_size'].iloc[0], dist, data['iterations'].iloc[0])
    print("Expect 50% Efficiency at: ", eh, " Ranks")
def makeGraphs(rawdata, title, filename, pmodel=False, chaosnoise=False):
    for i in range(len(rawdata)):
        #Plot e ajuste do histograma da série temporal
        (mu, sigma) = norm.fit(rawdata[0][2])
        # rv_nrm = norm(loc=mu, scale=sigma)
        # Estimate GEV:
        n = 8192
        ypoints = [
            min(rawdata[0][2]) + (i / n) *
            (max(rawdata[0][2]) - min(rawdata[0][2])) for i in range(0, n + 1)
        ]
        gev_fit = genextreme.fit(rawdata[0][2])
        # GEV parameters from fit:
        c, loc, scale = gev_fit
        mean, var, skew, kurt = genextreme.stats(c, moments='mvsk')
        rv_gev = genextreme(c, loc=loc, scale=scale)
        # Create data from estimated GEV to plot:
        gev_pdf = rv_gev.pdf(ypoints)
        plt.title((title + "\nMu= {1:.3}, Sigma={2:.3}.").format(
            rawdata[i][0], mu, sigma))
        n, bins, patches = plt.hist(rawdata[0][2],
                                    60,
                                    density=1,
                                    facecolor='powderblue',
                                    alpha=0.75,
                                    label="Normalized data")
        plt.plot(np.arange(min(bins), max(bins),
                           (max(bins) - min(bins)) / len(rawdata[0][2])),
                 gev_pdf[:len(rawdata[0][2])],
                 'r-',
                 lw=5,
                 alpha=0.6,
                 label='genextreme pdf')
        plt.ylabel("Probability Density")
        plt.xlabel("Value")
        plt.legend()
        plt.savefig("PDF" + filename.format(i))
        plt.show()

        plt.figure(figsize=(20, 12))
        #Plot da série temporal
        ax1 = plt.subplot(211)
        ax1.set_title(title.format(rawdata[i][0]), fontsize=18)
        if pmodel == True:
            ax1.plot(rawdata[i][2],
                     color="firebrick",
                     linestyle='-',
                     label="Data")
        elif chaosnoise == True:
            ax1.plot(rawdata[i][1],
                     rawdata[i][2],
                     color="firebrick",
                     marker='o',
                     linestyle='',
                     label="Data")
        else:
            ax1.plot(rawdata[i][1],
                     rawdata[i][2],
                     color="firebrick",
                     linestyle='-',
                     label="Data")
        #Plot e cálculo do DFA
        ax2 = plt.subplot(223)
        ax2.set_title(r"Detrended Fluctuation Analysis $\alpha$={0:.3}".format(
            rawdata[i][3]),
                      fontsize=15)
        ax2.plot(rawdata[i][4],
                 rawdata[i][5],
                 marker='o',
                 linestyle='',
                 color="#12355B",
                 label="{0:.3}".format(rawdata[i][3]))
        ax2.plot(rawdata[i][4], rawdata[i][6], color="#9DACB2")
        #Plot e cáculo do PSD
        ax3 = plt.subplot(224)
        ax3.set_title(r"Power Spectrum Density $\beta$={0:.3}".format(
            rawdata[i][12]),
                      fontsize=15)
        ax3.set_yscale('log')
        ax3.set_xscale('log')
        ax3.plot(rawdata[i][7],
                 rawdata[i][8],
                 '-',
                 color='deepskyblue',
                 alpha=0.7)
        ax3.plot(rawdata[i][9], rawdata[i][10], color="darkblue", alpha=0.8)
        ax3.axvline(rawdata[i][7][rawdata[i][14]],
                    color="darkblue",
                    linestyle='--')
        ax3.axvline(rawdata[i][7][rawdata[i][15]],
                    color="darkblue",
                    linestyle='--')
        ax3.plot(rawdata[i][9],
                 rawdata[i][13](rawdata[i][9], rawdata[i][11], rawdata[i][12]),
                 color="#D65108",
                 linestyle='-',
                 linewidth=3,
                 label='{0:.3}$'.format(rawdata[i][12]))
        ax2.set_xlabel("log(s)")
        ax2.set_ylabel("log F(s)")
        ax3.set_xlabel("Frequência (Hz)")
        ax3.set_ylabel("Potência")
        ax3.legend()
        plt.savefig(filename.format(i))
        plt.show()
                dist = sp.gumbel_l(X[0], X[1])
                x = np.array(bins)
                y = dist.pdf(x)
                print(y)
                plt.plot(x, y, 'k--', linewidth=2)

                X = sp.norm.fit(np.array(trace))
                print(X)
                dist = sp.norm(X[0], X[1])
                x = np.array(bins)
                y = dist.pdf(x)
                plt.plot(x, y, 'r--', linewidth=2)

                X = sp.genextreme.fit(np.array(trace))
                print(X)
                dist = sp.genextreme(X[0], X[1], X[2])
                x = np.array(bins)
                y = dist.pdf(x)
                plt.plot(x, y, 'b--', linewidth=2)
                plt.title("%s" % (t), fontsize='small')

            elif plot_idx == 3:
                n, bins, patches = plt.hist(np.array(trace),
                                            50,
                                            normed=1,
                                            facecolor='green',
                                            alpha=0.75)

                X = sp.expon.fit(np.array(trace), floc=0)

                print(X)
Ejemplo n.º 25
0
from scipy.stats import genextreme
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import expon
data = pd.read_csv("times.csv")
data_list = data['data_queue_vessels_waiting_lock'].tolist()
parrams = genextreme.fit(data_list)

fig, ax = plt.subplots(1, 1)
c1 = -0.7776
scale1 = 15.08
loc1 = 13.16

rv1 = genextreme(c=c1, scale=scale1, loc=loc1)
rv2 = genextreme(c=parrams[0], scale=parrams[1], loc=parrams[2])

x1 = np.linspace(rv1.ppf(0.00001), rv1.ppf(0.99999), 100)
x2 = np.linspace(rv2.ppf(0.00001), rv2.ppf(0.99999), 100)

ax.plot(x2, rv2.pdf(x2), 'r-', lw=5, label='scipy')
ax.plot(x1, rv1.pdf(x1), 'k-', lw=2, label='matlab')
plt.show()
Ejemplo n.º 26
0
                dist = sp.gumbel_l(X[0],X[1])
                x = np.array(bins)
                y = dist.pdf(x)
                print y
                plt.plot(x, y,'k--',linewidth=2)

                X = sp.norm.fit(np.array(trace))
                print X
                dist = sp.norm(X[0],X[1])
                x = np.array(bins)
                y = dist.pdf(x)
                plt.plot(x, y,'r--',linewidth=2)

                X = sp.genextreme.fit(np.array(trace))
                print X
                dist = sp.genextreme(X[0],X[1],X[2])
                x = np.array(bins)
                y = dist.pdf(x)
            #plt.plot(x, y,'b--',linewidth=2)


            elif plot_idx==3:
                n, bins, patches = plt.hist(np.array(trace), 50,  normed=1, facecolor='green', alpha=0.75)

                #copied = np.append(np.array(trace), -np.array(trace))
                #(mu, sigma) = sp.norm.fit(copied)
                #y = mlab.normpdf( bins, mu, sigma)
                #l = plt.plot(bins, y, 'r--', linewidth=2)

                X = sp.expon.fit(np.array(trace),floc=0)
Ejemplo n.º 27
0
def all_dists():
    # dists param were taken from scipy.stats official
    # documentaion examples
    # Total - 89
    return {
        "alpha":
        stats.alpha(a=3.57, loc=0.0, scale=1.0),
        "anglit":
        stats.anglit(loc=0.0, scale=1.0),
        "arcsine":
        stats.arcsine(loc=0.0, scale=1.0),
        "beta":
        stats.beta(a=2.31, b=0.627, loc=0.0, scale=1.0),
        "betaprime":
        stats.betaprime(a=5, b=6, loc=0.0, scale=1.0),
        "bradford":
        stats.bradford(c=0.299, loc=0.0, scale=1.0),
        "burr":
        stats.burr(c=10.5, d=4.3, loc=0.0, scale=1.0),
        "cauchy":
        stats.cauchy(loc=0.0, scale=1.0),
        "chi":
        stats.chi(df=78, loc=0.0, scale=1.0),
        "chi2":
        stats.chi2(df=55, loc=0.0, scale=1.0),
        "cosine":
        stats.cosine(loc=0.0, scale=1.0),
        "dgamma":
        stats.dgamma(a=1.1, loc=0.0, scale=1.0),
        "dweibull":
        stats.dweibull(c=2.07, loc=0.0, scale=1.0),
        "erlang":
        stats.erlang(a=2, loc=0.0, scale=1.0),
        "expon":
        stats.expon(loc=0.0, scale=1.0),
        "exponnorm":
        stats.exponnorm(K=1.5, loc=0.0, scale=1.0),
        "exponweib":
        stats.exponweib(a=2.89, c=1.95, loc=0.0, scale=1.0),
        "exponpow":
        stats.exponpow(b=2.7, loc=0.0, scale=1.0),
        "f":
        stats.f(dfn=29, dfd=18, loc=0.0, scale=1.0),
        "fatiguelife":
        stats.fatiguelife(c=29, loc=0.0, scale=1.0),
        "fisk":
        stats.fisk(c=3.09, loc=0.0, scale=1.0),
        "foldcauchy":
        stats.foldcauchy(c=4.72, loc=0.0, scale=1.0),
        "foldnorm":
        stats.foldnorm(c=1.95, loc=0.0, scale=1.0),
        # "frechet_r": stats.frechet_r(c=1.89, loc=0.0, scale=1.0),
        # "frechet_l": stats.frechet_l(c=3.63, loc=0.0, scale=1.0),
        "genlogistic":
        stats.genlogistic(c=0.412, loc=0.0, scale=1.0),
        "genpareto":
        stats.genpareto(c=0.1, loc=0.0, scale=1.0),
        "gennorm":
        stats.gennorm(beta=1.3, loc=0.0, scale=1.0),
        "genexpon":
        stats.genexpon(a=9.13, b=16.2, c=3.28, loc=0.0, scale=1.0),
        "genextreme":
        stats.genextreme(c=-0.1, loc=0.0, scale=1.0),
        "gausshyper":
        stats.gausshyper(a=13.8, b=3.12, c=2.51, z=5.18, loc=0.0, scale=1.0),
        "gamma":
        stats.gamma(a=1.99, loc=0.0, scale=1.0),
        "gengamma":
        stats.gengamma(a=4.42, c=-3.12, loc=0.0, scale=1.0),
        "genhalflogistic":
        stats.genhalflogistic(c=0.773, loc=0.0, scale=1.0),
        "gilbrat":
        stats.gilbrat(loc=0.0, scale=1.0),
        "gompertz":
        stats.gompertz(c=0.947, loc=0.0, scale=1.0),
        "gumbel_r":
        stats.gumbel_r(loc=0.0, scale=1.0),
        "gumbel_l":
        stats.gumbel_l(loc=0.0, scale=1.0),
        "halfcauchy":
        stats.halfcauchy(loc=0.0, scale=1.0),
        "halflogistic":
        stats.halflogistic(loc=0.0, scale=1.0),
        "halfnorm":
        stats.halfnorm(loc=0.0, scale=1.0),
        "halfgennorm":
        stats.halfgennorm(beta=0.675, loc=0.0, scale=1.0),
        "hypsecant":
        stats.hypsecant(loc=0.0, scale=1.0),
        "invgamma":
        stats.invgamma(a=4.07, loc=0.0, scale=1.0),
        "invgauss":
        stats.invgauss(mu=0.145, loc=0.0, scale=1.0),
        "invweibull":
        stats.invweibull(c=10.6, loc=0.0, scale=1.0),
        "johnsonsb":
        stats.johnsonsb(a=4.32, b=3.18, loc=0.0, scale=1.0),
        "johnsonsu":
        stats.johnsonsu(a=2.55, b=2.25, loc=0.0, scale=1.0),
        "ksone":
        stats.ksone(n=1e03, loc=0.0, scale=1.0),
        "kstwobign":
        stats.kstwobign(loc=0.0, scale=1.0),
        "laplace":
        stats.laplace(loc=0.0, scale=1.0),
        "levy":
        stats.levy(loc=0.0, scale=1.0),
        "levy_l":
        stats.levy_l(loc=0.0, scale=1.0),
        "levy_stable":
        stats.levy_stable(alpha=0.357, beta=-0.675, loc=0.0, scale=1.0),
        "logistic":
        stats.logistic(loc=0.0, scale=1.0),
        "loggamma":
        stats.loggamma(c=0.414, loc=0.0, scale=1.0),
        "loglaplace":
        stats.loglaplace(c=3.25, loc=0.0, scale=1.0),
        "lognorm":
        stats.lognorm(s=0.954, loc=0.0, scale=1.0),
        "lomax":
        stats.lomax(c=1.88, loc=0.0, scale=1.0),
        "maxwell":
        stats.maxwell(loc=0.0, scale=1.0),
        "mielke":
        stats.mielke(k=10.4, s=3.6, loc=0.0, scale=1.0),
        "nakagami":
        stats.nakagami(nu=4.97, loc=0.0, scale=1.0),
        "ncx2":
        stats.ncx2(df=21, nc=1.06, loc=0.0, scale=1.0),
        "ncf":
        stats.ncf(dfn=27, dfd=27, nc=0.416, loc=0.0, scale=1.0),
        "nct":
        stats.nct(df=14, nc=0.24, loc=0.0, scale=1.0),
        "norm":
        stats.norm(loc=0.0, scale=1.0),
        "pareto":
        stats.pareto(b=2.62, loc=0.0, scale=1.0),
        "pearson3":
        stats.pearson3(skew=0.1, loc=0.0, scale=1.0),
        "powerlaw":
        stats.powerlaw(a=1.66, loc=0.0, scale=1.0),
        "powerlognorm":
        stats.powerlognorm(c=2.14, s=0.446, loc=0.0, scale=1.0),
        "powernorm":
        stats.powernorm(c=4.45, loc=0.0, scale=1.0),
        "rdist":
        stats.rdist(c=0.9, loc=0.0, scale=1.0),
        "reciprocal":
        stats.reciprocal(a=0.00623, b=1.01, loc=0.0, scale=1.0),
        "rayleigh":
        stats.rayleigh(loc=0.0, scale=1.0),
        "rice":
        stats.rice(b=0.775, loc=0.0, scale=1.0),
        "recipinvgauss":
        stats.recipinvgauss(mu=0.63, loc=0.0, scale=1.0),
        "semicircular":
        stats.semicircular(loc=0.0, scale=1.0),
        "t":
        stats.t(df=2.74, loc=0.0, scale=1.0),
        "triang":
        stats.triang(c=0.158, loc=0.0, scale=1.0),
        "truncexpon":
        stats.truncexpon(b=4.69, loc=0.0, scale=1.0),
        "truncnorm":
        stats.truncnorm(a=0.1, b=2, loc=0.0, scale=1.0),
        "tukeylambda":
        stats.tukeylambda(lam=3.13, loc=0.0, scale=1.0),
        "uniform":
        stats.uniform(loc=0.0, scale=1.0),
        "vonmises":
        stats.vonmises(kappa=3.99, loc=0.0, scale=1.0),
        "vonmises_line":
        stats.vonmises_line(kappa=3.99, loc=0.0, scale=1.0),
        "wald":
        stats.wald(loc=0.0, scale=1.0),
        "weibull_min":
        stats.weibull_min(c=1.79, loc=0.0, scale=1.0),
        "weibull_max":
        stats.weibull_max(c=2.87, loc=0.0, scale=1.0),
        "wrapcauchy":
        stats.wrapcauchy(c=0.0311, loc=0.0, scale=1.0),
    }
Ejemplo n.º 28
0
def key_distribution(num_samples):
    dist = genextreme(30.7984, 8.20449, 0.078688)
    return dist.rvs(num_samples)
Ejemplo n.º 29
0
@author: rarossi
"""

from scipy import stats as ss
from numpy import linspace
from matplotlib import pyplot as plt
from math import log, exp
sz = 1000
mydistro = ss.gumbel_r
myparams = (0, 1)
myfunc = lambda x: -log(-log(x))
# myfunc = lambda x: -log(x)
# myfunc = lambda x: x
# myfunc = lambda x: exp(x)

sample = [mydistro.rvs(*myparams) for _ in range(sz)]
sample.sort()
emp = [(i + 0.6) / (sz + 0.4) for i in range(sz)]
dist_emp = list(map(myfunc, emp))
ge = ss.genextreme(*ss.genextreme.fit(sample))
x = linspace(min(sample), max(sample), 100)

plt.subplot(211)
plt.hist(sample, normed=True, bins=20)
plt.plot(x, ge.pdf(x))

plt.subplot(212)
plt.plot(sample, dist_emp, '.')
plt.plot(sample, list(map(myfunc, ge.cdf(sample))))
Ejemplo n.º 30
0
c = -0.1
mean, var, skew, kurt = genextreme.stats(c, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(genextreme.ppf(0.01, c), genextreme.ppf(0.99, c), 100)
ax.plot(x, genextreme.pdf(x, c), 'r-', lw=5, alpha=0.6, label='genextreme pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = genextreme(c)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = genextreme.ppf([0.001, 0.5, 0.999], c)
np.allclose([0.001, 0.5, 0.999], genextreme.cdf(vals, c))
# True

# Generate random numbers:

r = genextreme.rvs(c, size=1000)

# And compare the histogram:

ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
Ejemplo n.º 31
0
"""

from scipy import stats as ss
from numpy import linspace
from matplotlib import pyplot as plt
from math import log, exp
sz = 1000
mydistro = ss.gumbel_r
myparams = (0, 1)
myfunc = lambda x: -log(-log(x))
# myfunc = lambda x: -log(x)
# myfunc = lambda x: x
# myfunc = lambda x: exp(x)


sample = [mydistro.rvs(*myparams) for _ in range(sz)]
sample.sort()
emp = [(i+0.6)/(sz+0.4) for i in range(sz)]
dist_emp = list(map(myfunc, emp))
ge = ss.genextreme(*ss.genextreme.fit(sample))
x = linspace(min(sample), max(sample), 100)


plt.subplot(211)
plt.hist(sample, normed=True, bins=20)
plt.plot(x, ge.pdf(x))

plt.subplot(212)
plt.plot(sample, dist_emp, '.')
plt.plot(sample, list(map(myfunc, ge.cdf(sample))))
def fitting(dataset, frequency=200):
    reactivities = []

    # Summary instances into a table
    for seq_id in dataset.keys():
        structure_contexts, reactivity = dataset[seq_id]
        L = len(reactivity)
        assert len(
            structure_contexts
        ) == L, "Structure context and reactivities has different length"
        for i in range(L):
            reactivities.append((structure_contexts[i], reactivity[i], seq_id))
    reactivities = pd.DataFrame.from_records(reactivities)
    reactivities.columns = ["structure-context", "reactivity", "sequence-id"]
    # Only use reactivity larger than zero
    reactivities = reactivities[reactivities["reactivity"] > 0]
    # Get instances number of each structure context
    structure_contexts = reactivities["structure-context"].unique()
    n_context = structure_contexts.shape[0]
    n_instances = reactivities.groupby("structure-context").apply(
        lambda x: x.shape[0])
    statistics = pd.DataFrame(index=structure_contexts,
                              columns=["instances", "assignment"])
    statistics.loc[n_instances.index, "instances"] = n_instances.values

    print("5 mer should have 32 structure contexts")
    frequent_set = set(n_instances[n_instances > frequency].index)
    print("{} present in the input dataset".format(n_context))
    print("{} meet the frequency cutoff".format(len(frequent_set)))
    # Split structure context to frequent ones and not frequent ones
    frequent_reactivities = reactivities[
        reactivities["structure-context"].isin(frequent_set)]
    not_frequent_reactivities = reactivities[
        ~reactivities["structure-context"].isin(frequent_set)]

    ## Fit an individual generalized extreme distribution for each frequent 5 mer instance
    ## For rare instance, assign the instance the most similar fitted instance (fitted model with highest likelihood)
    # Structure context to model mapping
    modelDict = {}
    # logged likelihod of rare structure context to assign to a structure context
    likelihoodsDict = {}
    # Structure context not fit well to generalize extreme value distribution
    dubious_instances = []

    for structure_context in frequent_reactivities["structure-context"].unique(
    ):
        react = reactivities[reactivities["structure-context"] ==
                             structure_context]["reactivity"].values
        shape, location, scale = genextreme.fit(react)
        if shape > 0:
            dubious_instances.append(structure_context)
            continue
        model = genextreme(shape, location, scale)
        modelDict[structure_context] = model
        likelihoodsDict[structure_context] = not_frequent_reactivities.groupby(
            "structure-context").apply(
                lambda x: np.log(model.pdf(x["reactivity"].values)).sum())

    # Summarize the likelihod of rare structure context into a DataFrame
    likelihoods = pd.DataFrame(likelihoodsDict)
    # Map rare structure context to most similar frequent structure context
    assignment0 = dict(likelihoods.idxmax(axis=1))

    # Map frequent structure context to list of rare structure contexts
    assignment = defaultdict(list)
    for less, more in assignment0.items():
        statistics.loc[less, "assignment"] = more
        assignment[more].append(less)

    # Refit genextreme model for frequent structure context
    for more, less in assignment.items():
        for context in less:
            modelDict[context] = modelDict[more]
        contexts = set(less)
        contexts.add(more)
        react = reactivities[reactivities["structure-context"].isin(
            contexts)]["reactivity"].values
        shape, location, scale = genextreme.fit(react)
        model = genextreme(shape, location, scale)
        for structure_context in contexts:
            modelDict[structure_context] = model
    if len(dubious_instances) > 0:
        print(
            "Fitting for the following instances generates positive shape value, which is dubious"
        )
        print(",".join(dubious_instances))
        max_ll = np.nan
        for structure_context in dubious_instances:
            for fitted_context, model in modelDict.items():
                x = reactivities[reactivities["structure-context"] ==
                                 fitted_context]["reactivity"].values
                current_ll = np.log(model.pdf(x)).sum()
                if np.isnan(max_ll):
                    max_ll = np.log(model.pdf(x)).sum()
                    max_instance = fitted_context
                else:
                    if max_ll < current_ll:
                        max_ll = current_ll
                        max_instance = fitted_context
            print("{} is assigned to {}".format(structure_context,
                                                max_instance))
            statistics.loc[structure_context, "assignment"] = max_instance
            modelDict[structure_context] = modelDict[max_instance]

    #reactivities.to_csv("reactivity-table.txt",index=False,sep="\t")

    return modelDict, statistics
Ejemplo n.º 33
0
freqs, power, xdata, ydata, amp, index, powerlaw, inicio, fim = statsfuncs.psd(y)
psi, amax, amin, a0 = mfdfa.makemfdfa(y, True)
beta = 2.*alfa-1.
print("Beta=2*Alpha-1={}".format(beta))


# Plot e ajuste do histograma da série temporal

mu, sigma = norm.fit(y)
rv_nrm = norm(loc=mu, scale=sigma)
# Estimate GEV:
gev_fit = genextreme.fit(y)
# GEV parameters from fit:
c, loc, scale = gev_fit
mean, var, skew, kurt = genextreme.stats(c, moments='mvsk')
rv_gev = genextreme(c, loc=loc, scale=scale)
# Create data from estimated GEV to plot:
gev_pdf = rv_gev.pdf(ypoints)

plt.title("PDF with data from " + country + "\nmu={0:3.5}, sigma={1:3.5}"
          .format(mu, sigma))
n, bins, patches = plt.hist(y, 60, density=1, facecolor='powderblue',
                            alpha=0.75, label="Normalized data")
plt.plot(np.arange(min(bins), max(bins)+1, (max(bins) - min(bins))/len(y)),
         gev_pdf, 'r-', lw=5, alpha=0.6, label='genextreme pdf')
plt.ylabel("Probability Density")
plt.xlabel("Value")
plt.legend()
plt.savefig("PDF"+filename)
plt.show()
plt.figure(figsize=(20, 14))
Ejemplo n.º 34
0
print((
    "Mu:{0:5.2f} Sigma:{1:5.2f} z_score:{2:5.2f} p_value_one_side: {3:15.13f} p_value_two_side: {4:15.13f} p_values {5:15.13f}"
).format(mu, sigma, z_score, p_values, p_values_2side, p_values3))

# instead use extreme value distribution

#fit to data

extreme_fit = genextreme.fit(df[1])
c = extreme_fit[0]
loc = extreme_fit[1]
scale = extreme_fit[2]

print(("Extreme value fits c = {0}, loc = {1}, scale = {2}").format(
    c, loc, scale))

ax1 = sns.distplot(df[1], fit=genextreme, kde=False)
x = np.linspace(-10, 16, 1000)

extreme_to_plot = genextreme(c, loc, scale)
ax1.plot(x, extreme_to_plot.pdf(x), 'r-', lw=2, label='pdf')

p_value = extreme_to_plot.pdf(score_to_test)

ax1.axvline(score_to_test)

print(("p value of score {0} = {1}").format(score_to_test, p_value))

plt.show()
Ejemplo n.º 35
0
def cipe(src_ra,
         src_dec,
         counterpart_separation,
         region_radius=0.1,
         numpoints=10000):
    counterpart_separation = counterpart_separation * u.arcsec
    region_radius = region_radius * u.degree
    tap_cap = 100000
    tap_server = TapPlus(url='https://gea.esac.esa.int/tap-server/tap',
                         verbose=False)
    catalog = 'gaiaedr3.gaia_source'

    query = "SELECT TOP " + str(tap_cap) + \
            " * FROM " + catalog + " WHERE ra BETWEEN " + \
            str(src_ra.value - region_radius.value) + \
            " AND " + str(src_ra.value + region_radius.value) + \
            " AND dec BETWEEN " + str(src_dec.value - region_radius.value) + \
            " AND " + str(src_dec.value + region_radius.value)

    search = tap_server.launch_job(query)
    results = search.get_results()
    print('Number of Gaia sources:' + str(len(results)))

    if len(results) == tap_cap:
        print('WARNING: Gaia contains too many sources in the region >' +
              str(tap_cap) + '). Region may be too large.')

    gaia_srclist = SkyCoord(ra=results['ra'], dec=results['dec'])

    fake_srclist = SkyCoord(
        ra=src_ra + (np.random.rand(numpoints) - 0.5) * 2 * region_radius,
        dec=src_dec + (np.random.rand(numpoints) - 0.5) * 2 * region_radius)

    sep_dist = fake_srclist.match_to_catalog_sky(gaia_srclist)[1].to(
        u.arcsec).value

    fig1 = plt.figure(figsize=(6, 4))
    ax1 = fig1.add_subplot(1, 1, 1)
    ax1.hist(sep_dist,
             bins=50,
             color='#034BCA',
             edgecolor='w',
             density=True,
             label='Simulations')
    model_x = np.linspace(0, 8, 1000)
    params = st.genextreme.fit(sep_dist)
    model_y = st.genextreme(*params).pdf(model_x)
    p_less = len(sep_dist[sep_dist <= counterpart_separation.value]) / len(
        sep_dist) * 100
    ax1.plot(model_x, model_y, color='#EB24F4', label='Gumble fit')
    ax1.axvline(0.51, color='k', linestyle='--')
    ax1.set_title(f"$P(d<{counterpart_separation.value}'')={p_less:.3}\%$",
                  fontsize=12)
    ax1.legend(fontsize=12)
    ax1.set_xlabel(r'Distance to closest random Gaia source (arcsec)',
                   fontsize=14)
    ax1.set_ylabel(r'Probabilty density (arcsec$^{-1}$)', fontsize=14)
    ax1.set_xlim(0, 8)
    ax1.minorticks_on()
    ax1.tick_params(axis='both', which='major', labelsize=14)
    ax1.tick_params(axis='both', which='major', length=9)
    ax1.tick_params(axis='both', which='minor', length=4.5)
    ax1.tick_params(axis='both',
                    which='both',
                    direction='in',
                    right=True,
                    top=True)

    return fig1