def q2():
    # Retorne aqui o resultado da questão 2.
    norm = dataframe['normal']
    ecdf = ECDF(norm)
    prob_norm = ecdf(norm.mean() + norm.std()) - ecdf(norm.mean() - norm.std())
    prob_norm = prob_norm.round(3)

    return float(prob_norm)
Esempio n. 2
0
def q4():
    # Retorne aqui o resultado da questão 4.
    x = stars[stars['target'] == 0].mean_profile
    false_pulsar_mean_profile_standardized = (x - np.mean(x)) / np.std(x)

    ecdf = ECDF(false_pulsar_mean_profile_standardized)

    return tuple(map(lambda x: round(x, 3), ecdf(sct.norm.ppf([.8, .9, .95]))))
Esempio n. 3
0
def q2():
    # Retorne aqui o resultado da questão 2.
    inferior = dataframe.normal.mean() - dataframe.normal.std()
    superior = dataframe.normal.mean() + dataframe.normal.std()

    ecdf = ECDF(dataframe.normal)

    return np.float(round(ecdf(superior) - ecdf(inferior), 3))
Esempio n. 4
0
 def sample_background(intensities, n):
     """Sample pixels from a provided sample."""
     p1 = np.percentile(intensities, 10)
     p2 = np.percentile(intensities, 90)
     samples = intensities[np.logical_and(p1 < intensities,
                                          intensities < p2)]
     ecdf = ECDF(samples)
     return ecdf.x[np.searchsorted(ecdf.y, np.random.uniform(size=n))] 
Esempio n. 5
0
def estimate_empirical_cdf(X: np.ndarray, X_new: Optional[np.ndarray] = None):

    # initialize ecdf
    ecdf_f = ECDF(X)
    if X_new is None:
        return ecdf_f(X)
    else:
        return ecdf_f(X_new)
Esempio n. 6
0
def q2():
    media_normal = dataframe['normal'].mean()
    desvio_padrao_normal = dataframe['normal'].std()
    prob = ECDF(dataframe['normal'])
    resposta = np.round(
        prob(media_normal + desvio_padrao_normal) -
        prob(media_normal - desvio_padrao_normal), 3)
    return float(resposta)
Esempio n. 7
0
def build_edf_fr_vals(data):
    """ construct empirical distribution function given data values """
    from statsmodels.distributions.empirical_distribution import ECDF
    data = data.ravel()
    cdf = ECDF(data)
    x0 = cdf.x[1:]
    y0 = cdf.y[1:]
    return x0, y0
Esempio n. 8
0
def q4():
    # Retorne aqui o resultado da questão 4.
    fn_ecdf= ECDF(false_pulsar_mean_profile_standardized)
    q1 = sct.norm.ppf(0.8, loc=0, scale=1)
    q2 = sct.norm.ppf(0.9, loc=0, scale=1)
    q3 = sct.norm.ppf(0.95, loc=0, scale=1)
    return (fn_ecdf(q1).round(3), fn_ecdf(q2).round(3), fn_ecdf(q3).round(3))
    pass
Esempio n. 9
0
def percentiles_computation(chunk):

    # initialize Pandas DataFrame
    percentiles_series_append = pd.DataFrame()

    # initialize chunk
    chunks_df_tmp = pd.DataFrame()
    chunks_df_tmp = chunks_df[chunk]

    for quantile_rank in pd.unique(chunks_df_tmp.loc[:, 'quantile_rank']):

        # sub-setting
        percentiles_tmp = chunks_df_tmp.loc[chunks_df_tmp['quantile_rank'] ==
                                            quantile_rank].reset_index(
                                                drop=True)

        # compute percentiles of the sub-set accorting to sub-set's size
        ecdf_values = []

        # prepare Numpy array
        numpy_array = percentiles_tmp.loc[:, ['average_watch_percentage'
                                              ]].to_numpy()
        numpy_vector = numpy_array.flatten()

        # compute percentiles (by using Empirical Cumulative Density Function method)
        ecdf_values = ECDF(numpy_vector)

        # build Pandas Series with percentiles
        percentiles_series_tmp = pd.Series(ecdf_values(numpy_vector))
        percentiles_series = percentiles_series_tmp.round(decimals=2)

        # build Pandas Series with video_ids
        video_id_series = percentiles_tmp.loc[:, ['video_id']]

        # build Pandas Series with quantiles
        quantile_series_tmp = pd.Series(quantile_rank)
        quantile_series = quantile_series_tmp.repeat(
            repeats=percentiles_tmp.shape[0]).reset_index(drop=True)

        # concatenate across columns the two DataFrames
        video_id_percentiles = pd.concat(
            [video_id_series, quantile_series, percentiles_series],
            axis=1,
            sort=False)

        # rename columns
        video_id_percentiles = video_id_percentiles.rename(columns={
            0: 'quantiles',
            1: 'percentiles'
        },
                                                           inplace=False)

        # append Series
        percentiles_series_append = percentiles_series_append.append(
            video_id_percentiles, ignore_index=True, sort=False)

    # function's output
    return (percentiles_series_append)
def drawCumulativeHist(h0, h1, h2, h3, fname):
    # 创建累积曲线
    # 第一个参数为待绘制的定量数据
    # 第二个参数为划分的区间个数
    # normed参数为是否无量纲化
    # histtype参数为'step',绘制阶梯状的曲线
    # cumulative参数为是否累积
    # pyplot.rc('font', family='serif', serif='Times')
    pyplot.rc('text', usetex=True)
    pyplot.rc('xtick', labelsize=8)
    pyplot.rc('ytick', labelsize=8)
    pyplot.rc('axes', labelsize=8)
    # width as measured in inkscape
    width = 3.487
    height = width / 1.618
    length1 = len(h0)
    for i in range(0, length1):
        h0[i] = h0[i] / 10
    fig, ax = pyplot.subplots()
    fig.subplots_adjust(left=.15, bottom=.16, right=.99, top=.97)

    length = int(min(len(h0), len(h1), len(h2), len(h3)))
    ecdf = ECDF(h0)
    ecdf1 = ECDF(h1)
    ecdf2 = ECDF(h2)
    ecdf3 = ECDF(h3)
    x = np.linspace(min(h0), max(h0), length)
    y = ecdf(x)
    y1 = ecdf1(x)
    y2 = ecdf2(x)
    y3 = ecdf3(x)

    pyplot.step(x, y2, label="DCUM", color="blue")
    # pyplot.step(x, y1, label="LRFL", color="green", linestyle="--")
    pyplot.step(x, y3, label="DUM", color="black", linestyle=":")
    pyplot.step(x, y, label="FIFO", color="red", linestyle="-.")

    pyplot.xlabel('delays/ms')
    pyplot.ylabel('CDF')
    pyplot.xlim(0, 3000)
    pyplot.title('CDF of delays')
    pyplot.legend(loc='lower right')
    fig.set_size_inches(width, height)
    fig.savefig(fname + '.pdf')
    pyplot.show()
def CDF_estimator(data):
    # Bootsrapping 1000 times
    # Also creating the theta_star each time, for third part of question
    bootstrapped_samples = []
    theta_star = []
    for i in range(0, 1000):
        temp = bootstrap(data['mag'].values)
        temp_ecdf = ECDF(temp)
        theta_star.append(temp_ecdf(4.9) - temp_ecdf(4.3))
        bootstrapped_samples.extend(temp)
    # Estimated Empirical CDF
    ecdf = ECDF(bootstrapped_samples)

    line = np.linspace(3.5, 6.5, 1000)
    ecdf_points = []
    for i in line:
        ecdf_points.append(ecdf(i))
    plt.plot(line, ecdf_points)

    # Creating the confidence band
    epsilon = math.sqrt((1 / (2 * len(data)) * math.log10(2 / 0.05)))
    lower_band_points = []
    upper_band_points = []
    for x in line:
        lower_band_points.append(max(ecdf(x) - epsilon, 0))
    for x in line:
        upper_band_points.append(min(ecdf(x) + epsilon, 1))
    plt.title('Red: Lower CB | Green: Upper CB')
    plt.plot(line, lower_band_points, color='red')
    plt.plot(line, upper_band_points, color='green')
    plt.show()

    # Computing 3 types of CI for F(4.9) - F(4.3)
    # Normal:
    se = standard_error(theta_star)
    theta_hat = ecdf(4.9) - ecdf(4.3)
    normal_CI = (theta_hat - 1.96 * se, theta_hat + 1.96 * se)
    print('Normal Interval:', normal_CI)
    # Percentile
    percentile_CI = (np.percentile(theta_star, 0.025), np.percentile(theta_star, 97.5))
    print('Percentile Interval:', percentile_CI)
    # Pivotal
    pivotal_CI = (2 * theta_hat - np.percentile(theta_star, 97.5)
                  , 2 * theta_hat - np.percentile(theta_star, 0.025))
    print('Pivotal Interval:', pivotal_CI)
Esempio n. 12
0
 def quantile_mapping(mod, obs, downscale, *args, **kwargs):
     """
     Quantile Mapping using empirical cumulative distribution function
     """
     mod_ecdf = ECDF(mod)
     p = mod_ecdf(downscale) * 100
     corr = np.percentile(obs[~np.isnan(obs)], p) - \
            np.percentile(mod[~np.isnan(mod)], p)
     return downscale + corr
def q2():
    # CDF empírica da variável:
    ecdf = ECDF(dataframe.normal)

    # Média e desvio padrão:
    average, std = dataframe.normal.mean(),dataframe.normal.std()

    # Área acumulada superior menos a área acumulada inferior:
    return float(round(ecdf(average + std) - ecdf(average - std),3))
Esempio n. 14
0
def q4():
    false_pulsar_mean_profile = stars.mean_profile[stars.target == 0]
    media = false_pulsar_mean_profile.mean()
    std = false_pulsar_mean_profile.std()
    false_pulsar_mean_profile_standardized = (false_pulsar_mean_profile - media) / std
    quantis = sct.norm.ppf([0.80, 0.90, 0.95])
    cdf_model = ECDF(false_pulsar_mean_profile_standardized)
    resposta_q4 = cdf_model(quantis)
    return tuple(np.round(resposta_q4, 3))
def q2():
    # Retorne aqui o resultado da questão 2.
    ecdf = ECDF(dataframe.normal)
    media = dataframe.normal.mean()
    desvio = dataframe.normal.std()
    resposta = ecdf(media + desvio) - ecdf(media - desvio)
    resposta = round(resposta, 3)
    return resposta
    pass
Esempio n. 16
0
def q2():
    z_inf = dataframe['normal'].mean() - dataframe['normal'].std()
    z_sup = dataframe['normal'].mean() + dataframe['normal'].std()

    ecdf = ECDF(dataframe['normal'])

    answer = np.round(ecdf(z_sup) - ecdf(z_inf), 3)

    return float(answer)
Esempio n. 17
0
def perc(x):
    """get the percentile values (ECDF * 100)

    >>> perc(np.arange(10))

    array([ 10.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100.])
    """
    from statsmodels.distributions.empirical_distribution import ECDF
    return ECDF(x)(x) * 100
Esempio n. 18
0
File: main.py Progetto: vncsna/leads
def q5():
    x = df_stars['mean_profile']
    x = x[df_stars['target'] == False]
    x = (x - x.mean()) / x.std()
    false_pulsar_mean_profile_standardized = x
    normal_quantiles = sct.norm.ppf([0.25, 0.50, 0.75])
    false_pulsar_quantiles = ECDF(x)([0.25, 0.50, 0.75])
    return tuple(
        round(i, 3) for i in false_pulsar_quantiles - normal_quantiles)
Esempio n. 19
0
def q2():
    # Retorne aqui o resultado da questão 2.
    mean =dataframe['normal'].mean()
    std = dataframe['normal'].std()
    interv = [mean - std, mean + std]
    ecdf = ECDF(dataframe['normal'])
    empirico = ecdf(interv)
    result = empirico[1] - empirico[0]
    return result.round(3)
Esempio n. 20
0
def epsilon_time_from_distance(dfg_time_inner, aggregate_type, beta, distance,
                               precision, sens_time):
    delta_time_inner = []
    delta_edge = []
    delta_per_event = []
    R_ij = max(dfg_time_inner)
    r_ij = R_ij * precision
    accurate_result = 0
    # calculating the accurate result
    if aggregate_type == AggregateType.AVG:
        accurate_result = sum(dfg_time_inner) * 1.0 / len(dfg_time_inner)
    elif aggregate_type == AggregateType.SUM:
        accurate_result = sum(dfg_time_inner) * 1.0
    elif aggregate_type == AggregateType.MIN:
        accurate_result = min(dfg_time_inner) * 1.0
    elif aggregate_type == AggregateType.MAX:
        accurate_result = max(dfg_time_inner) * 1.0
    # in case of the time is instant, we set epsilon to avoid the error of division by zero
    if accurate_result == 0:
        epsilon_time_ij = 1
    else:
        distance_ij = accurate_result * distance  # hence distance is between 0 and 1
        #  calculate epsilon
        epsilon_time_ij = sens_time / distance_ij * log(1 / beta)
    epsilon_time_inner = epsilon_time_ij
    # fix the case of time is fixed
    flag = 1
    prev = dfg_time_inner[0]
    current = dfg_time_inner
    for t_k in dfg_time_inner:

        # fix the case of time is fixed
        if t_k != prev:
            flag = 0
        prev = t_k

        cdf = ECDF(dfg_time_inner)

        # p_k is calculated for every instance.
        cdf1 = calculate_cdf(cdf, t_k + r_ij)
        cdf2 = calculate_cdf(cdf, t_k - r_ij)
        p_k = cdf1 - cdf2

        # current_delta = p_k*( 1/(   (1-p_k) * exp(-R_ij * epsilon_time) +p_k) -1)
        current_delta = (p_k / (
            (1 - p_k) * exp(-R_ij * epsilon_time_ij) + p_k)) - p_k
        # eps = - log(p_k / (1.0 - p_k) * (1.0 / (current_delta + p_k) - 1.0)) / log(exp(1.0)) * (1.0 / R_ij)
        # we append the deltas and take the maximum delta out of them
        # if current_delta != float.nan:
        delta_edge.append(current_delta)
        # delta_per_event.append([x, current_delta])
        delta_per_event.append(
            current_delta)  # *****************!!!!!!!!!!!! changed
        if current_delta != 0:
            delta_time_inner.append(current_delta)

    return delta_edge, delta_per_event, delta_time_inner, epsilon_time_inner
Esempio n. 21
0
    def qq_plot(self):
        ecdf = ECDF(self.values)
        observed_quantiles = sorted(self.values)
        theorical_quantiles = [self.quantile(q=ecdf(x)) for x in observed_quantiles] 

        x = np.linspace(min(self.values), max(self.values), 10)
        plt.plot(x, x, '-', color='red')
        plt.plot(observed_quantiles, theorical_quantiles, '.', color='black')
        plt.show()
Esempio n. 22
0
def q4():
    # Retorne aqui o resultado da questão 4.
    filtro = stars['mean_profile'][(stars['target'] == 0)]
    filtro = filtro.values
    false_pulsar_mean_profile_standardized = (filtro -
                                              filtro.mean()) / filtro.std()
    ppf = sct.norm.ppf([0.8, 0.9, 0.95])
    ecdf = ECDF(false_pulsar_mean_profile_standardized)
    return (tuple(ecdf(ppf).round(3)))
Esempio n. 23
0
 def fit_transform(self, X):
     transformed_X = []
     for col in X.T:
         ecdf = ECDF(col)
         self.ecdfs.append(ecdf)
         transformed_X.append(ecdf(col))
     transformed_X = np.array(transformed_X)
     transformed_X = transformed_X * 2 - 1
     return transformed_X.T
def plot_domain_alignment():
    '''
    all_sets = list()
    pool = Pool(6)
    for tmp_counts in pool.imap_unordered(get_domain_alignment, range(len(g_clusters))):
        if tmp_counts:
            all_sets.append(tmp_counts)
    try:
        with open(g_ca.fmt_path('datadir/domain_alignment/raw.json'),'w') as f:
            json.dump(all_sets,f)
    except:
        print('failed to save raw')
    data = list()
    means = dict()
    for i, cluster in enumerate(all_sets):
        alns, sizes, perfs = zip(*cluster.values())
        mean_aln = np.mean(alns)
        perfs = [z for z in perfs if z > 0]
        if perfs:
            mean_perf = np.mean(perfs)
        else:
            mean_perf = None
        means[i] = (mean_aln, mean_perf)
        for dom, val in cluster.items():
            aln, s, perf = val
            data.append((dom, aln - mean_aln,
                perf - mean_perf if mean_perf and perf else None))
    with open(g_ca.fmt_path('datadir/domain_alignment/deviations.json'),'w') as f:
        json.dump(data,f)
    '''
    D = DataGetter()
    #with open(D.fmt_path('datadir/domain_alignment/deviations.json'),'r') as f:
    with open(D.fmt_path('datadir/deviations.json'), 'r') as f:
        data = json.load(f)
    doms, aln_devs, perf_devs = zip(*data)
    fig, ax = plt.subplots(figsize=(6, 3.5))
    ecdf = ECDF(aln_devs)
    ax.plot(list(ecdf.x), list(ecdf.y))
    ax.set_xlabel('distance from mean alignment')
    ax.set_ylabel('CDF')
    fig.savefig(D.fmt_path('plotsdir/domain_alignment/alignment.png'))
    plt.close(fig)
    fig, ax = plt.subplots(figsize=(4.5, 4.5))
    aln_devs, perf_devs = zip(
        *[z for z in zip(aln_devs, perf_devs) if z[1] is not None])
    heatmap, x, y = np.histogram2d(aln_devs, perf_devs, bins=50)
    extent = [x[0], x[-1], y[0], y[-1]]
    pos = ax.imshow(heatmap.T,
                    extent=extent,
                    origin='lower',
                    cmap='Greys',
                    aspect='auto')
    fig.colorbar(pos)
    ax.set_xlabel('distance from mean alignment')
    ax.set_ylabel('distance from mean performance')
    fig.savefig(D.fmt_path('plotsdir/domain_alignment/align_vs_perf.png'))
    plt.close(fig)
def q2():
    #CDF empírica
    ecdf = ECDF(dataframe.normal)

    #média e desvio
    media = dataframe.normal.mean()
    desvio = dataframe.normal.std()
    prob = ecdf(media + desvio) - ecdf(media - desvio)
    return round(prob, 3)
Esempio n. 26
0
def func_ps_level(ha_open, ha_close, ha_bar_percent_level):    
    ha_bar_size = ha_close - ha_open
    idx_positive_bar = np.where(ha_bar_size>0)[0]
    idx_negative_bar = np.where(ha_bar_size<0)[0]
    ha_bar_positive_size = ha_bar_size[idx_positive_bar]
    ha_bar_negative_size = ha_bar_size[idx_negative_bar]            
    positive = ECDF(ha_bar_positive_size)
    negative = ECDF(-ha_bar_negative_size)
    ha_positive_size, ha_positive_cdf = positive.x, positive.y    
    ha_negative_size, ha_negative_cdf = negative.x, negative.y
        
    n_level = len(ha_bar_percent_level)
    ha_ps_positive_level = np.zeros(n_level)
    ha_ps_negative_level = np.zeros(n_level)
    for i in range(n_level):
        ha_ps_positive_level[i] = ha_positive_size[np.where(ha_positive_cdf<=ha_bar_percent_level[i])[0][-1]]
        ha_ps_negative_level[i] = -ha_negative_size[np.where(ha_negative_cdf<=ha_bar_percent_level[i])[0][-1]]
    return ha_ps_positive_level, ha_ps_negative_level
Esempio n. 27
0
def q4():
    # Retorne aqui o resultado da questão 4.
    var = stars[stars["target"]== 0]["mean_profile"]
    false_pulsar_mean_profile_standardized =(var - var.mean())/var.std()
    ecdf_f = ECDF(false_pulsar_mean_profile_standardized)
    theor_quant = [sct.norm.ppf(x) for x in [0.8, 0.9, 0.95]]
    prob = tuple(ecdf_f(theor_quant).round(3))
    return prob
    pass
Esempio n. 28
0
def q2():
    serie = dataframe['normal']
    x_ = serie.mean()
    s = serie.std()
    interval_min = x_ - s
    interval_max = x_ + s
    ecdf = ECDF(serie)
    interval = ecdf(interval_max) - ecdf(interval_min)
    return float(round(interval, 3))
def build_edf_fr_vals(data):
    """ construct empirical distribution function given data values """

    data = data.ravel()
    cdf = ECDF(data)
    x0 = cdf.x[1:]
    y0 = cdf.y[1:]
    y0 = np.round(y0, 8)
    return x0, y0
Esempio n. 30
0
def q4():
    df_f = stars['mean_profile'][stars['target'] == False]
    false_pulsar_mean_profile_standardized = (df_f -
                                              df_f.mean()) / df_f.std(ddof=0)
    ppf = sct.norm.ppf([0.80, 0.90, 0.95])
    ecdf = ECDF(false_pulsar_mean_profile_standardized)
    return (ecdf(ppf[0]).round(decimals=3), ecdf(ppf[1]).round(decimals=3),
            ecdf(ppf[2]).round(decimals=3))
    pass