예제 #1
0
def fit_weibull(df_speed, x, weibull_params=None):
    from scipy.stats import weibull_min
    if not weibull_params:
        k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, loc=0)
    bins = x
    y_weibull = weibull_min.pdf(x, *weibull_params)
    k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, loc=0)
    density_expected_weibull = sp.stats.weibull_min.cdf(bins[1:], *weibull_params) - \
                               sp.stats.weibull_min.cdf(bins[:-1], *weibull_params)
    y_cdf_weibull = 1 - exp(-(x / lamb_scale) ** k_shape)
    return weibull_params, y_weibull, density_expected_weibull, y_cdf_weibull
def fit_weibull(data, length=10):
    trans_fun = lambda x: (x - 1) / 10 + 0.5
    x = numpy.array(sorted(list(set([trans_fun(i) for i in numpy.arange(1, length + 1)]))))
    confidence_vals = [[] for i in range(len(x))]
    nums = numpy.array(data.groupby('user_id').apply(len).to_dict().values())
    nums_groupped = defaultdict(list)
    for num in nums:
        nums_groupped[(num - 1) / 10].append(num)
    nums_avg = {key: numpy.mean(values) / 10.0 for (key, values) in nums_groupped.iteritems()}
    nums_trans = [nums_avg[(num - 1) / 10] for num in nums]

    fit_values = weibull_min.fit(nums_trans, floc=0)
    fit = weibull_min.pdf(x, *fit_values)
    for i, f in enumerate(fit):
        confidence_vals[i] = f

    def _aggr(r):
        return {
            'value': r,
            'confidence_interval_min': r,
            'confidence_interval_max': r,
        }
    return {
        'serie': map(_aggr, confidence_vals),
        'params': list(fit_values),
    }
예제 #3
0
def fit_weibull(df_speed, x, weibull_params=None, floc=True):
    from scipy.stats import weibull_min
    if not weibull_params:
        if floc:
            # sometimes need to set as loc=0
            k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed,
                                                                      floc=0)
        else:
            k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed)
    else:
        k_shape, _, lamb_scale = weibull_params
    y_weibull = weibull_min.pdf(x, *weibull_params)
    density_expected_weibull = weibull_min.cdf(
        x[1:], *weibull_params) - weibull_min.cdf(x[:-1], *weibull_params)
    y_cdf_weibull = weibull_min.cdf(x, *weibull_params)
    return weibull_params, y_weibull, density_expected_weibull, y_cdf_weibull
예제 #4
0
def getWeibullPdf(dataset, nbins, bins):
    c = 1.5
    shape, loc, scale = weibull_min.fit(dataset, floc=0)
    x = np.linspace(min(bins), max(bins), nbins)
    print('WEI: shape=' + str(shape) + ', loc=' + str(loc) + ", scale=" +
          str(scale))
    pdf = weibull_min.pdf(x, shape, loc, scale)
    return (x, pdf)
예제 #5
0
파일: clever.py 프로젝트: asplos2020/DRTest
def fit_and_test(rescaled_sample, sample, loc_shift, shape_rescale, optimizer,
                 c_i):
    [c, loc, scale] = weibull_min.fit(-rescaled_sample,
                                      c_i,
                                      optimizer=optimizer)
    loc = -loc_shift + loc * shape_rescale
    scale *= shape_rescale
    ks, pVal = scipy.stats.kstest(-sample, 'weibull_min', args=(c, loc, scale))
    return c, loc, scale, ks, pVal
예제 #6
0
def weibullFit(series):
    '''对series(pd.Series或np.array)进行威布尔分布参数估计'''
    # stats中weibull_min分布参数估计和weibullPdf中weibull分布参数关系:
    # 若设置floc=0(即始终loc=0),则有c = k,scale = lmd
    # stats中weibull_min分布参数估计和np.random.weibull分布参数关系:
    # 若设置floc=0(即始终loc=0),则有c = a,scale = 1

    c, loc, scale = weibull_min.fit(series, floc=0)
    k, lmd = c, scale

    return k, lmd
예제 #7
0
def fit_weibull(df_speed, x, weibull_params=None):
    from scipy.stats import weibull_min
    if not weibull_params:
        k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, floc=0)
    else:
        k_shape, _, lamb_scale = weibull_params
    y_weibull = weibull_min.pdf(x, *weibull_params)
    density_expected_weibull = weibull_min.cdf(x[1:], *weibull_params) - weibull_min.cdf(x[:-1], *weibull_params)
    # y_cdf_weibull = 1 - exp(-(x / lamb_scale) ** k_shape)
    y_cdf_weibull = weibull_min.cdf(x, *weibull_params)
    return weibull_params, y_weibull, density_expected_weibull, y_cdf_weibull
예제 #8
0
def fit_weibull_and_ecdf(df_speed, x=None):
    from scipy.stats import weibull_min
    max_speed = df_speed.max()
    if x is None:
        x = linspace(0, max_speed, 20)
    # Fit Weibull
    k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, loc=0)
    y_weibull = weibull_min.pdf(x, *weibull_params)
    y_cdf_weibull = 1 - exp(-(x / lamb_scale) ** k_shape)  # Weibull cdf
    # Fit Ecdf
    y_ecdf = sm.distributions.ECDF(df_speed)(x)
    return x, y_weibull, y_cdf_weibull, weibull_params, y_ecdf
예제 #9
0
def fit_distribution(data, fit_type, x_min, x_max, n_points=1000):
    # Initialization of the variables
    param, x, cdf, pdf = [-1, -1, -1, -1]

    if fit_type == 'exponweib':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = exponweib.fit(data, 1, 1, scale=02, loc=0)
        # param = exponweib.fit(data, fa=1, floc=0)
        # param = exponweib.fit(data)

        cdf = exponweib.cdf(x, param[0], param[1], param[2], param[3])
        pdf = exponweib.pdf(x, param[0], param[1], param[2], param[3])

    elif fit_type == 'lognorm':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = lognorm.fit(data, loc=0)

        cdf = lognorm.cdf(x, param[0], param[1], param[2])
        pdf = lognorm.pdf(x, param[0], param[1], param[2])

    elif fit_type == 'norm':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = norm.fit(data, loc=0)

        cdf = norm.cdf(x, param[0], param[1])
        pdf = norm.pdf(x, param[0], param[1])

    elif fit_type == 'weibull_min':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = weibull_min.fit(data, floc=0)

        cdf = weibull_min.cdf(x, param[0], param[1], param[2])
        pdf = weibull_min.pdf(x, param[0], param[1], param[2])

    return param, x, cdf, pdf
def run_Parametric(story_id, data):
    print "[" + str(story_id) + "]Fitting Fisk"
    fisk_params = fisk.fit(data, floc=0)
    fisk_nll = fisk.nnlf(fisk_params, data)
    fisk_rvs = fisk.rvs(*fisk_params, size=data.shape[0])
    ks_fisk = ks_2samp(data, fisk_rvs)
    bic_fisk = compute_BIC(data, len(fisk_params), fisk_nll)

    print "[" + str(story_id) + "]Fitting IG"
    ig_params = invgauss.fit(data, floc=0)
    ig_nll = invgauss.nnlf(ig_params, data)
    ig_rvs = invgauss.rvs(*ig_params, size=data.shape[0])
    ks_ig = ks_2samp(data, ig_rvs)
    bic_ig = compute_BIC(data, len(ig_params), ig_nll)

    print "[" + str(story_id) + "]Fitting LN"
    ln_params = lognorm.fit(data, floc=0)
    ln_nll = lognorm.nnlf(ln_params, data)
    ln_rvs = lognorm.rvs(*ln_params, size=data.shape[0])
    ks_ln = ks_2samp(data, ln_rvs)
    bic_ln = compute_BIC(data, len(ln_params), ln_nll)

    print "[" + str(story_id) + "]Fitting Weibull"
    weib_params = weibull_min.fit(data, floc=0)
    weib_nll = weibull_min.nnlf(weib_params, data)
    weib_rvs = weibull_min.rvs(*weib_params, size=data.shape[0])
    ks_weib = ks_2samp(data, weib_rvs)
    bic_weib = compute_BIC(data, len(weib_params), weib_nll)

    print "[" + str(story_id) + "]Fitting Gamma"
    gamma_params = gamma.fit(data, floc=0)
    gamma_nll = gamma.nnlf(gamma_params, data)
    gamma_rvs = gamma.rvs(*gamma_params, size=data.shape[0])
    ks_gamma = ks_2samp(data, gamma_rvs)
    bic_gamma = compute_BIC(data, len(gamma_params), gamma_nll)

    return [
        fisk_nll, ig_nll, ln_nll, weib_nll, gamma_nll, ks_fisk, ks_ig, ks_ln,
        ks_weib, ks_gamma, bic_fisk, bic_ig, bic_ln, bic_weib, bic_gamma,
        fisk_params, ig_params, ln_params, weib_params, gamma_params
    ]
예제 #11
0
 def calculateAndPlotHistogram(self, data, title=None):
     momentsSum: float = 0
     for i in range(1, 3):
         momentsSum += scipy.stats.moment(data,
                                          moment=i,
                                          axis=0,
                                          nan_policy='propagate')
     hist_velocity, bin = np.histogram(data,
                                       normed=True,
                                       bins=[i for i in range(18)])
     shape, loc, scale = weibull_min.fit(data, momentsSum, floc=0)
     x = np.linspace(0, 18, 100)
     f = plt.figure()
     plt.bar(bin[:-1], hist_velocity)
     plt.plot(x, weibull_min(shape, loc, scale).pdf(x), 'r')
     plt.title(title + '\n' +
               f'Shape: {round(shape, 4)}, scale: {round(scale, 4)}')
     plt.xlabel('Wind velocity [m/s]')
     plt.grid()
     plt.show()
     self.pdf.savefig(f, bbox_inches='tight')
예제 #12
0
def openmax_param(model,trainx,trainy):
    import pandas as pd
    import keras
    from tensorflow.keras import optimizers
    class_num = len(np.array(model.weights[-1])) # Number of Class
    if len(np.shape(trainx))!=4:
        trainx = np.expand_dims(trainx,axis=-1)
    if len(trainy[0])==1:
        from keras.utils import to_categorical
        trainy = to_categorical(trainy,class_num)
    #x_predict = model.predict(trainx)
    corr_ind = np.where(np.argmax(trainy,axis=-1)==np.argmax(x_predict,axis=-1))
    ver_X_train = trainx[corr_ind]
    ver_Y_train = trainy[corr_ind] # Step1 Data classified correctly
    new_model = keras.models.Sequential(model.layers[:-1])
    new_model.compile(optimizer='adam',loss='categorical_crossentropy',
                        metrics=['accuracy'])
    logit_vector = np.array(new_model.predict(ver_X_train))
    logit_matrix = [[]]*class_num
    for i in range(len(ver_X_train)): # Save Logit Vector by its class
        idx = np.argmax(ver_Y_train[i])
        logit_matrix[idx] = logit_matrix[idx]+[logit_vector[i]]
    mean_vector = []
    for i in range(len(logit_matrix)): # Compute Mean Vector
        mean_vector.append(np.array(logit_matrix[i]).mean(axis=0))
    distance_matrix=[[]]*class_num
    for idx in range(len(logit_matrix)):
        for logit in logit_matrix[idx]: # Save the distance
            distance_ = distance(logit,mean_vector[idx])
            distance_matrix[idx] = distance_matrix[idx]+[distance_]
    for i in range(len(distance_matrix)): # Sort
        distance_matrix[i] = np.array(distance_matrix[i])
        distance_matrix[i] = np.sort(distance_matrix[i])
    hyparam=[[]]*class_num;w=[[]]*class_num
    from scipy.stats import weibull_min
    for i in range(len(distance_matrix)): # Generate Weibull Distribution
        temp = weibull_min.fit(distance_matrix[i][-20:])
        hyparam[i] = hyparam[i]+list(temp)
    return hyparam, new_model, class_num, mean_vector
def fit_weibull_loop(data):
    loop_false = (data > 0)
    loop_true = (data < 0)

    data_false = data[loop_false]
    data_true = data[loop_true]

    loop_prob = np.sum(loop_false) / float(data.shape[0])
    weib_trunc_fit = np.array([np.nan, 0.0, np.nan])

    nll_false = 0
    nll_true = 0

    if np.sum(loop_false) > 0:
        trunc_data = data_false[data_false < 1.0]
        prior = (len(data_false) - len(trunc_data)) / float(len(data_false))
        if trunc_data.shape[0] > 0:
            distribution = TruncatedWeibull_Prior
            rv_weib_false = distribution(trunc_data)
            res_weib_false = rv_weib_false.fit()
            weib_trunc_fit = np.array(
                [prior, res_weib_false.params[0], res_weib_false.params[1]])

            nll_false = np.sum(rv_weib_false.nloglikeobs(
                res_weib_false.params))

    if np.sum(loop_true) > 0:
        weib_trueloop_fit = weibull_min.fit(np.abs(data_true), floc=0.0)
        nll_true = -np.sum(
            np.log(
                lognorm.pdf(np.abs(data_true), weib_trueloop_fit[0],
                            weib_trueloop_fit[1], weib_trueloop_fit[2]) +
                1e-200))
    else:
        weib_trueloop_fit = np.array([np.nan, 0.0, np.nan])

    nll = nll_false + nll_true

    return [loop_prob, weib_trunc_fit, weib_trueloop_fit, nll]
예제 #14
0
def fit_weibull(series, c=0, floc=0, scale=1, title=""):
    """
    Fits a Weibull distribution, initialized with
    given parameters. Plots the fitted distribution
    against the ground-truth data.
    
    Params
    ------
    series: Pandas series
    Pandas series containing the ground-truth values
    
    Returns
    -------
    params : dictionary
    Contains the fitted parameters
    """
    # Fit distribution
    (c, loc, scale) = weibull_min.fit(series, c, floc=floc, scale=scale)
    
    # Plot
    ax = plt.figure(figsize=(12,6)).gca()
    # the histogram of the data
    # Set as many bins as days
    bins = int(series.max() - series.min() + 1)
    n, bins, patches = plt.hist(series, bins, facecolor='green', alpha=1, density=True)
    # add a 'best fit' line
    y = weibull_min.pdf(bins, c, floc, scale)
    l = plt.plot(bins, y, 'r--', linewidth=2)
    plt.xlabel("Días")
    plt.title(title)
    # Only integer days
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    # Store parameters
    params = {"c":c, "scale":scale}
    return params
def clever_t(classifier, x, target_class, nb_batches, batch_size, radius, norm, c_init=1, pool_factor=10):
    """
    Compute CLEVER score for a targeted attack. Paper link: https://arxiv.org/abs/1801.10578

    :param classifier: A trained model
    :type classifier: :class:`.Classifier`
    :param x: One input sample
    :type x: `np.ndarray`
    :param target_class: Targeted class
    :type target_class: `int`
    :param nb_batches: Number of repetitions of the estimate
    :type nb_batches: `int`
    :param batch_size: Number of random examples to sample per batch
    :type batch_size: `int`
    :param radius: Radius of the maximum perturbation
    :type radius: `float`
    :param norm: Current support: 1, 2, np.inf
    :type norm: `int`
    :param c_init: Initialization of Weibull distribution
    :type c_init: `float`
    :param pool_factor: The factor to create a pool of random samples with size pool_factor x n_s
    :type pool_factor: `int`
    :return: CLEVER score
    :rtype: `float`
    """
    # Check if the targeted class is different from the predicted class
    y_pred = classifier.predict(np.array([x]), logits=True)
    pred_class = np.argmax(y_pred, axis=1)[0]
    if target_class == pred_class:
        raise ValueError("The targeted class is the predicted class.")

    # Check if pool_factor is smaller than 1
    if pool_factor < 1:
        raise ValueError("The `pool_factor` must be larger than 1.")

    # Some auxiliary vars
    grad_norm_set = []
    dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
    shape = [pool_factor * batch_size]
    shape.extend(x.shape)

    # Generate a pool of samples
    rand_pool = np.reshape(random_sphere(nb_points=pool_factor * batch_size, nb_dims=dim, radius=radius, norm=norm),
                           shape)
    rand_pool += np.repeat(np.array([x]), pool_factor * batch_size, 0)
    rand_pool = rand_pool.astype(NUMPY_DTYPE)
    np.clip(rand_pool, classifier.clip_values[0], classifier.clip_values[1], out=rand_pool)

    # Change norm since q = p / (p-1)
    if norm == 1:
        norm = np.inf
    elif norm == np.inf:
        norm = 1
    elif norm != 2:
        raise ValueError("Norm {} not supported".format(norm))

    # Loop over the batches
    for _ in range(nb_batches):
        # Random generation of data points
        sample_xs = rand_pool[np.random.choice(pool_factor * batch_size, batch_size)]

        # Compute gradients
        grads = classifier.class_gradient(sample_xs, logits=True)
        if np.isnan(grads).any():
            raise Exception("The classifier results NaN gradients.")

        grad = grads[:, pred_class] - grads[:, target_class]
        grad = np.reshape(grad, (batch_size, -1))
        grad_norm = np.max(np.linalg.norm(grad, ord=norm, axis=1))
        grad_norm_set.append(grad_norm)

    # Maximum likelihood estimation for max gradient norms
    [_, loc, _] = weibull_min.fit(-np.array(grad_norm_set), c_init, optimizer=scipy_optimizer)

    # Compute function value
    values = classifier.predict(np.array([x]), logits=True)
    value = values[:, pred_class] - values[:, target_class]

    # Compute scores
    s = np.min([-value[0] / loc, radius])

    return s

# Exponential function
def exp3(x, a, b, c):
    return a + b * np.exp(c * x)


#%% Read dataset A, B  or C.
DATASET_CHAR = 'A'
file_path = '../datasets/' + DATASET_CHAR + '.txt'
sample_hs, sample_tz, label_hs, label_tz = read_dataset(file_path)

df = pd.read_csv(file_path, sep='; ')
#%% Inspect the marginal distributions

weib_par1 = weibull_min.fit(df[df.columns[1]], loc=0)
logn_par1 = lognorm.fit(df[df.columns[1]], loc=0)

weib_par2 = weibull_min.fit(df[df.columns[2]], loc=0)
logn_par2 = lognorm.fit(df[df.columns[2]], loc=0)

#%% Goodness of fit

print(kstest(df[df.columns[1]].values, 'weibull_min', args=weib_par1))
print(kstest(df[df.columns[1]].values, 'lognorm', args=logn_par1))

print(kstest(df[df.columns[2]].values, 'weibull_min', args=weib_par2))
print(kstest(df[df.columns[2]].values, 'lognorm', args=logn_par2))

#%% Plot the distributions
#n_bins = 100
# number for parameters in the distribution for k value for AIC, each one has two parameters need to be estimated
num_params = 2

# Parameter estimates for generic data
shape1, loc1, scale1 = lognorm.fit(data2, floc=0)
mu1 = np.log(scale1)
sigma1 = shape1
y1 = lognorm.pdf(data2, s=sigma1, scale=np.exp(mu1))
log_likelihood1 = np.sum(np.log(y1))
print("Lognorm loglikelihood = " + str(log_likelihood1))
aic1= -2 * log_likelihood1 + 2 * num_params
print("Lognorm AIC = " + str(aic1))

# https://stackoverflow.com/questions/33070724/determine-weibull-parameters-from-data
# Parameter estimates for generic data
shape2, loc2, scale2 = weibull_min.fit(data2, floc=0)
c = shape2
b = scale2
y2 = weibull_min.pdf(data2, c, scale=b)
log_likelihood2 = np.sum(np.log(y2))
print("Weibull loglikelihood = " + str(log_likelihood2))
aic2= -2 * log_likelihood2 + 2 * num_params
print("Weibull AIC = " + str(aic2))

# https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.invgauss.html
# the argument floc=0 to ensure that it does not treat the location as a free parameter
# Parameter estimates for generic data
shape3, loc3, scale3 = invgauss.fit(data2, floc=0)
mu3 = shape3
lambda3 = scale3
# fitting the data with the estimated parameters
예제 #18
0
u1_50 = beta50*np.sin(phi)

theta_v_ij = np.zeros(shape=(NR_OF_BOOTSTRAP_SAMPLES, int(360/ANGLE_STEP_FOR_CI)))
theta_hs_ij = np.zeros(shape=(NR_OF_BOOTSTRAP_SAMPLES, int(360/ANGLE_STEP_FOR_CI)))


def power3(x, a, b, c):
    return a + b * x ** c

for i in range(NR_OF_BOOTSTRAP_SAMPLES):
    # Resample from the hindcast dataset to get the sample D_i.
    sample_indices = np.random.randint(dataset_d_v.size, size=nr_of_datapoints_to_draw)
    v_i = np.take(dataset_d_v, sample_indices)
    hs_i = np.take(dataset_d_hs, sample_indices)
    # Fit Weibull to Hs:
    weib_par2 = weibull_min.fit(hs_i, loc=0)
    # Find the conditional Weibull for V:
    h_min = hs_i.min()
    h_max = hs_i.max()

    h_bins = np.arange(np.floor(h_min), np.ceil(h_max), bin_size) + bin_size/2
    h_binedges = h_bins + bin_size/2

    h_ind_bin = np.digitize(hs_i, bins=h_binedges)
    unique, counts = np.unique(h_ind_bin, return_counts=True)

    ind_min_bin = unique[counts>10][0]
    ind_max_bin = unique[counts>10][-1]
    x_bins = h_bins[ind_min_bin:ind_max_bin+1]
    real_bins = np.zeros(len(x_bins))
예제 #19
0
    7.21, 7.88, 7.47, 8.16, 8.73, 9.91, 10.11, 8.51, 8.11, 7, 5.98, 5.07, 4.66,
    4.16, 3.69, 3.3, 2.54, 1.89, 1.89, 1.95, 2.34, 2.04, 2.66, 3.3, 3.58, 3.87,
    1.99, 2.1, 2.99, 3.46, 3.26, 2.43, 1.78, 1.39, 0.93, 0.84, 1.14, 1.7, 1.7,
    1.13, 1.34, 1.34, 1.49, 1.72, 1.79, 2.01, 1.34, 0.79, 0.35, 0.93, 4.76,
    5.47, 6.11, 6.31, 5.46, 4.7, 4.47, 4.19, 3.72, 3.26, 2.75, 2.28, 1.35,
    0.88, 0.43, 0.89, 1.14, 1.56, 2.01, 1.34, 1.14, 1.05, 2.21, 3.21, 5.9,
    6.99, 6.77, 5.81, 4.48, 3.15, 2.02, 1.64, 1.54, 2.1, 2.29, 2.48, 2.56,
    2.43, 2.43, 2.28, 2.52, 3.02, 3.2, 2.88, 4.01, 5.07, 7.1, 9.31, 7.16, 8.7,
    9.87, 10.48, 9.91, 8.12, 6.79, 6.01, 5.42, 4.87, 4.48, 4.87, 5.1, 4.93,
    4.61, 4.73, 4.32, 4.19, 3.96, 3.25, 4.36, 7.09, 10.11, 12.33, 12.1, 13.34,
    13.61, 12.52, 8.9, 6.2, 5.42, 4.85, 4.48, 3.55, 2.69, 2.34, 1.54, 1.5,
    1.96, 2.19, 2.19, 2.47, 2.34, 2.12, 2.08, 2.37, 2.18, 3.37, 5.43, 6.58,
    7.35, 7.4, 6.46
]

(shape, loc, scale) = weibull_min.fit(windspeed)  # 拟合所得数据的参数
print((shape, loc, scale))

plt.figure(1)
plt.subplot(1, 2, 1)
plt.title("原始风速数据直方图")
plt.hist(windspeed, density=True, histtype='stepfilled', alpha=0.2)
plt.subplot(1, 2, 2)
plt.title("原始风速数据")
plt.plot(np.arange(0, len(windspeed), 1), windspeed)
# endregion

# region 3.根据拟合得到参数,生成一组新的威布尔数据
windspeed_new = weibull_min.rvs(shape, loc=0, scale=scale, size=100)
plt.figure(2)
plt.subplot(1, 2, 1)
from scipy.stats import weibull_min
from scipy.stats import norm
from scipy.stats import lognorm
from matplotlib.patches import Rectangle

n, bins, patches = plt.hist(df['price'],
                            500,
                            density=1,
                            facecolor='b',
                            alpha=.75)

#Overlay distributions that might be an appropriate fit
x = np.linspace(df['price'].min(), df['price'].max(), 100)

#Weibull Distribution
shape, loc, scale = weibull_min.fit(df['price'], floc=0)
plt.plot(x, weibull_min(shape, loc, scale).pdf(x), color='g')

#Normal Distribution
shape, loc = norm.fit(df['price'])
plt.plot(x, norm(shape, loc).pdf(x), color='r')

#Lognormal Distribution
shape, loc, scale = lognorm.fit(df['price'], floc=0)
plt.plot(x, lognorm(shape, loc, scale).pdf(x), color='y')

plt.xlabel('Price')
plt.ticklabel_format(style='plain')
plt.xticks(rotation='vertical')
plt.ylabel('Probability')
plt.title('Histogram of All Prices\nAnd Some Distributions')
예제 #21
0
plt.ylabel('IDH')
plt.scatter(digdev, idh, c=cibsec, s=cibsec * 100)
plt.show()
#%%
plt.figure()
loc, scale = expon.fit(np.divide(1, milpow))
mu_mil = expon.mean(loc=loc, scale=scale)
plt.hist(np.divide(1, milpow), density=1, label='Media=%.2f' % mu_mil)
x = np.linspace(min(np.divide(1, milpow)), max(np.divide(1, milpow)))
plt.plot(x, expon.pdf(x, loc=loc, scale=scale))
plt.title('Poder Militar')
plt.legend(loc='best')
plt.show()
#%%
plt.figure()
c, loc, scale = weibull_min.fit(cibsec)
mu_cib = weibull_min.mean(c, loc=loc, scale=scale)
plt.hist(cibsec, density=1, label='Media=%.2f' % mu_cib)
x_cib = np.linspace(min(cibsec), max(cibsec))
plt.plot(x_cib, weibull_min.pdf(x_cib, c, loc=loc, scale=scale))
plt.title('Segurança Cibernética')
plt.legend(loc='best')
plt.show()
#%%
plt.figure()
c_dsv, loc_dsv, scale_dsv = weibull_min.fit(digdev)
mu_dsv = weibull_min.mean(c_dsv, loc=loc_dsv, scale=scale_dsv)
plt.hist(digdev, density=1, label='Media=%.2f' % mu_dsv)
x_dsv = np.linspace(min(digdev), max(digdev))
plt.title('Desenvolvimento Digital')
plt.plot(x_dsv, weibull_min.pdf(x_dsv, c_dsv, loc=loc_dsv, scale=scale_dsv))
예제 #22
0
 def from_durations2(cls, durations):
     from scipy.stats import weibull_min
     param = weibull_min.fit(durations, floc=0)
     alpha = param[0]
     beta = param[2]
     return cls(alpha, beta)
예제 #23
0
def fit_weibull(group):
    shape, loc, scale = weibull_min.fit(group['time'].values, floc=0)
    return pd.Series({
        'shape': shape,
        'scale': scale,
    })
예제 #24
0
파일: downtime.py 프로젝트: ribeiroale/ares
def downtime_accepted_models(D=list(), alpha=.05):
    params = list()
    params.append(uniform.fit(D))
    params.append(expon.fit(D))
    params.append(rayleigh.fit(D))
    params.append(weibull_min.fit(D))
    params.append(gamma.fit(D))
    params.append(gengamma.fit(D))
    params.append(invgamma.fit(D))
    params.append(gompertz.fit(D))
    params.append(lognorm.fit(D))
    params.append(exponweib.fit(D))

    llf_value = list()
    llf_value.append(log(product(uniform.pdf(D, *params[0]))))
    llf_value.append(log(product(expon.pdf(D, *params[1]))))
    llf_value.append(log(product(rayleigh.pdf(D, *params[2]))))
    llf_value.append(log(product(weibull_min.pdf(D, *params[3]))))
    llf_value.append(log(product(gamma.pdf(D, *params[4]))))
    llf_value.append(log(product(gengamma.pdf(D, *params[5]))))
    llf_value.append(log(product(invgamma.pdf(D, *params[6]))))
    llf_value.append(log(product(gompertz.pdf(D, *params[7]))))
    llf_value.append(log(product(lognorm.pdf(D, *params[8]))))
    llf_value.append(log(product(exponweib.pdf(D, *params[9]))))

    AIC = list()
    AIC.append(2 * len(params[0]) - 2 * llf_value[0])
    AIC.append(2 * len(params[1]) - 2 * llf_value[1])
    AIC.append(2 * len(params[2]) - 2 * llf_value[2])
    AIC.append(2 * len(params[3]) - 2 * llf_value[3])
    AIC.append(2 * len(params[4]) - 2 * llf_value[4])
    AIC.append(2 * len(params[5]) - 2 * llf_value[5])
    AIC.append(2 * len(params[6]) - 2 * llf_value[6])
    AIC.append(2 * len(params[7]) - 2 * llf_value[7])
    AIC.append(2 * len(params[8]) - 2 * llf_value[8])
    AIC.append(2 * len(params[9]) - 2 * llf_value[9])

    model = list()
    model.append(
        ["uniform", params[0],
         kstest(D, "uniform", params[0])[1], AIC[0]])
    model.append(
        ["expon", params[1],
         kstest(D, "expon", params[1])[1], AIC[1]])
    model.append(
        ["rayleigh", params[2],
         kstest(D, "rayleigh", params[2])[1], AIC[2]])
    model.append([
        "weibull_min", params[3],
        kstest(D, "weibull_min", params[3])[1], AIC[3]
    ])
    model.append(
        ["gamma", params[4],
         kstest(D, "gamma", params[4])[1], AIC[4]])
    model.append(
        ["gengamma", params[5],
         kstest(D, "gengamma", params[5])[1], AIC[5]])
    model.append(
        ["invgamma", params[6],
         kstest(D, "invgamma", params[6])[1], AIC[6]])
    model.append(
        ["gompertz", params[7],
         kstest(D, "gompertz", params[7])[1], AIC[7]])
    model.append(
        ["lognorm", params[8],
         kstest(D, "lognorm", params[8])[1], AIC[8]])
    model.append(
        ["exponweib", params[9],
         kstest(D, "exponweib", params[9])[1], AIC[9]])

    accepted_models = [i for i in model if i[2] > alpha]

    if accepted_models:
        aic_values = [i[3] for i in accepted_models]
        final_model = min(range(len(aic_values)), key=aic_values.__getitem__)
        return accepted_models, accepted_models[final_model]
    elif not accepted_models:
        aic_values = [i[3] for i in model]
        final_model = min(range(len(aic_values)), key=aic_values.__getitem__)
        return model, model[final_model]
예제 #25
0
    def fit(s, H, T, **kwargs):
        #Criar distribuicao de H Lognormal
        Hscale, Hloc, Hshape = lognorm.fit(H, floc=0)  # fit wei
        s.fln_H = lognorm(Hscale, Hloc, Hshape)  #cria a distribuicao
        s.fln_H_fit = 'xi = %.4f, lambda = %.4f' % (Hscale, np.log(Hshape))

        #Criar distribuicao de H Weibull
        Hscale, Hloc, Hshape = weibull_min.fit(H, floc=0)  #fit wei
        s.fwei_H = weibull_min(Hscale, Hloc, Hshape)  #cria a distribuicao
        s.fwei_H_fit = 'lambda = %.4f, alpha = %.4f' % (Hscale, Hshape)

        s.H = H
        s.T = T

        #Determinar o hitograma
        s.bins = 20
        s.rangeH = [0, 10]
        s.rangeT = [0, 20]

        if kwargs.has_key('bins'):
            s.bins = kwargs['bins']
        if kwargs.has_key('rangeH'):
            s.rangeH = kwargs['rangeH']
            pass
        if kwargs.has_key('rangeT'):
            s.rangeT = kwargs['rangeT']
            pass

        s.H_hist_y, s.H_hist_x = np.histogram(H,
                                              s.bins,
                                              density=True,
                                              range=s.rangeH)
        s.H_hist_xM = s.H_hist_x[:-1] + s.H_hist_x[0:2].mean()

        s.T_hist_y, s.T_hist_x = np.histogram(T,
                                              s.bins,
                                              density=True,
                                              range=s.rangeT)
        s.T_hist_xM = s.T_hist_x[:-1] + s.T_hist_x[0:2].mean()

        #Separando T condicional a H e calculando os parametros da distribuicao
        dft = pd.DataFrame(dict(H=H, T=T))

        ln_param = []
        wei_param = []

        for ix, aux in enumerate(s.H_hist_x):
            if ix == len(s.H_hist_x) - 1:
                break
            temp = dft[np.logical_and(dft.H > s.H_hist_x[ix],
                                      dft.H < s.H_hist_x[ix + 1])]['T'].values
            if len(temp) > 50:
                #lista contem [xi,loc,lamb*e,posicaoX,hs_condicionador]
                #xi = [0], loc=[1], lamb =[2], xpos = [3]
                ln_param.append(
                    lognorm.fit(temp, floc=0) + tuple([s.H_hist_xM[ix]]))
                #a lista contem [lamb,loc,alpha,hs_condicionador]
                #lambw=[0],loc=[1], alpha=[2], xpos=[3]
                wei_param.append(
                    weibull_min.fit(temp, floc=0) + tuple([s.H_hist_xM[ix]]))
            pass

        #Criando as funcoes dos parametros de distribuicao
        s.Tfxi = np.poly1d(
            np.polyfit([aux[3] for aux in ln_param],
                       [aux[0] for aux in ln_param], 3))
        s.Tflamb = np.poly1d(
            np.polyfit([aux[3] for aux in ln_param],
                       [aux[2] for aux in ln_param], 3))

        s.Tflambw = np.poly1d(
            np.polyfit([aux[3] for aux in wei_param],
                       [aux[0] for aux in wei_param], 3))
        s.Tfalphaw = np.poly1d(
            np.polyfit([aux[3] for aux in wei_param],
                       [aux[2] for aux in wei_param], 3))

        s.dl = pd.DataFrame(ln_param)
        s.dw = pd.DataFrame(wei_param)

        if kwargs.has_key('tipofH'):
            if kwargs['tipofH'] == 'weibull':
                s.fH = s.fwei_H
                s.fHtype = 'weibull'
            if kwargs['tipofH'] == 'lognormal':
                s.fH = s.fln_H
                s.fHtype = 'lognormal'
                pass
        else:
            s.fH = s.fln_H
            s.fHtype = 'lognormal'
            pass

        if kwargs.has_key('tipofT'):
            if kwargs['tipofT'] == 'weibull':
                s.fT = lambda h: weibull_min(s.Tflambw(h), 0, s.Tfalphaw(h))
                s.fTtype = 'weibull'
            if kwargs['tipofT'] == 'lognormal':
                s.fT = lambda h: lognorm(s.Tfxi(h), 0, s.Tflamb(h))
                s.fTtype = 'lognormal'
        else:
            s.fT = lambda h: lognorm(s.Tfxi(h), 0, s.Tflamb(h))
            s.fTtype = 'lognormal'
예제 #26
0
    def fit(s, H, T, **kwargs):

        #faz o fit para os parametros de hs em distribuicao Ln e Wei

        scaleH, lH, shapeH = lognorm.fit(H, floc=0)  # fit wei
        s.fln_H = lognorm(scaleH, lH, shapeH)  #cria a distribuicao
        s.fln_H_fit = '%.4f,%.4f' % (scaleH, np.log(shapeH))

        scaleH, lH, shapeH = weibull_min.fit(H, floc=0)  #fit wei
        s.fwei_H = weibull_min(scaleH, lH, shapeH)  #cria a distribuicao
        s.fwei_H_fit = '%.4f,%.4f' % (scaleH, shapeH)

        #faz o fit para os parametros de hs em distribuicao Ln e Wei

        scaleT, lT, shapeT = lognorm.fit(T, floc=0)
        s.fln_T = lognorm(scaleT, lT, shapeT)  #cria a ditribuicao ln de tp
        s.fln_T_fit = '%.4f,%.4f' % (scaleT, np.log(shapeT))

        scaleT, lT, shapeT = weibull_min.fit(T, floc=0)
        s.fwei_T = weibull_min(scaleT, lT, shapeT)
        s.fwei_T_fit = '%.4f,%.4f' % (scaleT, shapeT)

        s.H = H  #guarda a serie de hs internamente
        s.T = T  #guarda a serie de tp internamente

        #Usa a distribuicao conforme escolha do usuario
        if kwargs.has_key('tipofH'):
            if kwargs['tipofH'] == 'weibull':
                s.fH = s.fwei_H
                s.fHtype = 'weibull'
            if kwargs['tipofH'] == 'lognormal':
                s.fH = s.fln_H
                s.fHtype = 'lognormal'
                pass
        else:
            s.fH = s.fln_H
            s.Htype = 'lognormal'
            pass

        if kwargs.has_key('tipofT'):
            if kwargs['tipofT'] == 'weibull':
                s.fT = s.fwei_T
                s.fTtype = 'weibull'
            if kwargs['tipofT'] == 'lognormal':
                s.fT = s.fln_T
                s.fTtype = 'lognormal'
        else:
            s.fT = s.fln_T
            s.Ttype = 'lognormal'

        # Define automaticamente os bins e ranges ou usa os kwargs.
        s.bins = 20
        s.rangeH = [0, 10]
        s.rangeT = [0, 20]

        if kwargs.has_key('bins'):
            s.bins = kwargs['bins']
        if kwargs.has_key('rangeH'):
            s.rangeH = kwargs['rangeH']
            pass
        if kwargs.has_key('rangeT'):
            s.rangeT = kwargs['rangeT']
            pass

        #acha a rhobru, correlacao entre H e T para ser usada na pdf
        s.rhobru = scipy.stats.pearsonr(H, T)[0]
        #        s.rhobru = scipy.stats.spearmanr(H,T)[0]

        #define uma funcao normal padrao para criar u1 e u2, ou uh e ut
        s.N = norm(0, 1)

        s.phi_1 = lambda u: 1 / (np.sqrt(2 * np.pi)) * np.exp(-u**2 / 2)

        s.phi_2 = lambda u1, u2, rhobru: (2 * np.pi * np.sqrt(
            1 - rhobru**2))**-1 * np.exp(
                (-2 *
                 (1 - rhobru**2))**-1 * (u1**2 + u2**2 - 2 * rhobru * u1 * u2))
        pass
def clever_t(x, classifier, target_class, n_b, n_s, r, sess, c_init=1):
    """
    Compute CLEVER score for a targeted attack. Paper link: https://arxiv.org/abs/1801.10578

    :param x: One input sample
    :type x: `np.ndarray`
    :param classifier: A trained model
    :type classifier: :class:`Classifier`
    :param target_class: Targeted class
    :type target_class: `int`
    :param n_b: Batch size
    :type n_b: `int`
    :param n_s: Number of examples per batch
    :type n_s: `int`
    :param r: Maximum perturbation
    :type r: `float`
    :param sess: The session to run graphs in
    :type sess: `tf.Session`
    :param c_init: Initialization of Weibull distribution
    :type c_init: `float`
    :return: A tuple of 3 CLEVER scores, corresponding to norms 1, 2 and np.inf
    :rtype: `tuple`
    """
    # Check if the targeted class is different from the predicted class
    y_pred = classifier.predict(np.array([x]))
    pred_class = np.argmax(y_pred, axis=1)[0]
    if target_class == pred_class:
        raise ValueError("The targeted class is the predicted class!")

    # Define placeholders for computing g gradients
    shape = [None]
    shape.extend(x.shape)
    imgs = tf.placeholder(shape=shape, dtype=tf.float32)
    pred_class_ph = tf.placeholder(dtype=tf.int32, shape=[])
    target_class_ph = tf.placeholder(dtype=tf.int32, shape=[])

    # Define tensors for g gradients
    grad_norm_1, grad_norm_2, grad_norm_8, g_x = _build_g_gradient(
        imgs, classifier, pred_class_ph, target_class_ph)

    # Some auxiliary vars
    set1, set2, set8 = [], [], []
    dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
    shape = [n_s]
    shape.extend(x.shape)

    # Compute predicted class
    y_pred = classifier.predict(np.array([x]))
    pred_class = np.argmax(y_pred, axis=1)[0]

    # Loop over n_b batches
    for i in range(n_b):
        # Random generation of data points
        sample_xs0 = np.reshape(_random_sphere(m=n_s, n=dim, r=r), shape)
        sample_xs = sample_xs0 + np.repeat(np.array([x]), n_s, 0)
        np.clip(sample_xs, 0, 1, out=sample_xs)

        # Preprocess data if it is supported in the classifier
        if hasattr(classifier, 'feature_squeeze'):
            sample_xs = classifier.feature_squeeze(sample_xs)
        sample_xs = classifier._preprocess(sample_xs)

        # Compute gradients
        max_gn1, max_gn2, max_gn8 = sess.run(
            [grad_norm_1, grad_norm_2, grad_norm_8],
            feed_dict={
                imgs: sample_xs,
                pred_class_ph: pred_class,
                target_class_ph: target_class
            })
        set1.append(max_gn1)
        set2.append(max_gn2)
        set8.append(max_gn8)

    # Maximum likelihood estimation for max gradient norms
    [_, loc1, _] = weibull_min.fit(-np.array(set1),
                                   c_init,
                                   optimizer=scipy_optimizer)
    [_, loc2, _] = weibull_min.fit(-np.array(set2),
                                   c_init,
                                   optimizer=scipy_optimizer)
    [_, loc8, _] = weibull_min.fit(-np.array(set8),
                                   c_init,
                                   optimizer=scipy_optimizer)

    # Compute g_x0
    x0 = np.array([x])
    if hasattr(classifier, 'feature_squeeze'):
        x0 = classifier.feature_squeeze(x0)
    x0 = classifier._preprocess(x0)
    g_x0 = sess.run(g_x,
                    feed_dict={
                        imgs: x0,
                        pred_class_ph: pred_class,
                        target_class_ph: target_class
                    })

    # Compute scores
    # Note q = p / (p-1)
    s8 = np.min([-g_x0[0] / loc1, r])
    s2 = np.min([-g_x0[0] / loc2, r])
    s1 = np.min([-g_x0[0] / loc8, r])

    return s1, s2, s8
def clever_t(
    classifier: "CLASSIFIER_CLASS_LOSS_GRADIENTS_TYPE",
    x: np.ndarray,
    target_class: int,
    nb_batches: int,
    batch_size: int,
    radius: float,
    norm: int,
    c_init: float = 1.0,
    pool_factor: int = 10,
) -> float:
    """
    Compute CLEVER score for a targeted attack.

    | Paper link: https://arxiv.org/abs/1801.10578

    :param classifier: A trained model.
    :param x: One input sample.
    :param target_class: Targeted class.
    :param nb_batches: Number of repetitions of the estimate.
    :param batch_size: Number of random examples to sample per batch.
    :param radius: Radius of the maximum perturbation.
    :param norm: Current support: 1, 2, np.inf.
    :param c_init: Initialization of Weibull distribution.
    :param pool_factor: The factor to create a pool of random samples with size pool_factor x n_s.
    :return: CLEVER score.
    """
    # Check if the targeted class is different from the predicted class
    y_pred = classifier.predict(np.array([x]))
    pred_class = np.argmax(y_pred, axis=1)[0]
    if target_class == pred_class:
        raise ValueError("The targeted class is the predicted class.")

    # Check if pool_factor is smaller than 1
    if pool_factor < 1:
        raise ValueError("The `pool_factor` must be larger than 1.")

    # Some auxiliary vars
    rand_pool_grad_set = []
    grad_norm_set = []
    dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
    shape = [pool_factor * batch_size]
    shape.extend(x.shape)

    # Generate a pool of samples
    rand_pool = np.reshape(
        random_sphere(nb_points=pool_factor * batch_size,
                      nb_dims=dim,
                      radius=radius,
                      norm=norm),
        shape,
    )
    rand_pool += np.repeat(np.array([x]), pool_factor * batch_size, 0)
    rand_pool = rand_pool.astype(ART_NUMPY_DTYPE)
    if hasattr(classifier,
               "clip_values") and classifier.clip_values is not None:
        np.clip(rand_pool,
                classifier.clip_values[0],
                classifier.clip_values[1],
                out=rand_pool)

    # Change norm since q = p / (p-1)
    if norm == 1:
        norm = np.inf
    elif norm == np.inf:
        norm = 1
    elif norm != 2:
        raise ValueError("Norm {} not supported".format(norm))

    # Compute gradients for all samples in rand_pool
    for i in range(batch_size):
        rand_pool_batch = rand_pool[i * pool_factor:(i + 1) * pool_factor]

        # Compute gradients
        grad_pred_class = classifier.class_gradient(rand_pool_batch,
                                                    label=pred_class)
        grad_target_class = classifier.class_gradient(rand_pool_batch,
                                                      label=target_class)

        if np.isnan(grad_pred_class).any() or np.isnan(
                grad_target_class).any():
            raise Exception("The classifier results NaN gradients.")

        grad = grad_pred_class - grad_target_class
        grad = np.reshape(grad, (pool_factor, -1))
        grad = np.linalg.norm(grad, ord=norm, axis=1)
        rand_pool_grad_set.extend(grad)

    rand_pool_grads = np.array(rand_pool_grad_set)

    # Loop over the batches
    for _ in range(nb_batches):
        # Random selection of gradients
        grad_norm = rand_pool_grads[np.random.choice(pool_factor * batch_size,
                                                     batch_size)]
        grad_norm = np.max(grad_norm)
        grad_norm_set.append(grad_norm)

    # Maximum likelihood estimation for max gradient norms
    [_, loc, _] = weibull_min.fit(-np.array(grad_norm_set),
                                  c_init,
                                  optimizer=scipy_optimizer)

    # Compute function value
    values = classifier.predict(np.array([x]))
    value = values[:, pred_class] - values[:, target_class]

    # Compute scores
    score = np.min([-value[0] / loc, radius])

    return score
예제 #29
0
def get_error_set(root_dir, error_suffix, sample_list, out_prefix):
    var_list = []
    dp_0_dict = {}  # include sample with vd >= 0
    dp_1_dict = {}  # include sample with vd >= 1
    vd_dict = {}
    sample_0_dict = {}  # include sample with vd >= 0
    sample_1_dict = {}  # include sample with vd >= 1
    af_list_dict = {}
    for sample in sample_list:
        with open('{}/{}.{}'.format(root_dir, sample, error_suffix)) as f:
            for line in f:
                if re.match(r'^chr.*', line):
                    line = line.strip()
                    chrom, pos, ref, alt, dp, vd, af = line.split('\t')
                    var = '{}_{}_{}_{}'.format(chrom, pos, ref, alt)
                    var_list.append(var)
    var_list = list(set(var_list))
    for var in var_list:
        af_list_dict[var] = []
        dp_0_dict[var] = 0
        dp_1_dict[var] = 0
        vd_dict[var] = 0
        sample_0_dict[var] = 0
        sample_1_dict[var] = 0
    for sample in sample_list:
        with open('{}/{}.{}'.format(root_dir, sample, error_suffix)) as f:
            for line in f:
                if re.match(r'^chr.*', line):
                    line = line.strip()
                    chrom, pos, ref, alt, dp, vd, af = line.split('\t')
                    var = '{}_{}_{}_{}'.format(chrom, pos, ref, alt)
                    dp_0_dict[var] += int(dp)
                    vd_dict[var] += int(vd)
                    sample_0_dict[var] += 1
                    af_list_dict[var].append(af)
                    if float(af) > 0:
                        dp_1_dict[var] += int(dp)
                        sample_1_dict[var] += 1
    r = open('{}/{}.error.txt'.format(root_dir, out_prefix), 'w')
    r.write(
        'Chr\tPos\tRef\tAlt\tSample(T|0|1)\tDP(0|1)\tVD\tAF(0|1)\tAF_List\tDistribution\tMean/Shape\tSD/Scale\n'
    )
    sample = len(sample_list)
    for var in sorted(var_list):
        chrom, pos, ref, alt = var.split('_')
        err_0 = sample_0_dict[var]
        err_1 = sample_1_dict[var]
        dp_0 = dp_0_dict[var]
        dp_1 = dp_1_dict[var]
        vd = vd_dict[var]
        af_0 = vd / float(dp_0)
        af_1 = 0.0
        if dp_1 != 0:
            af_1 = vd / float(dp_1)
        af_list = ','.join(af_list_dict[var])
        float_af_list = []
        for af in af_list_dict[var]:
            float_af_list.append(float(af))
        af_max = max(float_af_list)
        gas_list = []
        wei_list = []
        for i in float_af_list:
            if i != af_max:
                gas_list.append(i)
                if i != 0.0:
                    wei_list.append(i)
        if len(float_af_list) == 1 or len(
                float_af_list) == float_af_list.count(0.0):
            r.write(
                '{}\t{}\t{}\t{}\t{}|{}|{}\t{}|{}\t{}\t{}|{}\t{}\t{}\t{}\t{}\n'.
                format(chrom, pos, ref, alt, sample, err_0, err_1, dp_0, dp_1,
                       vd, af_0, af_1, af_list, 'NA', 'NA', 'NA'))
        else:
            if len(wei_list) < 5:
                dis = "Gaussian"
                mean = np.mean(gas_list)
                sd = np.std(gas_list)
                r.write(
                    '{}\t{}\t{}\t{}\t{}|{}|{}\t{}|{}\t{}\t{}|{}\t{}\t{}\t{}\t{}\n'
                    .format(chrom, pos, ref, alt, sample, err_0, err_1, dp_0,
                            dp_1, vd, af_0, af_1, af_list, dis, mean, sd))
            else:
                dis = 'Weibull'
                shape, loc, scale = weibull_min.fit(wei_list, floc=0)
                r.write(
                    '{}\t{}\t{}\t{}\t{}|{}|{}\t{}|{}\t{}\t{}|{}\t{}\t{}\t{}\t{}\n'
                    .format(chrom, pos, ref, alt, sample, err_0, err_1, dp_0,
                            dp_1, vd, af_0, af_1, af_list, dis, shape, scale))
    r.close()
예제 #30
0
axs[0].set_xlabel(hs_label)
axs[0].set_ylabel(steepness_label)

# Define a hs - steepness bivariate model
dist_description_hs = {
    'name': 'Weibull_Exp'
}  # Order: shape, loc, scale, shape2
dist_description_s = {'name': 'Weibull_3p'}

from scipy.stats import weibull_min
from viroconcom.distributions import WeibullDistribution
from viroconcom.distributions import ExponentiatedWeibullDistribution
from viroconcom.distributions import MultivariateDistribution
from viroconcom.params import FunctionParam

params = weibull_min.fit(steepness, floc=0.005)
my_loc = FunctionParam('poly1', 0.0015, 0.002, None)
dist_s = WeibullDistribution(shape=params[0], loc=my_loc, scale=params[2])
dist_hs = ExponentiatedWeibullDistribution()
dist_hs.fit(hs)
joint_dist = MultivariateDistribution(distributions=[dist_hs, dist_s],
                                      dependencies=[(None, None, None, None),
                                                    (None, 0, None)])

# Fit the model to the data.
#fit = Fit((hs, steepness),
#          (dist_description_hs, dist_description_s))
#joint_dist = fit.mul_var_dist

trs = [1, 50, 250]
fms = np.empty(shape=(3, 1))
예제 #31
0
    def HisPAnalysis_Stn(Wth_obv, Setting, Stat, Stn):
        P_Threshold = Setting["P_Threshold"]
        Data = {
            "P": Wth_obv[Stn]["PP01"],  # P is the original observed data
            "Pw": Wth_obv[Stn]["PP01"]
        }
        PrepDF = pd.DataFrame(Data)

        # Prepare data for estimate prep occurance (Nan will remain Nan)
        PrepDF["Pw"][PrepDF["Pw"] > P_Threshold] = 1  # Wet day = 1
        PrepDF["Pw"][PrepDF["Pw"] <= P_Threshold] = 0  # Wet day = 1
        PrepDF["Pd"] = 1 - PrepDF["Pw"]  # Dry day = 1
        # Calculate consecutive wet day
        pp = list(PrepDF["Pw"])
        PrepDF["PP"] = [pp[-1]] + pp[0:-1]
        PrepDF["PP"] = PrepDF["PP"] + PrepDF["Pw"]

        # Estimate parameters for each month
        Pw = []
        Pwd = []
        Pww = []
        Pexpon = []
        Pgamma = []
        Pweibull = []
        Plognorm = []
        for m in range(12):
            PrepDF_m = PrepDF[PrepDF.index.month == (m + 1)]

            # Prep Occurance
            Sum = PrepDF_m.sum()  # Default drop nan
            Nan = PrepDF_m.isnull().sum()
            TotalNum = PrepDF_m.shape[0] - Nan["P"]

            frq = PrepDF_m["PP"].value_counts()  # Default drop nan
            Pw.append(Sum["Pw"] / TotalNum)
            Pww.append(frq[2] / Sum["Pw"])
            Pwd.append(1 - frq[0] / Sum["Pd"])  # 1-Pdd

            # Prep Amount (Using MLE as default method in scipy "fit")
            # Eliminate all nan but include all other value no matter below or above the dry day wet day threshold.
            PrepDF_m_P = PrepDF_m[PrepDF_m["P"] > 0]["P"]
            PrepDF_m_logP = np.log(PrepDF_m_P)
            Pexpon.append(expon.fit(
                PrepDF_m_P,
                floc=0))  # return( loc, scale ) lambda = 1/mean = scale + loc
            Pgamma.append(gamma.fit(PrepDF_m_P,
                                    floc=0))  # return( shape, loc, scale)
            Pweibull.append(weibull_min.fit(
                PrepDF_m_P, floc=0))  # return( shape, loc, scale)

            # Coef = weibull_min.fit(PrepDF_m_P, floc = 0)
            # x = np.linspace(min(PrepDF_m_P), max(PrepDF_m_P), 1000)
            # plt.hist(PrepDF_m_P, bins = 100,normed=True,alpha = 0.5)
            # plt.plot(x, weibull_min.pdf(x, Coef[0],Coef[1],Coef[2]))
            # plt.show()

            Plognorm.append(norm.fit(PrepDF_m_logP, loc=0,
                                     scale=1))  # return( mu, sig)

        Data = {
            "Pw": Pw,
            "Pwd": Pwd,
            "Pww": Pww,
            "exp": Pexpon,
            "gamma": Pgamma,
            "weibull": Pweibull,
            "lognorm": Plognorm
        }
        MonthlyStat = pd.DataFrame(Data,
                                   columns=Data.keys(),
                                   index=np.arange(1, 13))
        Stat[Stn]["MonthlyStat"] = MonthlyStat
        Stat[Stn]["PrepDF"] = PrepDF
        return Stat