Python fit Beispiele, scipy.stats.weibull_min.fit Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: app_helper.py Projekt: zzygyx9119/Wind-Speed-Analysis

def fit_weibull(df_speed, x, weibull_params=None):
    from scipy.stats import weibull_min
    if not weibull_params:
        k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, loc=0)
    bins = x
    y_weibull = weibull_min.pdf(x, *weibull_params)
    k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, loc=0)
    density_expected_weibull = sp.stats.weibull_min.cdf(bins[1:], *weibull_params) - \
                               sp.stats.weibull_min.cdf(bins[:-1], *weibull_params)
    y_cdf_weibull = 1 - exp(-(x / lamb_scale) ** k_shape)
    return weibull_params, y_weibull, density_expected_weibull, y_cdf_weibull

Beispiel #2

0

Datei anzeigen

Datei: ab_random_random_3.py Projekt: papousek/slepemapy-learning-curves

def fit_weibull(data, length=10):
    trans_fun = lambda x: (x - 1) / 10 + 0.5
    x = numpy.array(sorted(list(set([trans_fun(i) for i in numpy.arange(1, length + 1)]))))
    confidence_vals = [[] for i in range(len(x))]
    nums = numpy.array(data.groupby('user_id').apply(len).to_dict().values())
    nums_groupped = defaultdict(list)
    for num in nums:
        nums_groupped[(num - 1) / 10].append(num)
    nums_avg = {key: numpy.mean(values) / 10.0 for (key, values) in nums_groupped.iteritems()}
    nums_trans = [nums_avg[(num - 1) / 10] for num in nums]

    fit_values = weibull_min.fit(nums_trans, floc=0)
    fit = weibull_min.pdf(x, *fit_values)
    for i, f in enumerate(fit):
        confidence_vals[i] = f

    def _aggr(r):
        return {
            'value': r,
            'confidence_interval_min': r,
            'confidence_interval_max': r,
        }
    return {
        'serie': map(_aggr, confidence_vals),
        'params': list(fit_values),
    }

Beispiel #3

0

Datei anzeigen

Datei: app_helper.py Projekt: yz599/Wind-Speed-Analysis

def fit_weibull(df_speed, x, weibull_params=None, floc=True):
    from scipy.stats import weibull_min
    if not weibull_params:
        if floc:
            # sometimes need to set as loc=0
            k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed,
                                                                      floc=0)
        else:
            k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed)
    else:
        k_shape, _, lamb_scale = weibull_params
    y_weibull = weibull_min.pdf(x, *weibull_params)
    density_expected_weibull = weibull_min.cdf(
        x[1:], *weibull_params) - weibull_min.cdf(x[:-1], *weibull_params)
    y_cdf_weibull = weibull_min.cdf(x, *weibull_params)
    return weibull_params, y_weibull, density_expected_weibull, y_cdf_weibull

Beispiel #4

0

Datei anzeigen

Datei: utils.py Projekt: DigitalGoetz/SimProject2

def getWeibullPdf(dataset, nbins, bins):
    c = 1.5
    shape, loc, scale = weibull_min.fit(dataset, floc=0)
    x = np.linspace(min(bins), max(bins), nbins)
    print('WEI: shape=' + str(shape) + ', loc=' + str(loc) + ", scale=" +
          str(scale))
    pdf = weibull_min.pdf(x, shape, loc, scale)
    return (x, pdf)

Beispiel #5

0

Datei anzeigen

Datei: clever.py Projekt: asplos2020/DRTest

def fit_and_test(rescaled_sample, sample, loc_shift, shape_rescale, optimizer,
                 c_i):
    [c, loc, scale] = weibull_min.fit(-rescaled_sample,
                                      c_i,
                                      optimizer=optimizer)
    loc = -loc_shift + loc * shape_rescale
    scale *= shape_rescale
    ks, pVal = scipy.stats.kstest(-sample, 'weibull_min', args=(c, loc, scale))
    return c, loc, scale, ks, pVal

Beispiel #6

0

Datei anzeigen

def weibullFit(series):
    '''对series（pd.Series或np.array）进行威布尔分布参数估计'''
    # stats中weibull_min分布参数估计和weibullPdf中weibull分布参数关系：
    # 若设置floc=0（即始终loc=0），则有c = k，scale = lmd
    # stats中weibull_min分布参数估计和np.random.weibull分布参数关系：
    # 若设置floc=0（即始终loc=0），则有c = a，scale = 1

    c, loc, scale = weibull_min.fit(series, floc=0)
    k, lmd = c, scale

    return k, lmd

Beispiel #7

0

Datei anzeigen

def fit_weibull(df_speed, x, weibull_params=None):
    from scipy.stats import weibull_min
    if not weibull_params:
        k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, floc=0)
    else:
        k_shape, _, lamb_scale = weibull_params
    y_weibull = weibull_min.pdf(x, *weibull_params)
    density_expected_weibull = weibull_min.cdf(x[1:], *weibull_params) - weibull_min.cdf(x[:-1], *weibull_params)
    # y_cdf_weibull = 1 - exp(-(x / lamb_scale) ** k_shape)
    y_cdf_weibull = weibull_min.cdf(x, *weibull_params)
    return weibull_params, y_weibull, density_expected_weibull, y_cdf_weibull

Beispiel #8

0

Datei anzeigen

Datei: app_helper.py Projekt: zzygyx9119/Wind-Speed-Analysis

def fit_weibull_and_ecdf(df_speed, x=None):
    from scipy.stats import weibull_min
    max_speed = df_speed.max()
    if x is None:
        x = linspace(0, max_speed, 20)
    # Fit Weibull
    k_shape, _, lamb_scale = weibull_params = weibull_min.fit(df_speed, loc=0)
    y_weibull = weibull_min.pdf(x, *weibull_params)
    y_cdf_weibull = 1 - exp(-(x / lamb_scale) ** k_shape)  # Weibull cdf
    # Fit Ecdf
    y_ecdf = sm.distributions.ECDF(df_speed)(x)
    return x, y_weibull, y_cdf_weibull, weibull_params, y_ecdf

Beispiel #9

0

Datei anzeigen

Datei: theoretical_fit.py Projekt: gdfa-ugr/protocol

def fit_distribution(data, fit_type, x_min, x_max, n_points=1000):
    # Initialization of the variables
    param, x, cdf, pdf = [-1, -1, -1, -1]

    if fit_type == 'exponweib':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = exponweib.fit(data, 1, 1, scale=02, loc=0)
        # param = exponweib.fit(data, fa=1, floc=0)
        # param = exponweib.fit(data)

        cdf = exponweib.cdf(x, param[0], param[1], param[2], param[3])
        pdf = exponweib.pdf(x, param[0], param[1], param[2], param[3])

    elif fit_type == 'lognorm':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = lognorm.fit(data, loc=0)

        cdf = lognorm.cdf(x, param[0], param[1], param[2])
        pdf = lognorm.pdf(x, param[0], param[1], param[2])

    elif fit_type == 'norm':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = norm.fit(data, loc=0)

        cdf = norm.cdf(x, param[0], param[1])
        pdf = norm.pdf(x, param[0], param[1])

    elif fit_type == 'weibull_min':
        x = np.linspace(x_min, x_max, n_points)

        # Fit data to the theoretical distribution
        param = weibull_min.fit(data, floc=0)

        cdf = weibull_min.cdf(x, param[0], param[1], param[2])
        pdf = weibull_min.pdf(x, param[0], param[1], param[2])

    return param, x, cdf, pdf

Beispiel #10

0

Datei anzeigen

Datei: LoopTrue_ParallelFits.py Projekt: hemanklamba/ModelingDwellTime

def run_Parametric(story_id, data):
    print "[" + str(story_id) + "]Fitting Fisk"
    fisk_params = fisk.fit(data, floc=0)
    fisk_nll = fisk.nnlf(fisk_params, data)
    fisk_rvs = fisk.rvs(*fisk_params, size=data.shape[0])
    ks_fisk = ks_2samp(data, fisk_rvs)
    bic_fisk = compute_BIC(data, len(fisk_params), fisk_nll)

    print "[" + str(story_id) + "]Fitting IG"
    ig_params = invgauss.fit(data, floc=0)
    ig_nll = invgauss.nnlf(ig_params, data)
    ig_rvs = invgauss.rvs(*ig_params, size=data.shape[0])
    ks_ig = ks_2samp(data, ig_rvs)
    bic_ig = compute_BIC(data, len(ig_params), ig_nll)

    print "[" + str(story_id) + "]Fitting LN"
    ln_params = lognorm.fit(data, floc=0)
    ln_nll = lognorm.nnlf(ln_params, data)
    ln_rvs = lognorm.rvs(*ln_params, size=data.shape[0])
    ks_ln = ks_2samp(data, ln_rvs)
    bic_ln = compute_BIC(data, len(ln_params), ln_nll)

    print "[" + str(story_id) + "]Fitting Weibull"
    weib_params = weibull_min.fit(data, floc=0)
    weib_nll = weibull_min.nnlf(weib_params, data)
    weib_rvs = weibull_min.rvs(*weib_params, size=data.shape[0])
    ks_weib = ks_2samp(data, weib_rvs)
    bic_weib = compute_BIC(data, len(weib_params), weib_nll)

    print "[" + str(story_id) + "]Fitting Gamma"
    gamma_params = gamma.fit(data, floc=0)
    gamma_nll = gamma.nnlf(gamma_params, data)
    gamma_rvs = gamma.rvs(*gamma_params, size=data.shape[0])
    ks_gamma = ks_2samp(data, gamma_rvs)
    bic_gamma = compute_BIC(data, len(gamma_params), gamma_nll)

    return [
        fisk_nll, ig_nll, ln_nll, weib_nll, gamma_nll, ks_fisk, ks_ig, ks_ln,
        ks_weib, ks_gamma, bic_fisk, bic_ig, bic_ln, bic_weib, bic_gamma,
        fisk_params, ig_params, ln_params, weib_params, gamma_params
    ]

Beispiel #11

0

Datei anzeigen

 def calculateAndPlotHistogram(self, data, title=None):
     momentsSum: float = 0
     for i in range(1, 3):
         momentsSum += scipy.stats.moment(data,
                                          moment=i,
                                          axis=0,
                                          nan_policy='propagate')
     hist_velocity, bin = np.histogram(data,
                                       normed=True,
                                       bins=[i for i in range(18)])
     shape, loc, scale = weibull_min.fit(data, momentsSum, floc=0)
     x = np.linspace(0, 18, 100)
     f = plt.figure()
     plt.bar(bin[:-1], hist_velocity)
     plt.plot(x, weibull_min(shape, loc, scale).pdf(x), 'r')
     plt.title(title + '\n' +
               f'Shape: {round(shape, 4)}, scale: {round(scale, 4)}')
     plt.xlabel('Wind velocity [m/s]')
     plt.grid()
     plt.show()
     self.pdf.savefig(f, bbox_inches='tight')

Beispiel #12

0

Datei anzeigen

Datei: ASR_openmax.py Projekt: imeunu/Capstone_PBL

def openmax_param(model,trainx,trainy):
    import pandas as pd
    import keras
    from tensorflow.keras import optimizers
    class_num = len(np.array(model.weights[-1])) # Number of Class
    if len(np.shape(trainx))!=4:
        trainx = np.expand_dims(trainx,axis=-1)
    if len(trainy[0])==1:
        from keras.utils import to_categorical
        trainy = to_categorical(trainy,class_num)
    #x_predict = model.predict(trainx)
    corr_ind = np.where(np.argmax(trainy,axis=-1)==np.argmax(x_predict,axis=-1))
    ver_X_train = trainx[corr_ind]
    ver_Y_train = trainy[corr_ind] # Step1 Data classified correctly
    new_model = keras.models.Sequential(model.layers[:-1])
    new_model.compile(optimizer='adam',loss='categorical_crossentropy',
                        metrics=['accuracy'])
    logit_vector = np.array(new_model.predict(ver_X_train))
    logit_matrix = [[]]*class_num
    for i in range(len(ver_X_train)): # Save Logit Vector by its class
        idx = np.argmax(ver_Y_train[i])
        logit_matrix[idx] = logit_matrix[idx]+[logit_vector[i]]
    mean_vector = []
    for i in range(len(logit_matrix)): # Compute Mean Vector
        mean_vector.append(np.array(logit_matrix[i]).mean(axis=0))
    distance_matrix=[[]]*class_num
    for idx in range(len(logit_matrix)):
        for logit in logit_matrix[idx]: # Save the distance
            distance_ = distance(logit,mean_vector[idx])
            distance_matrix[idx] = distance_matrix[idx]+[distance_]
    for i in range(len(distance_matrix)): # Sort
        distance_matrix[i] = np.array(distance_matrix[i])
        distance_matrix[i] = np.sort(distance_matrix[i])
    hyparam=[[]]*class_num;w=[[]]*class_num
    from scipy.stats import weibull_min
    for i in range(len(distance_matrix)): # Generate Weibull Distribution
        temp = weibull_min.fit(distance_matrix[i][-20:])
        hyparam[i] = hyparam[i]+list(temp)
    return hyparam, new_model, class_num, mean_vector

Beispiel #13

0

Datei anzeigen

Datei: Truncated_Viewer_ParallelFits.py Projekt: hemanklamba/ModelingDwellTime

def fit_weibull_loop(data):
    loop_false = (data > 0)
    loop_true = (data < 0)

    data_false = data[loop_false]
    data_true = data[loop_true]

    loop_prob = np.sum(loop_false) / float(data.shape[0])
    weib_trunc_fit = np.array([np.nan, 0.0, np.nan])

    nll_false = 0
    nll_true = 0

    if np.sum(loop_false) > 0:
        trunc_data = data_false[data_false < 1.0]
        prior = (len(data_false) - len(trunc_data)) / float(len(data_false))
        if trunc_data.shape[0] > 0:
            distribution = TruncatedWeibull_Prior
            rv_weib_false = distribution(trunc_data)
            res_weib_false = rv_weib_false.fit()
            weib_trunc_fit = np.array(
                [prior, res_weib_false.params[0], res_weib_false.params[1]])

            nll_false = np.sum(rv_weib_false.nloglikeobs(
                res_weib_false.params))

    if np.sum(loop_true) > 0:
        weib_trueloop_fit = weibull_min.fit(np.abs(data_true), floc=0.0)
        nll_true = -np.sum(
            np.log(
                lognorm.pdf(np.abs(data_true), weib_trueloop_fit[0],
                            weib_trueloop_fit[1], weib_trueloop_fit[2]) +
                1e-200))
    else:
        weib_trueloop_fit = np.array([np.nan, 0.0, np.nan])

    nll = nll_false + nll_true

    return [loop_prob, weib_trunc_fit, weib_trueloop_fit, nll]

Beispiel #14

0

Datei anzeigen

def fit_weibull(series, c=0, floc=0, scale=1, title=""):
    """
    Fits a Weibull distribution, initialized with
    given parameters. Plots the fitted distribution
    against the ground-truth data.
    
    Params
    ------
    series: Pandas series
    Pandas series containing the ground-truth values
    
    Returns
    -------
    params : dictionary
    Contains the fitted parameters
    """
    # Fit distribution
    (c, loc, scale) = weibull_min.fit(series, c, floc=floc, scale=scale)
    
    # Plot
    ax = plt.figure(figsize=(12,6)).gca()
    # the histogram of the data
    # Set as many bins as days
    bins = int(series.max() - series.min() + 1)
    n, bins, patches = plt.hist(series, bins, facecolor='green', alpha=1, density=True)
    # add a 'best fit' line
    y = weibull_min.pdf(bins, c, floc, scale)
    l = plt.plot(bins, y, 'r--', linewidth=2)
    plt.xlabel("Días")
    plt.title(title)
    # Only integer days
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    # Store parameters
    params = {"c":c, "scale":scale}
    return params

Beispiel #15

0

Datei anzeigen

Datei: metrics.py Projekt: wielandbrendel/adversarial-robustness-toolbox

def clever_t(classifier, x, target_class, nb_batches, batch_size, radius, norm, c_init=1, pool_factor=10):
    """
    Compute CLEVER score for a targeted attack. Paper link: https://arxiv.org/abs/1801.10578

    :param classifier: A trained model
    :type classifier: :class:`.Classifier`
    :param x: One input sample
    :type x: `np.ndarray`
    :param target_class: Targeted class
    :type target_class: `int`
    :param nb_batches: Number of repetitions of the estimate
    :type nb_batches: `int`
    :param batch_size: Number of random examples to sample per batch
    :type batch_size: `int`
    :param radius: Radius of the maximum perturbation
    :type radius: `float`
    :param norm: Current support: 1, 2, np.inf
    :type norm: `int`
    :param c_init: Initialization of Weibull distribution
    :type c_init: `float`
    :param pool_factor: The factor to create a pool of random samples with size pool_factor x n_s
    :type pool_factor: `int`
    :return: CLEVER score
    :rtype: `float`
    """
    # Check if the targeted class is different from the predicted class
    y_pred = classifier.predict(np.array([x]), logits=True)
    pred_class = np.argmax(y_pred, axis=1)[0]
    if target_class == pred_class:
        raise ValueError("The targeted class is the predicted class.")

    # Check if pool_factor is smaller than 1
    if pool_factor < 1:
        raise ValueError("The `pool_factor` must be larger than 1.")

    # Some auxiliary vars
    grad_norm_set = []
    dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
    shape = [pool_factor * batch_size]
    shape.extend(x.shape)

    # Generate a pool of samples
    rand_pool = np.reshape(random_sphere(nb_points=pool_factor * batch_size, nb_dims=dim, radius=radius, norm=norm),
                           shape)
    rand_pool += np.repeat(np.array([x]), pool_factor * batch_size, 0)
    rand_pool = rand_pool.astype(NUMPY_DTYPE)
    np.clip(rand_pool, classifier.clip_values[0], classifier.clip_values[1], out=rand_pool)

    # Change norm since q = p / (p-1)
    if norm == 1:
        norm = np.inf
    elif norm == np.inf:
        norm = 1
    elif norm != 2:
        raise ValueError("Norm {} not supported".format(norm))

    # Loop over the batches
    for _ in range(nb_batches):
        # Random generation of data points
        sample_xs = rand_pool[np.random.choice(pool_factor * batch_size, batch_size)]

        # Compute gradients
        grads = classifier.class_gradient(sample_xs, logits=True)
        if np.isnan(grads).any():
            raise Exception("The classifier results NaN gradients.")

        grad = grads[:, pred_class] - grads[:, target_class]
        grad = np.reshape(grad, (batch_size, -1))
        grad_norm = np.max(np.linalg.norm(grad, ord=norm, axis=1))
        grad_norm_set.append(grad_norm)

    # Maximum likelihood estimation for max gradient norms
    [_, loc, _] = weibull_min.fit(-np.array(grad_norm_set), c_init, optimizer=scipy_optimizer)

    # Compute function value
    values = classifier.predict(np.array([x]), logits=True)
    value = values[:, pred_class] - values[:, target_class]

    # Compute scores
    s = np.min([-value[0] / loc, radius])

    return s

Beispiel #16

0

Datei anzeigen

Datei: e1_baseline_dataset_a_to_c_asta.py Projekt: ec-benchmark-organizers/ec-benchmark


# Exponential function
def exp3(x, a, b, c):
    return a + b * np.exp(c * x)


#%% Read dataset A, B  or C.
DATASET_CHAR = 'A'
file_path = '../datasets/' + DATASET_CHAR + '.txt'
sample_hs, sample_tz, label_hs, label_tz = read_dataset(file_path)

df = pd.read_csv(file_path, sep='; ')
#%% Inspect the marginal distributions

weib_par1 = weibull_min.fit(df[df.columns[1]], loc=0)
logn_par1 = lognorm.fit(df[df.columns[1]], loc=0)

weib_par2 = weibull_min.fit(df[df.columns[2]], loc=0)
logn_par2 = lognorm.fit(df[df.columns[2]], loc=0)

#%% Goodness of fit

print(kstest(df[df.columns[1]].values, 'weibull_min', args=weib_par1))
print(kstest(df[df.columns[1]].values, 'lognorm', args=logn_par1))

print(kstest(df[df.columns[2]].values, 'weibull_min', args=weib_par2))
print(kstest(df[df.columns[2]].values, 'lognorm', args=logn_par2))

#%% Plot the distributions
#n_bins = 100

Beispiel #17

0

Datei anzeigen

Datei: p4.py Projekt: pythRazer/Statistical-Analysis-Simulation-and-Modeling-final-report

# number for parameters in the distribution for k value for AIC, each one has two parameters need to be estimated
num_params = 2

# Parameter estimates for generic data
shape1, loc1, scale1 = lognorm.fit(data2, floc=0)
mu1 = np.log(scale1)
sigma1 = shape1
y1 = lognorm.pdf(data2, s=sigma1, scale=np.exp(mu1))
log_likelihood1 = np.sum(np.log(y1))
print("Lognorm loglikelihood = " + str(log_likelihood1))
aic1= -2 * log_likelihood1 + 2 * num_params
print("Lognorm AIC = " + str(aic1))

# https://stackoverflow.com/questions/33070724/determine-weibull-parameters-from-data
# Parameter estimates for generic data
shape2, loc2, scale2 = weibull_min.fit(data2, floc=0)
c = shape2
b = scale2
y2 = weibull_min.pdf(data2, c, scale=b)
log_likelihood2 = np.sum(np.log(y2))
print("Weibull loglikelihood = " + str(log_likelihood2))
aic2= -2 * log_likelihood2 + 2 * num_params
print("Weibull AIC = " + str(aic2))

# https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.invgauss.html
# the argument floc=0 to ensure that it does not treat the location as a free parameter
# Parameter estimates for generic data
shape3, loc3, scale3 = invgauss.fit(data2, floc=0)
mu3 = shape3
lambda3 = scale3
# fitting the data with the estimated parameters

Beispiel #18

0

Datei anzeigen

u1_50 = beta50*np.sin(phi)

theta_v_ij = np.zeros(shape=(NR_OF_BOOTSTRAP_SAMPLES, int(360/ANGLE_STEP_FOR_CI)))
theta_hs_ij = np.zeros(shape=(NR_OF_BOOTSTRAP_SAMPLES, int(360/ANGLE_STEP_FOR_CI)))


def power3(x, a, b, c):
    return a + b * x ** c

for i in range(NR_OF_BOOTSTRAP_SAMPLES):
    # Resample from the hindcast dataset to get the sample D_i.
    sample_indices = np.random.randint(dataset_d_v.size, size=nr_of_datapoints_to_draw)
    v_i = np.take(dataset_d_v, sample_indices)
    hs_i = np.take(dataset_d_hs, sample_indices)
    # Fit Weibull to Hs:
    weib_par2 = weibull_min.fit(hs_i, loc=0)
    # Find the conditional Weibull for V:
    h_min = hs_i.min()
    h_max = hs_i.max()

    h_bins = np.arange(np.floor(h_min), np.ceil(h_max), bin_size) + bin_size/2
    h_binedges = h_bins + bin_size/2

    h_ind_bin = np.digitize(hs_i, bins=h_binedges)
    unique, counts = np.unique(h_ind_bin, return_counts=True)

    ind_min_bin = unique[counts>10][0]
    ind_max_bin = unique[counts>10][-1]
    x_bins = h_bins[ind_min_bin:ind_max_bin+1]
    real_bins = np.zeros(len(x_bins))

Beispiel #19

0

Datei anzeigen

    7.21, 7.88, 7.47, 8.16, 8.73, 9.91, 10.11, 8.51, 8.11, 7, 5.98, 5.07, 4.66,
    4.16, 3.69, 3.3, 2.54, 1.89, 1.89, 1.95, 2.34, 2.04, 2.66, 3.3, 3.58, 3.87,
    1.99, 2.1, 2.99, 3.46, 3.26, 2.43, 1.78, 1.39, 0.93, 0.84, 1.14, 1.7, 1.7,
    1.13, 1.34, 1.34, 1.49, 1.72, 1.79, 2.01, 1.34, 0.79, 0.35, 0.93, 4.76,
    5.47, 6.11, 6.31, 5.46, 4.7, 4.47, 4.19, 3.72, 3.26, 2.75, 2.28, 1.35,
    0.88, 0.43, 0.89, 1.14, 1.56, 2.01, 1.34, 1.14, 1.05, 2.21, 3.21, 5.9,
    6.99, 6.77, 5.81, 4.48, 3.15, 2.02, 1.64, 1.54, 2.1, 2.29, 2.48, 2.56,
    2.43, 2.43, 2.28, 2.52, 3.02, 3.2, 2.88, 4.01, 5.07, 7.1, 9.31, 7.16, 8.7,
    9.87, 10.48, 9.91, 8.12, 6.79, 6.01, 5.42, 4.87, 4.48, 4.87, 5.1, 4.93,
    4.61, 4.73, 4.32, 4.19, 3.96, 3.25, 4.36, 7.09, 10.11, 12.33, 12.1, 13.34,
    13.61, 12.52, 8.9, 6.2, 5.42, 4.85, 4.48, 3.55, 2.69, 2.34, 1.54, 1.5,
    1.96, 2.19, 2.19, 2.47, 2.34, 2.12, 2.08, 2.37, 2.18, 3.37, 5.43, 6.58,
    7.35, 7.4, 6.46
]

(shape, loc, scale) = weibull_min.fit(windspeed)  # 拟合所得数据的参数
print((shape, loc, scale))

plt.figure(1)
plt.subplot(1, 2, 1)
plt.title("原始风速数据直方图")
plt.hist(windspeed, density=True, histtype='stepfilled', alpha=0.2)
plt.subplot(1, 2, 2)
plt.title("原始风速数据")
plt.plot(np.arange(0, len(windspeed), 1), windspeed)
# endregion

# region 3.根据拟合得到参数，生成一组新的威布尔数据
windspeed_new = weibull_min.rvs(shape, loc=0, scale=scale, size=100)
plt.figure(2)
plt.subplot(1, 2, 1)

Beispiel #20

0

Datei anzeigen

Datei: KC Housing Price Regression Model In Progress.py Projekt: ChristopherSwindell/Python_Learning_Codes

from scipy.stats import weibull_min
from scipy.stats import norm
from scipy.stats import lognorm
from matplotlib.patches import Rectangle

n, bins, patches = plt.hist(df['price'],
                            500,
                            density=1,
                            facecolor='b',
                            alpha=.75)

#Overlay distributions that might be an appropriate fit
x = np.linspace(df['price'].min(), df['price'].max(), 100)

#Weibull Distribution
shape, loc, scale = weibull_min.fit(df['price'], floc=0)
plt.plot(x, weibull_min(shape, loc, scale).pdf(x), color='g')

#Normal Distribution
shape, loc = norm.fit(df['price'])
plt.plot(x, norm(shape, loc).pdf(x), color='r')

#Lognormal Distribution
shape, loc, scale = lognorm.fit(df['price'], floc=0)
plt.plot(x, lognorm(shape, loc, scale).pdf(x), color='y')

plt.xlabel('Price')
plt.ticklabel_format(style='plain')
plt.xticks(rotation='vertical')
plt.ylabel('Probability')
plt.title('Histogram of All Prices\nAnd Some Distributions')

Beispiel #21

0

Datei anzeigen

Datei: milciber.py Projekt: alegalopes/Ciber

plt.ylabel('IDH')
plt.scatter(digdev, idh, c=cibsec, s=cibsec * 100)
plt.show()
#%%
plt.figure()
loc, scale = expon.fit(np.divide(1, milpow))
mu_mil = expon.mean(loc=loc, scale=scale)
plt.hist(np.divide(1, milpow), density=1, label='Media=%.2f' % mu_mil)
x = np.linspace(min(np.divide(1, milpow)), max(np.divide(1, milpow)))
plt.plot(x, expon.pdf(x, loc=loc, scale=scale))
plt.title('Poder Militar')
plt.legend(loc='best')
plt.show()
#%%
plt.figure()
c, loc, scale = weibull_min.fit(cibsec)
mu_cib = weibull_min.mean(c, loc=loc, scale=scale)
plt.hist(cibsec, density=1, label='Media=%.2f' % mu_cib)
x_cib = np.linspace(min(cibsec), max(cibsec))
plt.plot(x_cib, weibull_min.pdf(x_cib, c, loc=loc, scale=scale))
plt.title('Segurança Cibernética')
plt.legend(loc='best')
plt.show()
#%%
plt.figure()
c_dsv, loc_dsv, scale_dsv = weibull_min.fit(digdev)
mu_dsv = weibull_min.mean(c_dsv, loc=loc_dsv, scale=scale_dsv)
plt.hist(digdev, density=1, label='Media=%.2f' % mu_dsv)
x_dsv = np.linspace(min(digdev), max(digdev))
plt.title('Desenvolvimento Digital')
plt.plot(x_dsv, weibull_min.pdf(x_dsv, c_dsv, loc=loc_dsv, scale=scale_dsv))

Beispiel #22

0

Datei anzeigen

 def from_durations2(cls, durations):
     from scipy.stats import weibull_min
     param = weibull_min.fit(durations, floc=0)
     alpha = param[0]
     beta = param[2]
     return cls(alpha, beta)

Beispiel #23

0

Datei anzeigen

def fit_weibull(group):
    shape, loc, scale = weibull_min.fit(group['time'].values, floc=0)
    return pd.Series({
        'shape': shape,
        'scale': scale,
    })

Beispiel #24

0

Datei anzeigen

Datei: downtime.py Projekt: ribeiroale/ares

def downtime_accepted_models(D=list(), alpha=.05):
    params = list()
    params.append(uniform.fit(D))
    params.append(expon.fit(D))
    params.append(rayleigh.fit(D))
    params.append(weibull_min.fit(D))
    params.append(gamma.fit(D))
    params.append(gengamma.fit(D))
    params.append(invgamma.fit(D))
    params.append(gompertz.fit(D))
    params.append(lognorm.fit(D))
    params.append(exponweib.fit(D))

    llf_value = list()
    llf_value.append(log(product(uniform.pdf(D, *params[0]))))
    llf_value.append(log(product(expon.pdf(D, *params[1]))))
    llf_value.append(log(product(rayleigh.pdf(D, *params[2]))))
    llf_value.append(log(product(weibull_min.pdf(D, *params[3]))))
    llf_value.append(log(product(gamma.pdf(D, *params[4]))))
    llf_value.append(log(product(gengamma.pdf(D, *params[5]))))
    llf_value.append(log(product(invgamma.pdf(D, *params[6]))))
    llf_value.append(log(product(gompertz.pdf(D, *params[7]))))
    llf_value.append(log(product(lognorm.pdf(D, *params[8]))))
    llf_value.append(log(product(exponweib.pdf(D, *params[9]))))

    AIC = list()
    AIC.append(2 * len(params[0]) - 2 * llf_value[0])
    AIC.append(2 * len(params[1]) - 2 * llf_value[1])
    AIC.append(2 * len(params[2]) - 2 * llf_value[2])
    AIC.append(2 * len(params[3]) - 2 * llf_value[3])
    AIC.append(2 * len(params[4]) - 2 * llf_value[4])
    AIC.append(2 * len(params[5]) - 2 * llf_value[5])
    AIC.append(2 * len(params[6]) - 2 * llf_value[6])
    AIC.append(2 * len(params[7]) - 2 * llf_value[7])
    AIC.append(2 * len(params[8]) - 2 * llf_value[8])
    AIC.append(2 * len(params[9]) - 2 * llf_value[9])

    model = list()
    model.append(
        ["uniform", params[0],
         kstest(D, "uniform", params[0])[1], AIC[0]])
    model.append(
        ["expon", params[1],
         kstest(D, "expon", params[1])[1], AIC[1]])
    model.append(
        ["rayleigh", params[2],
         kstest(D, "rayleigh", params[2])[1], AIC[2]])
    model.append([
        "weibull_min", params[3],
        kstest(D, "weibull_min", params[3])[1], AIC[3]
    ])
    model.append(
        ["gamma", params[4],
         kstest(D, "gamma", params[4])[1], AIC[4]])
    model.append(
        ["gengamma", params[5],
         kstest(D, "gengamma", params[5])[1], AIC[5]])
    model.append(
        ["invgamma", params[6],
         kstest(D, "invgamma", params[6])[1], AIC[6]])
    model.append(
        ["gompertz", params[7],
         kstest(D, "gompertz", params[7])[1], AIC[7]])
    model.append(
        ["lognorm", params[8],
         kstest(D, "lognorm", params[8])[1], AIC[8]])
    model.append(
        ["exponweib", params[9],
         kstest(D, "exponweib", params[9])[1], AIC[9]])

    accepted_models = [i for i in model if i[2] > alpha]

    if accepted_models:
        aic_values = [i[3] for i in accepted_models]
        final_model = min(range(len(aic_values)), key=aic_values.__getitem__)
        return accepted_models, accepted_models[final_model]
    elif not accepted_models:
        aic_values = [i[3] for i in model]
        final_model = min(range(len(aic_values)), key=aic_values.__getitem__)
        return model, model[final_model]

Beispiel #25

0

Datei anzeigen

Datei: 0moduloMk001.py Projekt: pliniobas/mestrado

    def fit(s, H, T, **kwargs):
        #Criar distribuicao de H Lognormal
        Hscale, Hloc, Hshape = lognorm.fit(H, floc=0)  # fit wei
        s.fln_H = lognorm(Hscale, Hloc, Hshape)  #cria a distribuicao
        s.fln_H_fit = 'xi = %.4f, lambda = %.4f' % (Hscale, np.log(Hshape))

        #Criar distribuicao de H Weibull
        Hscale, Hloc, Hshape = weibull_min.fit(H, floc=0)  #fit wei
        s.fwei_H = weibull_min(Hscale, Hloc, Hshape)  #cria a distribuicao
        s.fwei_H_fit = 'lambda = %.4f, alpha = %.4f' % (Hscale, Hshape)

        s.H = H
        s.T = T

        #Determinar o hitograma
        s.bins = 20
        s.rangeH = [0, 10]
        s.rangeT = [0, 20]

        if kwargs.has_key('bins'):
            s.bins = kwargs['bins']
        if kwargs.has_key('rangeH'):
            s.rangeH = kwargs['rangeH']
            pass
        if kwargs.has_key('rangeT'):
            s.rangeT = kwargs['rangeT']
            pass

        s.H_hist_y, s.H_hist_x = np.histogram(H,
                                              s.bins,
                                              density=True,
                                              range=s.rangeH)
        s.H_hist_xM = s.H_hist_x[:-1] + s.H_hist_x[0:2].mean()

        s.T_hist_y, s.T_hist_x = np.histogram(T,
                                              s.bins,
                                              density=True,
                                              range=s.rangeT)
        s.T_hist_xM = s.T_hist_x[:-1] + s.T_hist_x[0:2].mean()

        #Separando T condicional a H e calculando os parametros da distribuicao
        dft = pd.DataFrame(dict(H=H, T=T))

        ln_param = []
        wei_param = []

        for ix, aux in enumerate(s.H_hist_x):
            if ix == len(s.H_hist_x) - 1:
                break
            temp = dft[np.logical_and(dft.H > s.H_hist_x[ix],
                                      dft.H < s.H_hist_x[ix + 1])]['T'].values
            if len(temp) > 50:
                #lista contem [xi,loc,lamb*e,posicaoX,hs_condicionador]
                #xi = [0], loc=[1], lamb =[2], xpos = [3]
                ln_param.append(
                    lognorm.fit(temp, floc=0) + tuple([s.H_hist_xM[ix]]))
                #a lista contem [lamb,loc,alpha,hs_condicionador]
                #lambw=[0],loc=[1], alpha=[2], xpos=[3]
                wei_param.append(
                    weibull_min.fit(temp, floc=0) + tuple([s.H_hist_xM[ix]]))
            pass

        #Criando as funcoes dos parametros de distribuicao
        s.Tfxi = np.poly1d(
            np.polyfit([aux[3] for aux in ln_param],
                       [aux[0] for aux in ln_param], 3))
        s.Tflamb = np.poly1d(
            np.polyfit([aux[3] for aux in ln_param],
                       [aux[2] for aux in ln_param], 3))

        s.Tflambw = np.poly1d(
            np.polyfit([aux[3] for aux in wei_param],
                       [aux[0] for aux in wei_param], 3))
        s.Tfalphaw = np.poly1d(
            np.polyfit([aux[3] for aux in wei_param],
                       [aux[2] for aux in wei_param], 3))

        s.dl = pd.DataFrame(ln_param)
        s.dw = pd.DataFrame(wei_param)

        if kwargs.has_key('tipofH'):
            if kwargs['tipofH'] == 'weibull':
                s.fH = s.fwei_H
                s.fHtype = 'weibull'
            if kwargs['tipofH'] == 'lognormal':
                s.fH = s.fln_H
                s.fHtype = 'lognormal'
                pass
        else:
            s.fH = s.fln_H
            s.fHtype = 'lognormal'
            pass

        if kwargs.has_key('tipofT'):
            if kwargs['tipofT'] == 'weibull':
                s.fT = lambda h: weibull_min(s.Tflambw(h), 0, s.Tfalphaw(h))
                s.fTtype = 'weibull'
            if kwargs['tipofT'] == 'lognormal':
                s.fT = lambda h: lognorm(s.Tfxi(h), 0, s.Tflamb(h))
                s.fTtype = 'lognormal'
        else:
            s.fT = lambda h: lognorm(s.Tfxi(h), 0, s.Tflamb(h))
            s.fTtype = 'lognormal'

Beispiel #26

0

Datei anzeigen

Datei: 0moduloMk001.py Projekt: pliniobas/mestrado

    def fit(s, H, T, **kwargs):

        #faz o fit para os parametros de hs em distribuicao Ln e Wei

        scaleH, lH, shapeH = lognorm.fit(H, floc=0)  # fit wei
        s.fln_H = lognorm(scaleH, lH, shapeH)  #cria a distribuicao
        s.fln_H_fit = '%.4f,%.4f' % (scaleH, np.log(shapeH))

        scaleH, lH, shapeH = weibull_min.fit(H, floc=0)  #fit wei
        s.fwei_H = weibull_min(scaleH, lH, shapeH)  #cria a distribuicao
        s.fwei_H_fit = '%.4f,%.4f' % (scaleH, shapeH)

        #faz o fit para os parametros de hs em distribuicao Ln e Wei

        scaleT, lT, shapeT = lognorm.fit(T, floc=0)
        s.fln_T = lognorm(scaleT, lT, shapeT)  #cria a ditribuicao ln de tp
        s.fln_T_fit = '%.4f,%.4f' % (scaleT, np.log(shapeT))

        scaleT, lT, shapeT = weibull_min.fit(T, floc=0)
        s.fwei_T = weibull_min(scaleT, lT, shapeT)
        s.fwei_T_fit = '%.4f,%.4f' % (scaleT, shapeT)

        s.H = H  #guarda a serie de hs internamente
        s.T = T  #guarda a serie de tp internamente

        #Usa a distribuicao conforme escolha do usuario
        if kwargs.has_key('tipofH'):
            if kwargs['tipofH'] == 'weibull':
                s.fH = s.fwei_H
                s.fHtype = 'weibull'
            if kwargs['tipofH'] == 'lognormal':
                s.fH = s.fln_H
                s.fHtype = 'lognormal'
                pass
        else:
            s.fH = s.fln_H
            s.Htype = 'lognormal'
            pass

        if kwargs.has_key('tipofT'):
            if kwargs['tipofT'] == 'weibull':
                s.fT = s.fwei_T
                s.fTtype = 'weibull'
            if kwargs['tipofT'] == 'lognormal':
                s.fT = s.fln_T
                s.fTtype = 'lognormal'
        else:
            s.fT = s.fln_T
            s.Ttype = 'lognormal'

        # Define automaticamente os bins e ranges ou usa os kwargs.
        s.bins = 20
        s.rangeH = [0, 10]
        s.rangeT = [0, 20]

        if kwargs.has_key('bins'):
            s.bins = kwargs['bins']
        if kwargs.has_key('rangeH'):
            s.rangeH = kwargs['rangeH']
            pass
        if kwargs.has_key('rangeT'):
            s.rangeT = kwargs['rangeT']
            pass

        #acha a rhobru, correlacao entre H e T para ser usada na pdf
        s.rhobru = scipy.stats.pearsonr(H, T)[0]
        #        s.rhobru = scipy.stats.spearmanr(H,T)[0]

        #define uma funcao normal padrao para criar u1 e u2, ou uh e ut
        s.N = norm(0, 1)

        s.phi_1 = lambda u: 1 / (np.sqrt(2 * np.pi)) * np.exp(-u**2 / 2)

        s.phi_2 = lambda u1, u2, rhobru: (2 * np.pi * np.sqrt(
            1 - rhobru**2))**-1 * np.exp(
                (-2 *
                 (1 - rhobru**2))**-1 * (u1**2 + u2**2 - 2 * rhobru * u1 * u2))
        pass

Beispiel #27

0

Datei anzeigen

Datei: metrics.py Projekt: yueqiusun/adversarial-robustness-toolbox

def clever_t(x, classifier, target_class, n_b, n_s, r, sess, c_init=1):
    """
    Compute CLEVER score for a targeted attack. Paper link: https://arxiv.org/abs/1801.10578

    :param x: One input sample
    :type x: `np.ndarray`
    :param classifier: A trained model
    :type classifier: :class:`Classifier`
    :param target_class: Targeted class
    :type target_class: `int`
    :param n_b: Batch size
    :type n_b: `int`
    :param n_s: Number of examples per batch
    :type n_s: `int`
    :param r: Maximum perturbation
    :type r: `float`
    :param sess: The session to run graphs in
    :type sess: `tf.Session`
    :param c_init: Initialization of Weibull distribution
    :type c_init: `float`
    :return: A tuple of 3 CLEVER scores, corresponding to norms 1, 2 and np.inf
    :rtype: `tuple`
    """
    # Check if the targeted class is different from the predicted class
    y_pred = classifier.predict(np.array([x]))
    pred_class = np.argmax(y_pred, axis=1)[0]
    if target_class == pred_class:
        raise ValueError("The targeted class is the predicted class!")

    # Define placeholders for computing g gradients
    shape = [None]
    shape.extend(x.shape)
    imgs = tf.placeholder(shape=shape, dtype=tf.float32)
    pred_class_ph = tf.placeholder(dtype=tf.int32, shape=[])
    target_class_ph = tf.placeholder(dtype=tf.int32, shape=[])

    # Define tensors for g gradients
    grad_norm_1, grad_norm_2, grad_norm_8, g_x = _build_g_gradient(
        imgs, classifier, pred_class_ph, target_class_ph)

    # Some auxiliary vars
    set1, set2, set8 = [], [], []
    dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
    shape = [n_s]
    shape.extend(x.shape)

    # Compute predicted class
    y_pred = classifier.predict(np.array([x]))
    pred_class = np.argmax(y_pred, axis=1)[0]

    # Loop over n_b batches
    for i in range(n_b):
        # Random generation of data points
        sample_xs0 = np.reshape(_random_sphere(m=n_s, n=dim, r=r), shape)
        sample_xs = sample_xs0 + np.repeat(np.array([x]), n_s, 0)
        np.clip(sample_xs, 0, 1, out=sample_xs)

        # Preprocess data if it is supported in the classifier
        if hasattr(classifier, 'feature_squeeze'):
            sample_xs = classifier.feature_squeeze(sample_xs)
        sample_xs = classifier._preprocess(sample_xs)

        # Compute gradients
        max_gn1, max_gn2, max_gn8 = sess.run(
            [grad_norm_1, grad_norm_2, grad_norm_8],
            feed_dict={
                imgs: sample_xs,
                pred_class_ph: pred_class,
                target_class_ph: target_class
            })
        set1.append(max_gn1)
        set2.append(max_gn2)
        set8.append(max_gn8)

    # Maximum likelihood estimation for max gradient norms
    [_, loc1, _] = weibull_min.fit(-np.array(set1),
                                   c_init,
                                   optimizer=scipy_optimizer)
    [_, loc2, _] = weibull_min.fit(-np.array(set2),
                                   c_init,
                                   optimizer=scipy_optimizer)
    [_, loc8, _] = weibull_min.fit(-np.array(set8),
                                   c_init,
                                   optimizer=scipy_optimizer)

    # Compute g_x0
    x0 = np.array([x])
    if hasattr(classifier, 'feature_squeeze'):
        x0 = classifier.feature_squeeze(x0)
    x0 = classifier._preprocess(x0)
    g_x0 = sess.run(g_x,
                    feed_dict={
                        imgs: x0,
                        pred_class_ph: pred_class,
                        target_class_ph: target_class
                    })

    # Compute scores
    # Note q = p / (p-1)
    s8 = np.min([-g_x0[0] / loc1, r])
    s2 = np.min([-g_x0[0] / loc2, r])
    s1 = np.min([-g_x0[0] / loc8, r])

    return s1, s2, s8

Beispiel #28

0

Datei anzeigen

Datei: metrics.py Projekt: stjordanis/adversarial-robustness-toolbox

def clever_t(
    classifier: "CLASSIFIER_CLASS_LOSS_GRADIENTS_TYPE",
    x: np.ndarray,
    target_class: int,
    nb_batches: int,
    batch_size: int,
    radius: float,
    norm: int,
    c_init: float = 1.0,
    pool_factor: int = 10,
) -> float:
    """
    Compute CLEVER score for a targeted attack.

    | Paper link: https://arxiv.org/abs/1801.10578

    :param classifier: A trained model.
    :param x: One input sample.
    :param target_class: Targeted class.
    :param nb_batches: Number of repetitions of the estimate.
    :param batch_size: Number of random examples to sample per batch.
    :param radius: Radius of the maximum perturbation.
    :param norm: Current support: 1, 2, np.inf.
    :param c_init: Initialization of Weibull distribution.
    :param pool_factor: The factor to create a pool of random samples with size pool_factor x n_s.
    :return: CLEVER score.
    """
    # Check if the targeted class is different from the predicted class
    y_pred = classifier.predict(np.array([x]))
    pred_class = np.argmax(y_pred, axis=1)[0]
    if target_class == pred_class:
        raise ValueError("The targeted class is the predicted class.")

    # Check if pool_factor is smaller than 1
    if pool_factor < 1:
        raise ValueError("The `pool_factor` must be larger than 1.")

    # Some auxiliary vars
    rand_pool_grad_set = []
    grad_norm_set = []
    dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
    shape = [pool_factor * batch_size]
    shape.extend(x.shape)

    # Generate a pool of samples
    rand_pool = np.reshape(
        random_sphere(nb_points=pool_factor * batch_size,
                      nb_dims=dim,
                      radius=radius,
                      norm=norm),
        shape,
    )
    rand_pool += np.repeat(np.array([x]), pool_factor * batch_size, 0)
    rand_pool = rand_pool.astype(ART_NUMPY_DTYPE)
    if hasattr(classifier,
               "clip_values") and classifier.clip_values is not None:
        np.clip(rand_pool,
                classifier.clip_values[0],
                classifier.clip_values[1],
                out=rand_pool)

    # Change norm since q = p / (p-1)
    if norm == 1:
        norm = np.inf
    elif norm == np.inf:
        norm = 1
    elif norm != 2:
        raise ValueError("Norm {} not supported".format(norm))

    # Compute gradients for all samples in rand_pool
    for i in range(batch_size):
        rand_pool_batch = rand_pool[i * pool_factor:(i + 1) * pool_factor]

        # Compute gradients
        grad_pred_class = classifier.class_gradient(rand_pool_batch,
                                                    label=pred_class)
        grad_target_class = classifier.class_gradient(rand_pool_batch,
                                                      label=target_class)

        if np.isnan(grad_pred_class).any() or np.isnan(
                grad_target_class).any():
            raise Exception("The classifier results NaN gradients.")

        grad = grad_pred_class - grad_target_class
        grad = np.reshape(grad, (pool_factor, -1))
        grad = np.linalg.norm(grad, ord=norm, axis=1)
        rand_pool_grad_set.extend(grad)

    rand_pool_grads = np.array(rand_pool_grad_set)

    # Loop over the batches
    for _ in range(nb_batches):
        # Random selection of gradients
        grad_norm = rand_pool_grads[np.random.choice(pool_factor * batch_size,
                                                     batch_size)]
        grad_norm = np.max(grad_norm)
        grad_norm_set.append(grad_norm)

    # Maximum likelihood estimation for max gradient norms
    [_, loc, _] = weibull_min.fit(-np.array(grad_norm_set),
                                  c_init,
                                  optimizer=scipy_optimizer)

    # Compute function value
    values = classifier.predict(np.array([x]))
    value = values[:, pred_class] - values[:, target_class]

    # Compute scores
    score = np.min([-value[0] / loc, radius])

    return score

Beispiel #29

0

Datei anzeigen

Datei: error_set.py Projekt: Muyiyuan/Bioinfo

def get_error_set(root_dir, error_suffix, sample_list, out_prefix):
    var_list = []
    dp_0_dict = {}  # include sample with vd >= 0
    dp_1_dict = {}  # include sample with vd >= 1
    vd_dict = {}
    sample_0_dict = {}  # include sample with vd >= 0
    sample_1_dict = {}  # include sample with vd >= 1
    af_list_dict = {}
    for sample in sample_list:
        with open('{}/{}.{}'.format(root_dir, sample, error_suffix)) as f:
            for line in f:
                if re.match(r'^chr.*', line):
                    line = line.strip()
                    chrom, pos, ref, alt, dp, vd, af = line.split('\t')
                    var = '{}_{}_{}_{}'.format(chrom, pos, ref, alt)
                    var_list.append(var)
    var_list = list(set(var_list))
    for var in var_list:
        af_list_dict[var] = []
        dp_0_dict[var] = 0
        dp_1_dict[var] = 0
        vd_dict[var] = 0
        sample_0_dict[var] = 0
        sample_1_dict[var] = 0
    for sample in sample_list:
        with open('{}/{}.{}'.format(root_dir, sample, error_suffix)) as f:
            for line in f:
                if re.match(r'^chr.*', line):
                    line = line.strip()
                    chrom, pos, ref, alt, dp, vd, af = line.split('\t')
                    var = '{}_{}_{}_{}'.format(chrom, pos, ref, alt)
                    dp_0_dict[var] += int(dp)
                    vd_dict[var] += int(vd)
                    sample_0_dict[var] += 1
                    af_list_dict[var].append(af)
                    if float(af) > 0:
                        dp_1_dict[var] += int(dp)
                        sample_1_dict[var] += 1
    r = open('{}/{}.error.txt'.format(root_dir, out_prefix), 'w')
    r.write(
        'Chr\tPos\tRef\tAlt\tSample(T|0|1)\tDP(0|1)\tVD\tAF(0|1)\tAF_List\tDistribution\tMean/Shape\tSD/Scale\n'
    )
    sample = len(sample_list)
    for var in sorted(var_list):
        chrom, pos, ref, alt = var.split('_')
        err_0 = sample_0_dict[var]
        err_1 = sample_1_dict[var]
        dp_0 = dp_0_dict[var]
        dp_1 = dp_1_dict[var]
        vd = vd_dict[var]
        af_0 = vd / float(dp_0)
        af_1 = 0.0
        if dp_1 != 0:
            af_1 = vd / float(dp_1)
        af_list = ','.join(af_list_dict[var])
        float_af_list = []
        for af in af_list_dict[var]:
            float_af_list.append(float(af))
        af_max = max(float_af_list)
        gas_list = []
        wei_list = []
        for i in float_af_list:
            if i != af_max:
                gas_list.append(i)
                if i != 0.0:
                    wei_list.append(i)
        if len(float_af_list) == 1 or len(
                float_af_list) == float_af_list.count(0.0):
            r.write(
                '{}\t{}\t{}\t{}\t{}|{}|{}\t{}|{}\t{}\t{}|{}\t{}\t{}\t{}\t{}\n'.
                format(chrom, pos, ref, alt, sample, err_0, err_1, dp_0, dp_1,
                       vd, af_0, af_1, af_list, 'NA', 'NA', 'NA'))
        else:
            if len(wei_list) < 5:
                dis = "Gaussian"
                mean = np.mean(gas_list)
                sd = np.std(gas_list)
                r.write(
                    '{}\t{}\t{}\t{}\t{}|{}|{}\t{}|{}\t{}\t{}|{}\t{}\t{}\t{}\t{}\n'
                    .format(chrom, pos, ref, alt, sample, err_0, err_1, dp_0,
                            dp_1, vd, af_0, af_1, af_list, dis, mean, sd))
            else:
                dis = 'Weibull'
                shape, loc, scale = weibull_min.fit(wei_list, floc=0)
                r.write(
                    '{}\t{}\t{}\t{}\t{}|{}|{}\t{}|{}\t{}\t{}|{}\t{}\t{}\t{}\t{}\n'
                    .format(chrom, pos, ref, alt, sample, err_0, err_1, dp_0,
                            dp_1, vd, af_0, af_1, af_list, dis, shape, scale))
    r.close()

Beispiel #30

0

Datei anzeigen

Datei: fit_3d_joint_model.py Projekt: 1fr9/NREL-design-loads

axs[0].set_xlabel(hs_label)
axs[0].set_ylabel(steepness_label)

# Define a hs - steepness bivariate model
dist_description_hs = {
    'name': 'Weibull_Exp'
}  # Order: shape, loc, scale, shape2
dist_description_s = {'name': 'Weibull_3p'}

from scipy.stats import weibull_min
from viroconcom.distributions import WeibullDistribution
from viroconcom.distributions import ExponentiatedWeibullDistribution
from viroconcom.distributions import MultivariateDistribution
from viroconcom.params import FunctionParam

params = weibull_min.fit(steepness, floc=0.005)
my_loc = FunctionParam('poly1', 0.0015, 0.002, None)
dist_s = WeibullDistribution(shape=params[0], loc=my_loc, scale=params[2])
dist_hs = ExponentiatedWeibullDistribution()
dist_hs.fit(hs)
joint_dist = MultivariateDistribution(distributions=[dist_hs, dist_s],
                                      dependencies=[(None, None, None, None),
                                                    (None, 0, None)])

# Fit the model to the data.
#fit = Fit((hs, steepness),
#          (dist_description_hs, dist_description_s))
#joint_dist = fit.mul_var_dist

trs = [1, 50, 250]
fms = np.empty(shape=(3, 1))

Beispiel #31

0

Datei anzeigen

Datei: WG_HisAnalysis.py Projekt: qa4510qa/GCCRW_yhHuang

    def HisPAnalysis_Stn(Wth_obv, Setting, Stat, Stn):
        P_Threshold = Setting["P_Threshold"]
        Data = {
            "P": Wth_obv[Stn]["PP01"],  # P is the original observed data
            "Pw": Wth_obv[Stn]["PP01"]
        }
        PrepDF = pd.DataFrame(Data)

        # Prepare data for estimate prep occurance (Nan will remain Nan)
        PrepDF["Pw"][PrepDF["Pw"] > P_Threshold] = 1  # Wet day = 1
        PrepDF["Pw"][PrepDF["Pw"] <= P_Threshold] = 0  # Wet day = 1
        PrepDF["Pd"] = 1 - PrepDF["Pw"]  # Dry day = 1
        # Calculate consecutive wet day
        pp = list(PrepDF["Pw"])
        PrepDF["PP"] = [pp[-1]] + pp[0:-1]
        PrepDF["PP"] = PrepDF["PP"] + PrepDF["Pw"]

        # Estimate parameters for each month
        Pw = []
        Pwd = []
        Pww = []
        Pexpon = []
        Pgamma = []
        Pweibull = []
        Plognorm = []
        for m in range(12):
            PrepDF_m = PrepDF[PrepDF.index.month == (m + 1)]

            # Prep Occurance
            Sum = PrepDF_m.sum()  # Default drop nan
            Nan = PrepDF_m.isnull().sum()
            TotalNum = PrepDF_m.shape[0] - Nan["P"]

            frq = PrepDF_m["PP"].value_counts()  # Default drop nan
            Pw.append(Sum["Pw"] / TotalNum)
            Pww.append(frq[2] / Sum["Pw"])
            Pwd.append(1 - frq[0] / Sum["Pd"])  # 1-Pdd

            # Prep Amount (Using MLE as default method in scipy "fit")
            # Eliminate all nan but include all other value no matter below or above the dry day wet day threshold.
            PrepDF_m_P = PrepDF_m[PrepDF_m["P"] > 0]["P"]
            PrepDF_m_logP = np.log(PrepDF_m_P)
            Pexpon.append(expon.fit(
                PrepDF_m_P,
                floc=0))  # return( loc, scale ) lambda = 1/mean = scale + loc
            Pgamma.append(gamma.fit(PrepDF_m_P,
                                    floc=0))  # return( shape, loc, scale)
            Pweibull.append(weibull_min.fit(
                PrepDF_m_P, floc=0))  # return( shape, loc, scale)

            # Coef = weibull_min.fit(PrepDF_m_P, floc = 0)
            # x = np.linspace(min(PrepDF_m_P), max(PrepDF_m_P), 1000)
            # plt.hist(PrepDF_m_P, bins = 100,normed=True,alpha = 0.5)
            # plt.plot(x, weibull_min.pdf(x, Coef[0],Coef[1],Coef[2]))
            # plt.show()

            Plognorm.append(norm.fit(PrepDF_m_logP, loc=0,
                                     scale=1))  # return( mu, sig)

        Data = {
            "Pw": Pw,
            "Pwd": Pwd,
            "Pww": Pww,
            "exp": Pexpon,
            "gamma": Pgamma,
            "weibull": Pweibull,
            "lognorm": Plognorm
        }
        MonthlyStat = pd.DataFrame(Data,
                                   columns=Data.keys(),
                                   index=np.arange(1, 13))
        Stat[Stn]["MonthlyStat"] = MonthlyStat
        Stat[Stn]["PrepDF"] = PrepDF
        return Stat