def p_val(N, data_name, g_size, dim):

    # Sensorless
    p_values = np.zeros((N, 3))
    X, y, T, yT = import_pickled_data(data_name)

    for i in tqdm(xrange(N)):
        # Extract Sample, Lets get data
        Sample_normal = extract_samples(X, y, 0)
        scalar = preprocessing.StandardScaler(with_mean=True,
                                              with_std=True).fit(Sample_normal)
        N_scaled = scalar.transform(Sample_normal)
        Boot_strap_normal = Boot_sample(N_scaled, 10000)

        Sample_T = extract_samples(T, yT, 0)
        T_scaled = scalar.transform(Sample_T)

        ### Results
        Distances = distance_calculation_power(Boot_strap_normal,
                                               Boot_strap_normal, dim, g_size)
        c = [
            np.percentile(Distances, 99),
            np.percentile(Distances, 95),
            np.percentile(Distances, 90)
        ]
        Distances = distance_calculation_power(Boot_strap_normal, T_scaled,
                                               dim, g_size)

        p_values[i, 0] = len([1 for j in Distances if j > c[0]]) / float(10000)
        p_values[i, 1] = len([1 for j in Distances if j > c[1]]) / float(10000)
        p_values[i, 2] = len([1 for j in Distances if j > c[2]]) / float(10000)
    print("P value -- mean, std", np.mean(p_values, axis=0),
          np.std(p_values, axis=0))
    return Sample_normal, Boot_strap_normal, c
def Generate_samples_each_class(N, data_name, classes):
    # Sensorless
    Distance = np.zeros((N, 3))
    X, y, T, yT = import_pickled_data(data_name)
    for i in tqdm(xrange(N)):
        for j in tqdm(xrange(classes)):
            # Extract Sample, Lets get data
            Sample_normal = extract_samples(X, y, j)
            Boot_strap_normal = Boot_sample(Sample_normal, 10000)
            #####
            np.savetxt('Data_Boot' + str(j) + data_name + '.csv',
                       Boot_strap_normal,
                       delimiter=',')
def classify_samples(XTrain, yTrain):
    n_classes = int(max(yTrain) + 1)
    pdf_d = []
    for i in range(n_classes):
        # use grid search cross-validation to optimize the bandwidth
        temp = extract_samples(XTrain, yTrain, i)
        params = {'bandwidth': np.logspace(-1, 1, 20)}
        grid = GridSearchCV(KernelDensity(), params)
        grid.fit(temp)
        print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
        kde_skl = grid.best_estimator_
        pdf_d.append(kde_skl)
    return pdf_d
def power(N, Boot_strap_normal, Samp_Norm, data_name, c, g_size, dim, C):
    power = np.zeros((N, 3))
    # Extract Sample, Lets get data
    X, y, T, yT = import_pickled_data(data_name)
    scalar = preprocessing.StandardScaler(with_mean=True,
                                          with_std=True).fit(Samp_Norm)
    for i in tqdm(xrange(N)):
        Rand_class = random.randint(1, C)
        Sample_test = extract_samples(T, yT, Rand_class)
        T_scaled = scalar.transform(Sample_test)

        ### Results
        Distances = distance_calculation_power(Boot_strap_normal, T_scaled,
                                               dim, g_size)
        power[i, 0] = len([1 for j in Distances if j < c[0]]) / float(10000)
        power[i, 1] = len([1 for j in Distances if j < c[1]]) / float(10000)
        power[i, 2] = len([1 for j in Distances if j < c[2]]) / float(10000)

    print("Power -- mean std", np.mean(power, axis=0), np.std(power, axis=0))
def separate_class_samples(Train, labels):
    classes = int(np.max(labels) + 1)
    Samples = []
    for i in xrange(classes):
        Samples.append(extract_samples(Train, labels, i))
    return Samples