Example #1
0
def get_data_artificial(n_clients):
	"""
	Import the dataset via sklearn, shuffle and split train/test.
	Return training, target lists for `n_clients` and a holdout test set
	"""
	print("Loading data")
	X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,random_state=1, n_clusters_per_class=1)
	X = StandardScaler().fit_transform(X)
	# add some noise
	rng = np.random.RandomState(2)
	X += 2 * rng.uniform(size=X.shape)

	# Add constant to emulate intercept
	X = np.c_[X, np.ones(X.shape[0])]

	# The features are already preprocessed
	# Shuffle
	perm = np.random.permutation(X.shape[0])
	X, y = X[perm, :], y[perm]

	# Select test at random
	test_size = round(X.shape[0]*20/100)
	test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)
	train_idx = np.ones(X.shape[0], dtype=bool)
	train_idx[test_idx] = False
	X_test, y_test = X[test_idx, :], y[test_idx]
	X_train, y_train = X[train_idx, :], y[train_idx]

	# Split train among multiple clients.
	# The selection is not at random. We simulate the fact that each client
	# sees a potentially very different sample of patients.
	X, y = [], []
	step = int(X_train.shape[0] / n_clients)
	for c in range(n_clients):
		X.append(X_train[step * c: step * (c + 1), :])
		y.append(y_train[step * c: step * (c + 1)])

	return X, y, X_test, y_test
Example #2
0
        overall_wake_data[i][j] = wake_PCA_data[i][j]
    overall_wake_data[i][wake_PCA_data.shape[1]] = 0

for i in range(sleep_1_PCA_data.shape[0]):

    for j in range(sleep_1_PCA_data.shape[1]):
        overall_sleep_data[i][j] = sleep_1_PCA_data[i][j]
    overall_sleep_data[i][sleep_1_PCA_data.shape[1]] = 1

fig, ax = plt.subplots()
x = []
y = []
x2 = []
y2 = []
for i in range(overall_wake_data.__len__()):
    x.append(overall_wake_data[i][0])
    y.append(overall_wake_data[i][1])

for i in range(overall_sleep_data.__len__()):
    x2.append(overall_sleep_data[i][0])
    y2.append(overall_sleep_data[i][1])

# Y = np.array([0.0, 0.001, 0.003, 0.2, 0.4, 0.5, 0.7, 0.88, 0.9, 1.0])
# Y2 = np.repeat(Y,4)
# print(Y2)
# plt.ylim(0.0000,0.00030 )
ax.scatter(x, y, color=['blue'])
ax.scatter(x2, y2, color=['red'])
plt.show()

# print(overall_wake_data)
Example #3
0
                                                    seed=seed)[0]
            writer.writerow([m, n, mix_l])
            print(m, n, mix_l)

print()
print()
print()
print()

# Leemos los datos
data = []
with open("datos/winequality-white.csv", "r") as filestream:
    #with open("datos/winequality-white.csv", "r") as filestream:
    for line in filestream:
        chunks = line.split(";")
        data.append(np.array(chunks))
# We pass from string to float, and we drop the line naming the features.
data = np.array(data[1:], np.float64)
# Preprocesamiento
# Restamos por la media y dividimos por la desviación típica
data = StandardScaler(with_mean=True, with_std=True).fit_transform(data)
info_o = info.Informacion(X=data, last_feature_is_Y=True)

fmi = info_o.brute_force_MI()
fd = info_o.brute_force_Delta()
md = info_o.brute_force_MI_Delta()
md_2 = info_o.MI_Delta(arr_mi=fmi[2], arr_delta=fd[2])

print()
print("ALGORITMOS:")
print("  Brute Force MI:")
Example #4
0
    count = 0
    new_merge_dict = {}
    for id in merge_dict:
        new_merge_dict[id] = count
        for ele in merge_dict[id]:
            new_merge_dict[ele] = count
        count += 1
    print("new merge dict")

    print(new_merge_dict)

    X = []
    y = []
    for point in points_dict:
        point_obj = points_dict[point]
        X.append([point_obj.X_cor, point_obj.Y_cor])
        y.append(new_merge_dict[point_obj.cluster_id[0]])

    X = np.array(X)
    y = np.array(y)

    colors = np.array(
        list(
            islice(
                cycle([
                    '#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628',
                    '#984ea3', '#999999', '#e41a1c', '#dede00'
                ]), int(y.max() + 1))))
    plt.scatter(X[:, 0], X[:, 1], s=10, color=colors[y])
    # plt.scatter(data[:, 0], data[:, 1], s=10, color=colors[y_pred])
    index = 0
Example #5
0
def get_data_dga(n_clients):
	pd.set_option('display.max_columns', None)

	if os.path.exists('sampledga.csv') == True:
		dga_pd = pd.read_csv('sampledga.csv',sep=',',quotechar='"',index_col=0)
	else:
		raise Exception("Unable to find test file")

	print(dga_pd.head(10))

	print(dga_pd.tail(10))

	print(dga_pd.describe())

	print(dga_pd.info())

	dga_pd['Entropy(Domain)'] = dga_pd['domain'].apply(compute_domain_entropy)
	dga_pd['Length(Domain)'] = dga_pd['domain'].apply(lambda x : len(x))
	dga_pd['Label'] = dga_pd['class'].apply(lambda x: 1 if x != 'legit' else 0)

	print(dga_pd.head(10))
	print(dga_pd.tail(10))

	print("-----------------")
	print("Unique class {0}".format(dga_pd["class"].unique()))

	print("Unique subclass {0}".format(dga_pd["subclass"].unique()))

	dga_X = dga_pd[['Entropy(Domain)','Length(Domain)']]
	dga_y = dga_pd[['Label']]

	y = dga_pd['Label'].values
	X = dga_X.values
	d = dga_pd['subclass'].values

	for subclass in dga_pd["subclass"].unique():
		print("subclass %s"%subclass)
		filter = np.where(d==subclass)[0]
		print(filter.shape[0])

	X = StandardScaler().fit_transform(X)

	# Add constant to emulate intercept
	X = np.c_[X, np.ones(X.shape[0])]

	# The features are already preprocessed
	# Shuffle
	perm = np.random.permutation(X.shape[0])
	X, y = X[perm, :], y[perm]
	d = d[perm]
	# Select test at random
	test_size = round(X.shape[0]*20/100)

	test_idx = np.random.choice(X.shape[0], size=test_size, replace=False)

	train_idx = np.ones(X.shape[0], dtype=bool)

	train_idx[test_idx] = False

	X_test, y_test = X[test_idx, :], y[test_idx]
	X_train, y_train = X[train_idx, :], y[train_idx]
	d_train = d[train_idx]
	r = ['alexa' 'opendns' 'cryptolocker' 'goz' 'newgoz']
	alexa_idx = np.where( d_train == 'alexa' )
	crypto_idx = np.where(d_train == 'cryptolocker')
	goz_idx = np.where(d_train == 'goz')
	newgoz_idx = np.where(d_train == 'newgoz')
	# create 3 datasets

	# one with alexa + cryptolocker
	# one with alexa + goz
	# The selection is not at random. We simulate the fact that each client
	# sees a potentially very different sample of DNS logs.
	X, y = [], []

	for c in range(n_clients):
		if c == 0:
			X.append(np.concatenate((X_train[alexa_idx],X_train[crypto_idx]),axis=0))
			y.append(np.concatenate((y_train[alexa_idx],y_train[crypto_idx]),axis=0))
		elif c == 1:
			X.append(np.concatenate((X_train[alexa_idx],X_train[goz_idx]),axis=0))
			y.append(np.concatenate((y_train[alexa_idx],y_train[goz_idx]),axis=0))
		elif c == 2:
			X.append(np.concatenate((X_train[alexa_idx],X_train[newgoz_idx]),axis=0))
			y.append(np.concatenate((y_train[alexa_idx],y_train[newgoz_idx]),axis=0))

	return X, y, X_test, y_test
sleep_1_PCA_data = pca.fit_transform(x)

# print(principalComponents)

##############################################################################################################

################################################ Ploting #####################################################

fig, ax = plt.subplots()
x=[]
y=[]
x2=[]
y2=[]
for i in range(wake_PCA_data.__len__()):
    x.append(wake_PCA_data[i][0])
    y.append(wake_PCA_data[i][1])

for i in range(sleep_1_PCA_data.__len__()):
    x2.append(sleep_1_PCA_data[i][0])
    y2.append(sleep_1_PCA_data[i][1])

# Y = np.array([0.0, 0.001, 0.003, 0.2, 0.4, 0.5, 0.7, 0.88, 0.9, 1.0])
# Y2 = np.repeat(Y,4)
# print(Y2)
# plt.ylim(0.0000,0.00030 )
ax.scatter(x,y,color=['blue'])
ax.scatter(x2,y2,color=['red'])
plt.show()

Example #7
0
# Y = []
# for x in range(0,len(X)):
#   # print X[x][1]
#   if X[x][1]>400:
#     Y.append(X[x])
# X = Y
# print X

X = X.tolist();
# print X
mean =  np.mean(X,axis=0)
print mean
Xlength = len(X)
X_corrd = X
print Xlength
X.append([0,mean[1]])
X.append([1,mean[1]])
X.append([3,mean[1]])
X.append([4,mean[1]])
X.append([5,mean[1]])
X.append([1200,mean[1]])
X.append([1201,mean[1]])
X.append([1202,mean[1]])
X.append([1230,mean[1]])
X.append([1220,mean[1]])

print X
X = StandardScaler().fit_transform(X)
X[:,1] = 0.15*X[:,1]
# print 
Example #8
0
def TsExtractor(labels,
                labelmap,
                func,
                mask,
                global_signal=True,
                pca=False,
                outfile="reg_timeseries.tsv",
                outlabelmap="individual_gm_labelmap.nii.gz"):

    import nibabel as nib
    import pandas as pd
    import numpy as np

    func_data = nib.load(func).get_data()
    labelmap_data = nib.load(labelmap).get_data()
    mask_data = nib.load(mask).get_data()

    labelmap_data[mask_data == 0] = 0  # background

    outlab = nib.Nifti1Image(labelmap_data, nib.load(labelmap).affine)
    nib.save(outlab, outlabelmap)

    ret = []

    if global_signal:
        indices = np.argwhere(mask_data > 0)
        X = []
        for i in indices:
            x = func_data[i[0], i[1], i[2], :]
            if np.std(x) > 0.000001:
                X.append(x.tolist())
        if len(X) == 0:
            x = np.repeat(0, func_data.shape[3])
        elif pca:
            import sklearn.decomposition as decomp
            from sklearn.preprocessing import StandardScaler
            X = StandardScaler().fit_transform(np.transpose(X))
            PCA = decomp.PCA(n_components=1, svd_solver="arpack")
            x = PCA.fit_transform(X).flatten()
        else:
            #from sklearn.preprocessing import StandardScaler
            #X = StandardScaler().fit_transform(np.transpose(X))
            x = np.mean(X, axis=0)
        ret.append(x)

    for l in range(1, len(labels) + 1):
        indices = np.argwhere(labelmap_data == l)
        X = []
        for i in indices:
            x = func_data[i[0], i[1], i[2], :]
            if np.std(x) > 0.000001:
                X.append(x.tolist())
        X = np.array(X)
        if X.shape[0] == 0:
            x = np.repeat(0, func_data.shape[3])
        elif X.shape[0] == 1:
            x = X.flatten()
        elif pca:
            import sklearn.decomposition as decomp
            from sklearn.preprocessing import StandardScaler
            X = StandardScaler().fit_transform(np.transpose(X))
            PCA = decomp.PCA(n_components=1, svd_solver="arpack")
            x = PCA.fit_transform(X).flatten()
        else:
            #from sklearn.preprocessing import StandardScaler
            #X = StandardScaler().fit_transform(np.transpose(X))
            x = np.mean(X, axis=0)
        ret.append(x)

    ret = np.transpose(np.array(ret))

    if global_signal:
        labels = ["GlobSig"] + labels

    import pandas as pd
    ret = pd.DataFrame(data=ret, columns=labels)

    ret.to_csv(outfile, sep="\t", index=False)

    import os
    return os.path.join(os.getcwd(),
                        outfile), labels, os.path.join(os.getcwd(),
                                                       outlabelmap)
Example #9
0
def main():
  # define timer to check how long the job runs
  t = timer.Timer()
  t.start()
  
  opts = parse_options()

  print 'Loading configuration...'
  from variables import preselection, lumi, nvar, weight
  from samples import Signal, Background
  from algorithm import analysis
  
  alg = getModel(analysis, opts.analysis)
  opts.name = opts.name + alg.modelname
  
  dataset = os.path.join(opts.dataDir,opts.dataset+'.h5')
  
  print 'Creating training and test set!'
  if (opts.analysis.lower() == 'rnn'):
    X_train, X_test, y_train, y_test, w_train, w_test, sequence = prepareSequentialTraining(dataset, Signal, Background, preselection, alg.options['collection'], alg.options['removeVar'], nvar, weight, lumi, opts.kfold, opts.trainsize, opts.testsize, opts.reproduce, multiclass=opts.multiclass)
    
  else:
    X_train, X_test, y_train, y_test, w_train, w_test = prepareTraining(dataset, Signal, Background, preselection, nvar, weight, lumi, opts.trainsize, opts.testsize, opts.reproduce, multiclass=opts.multiclass)

  if opts.kfold:
    for i in range(opts.kfold):
      print 'Summary of kfold cross-validation datasets!'
      checkDataset(y_train[i], y_test[i], w_train[i], w_test[i], multiclass=opts.multiclass)
  else:
    checkDataset(y_train, y_test, w_train, w_test, multiclass=opts.multiclass)
  
  if (opts.analysis.lower() == 'bdt'): 
    model, y_pred = trainBDT(X_train, X_test, y_train, y_test, w_train, w_test, alg.options['classifier'], alg.options['max_depth'], alg.options['n_estimators'], alg.options['learning_rate'],  alg.options['lambda'], alg.options['alpha'], alg.options['gamma'], alg.options['scale_pos_weights'], opts.reproduce)

  elif (opts.analysis.lower() == 'nn'):
      
    print 'Standardize training and test set...'
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    if opts.hyperoptimization:
        print 'Using hyperas for hyperparameter optimization'
        
        model, y_pred = trainOptNN(X_train, X_test, y_train, y_test, w_train, w_test, alg.options['layers'], 
                                        alg.options['ncycles'], alg.options['batchSize'], alg.options['dropout'], 
                                        alg.options['optimizer'], alg.options['activation'], alg.options['initializer'], alg.options['regularizer'], alg.options['classWeight'], 
                                        alg.options['learningRate'], alg.options['decay'], alg.options['momentum'], 
                                        alg.options['nesterov'], alg.options['multiclassification'], reproduce=opts.reproduce)
        #with open(os.path.join(opts.modelDir,opts.name+'_history.pkl'), 'w') as hist_pi:
            #pickle.dump(history.history, hist_pi)
        
    else:
        model, history, y_pred = trainNN(X_train, X_test, y_train, y_test, w_train, w_test, alg.options['layers'], 
                                        alg.options['ncycles'], alg.options['batchSize'], alg.options['dropout'], 
                                        alg.options['optimizer'], alg.options['activation'], alg.options['initializer'], alg.options['regularizer'], alg.options['classWeight'], 
                                        alg.options['learningRate'], alg.options['decay'], alg.options['momentum'], 
                                        alg.options['nesterov'], alg.options['multiclassification'], reproduce=opts.reproduce)
        
        with open(os.path.join(opts.modelDir,opts.name+'_history.pkl'), 'w') as hist_pi:
            pickle.dump(history.history, hist_pi)

  elif (opts.analysis.lower() == 'rnn'):
      
    if opts.kfold:
      model = []
      history = []
      y_pred = []
      scaler = []
      score = []
     
      for i in range(opts.kfold):
        print 'k-fold cross-validation! Iteration:{}'.format(i)

        if alg.options['mergeModels']:
          print 'Standardize training set...'
          scale = StandardScaler()
          _X_train = scale.fit_transform(X_train[i])
          _X_test = scale.transform(X_test[i])
        
        scaler.append(scale) 

        m, h, y_hat = trainRNN(_X_train, _X_test, y_train[i], y_test[i], w_train[i], w_test[i], sequence[i], alg.options['collection'],
                                        alg.options['unit_type'], alg.options['n_units'], alg.options['combinedDim'],
                                        alg.options['epochs'], alg.options['batchSize'], alg.options['dropout'], 
                                        alg.options['optimizer'], alg.options['activation'], alg.options['initializer'], alg.options['regularizer'], 
                                        alg.options['learningRate'], alg.options['decay'], 
                                        alg.options['momentum'], alg.options['nesterov'], alg.options['mergeModels'], 
                                        alg.options['multiclassification'], alg.options['classWeight'])
        model.append(m)
        history.append(h)
        y_pred.append(y_hat)
        score.append(m.evaluate([seq['X_test'] for seq in sequence[i]]+[X_test[i]], y_test[i]))

        with open(os.path.join(opts.modelDir,opts.name+'_kFoldCV'+str(i)+'_history.pkl'), 'w') as hist_pi:
          pickle.dump(h.history, hist_pi)

    else:
      if alg.options['mergeModels']:
          print 'Standardize training set...'
          scaler = StandardScaler()
          X_train = scaler.fit_transform(X_train)
          X_test = scaler.transform(X_test)
    
      model, history, y_pred = trainRNN(X_train, X_test, y_train, y_test, w_train, w_test, sequence, alg.options['collection'],
                                      alg.options['unit_type'], alg.options['n_units'], alg.options['combinedDim'],
                                      alg.options['epochs'], alg.options['batchSize'], alg.options['dropout'], 
                                      alg.options['optimizer'], alg.options['activation'], alg.options['initializer'], alg.options['regularizer'],
                                      alg.options['learningRate'], alg.options['decay'], 
                                      alg.options['momentum'], alg.options['nesterov'], alg.options['mergeModels'], 
                                      alg.options['multiclassification'], alg.options['classWeight'])

      with open(os.path.join(opts.modelDir,opts.name+'_history.pkl'), 'w') as hist_pi:
        pickle.dump(history.history, hist_pi)

  if opts.kfold:
    s = np.array(score, dtype=float)
    print 'Evaluating k-fold cross validation!'
    print 'Score: ', s
    print("\nMean %s: %.2f +/- %.2f" % (model[i].metrics_names[0], np.mean(s[:,0]), np.std(s[:,0])))
    print("\nMean %s: %.2f +/- %.2f" % (model[i].metrics_names[1], np.mean(s[:,1]), np.std(s[:,1])))
    for i in range(opts.kfold):
      saveModel(model[i], opts.modelDir, opts.weightDir, opts.name+'_kFoldCV'+str(i), opts.analysis)
    
      try:
        print('Saving Scaler to file...')
        joblib.dump(scaler[i], os.path.join(opts.modelDir,opts.name+'_kFoldCV'+str(i)+'_scaler.pkl'))
      except NameError:
          print('No Scaler found')
     
      saveInfos(opts.name+'_kFoldCV'+str(i), opts.analysis.lower(), opts.dataset+'_kFoldCV'+str(i), ' '.join(nvar), preselection, lumi, Signal, Background, str(alg.options), opts.trainsize, opts.testsize, opts.reproduce, opts.multiclass, ' '.join(weight))
      
      if opts.plot:
        print('Start Plotting...')
        startPlot(os.path.join('TrainedModels/models',opts.name+'_kFoldCV'+str(i)+'.h5'),save=True, multiclass=opts.multiclass)
    

  else:
    saveModel(model, opts.modelDir, opts.weightDir, opts.name, opts.analysis)
  
    try:
      print('Saving Scaler to file...')
      joblib.dump(scaler, os.path.join(opts.modelDir,opts.name+'_scaler.pkl'))
    except NameError:
        print('No Scaler found')
   
    saveInfos(opts.name, opts.analysis.lower(), opts.dataset, ' '.join(nvar), preselection, lumi, Signal, Background, str(alg.options), opts.trainsize, opts.testsize, opts.reproduce, opts.multiclass, ' '.join(weight))
    
    if opts.plot:
      print('Start Plotting...')
      startPlot(os.path.join('TrainedModels/models',opts.name+'.h5'),save=True, multiclass=opts.multiclass)
    
  # end timer and print time
  t.stop()
  t0 = t.elapsed
  t.reset()
  runtimeSummary(t0)
Example #10
0
class ObjectRecognizer:
    def __init__(self):
        self.TestingData = []
        self.TrainingData = []
        self.TrainingLabels = []
        self.TestingLabels = []

    def Normalize(self):
        # Standardizing the features
        self.TrainingData = StandardScaler().fit_transform(self.TrainingData)
        self.TestingData = StandardScaler().fit_transform(self.TestingData)
        #for i in range(0, 2500):
        #    mean = np.mean(self.TrainingData[:, i])
        #    max = np.max(self.TrainingData[:, i])
        #    self.TrainingData[:, i] -= mean
        #    self.TrainingData[:, i] /= max

    def Read(self, train_path, test_path):
        # Reading Training Data
        for each in glob(train_path + "*"):
            take = False
            st = ""
            for ch in each:
                if ch == ' ' and st != "":
                    break
                if ch == '.':
                    break
                if take and ch != ' ':
                    st += ch
                if ch == '-':
                    take = True
            if st == "Cat":
                self.TrainingLabels.append(1)
            elif st == "Laptop":
                self.TrainingLabels.append(2)
            elif st == "Apple":
                self.TrainingLabels.append(3)
            elif st == "Car":
                self.TrainingLabels.append(4)
            elif st == "Helicopter":
                self.TrainingLabels.append(5)
            im = cv.imread(each, 0)
            im = cv.resize(im, (50, 50))
            final_data = np.reshape(im, 2500)
            self.TrainingData.append(final_data)
        self.TrainingData = np.array(self.TrainingData, dtype='float64')
        # Reading Testing Data
        count = 0
        for each in glob(test_path + "*"):
            if count % 2 == 0:
                original_path = each
            else:
                all, pos = seg.segment(original_path, each)
                stri = ""
                for i in each.split('/')[-1]:
                    if i == ' ':
                        break
                    stri += i
                # cat = 1
                # laptop = 2
                # apple = 3
                # car = 4
                # helicopter = 5
                if stri == "T1":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(1)
                    self.TestingLabels.append(2)
                elif stri == "T2":
                    self.TestingData.append(all[1])
                    self.TestingData.append(all[2])
                    self.TestingLabels.append(1)
                    self.TestingLabels.append(2)
                elif stri == "T3":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(2)
                    self.TestingLabels.append(1)
                elif stri == "T4":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingData.append(all[2])
                    self.TestingLabels.append(4)
                    self.TestingLabels.append(4)
                    self.TestingLabels.append(1)
                elif stri == "T5":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(2)
                    self.TestingLabels.append(3)
                elif stri == "T6":
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(5)
                elif stri == "T7":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingData.append(all[2])
                    self.TestingLabels.append(5)
                    self.TestingLabels.append(1)
                    self.TestingLabels.append(3)
                elif stri == "T8":
                    self.TestingData.append(all[0])
                    self.TestingLabels.append(4)
                elif stri == "T9":
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(3)
                elif stri == "T10":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(4)
                    self.TestingLabels.append(1)
                elif stri == "T11":
                    self.TestingData.append(all[0])
                    self.TestingLabels.append(1)
                elif stri == "T12":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(2)
                    self.TestingLabels.append(1)
                elif stri == "T13":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(5)
                    self.TestingLabels.append(4)
                elif stri == "T14":
                    self.TestingData.append(all[0])
                    self.TestingData.append(all[1])
                    self.TestingLabels.append(4)
                    self.TestingLabels.append(4)
            count += 1
        self.TestingData = np.array(self.TestingData, dtype='float64')

    def calculate_pca(self):
        pca = PCA(25)
        pca.fit(self.TrainingData)
        plt.plot(np.cumsum(pca.explained_variance_ratio_))
        plt.xlabel('number of components')
        plt.ylabel('cumulative explained variance')
        plt.show()
        self.TrainingData = pca.transform(self.TrainingData)
        self.TestingData = pca.transform(self.TestingData)

    def Run(self):
        self.Read("/Users/mac/PycharmProjects/NNProject/Training/",
                  "/Users/mac/PycharmProjects/NNProject/Testing/")
        self.Normalize()
        self.calculate_pca()
        cv.waitKey(0)