def main(): ##### Calling the Naive Bayes Classifier ##### #print("\nNAIVE BAYES CLASSIFIER...\n") #NB(train_data_x, train_data_y, test_data_x, test_data_y) ##### Calling the SGD Classifier ##### print("\nSGD CLASSIFIER...\n") SGD(train_data_x, train_data_y, test_data_x, test_data_y)
def sgd_optimizer(model, base_lr, momentum, weight_decay): params = [] for key, value in model.named_parameters(): params.append(value) param_group = [{'params': params, 'weight_decay': weight_decay}] optimizer = SGD(param_group, lr=base_lr, momentum=momentum) return optimizer
def jobman(state, channel): # load dataset _train_data = ListSequences(path=state['path'], pca=state['pca'], subset=state['subset'], which='train', one_hot=False, nbits=32) train_data = _train_data.export_dense_format( sequence_length=state['seqlen'], overlap=state['overlap']) valid_data = ListSequences( path = state['path'], pca=state['pca'], subset=state['subset'], which='valid', one_hot=False, nbits=32) model = biRNN( nhids=state['nhids'], nouts=numpy.max(train_data.data_y)+1, nins=train_data.data_x.shape[-1], activ = TT.nnet.sigmoid, seed = state['seed'], bs = state['bs'], seqlen = state['seqlen']) algo = SGD(model, state, train_data) main = MainLoop(train_data,valid_data, None, model, algo, state, channel) main.main()
def main(): ##### Calling the Naive Bayes Classifier ##### #print("\nNAIVE BAYES CLASSIFIER...\n") #NB(train_data_x, train_data_y, test_data_x, test_data_y) ##### Calling the SGD Classifier ##### print("\nSGD CLASSIFIER...\n") SGD(train_data_x, train_data_y, test_data_x, test_data_y) print("Time taken: " + str(datetime.datetime.now() - start_time))
def run_sgd(source, steps, learn_rate, reg_rate): simulate = SGD() simulate.get_rates(source) start = time() simulate.mf(steps, learn_rate, reg_rate) stop = time() with open(str("OUT_" + dataIN + "_" + rateIN), "a") as myfile: myfile.write("All:\t" + str(stop - start) + "\n")
def jobman(state, channel): rng = numpy.random.RandomState(state['seed']) data = DataMNIST(state['path'], state['mbs'], state['bs'], rng, state['unlabled']) model = convMat(state, data) if state['natSGD'] == 0: algo = SGD(model, state, data) else: algo = natSGD(model, state, data) main = MainLoop(data, model, algo, state, channel) main.main()
def __init__(self, options, channel): """ options: a dictionary contains all the configurations channel: jobman channel """ # Step 0. Load data print 'Loading data' data = numpy.load(options['data']) self.options = options self.channel = channel # Step 1. Construct Model print 'Constructing Model' if options['model'] == 'mlp': model = mlp(options, channel, data) elif options['model'] == 'daa': model = daa(options, channel, data) self.model = model print 'Constructing algo' # Step 2. Construct optimization technique if options['algo'] == 'natSGD_basic': algo = natSGD(options, channel, data, model) elif options['algo'] == 'natSGD_jacobi': algo = natSGD_jacobi(options, channel, data, model) elif options['algo'] == 'natSGD_ls': algo = natSGD_linesearch(options, channel, data, model) elif options['algo'] == 'natNCG': algo = natNCG(options, channel, data, model) elif options['algo'] == 'krylov': algo = KrylovDescent(options, channel, data, model) elif options['algo'] == 'hf': raise NotImplemented elif options['algo'] == 'hf_jacobi': raise NotImplemented elif options['algo'] == 'sgd': algo = SGD(options, channel, data, model) self.algo = algo self.options['validscore'] = 1e20 self.train_timing = numpy.zeros((options['loopIters'], 13), dtype='float32') self.valid_timing = numpy.zeros((options['loopIters'], 2), dtype='float32') if self.channel is not None: self.channel.save() self.start_time = time.time() self.batch_start_time = time.time()
import numpy as np from Tensor import Tensor from SGD import SGD from Layer import ( Sequential, Linear, MSELoss, Tanh, Sigmoid, Embedding, CrossEntropyLoss, ) np.random.seed(0) data = Tensor(np.array([1, 2, 1, 2]), autograd=True) target = Tensor(np.array([0, 1, 0, 1]), autograd=True) model = Sequential([Embedding(3, 3), Tanh(), Linear(3, 4)]) criterion = CrossEntropyLoss() optimizer = SGD(parameters=model.getParameters(), alpha=0.1) for i in range(0, 10): pred = model.forward(data) loss = criterion.forward(pred, target) loss.backprop() optimizer.step() print(loss)
def main(): ds = DatasetSplit("data/datasetSplit.txt") print("Train size: " + str(len(ds.train))) print("Test size: " + str(len(ds.test))) trainTest = TrainTestSplit(ds, "data/datasetSentences.txt") config = ConfigParser.ConfigParser() config.read("conf.ini") modelParameters = ModelParameters( HyperParameters(config.getint('HyperParams', 'iterations'), config.getint('HyperParams', 'minibatchsize'), config.getint('HyperParams', 'C'), config.getint('HyperParams', 'D'), config.getint('HyperParams', 'K'), config.getint('HyperParams', 'annealingRate'), config.getfloat('HyperParams', 'eta'), config.getint('HyperParams', 'seed'), config.getfloat('HyperParams', 'alpha'), config.getint('HyperParams', 'X'))) # modelParameters.Init(trainTest) if config.getboolean('Debug', 'bTrain'): start = time.time() sgd = SGD() finalTestLogLiklihood = sgd.LearnParamsUsingSGD( trainTest, modelParameters) total_time = time.time() - start pickle_save('model.pkl', modelParameters) with open("output/results.txt", 'a') as output: output.write("Hyperparameters: " + str(config.items('HyperParams')) + "\n") output.write("Train time: " + str(total_time / 3600) + "H " + str(total_time % 3600 / 60) + "M" + str(total_time % 3600 % 60) + "S" + "\n") output.write("Final Test Logliklihood: " + str(finalTestLogLiklihood)) else: #modelParameters = pickle_load('model.pkl') modelParameters = pickle_load('eta_2.0_model.pkl') #plot_logLiklihood(modelParameters.hyperParams) if config.getboolean('Debug', 'bVariantD'): total_time = [] finalTestLogLiklihood = [] finalTrainLogLiklihood = [] d_vals = np.linspace(10.0, 300.0, 5, dtype=int) # for d in d_vals: # start = time.time() # modelParameters = ModelParameters(HyperParameters(config.getint('HyperParams', 'iterations'), # config.getint('HyperParams', 'minibatchsize'), # config.getint('HyperParams', 'C'), # d, # config.getint('HyperParams', 'K'), # config.getint('HyperParams', 'annealingRate'), # config.getfloat('HyperParams', 'eta'), # config.getint('HyperParams', 'seed'), # config.getfloat('HyperParams', 'alpha'), # config.getint('HyperParams', 'X'))) # modelParameters.Init(trainTest) # sgd = SGD() # finalTestLogLiklihood.append(sgd.LearnParamsUsingSGD(trainTest, modelParameters)) # finalTrainLogLiklihood.append(loglikelihood(trainTest.train, modelParameters)) # total_time.append(time.time() - start) # pickle_save(str(d) + "_finalTrainLogLiklihood.pkl", finalTrainLogLiklihood) # pickle_save(str(d) + "_finalTestLogLiklihood.pkl", finalTestLogLiklihood) # pickle_save(str(d) + "_d_total_time.pkl", total_time) #pickle_save("d_finalTrainLogLiklihood.pkl", finalTrainLogLiklihood) #pickle_save("d_finalTestLogLiklihood.pkl", finalTestLogLiklihood) #pickle_save("d_total_time.pkl", total_time) finalTrainLogLiklihood = pickle_load("d_finalTrainLogLiklihood.pkl") finalTestLogLiklihood = pickle_load("d_finalTestLogLiklihood.pkl") total_time = pickle_load("d_total_time.pkl") plot_varientParam(modelParameters.hyperParams, d_vals, finalTrainLogLiklihood, finalTestLogLiklihood, total_time, "size of the word embedding-D. ", 'size of the word embedding - D') if config.getboolean('Debug', 'bVariantLearnRate'): total_time = [] finalTestLogLiklihood = [] finalTrainLogLiklihood = [] sgd = SGD() eta_vals = np.linspace(0.1, 2, 5) # for eta in eta_vals: # start = time.time() # modelParameters = ModelParameters(HyperParameters(config.getint('HyperParams', 'iterations'), # config.getint('HyperParams', 'minibatchsize'), # config.getint('HyperParams', 'C'), # config.getint('HyperParams', 'D'), # config.getint('HyperParams', 'K'), # config.getint('HyperParams', 'annealingRate'), # eta, # config.getint('HyperParams', 'seed'), # config.getfloat('HyperParams', 'alpha'), # config.getint('HyperParams', 'X'))) # modelParameters.Init(trainTest) # finalTestLogLiklihood.append(sgd.LearnParamsUsingSGD(trainTest, modelParameters)) # finalTrainLogLiklihood.append(loglikelihood(trainTest.train, modelParameters)) # total_time.append(time.time() - start) # pickle_save("eta_" + str(eta) + "_model.pkl", modelParameters) #pickle_save("eta_finalTrainLogLiklihood.pkl", finalTrainLogLiklihood) #pickle_save("eta_finalTestLogLiklihood.pkl", finalTestLogLiklihood) #pickle_save("eta_total_time.pkl", total_time) finalTrainLogLiklihood = pickle_load("eta_finalTrainLogLiklihood.pkl") finalTestLogLiklihood = pickle_load("eta_finalTestLogLiklihood.pkl") total_time = pickle_load("eta_total_time.pkl") plot_varientParam(modelParameters.hyperParams, eta_vals, finalTrainLogLiklihood, finalTestLogLiklihood, total_time, "eta", "eta") if config.getboolean('Debug', 'bEvaluate'): print "Best Context words:" for word in ["good", "bad", "lame", "cool", "exciting"]: print "\t Target:" + word print "\t " + str(PredictContext(modelParameters, word)) modelParameters = ModelParameters( HyperParameters(config.getint('HyperParams', 'iterations'), config.getint('HyperParams', 'minibatchsize'), config.getint('HyperParams', 'C'), config.getint('HyperParams', 'D'), 2, config.getint('HyperParams', 'annealingRate'), config.getfloat('HyperParams', 'eta'), config.getint('HyperParams', 'seed'), config.getfloat('HyperParams', 'alpha'), config.getint('HyperParams', 'X'))) modelParameters.Init(trainTest) sgd = SGD() sgd.LearnParamsUsingSGD(trainTest, modelParameters) ScatterMatrix(modelParameters, ["good", "bad", "lame", "cool", "exciting"]) #modelParameters = pickle_load('model.pkl') modelParameters = pickle_load('eta_2.0_model.pkl') print "model hyperparams:" + str(modelParameters.hyperParams) print "Predict input for - The movie was surprisingly __:" print "\t" + str( PredictInput(modelParameters, ["The", "movie", "was", "surprisingly"])) print "Predict input for - __ was really disappointing:" print "\t" + str( PredictInput(modelParameters, ["was", "really", "disappointing"])) print "Predict input for -Knowing that she __ was the best part:" print "\t" + str( PredictInput( modelParameters, ["Knowing", "that", "she", "was", "the", "best", "part"])) print "Solving analogy for - man is to woman as men is to:" print "\t" + str(AnalogySolver(modelParameters, "man", "woman", "men")) print "Solving analogy for - good is to great as bad is to:" print "\t" + str(AnalogySolver(modelParameters, "good", "great", "bad")) print "Solving analogy for - warm is to cold as summer is to:" print "\t" + str( AnalogySolver(modelParameters, "warm", "cold", "summer"))
('rf', rf_clf), ('mb', mb_clf), ('svm', svm_clf), ('sgd', sgd_clf)] voting_clf = VotingClassifier( estimators=estimators, voting='hard') for clf in (log_clf,mlp_clf,rf_clf,mb_clf,svm_clf,sgd_clf,voting_clf): clf.fit(X, labels) y_pred = clf.predict(X_test) print(clf.__class__.__name__, accuracy_score(label_test, y_pred)) clf = BaggingClassifier( LogisticRegression(), n_estimators=100, max_samples=2000, bootstrap=True, n_jobs=-1, oob_score=True) ''' X = X.toarray() X_test = X_test.toarray() Y = np.array(labels) sgd = SGD() sgd.fit(X, Y) y_pred = sgd.predict(X_test) print(accuracy_score(label_test, y_pred)) end = time.time() print(end - start)
network = MutiLayerNet(input_size=x_train.shape[1], hidden_size_list= hidden_size_list, output_size=t_train.shape[1],weight_init_std=weight_init_std) return train(x_train,t_train,x_test,t_test,network,optimizer,iters_num) return train_NN import matplotlib.pyplot as plt (x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True) training = Prepare_training(x_train,t_train,x_test,t_test) from SGD import SGD from Momentum import Momentum from AdaGrad import AdaGrad from Adam import Adam optimizer1 = SGD(0.01) optimizer2 = Momentum(0.01,0.9) optimizer3 = AdaGrad(0.01) optimizer4 = Adam(0.01) train_loss_list1,train_acc_list1,test_acc_list1 = training(optimizer1,[50,50,50],200,0.01) train_loss_list2,train_acc_list2,test_acc_list2 = training(optimizer2,[50,50,50],200,0.01) train_loss_list3,train_acc_list3,test_acc_list3 = training(optimizer4,[50,50,50],200,0.01) ones = np.ones(5)/5.0 plt.subplot(1,2,1) plt.plot(np.convolve(train_loss_list1,ones,'valid'),label="SGD") plt.plot(np.convolve(train_loss_list2,ones,'valid'),label="Momentum") plt.plot(np.convolve(train_loss_list3,ones,'valid'),label="Adam")
def otimizacao(U, X, tipo, metodo): if tipo == 1: #----------------------------------------------------------- #variáveis globais #----------------------------------------------------------- glob = GlobalVariables() maxNGrad = glob.getMaxNGrad() #ganhoAlpha = glob.getGanhoAlpha #gamma = glob.getGamma #global maxNGrad, ganhoAlpha, gamma #----------------------------------------------------------- #iniciar variáveis de controle inicial #----------------------------------------------------------- u1 = U[0, 0] u2 = U[1, 0] u3 = U[2, 0] u4 = U[3, 0] u5 = U[4, 0] #----------------------------------------------------------- #inicio do método #---------------------------------------------------------- fo = 1 #condição de parada #----------------------------------------------------------- #melhores valores #---------------------------------------------------------- fm = fo UM = U #----------------------------------------------------------- #vetores axiliares para os métodos de otimização #---------------------------------------------------------- vt = np.zeros((4, 1)) #usado como auxiliar NAG Grad = np.zeros((4, 1)) #usado como auxiliar adagrad [pa, pb, pc, M, ponto] = trajetoria(U, X) fo = funcaoObjetivo(pa, pb, pc) if fo < 1 * 10**(-10): print("Valores já otimizados") return for j in range(1, maxNGrad, 1): #----------------------------------------------------------- # gradiente descendente estocástico SGD #---------------------------------------------------------- if metodo == 0: U = SGD(U, X) #----------------------------------------------------------- #SGD com momento #---------------------------------------------------------- if metodo == 1: [U, vt] = SGDMomento(U, X, vt) #----------------------------------------------------------- #Nesterov accelerated gradient #---------------------------------------------------------- if metodo == 2: [U, vt] = NAG(U, X, vt) #----------------------------------------------------------- #Adagrad #---------------------------------------------------------- if metodo == 3: [U, Grad] = adagrad(U, X, Grad) #----------------------------------------------------------- #Setar os limites inferiores e superiores em U #---------------------------------------------------------- U0 = np.zeros((5, 1)) U0 = setLimites(U) u1 = U0[0, 0] u2 = U0[1, 0] u3 = U0[2, 0] u4 = U0[3, 0] u5 = U0[4, 0] #----------------------------------------------------------- #atualizar o vetor U (variáveis de controle) #---------------------------------------------------------- U = np.array([[u1], [u2], [u3], [u4], [u5]]) #----------------------------------------------------------- #Cálculo do valor da função objetivo #a função de otimização é usada para calcular os valores de U, #que serão inseridos na função de calcular a trajetória #---------------------------------------------------------- [pa, pb, pc, M, ponto] = trajetoria(U, X) fo = funcaoObjetivo(pa, pb, pc) #----------------------------------------------------------- #verificar melhor resultado #fm = 1, inicialmente #cada vez que fo < fm, fm armazena o valor de fo #fo sempre é comparado com seu valor anterior, desde que #esteja convergindo #valores de fo maiores que o anterior, serão ignorados #---------------------------------------------------------- if fo < fm: fm = fo UM = U #----------------------------------------------------------- #verificar condição de parada #a condição de parada ocorre quando a projeção do CoM no plano xy #praticamente coincide com o ponto médio das duas pernas, no mesmo plano #isso deve acontecer na fase LH, da caminhada #---------------------------------------------------------- if fo < 1 * 10**(-10): break #----------------------------------------------------------- #imprimir resultado no console #---------------------------------------------------------- imprimirConsole(j, [U, fo]) #----------------------------------------------------------- #imprimir resultado final no console #---------------------------------------------------------- print('************************************************************') print('Melhor Solução: ') imprimirConsole(0, [UM, fm]) print('************************************************************') #----------------------------------------------------------- #mostrar a trajetória #---------------------------------------------------------- plotarTrajetoria(UM, X) else: #----------------------------------------------------------- #mostrar a trajetoria #---------------------------------------------------------- plotarTrajetoria(U, X)
self.bias = torch.Tensor(out_features) fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) self.bias = Parameter(self.bias) def forward(self, input): return torch.sparse.mm(self.weight, input.T).T + self.bias if __name__ == "__main__": x = torch.tensor([[1, 2]], dtype=torch.float) a = SparseLinear(2, 3) # b = Parameter(torch.tensor([[1, 2], [-1, -2]], dtype=torch.float).to_sparse()) # optimizer = optim.SparseAdam(a.parameters(), lr=1e-3) # optimizer = optim.Adam(a.parameters()) from SGD import SGD optimizer = SGD(a.parameters(), lr=1e-3) # loss = a(x).sum() loss = a(x).sum() loss.backward() optimizer.step() print(a.weight) with torch.no_grad(): a.weight.add_(a.weight.grad) print(a.weight)
# save wordToIndex pickle.dump( wordToIndex, open( "wordToIndex.p", "wb" ) ) ############################################# data = indices embed = Embedding(vocabSize=len(vocab), dim=512) model = RNNCell( numInputs=512, numHidden=512, numOutput=len(vocab)) criterion = CrossEntropyLoss() optimizer = SGD( parameters = model.getParameters() + embed.getParameters(), alpha = 0.05) ############################################# def generateSample(n=30, initChar = ' '): s = "" hidden = model.initHidden(batchSize = 1) input = Tensor(np.array([wordToIndex[initChar]])) for i in range(n): rnnInput = embed.forward(input) output, hidden = model.forward(input=rnnInput, hidden=hidden) output.data *= 10 tempDist = output.softmax() tempDist /= tempDist.sum() m = (tempDist > np.random.rand()).argmax()
def main(): # Handle refit argument refit = False if len(sys.argv) > 2: print("Too many arguments... check out the readme.") exit() if len(sys.argv) == 2: if string(sys.argv)[1] != "--refit": print("Unrecognized argument passed... check out the readme.") exit() else: print("Refit is turned on!") refit = True # load in training and test data print("Loading Data...") train = DataLoader("training") validation = DataLoader("validation") test = DataLoader("test") test = np.vstack((test, validation)) # split the data X = train[:, :-1] y = train[:, -1] X_test = test[:, :-1] y_test = test[:, -1] print("Data Loaded.\n") # PCA and Scaling print(f"Reducing dimensionality of data with PCA...") pca = make_pipeline(StandardScaler(), PCA(n_components=100, random_state=0)) pca.fit(X, y) pca_model = pca.named_steps['pca'] print("{0:.2f}% of variance explained\n".format( pca_model.explained_variance_ratio_.cumsum()[-1] * 100)) X = pca.transform(X) X_test = pca.transform(X_test) # Experimentation accuracies = [] end = [] start = [] print("-= Begin KNN =-") kstart = time.time() accuracy, k, st, nd = KNN(X, y, X_test, y_test, refit) kend = time.time() start.append(st) end.append(nd) accuracies.append(accuracy) print( f"KNN acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(kend-kstart):.2f}s\t best k:{k}\n" ) print("-= Begin SVM =-") start.append(time.time()) accuracy = SVM(X, y, X_test, y_test, refit) end.append(time.time()) accuracies.append(accuracy) print( f"SVM acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(end[-1]-start[-1]):.2f}s\n" ) print("-= Begin SGD =-") start.append(time.time()) accuracy = SGD(X, y, X_test, y_test, refit) end.append(time.time()) accuracies.append(accuracy) print( f"SGD acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(end[-1]-start[-1]):.4f}s\n" ) print("-= Begin NN =-") start.append(time.time()) accuracy = NN.test_model("models/nn-vgg4.h5", "vgg4") end.append(time.time()) accuracies.append(accuracy) print( f"SVM acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(end[-1]-start[-1]):.2f}s\n" ) # Plot Sumary Visualizations np.array(accuracies) fig, ax = plt.subplots() PlotClassAccuracy(accuracies, ax) fig.tight_layout() plt.show() fig, ax = plt.subplots() PlotAlgAccEff(accuracies, np.subtract(end, start), ax) fig.tight_layout() plt.show()
print("Kết quả dự đoán giá nhà của 10 examples đầu trong tập test: ", yPredBGD[:10]) print("E test:", ETestBGD) print("E train:", ETrainBGD) plot(xTrain, yTrain, xTest, yTest, wBGD, yScaler, 50, 'Batch Gradient Descent', 'BGD.png') ''' SGD ''' from SGD import SGD # tìm bộ trọng số và độ lỗi dựa trên tập train wSGD = SGD(xTrain, yTrain, learningRate=0.00005, epsilon=1e-15, numberOfIterations=1000) # dự đoán giá nhà theo examples trong tập test, có scale về khoảng giá ban đầu yPredSGD = linearRegressionPredict(xTest, wSGD, yScaler) # tìm độ lỗi với w tìm được trên tập test và tập train ETestSGD = mean_squared_error(yScaler.inverse_transform(yTest), yPredSGD) ETrainSGD = mean_squared_error(yScaler.inverse_transform(yTrain), linearRegressionPredict(xTrain, wSGD, yScaler)) print("Kết quả dự đoán giá nhà của 10 examples đầu trong tập test: ", yPredSGD[:10]) print("E test:", ETestSGD) print("E train:", ETrainSGD)
def training(self, data, target, epochs, mini_batch_size, eta = 0.5, eta_schedule = ('decay',0.1), momentum = True, gamma = 0.1, lmbd = 0.1, tolerance = 1e-3, test_data = None, validation_data = None): """ training NN data shape (#samples, #features) target shape (#samples, #output nodes) eta: learning rate eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False) lmbd fraction of old weights taken into change test_data/validation_data (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes) """ data = np.copy(data) target = np.copy(target) self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size, learning_rate = eta, adaptive_learning_rate = eta_schedule[0], momentum = momentum, m0 = gamma) self.lmbd = lmbd best_accuracy = 0.0 samples = data.shape[0] num_mini_batches = samples // mini_batch_size self.init_eta = eta self.tolerance = tolerance for self.epoch in range(epochs): #run minibatches for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches): Neural_Network.feedforward(self, mini_batch_data.T) #calls backpropagation to find the new gradient Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T) self.gradient.time += float(eta_schedule[1])* 1 #update time for decay # calculate the cost of the epoch Neural_Network.__epoch_output(self, data, target, name = 'train') if test_data != None: Neural_Network.__epoch_output(self, *test_data, name = 'test') # Checking if accuracy if self.has_acc == True: if self.accuracy > best_accuracy: best_accuracy = self.accuracy best_weights = np.copy(self.weights) if Neural_Network.accuracy_test(self) == True: break # Checking if MSE if self.cost_mse == True: if Neural_Network.cost_test(self) == True: break #after training set the weights to the best weights if self.has_acc: self.weights = best_weights if validation_data != None: Neural_Network.__epoch_output(self, *validation_data, name = 'validation')