예제 #1
0
def main():
    ##### Calling the Naive Bayes Classifier #####
    #print("\nNAIVE BAYES CLASSIFIER...\n")
    #NB(train_data_x, train_data_y, test_data_x, test_data_y)
    ##### Calling the SGD Classifier #####
    print("\nSGD CLASSIFIER...\n")
    SGD(train_data_x, train_data_y, test_data_x, test_data_y)
예제 #2
0
def sgd_optimizer(model, base_lr, momentum, weight_decay):
    params = []
    for key, value in model.named_parameters():
        params.append(value)
    param_group = [{'params': params, 'weight_decay': weight_decay}]
    optimizer = SGD(param_group, lr=base_lr, momentum=momentum)
    return optimizer
예제 #3
0
def jobman(state, channel):
    # load dataset
    _train_data = ListSequences(path=state['path'],
                                pca=state['pca'],
                                subset=state['subset'],
                                which='train',
                                one_hot=False,
                                nbits=32)
    train_data = _train_data.export_dense_format(
        sequence_length=state['seqlen'],
        overlap=state['overlap'])

    valid_data = ListSequences(
        path = state['path'],
        pca=state['pca'],
        subset=state['subset'],
        which='valid',
        one_hot=False,
        nbits=32)
    model = biRNN(
        nhids=state['nhids'],
        nouts=numpy.max(train_data.data_y)+1,
        nins=train_data.data_x.shape[-1],
        activ = TT.nnet.sigmoid,
        seed = state['seed'],
        bs = state['bs'],
        seqlen = state['seqlen'])

    algo = SGD(model, state, train_data)

    main = MainLoop(train_data,valid_data, None, model, algo, state, channel)
    main.main()
def main():
    ##### Calling the Naive Bayes Classifier #####
    #print("\nNAIVE BAYES CLASSIFIER...\n")
    #NB(train_data_x, train_data_y, test_data_x, test_data_y)
    ##### Calling the SGD Classifier #####
    print("\nSGD CLASSIFIER...\n")
    SGD(train_data_x, train_data_y, test_data_x, test_data_y)
    print("Time taken: " + str(datetime.datetime.now() - start_time))
예제 #5
0
def run_sgd(source, steps, learn_rate, reg_rate):

    simulate = SGD()
    simulate.get_rates(source)
    start = time()
    simulate.mf(steps, learn_rate, reg_rate)
    stop = time()
    with open(str("OUT_" + dataIN + "_" + rateIN), "a") as myfile:
        myfile.write("All:\t" + str(stop - start) + "\n")
예제 #6
0
def jobman(state, channel):
    rng = numpy.random.RandomState(state['seed'])
    data = DataMNIST(state['path'], state['mbs'], state['bs'], rng,
                     state['unlabled'])
    model = convMat(state, data)
    if state['natSGD'] == 0:
        algo = SGD(model, state, data)
    else:
        algo = natSGD(model, state, data)

    main = MainLoop(data, model, algo, state, channel)
    main.main()
예제 #7
0
파일: main_nat.py 프로젝트: vd114/galatea
 def __init__(self, options, channel):
     """
     options: a dictionary contains all the configurations
     channel: jobman channel
     """
     # Step 0. Load data
     print 'Loading data'
     data = numpy.load(options['data'])
     self.options = options
     self.channel = channel
     # Step 1. Construct Model
     print 'Constructing Model'
     if options['model'] == 'mlp':
         model = mlp(options, channel, data)
     elif options['model'] == 'daa':
         model = daa(options, channel, data)
     self.model = model
     print 'Constructing algo'
     # Step 2. Construct optimization technique
     if options['algo'] == 'natSGD_basic':
         algo = natSGD(options, channel, data, model)
     elif options['algo'] == 'natSGD_jacobi':
         algo = natSGD_jacobi(options, channel, data, model)
     elif options['algo'] == 'natSGD_ls':
         algo = natSGD_linesearch(options, channel, data, model)
     elif options['algo'] == 'natNCG':
         algo = natNCG(options, channel, data, model)
     elif options['algo'] == 'krylov':
         algo = KrylovDescent(options, channel, data, model)
     elif options['algo'] == 'hf':
         raise NotImplemented
     elif options['algo'] == 'hf_jacobi':
         raise NotImplemented
     elif options['algo'] == 'sgd':
         algo = SGD(options, channel, data, model)
     self.algo = algo
     self.options['validscore'] = 1e20
     self.train_timing = numpy.zeros((options['loopIters'], 13),
                                     dtype='float32')
     self.valid_timing = numpy.zeros((options['loopIters'], 2),
                                     dtype='float32')
     if self.channel is not None:
         self.channel.save()
     self.start_time = time.time()
     self.batch_start_time = time.time()
import numpy as np
from Tensor import Tensor
from SGD import SGD
from Layer import (
    Sequential,
    Linear,
    MSELoss,
    Tanh,
    Sigmoid,
    Embedding,
    CrossEntropyLoss,
)

np.random.seed(0)

data = Tensor(np.array([1, 2, 1, 2]), autograd=True)
target = Tensor(np.array([0, 1, 0, 1]), autograd=True)

model = Sequential([Embedding(3, 3), Tanh(), Linear(3, 4)])
criterion = CrossEntropyLoss()

optimizer = SGD(parameters=model.getParameters(), alpha=0.1)

for i in range(0, 10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backprop()
    optimizer.step()

    print(loss)
예제 #9
0
def main():

    ds = DatasetSplit("data/datasetSplit.txt")
    print("Train size: " + str(len(ds.train)))
    print("Test size: " + str(len(ds.test)))
    trainTest = TrainTestSplit(ds, "data/datasetSentences.txt")
    config = ConfigParser.ConfigParser()
    config.read("conf.ini")

    modelParameters = ModelParameters(
        HyperParameters(config.getint('HyperParams', 'iterations'),
                        config.getint('HyperParams', 'minibatchsize'),
                        config.getint('HyperParams', 'C'),
                        config.getint('HyperParams', 'D'),
                        config.getint('HyperParams', 'K'),
                        config.getint('HyperParams', 'annealingRate'),
                        config.getfloat('HyperParams', 'eta'),
                        config.getint('HyperParams', 'seed'),
                        config.getfloat('HyperParams', 'alpha'),
                        config.getint('HyperParams', 'X')))
    # modelParameters.Init(trainTest)

    if config.getboolean('Debug', 'bTrain'):
        start = time.time()
        sgd = SGD()
        finalTestLogLiklihood = sgd.LearnParamsUsingSGD(
            trainTest, modelParameters)
        total_time = time.time() - start
        pickle_save('model.pkl', modelParameters)

        with open("output/results.txt", 'a') as output:
            output.write("Hyperparameters: " +
                         str(config.items('HyperParams')) + "\n")
            output.write("Train time: " + str(total_time / 3600) + "H " +
                         str(total_time % 3600 / 60) + "M" +
                         str(total_time % 3600 % 60) + "S" + "\n")
            output.write("Final Test Logliklihood: " +
                         str(finalTestLogLiklihood))

    else:
        #modelParameters = pickle_load('model.pkl')
        modelParameters = pickle_load('eta_2.0_model.pkl')

    #plot_logLiklihood(modelParameters.hyperParams)

    if config.getboolean('Debug', 'bVariantD'):
        total_time = []
        finalTestLogLiklihood = []
        finalTrainLogLiklihood = []
        d_vals = np.linspace(10.0, 300.0, 5, dtype=int)
        # for d in d_vals:
        #     start = time.time()
        #     modelParameters = ModelParameters(HyperParameters(config.getint('HyperParams', 'iterations'),
        #                                                       config.getint('HyperParams', 'minibatchsize'),
        #                                                       config.getint('HyperParams', 'C'),
        #                                                       d,
        #                                                       config.getint('HyperParams', 'K'),
        #                                                       config.getint('HyperParams', 'annealingRate'),
        #                                                       config.getfloat('HyperParams', 'eta'),
        #                                                       config.getint('HyperParams', 'seed'),
        #                                                       config.getfloat('HyperParams', 'alpha'),
        #                                                       config.getint('HyperParams', 'X')))
        #     modelParameters.Init(trainTest)
        #     sgd = SGD()
        #     finalTestLogLiklihood.append(sgd.LearnParamsUsingSGD(trainTest, modelParameters))
        #     finalTrainLogLiklihood.append(loglikelihood(trainTest.train, modelParameters))
        #     total_time.append(time.time() - start)
        #     pickle_save(str(d) + "_finalTrainLogLiklihood.pkl", finalTrainLogLiklihood)
        #     pickle_save(str(d) + "_finalTestLogLiklihood.pkl", finalTestLogLiklihood)
        #     pickle_save(str(d) + "_d_total_time.pkl", total_time)

        #pickle_save("d_finalTrainLogLiklihood.pkl", finalTrainLogLiklihood)
        #pickle_save("d_finalTestLogLiklihood.pkl", finalTestLogLiklihood)
        #pickle_save("d_total_time.pkl", total_time)
        finalTrainLogLiklihood = pickle_load("d_finalTrainLogLiklihood.pkl")
        finalTestLogLiklihood = pickle_load("d_finalTestLogLiklihood.pkl")
        total_time = pickle_load("d_total_time.pkl")
        plot_varientParam(modelParameters.hyperParams, d_vals,
                          finalTrainLogLiklihood, finalTestLogLiklihood,
                          total_time, "size of the word embedding-D. ",
                          'size of the word embedding - D')

    if config.getboolean('Debug', 'bVariantLearnRate'):
        total_time = []
        finalTestLogLiklihood = []
        finalTrainLogLiklihood = []
        sgd = SGD()
        eta_vals = np.linspace(0.1, 2, 5)
        # for eta in eta_vals:
        #     start = time.time()
        #     modelParameters = ModelParameters(HyperParameters(config.getint('HyperParams', 'iterations'),
        #                                                       config.getint('HyperParams', 'minibatchsize'),
        #                                                       config.getint('HyperParams', 'C'),
        #                                                       config.getint('HyperParams', 'D'),
        #                                                       config.getint('HyperParams', 'K'),
        #                                                       config.getint('HyperParams', 'annealingRate'),
        #                                                       eta,
        #                                                       config.getint('HyperParams', 'seed'),
        #                                                       config.getfloat('HyperParams', 'alpha'),
        #                                                       config.getint('HyperParams', 'X')))
        #     modelParameters.Init(trainTest)
        #     finalTestLogLiklihood.append(sgd.LearnParamsUsingSGD(trainTest, modelParameters))
        #     finalTrainLogLiklihood.append(loglikelihood(trainTest.train, modelParameters))
        #     total_time.append(time.time() - start)
        #     pickle_save("eta_" + str(eta) + "_model.pkl", modelParameters)

        #pickle_save("eta_finalTrainLogLiklihood.pkl", finalTrainLogLiklihood)
        #pickle_save("eta_finalTestLogLiklihood.pkl", finalTestLogLiklihood)
        #pickle_save("eta_total_time.pkl", total_time)
        finalTrainLogLiklihood = pickle_load("eta_finalTrainLogLiklihood.pkl")
        finalTestLogLiklihood = pickle_load("eta_finalTestLogLiklihood.pkl")
        total_time = pickle_load("eta_total_time.pkl")
        plot_varientParam(modelParameters.hyperParams, eta_vals,
                          finalTrainLogLiklihood, finalTestLogLiklihood,
                          total_time, "eta", "eta")

    if config.getboolean('Debug', 'bEvaluate'):
        print "Best Context words:"
        for word in ["good", "bad", "lame", "cool", "exciting"]:
            print "\t Target:" + word
            print "\t " + str(PredictContext(modelParameters, word))

        modelParameters = ModelParameters(
            HyperParameters(config.getint('HyperParams', 'iterations'),
                            config.getint('HyperParams', 'minibatchsize'),
                            config.getint('HyperParams', 'C'),
                            config.getint('HyperParams', 'D'), 2,
                            config.getint('HyperParams', 'annealingRate'),
                            config.getfloat('HyperParams', 'eta'),
                            config.getint('HyperParams', 'seed'),
                            config.getfloat('HyperParams', 'alpha'),
                            config.getint('HyperParams', 'X')))
        modelParameters.Init(trainTest)
        sgd = SGD()
        sgd.LearnParamsUsingSGD(trainTest, modelParameters)
        ScatterMatrix(modelParameters,
                      ["good", "bad", "lame", "cool", "exciting"])

        #modelParameters = pickle_load('model.pkl')
        modelParameters = pickle_load('eta_2.0_model.pkl')

        print "model hyperparams:" + str(modelParameters.hyperParams)
        print "Predict input for - The movie was surprisingly __:"
        print "\t" + str(
            PredictInput(modelParameters,
                         ["The", "movie", "was", "surprisingly"]))
        print "Predict input for - __ was really disappointing:"
        print "\t" + str(
            PredictInput(modelParameters, ["was", "really", "disappointing"]))
        print "Predict input for -Knowing that she __ was the best part:"
        print "\t" + str(
            PredictInput(
                modelParameters,
                ["Knowing", "that", "she", "was", "the", "best", "part"]))
        print "Solving analogy for - man is to woman as men is to:"
        print "\t" + str(AnalogySolver(modelParameters, "man", "woman", "men"))
        print "Solving analogy for - good is to great as bad is to:"
        print "\t" + str(AnalogySolver(modelParameters, "good", "great",
                                       "bad"))
        print "Solving analogy for -  warm is to cold as summer is to:"
        print "\t" + str(
            AnalogySolver(modelParameters, "warm", "cold", "summer"))
예제 #10
0
             ('rf', rf_clf),
             ('mb', mb_clf),
             ('svm', svm_clf),
             ('sgd', sgd_clf)]
 
 voting_clf = VotingClassifier(
         estimators=estimators,
         voting='hard')
 
 for clf in (log_clf,mlp_clf,rf_clf,mb_clf,svm_clf,sgd_clf,voting_clf):
     clf.fit(X, labels)
     y_pred = clf.predict(X_test)
     print(clf.__class__.__name__, accuracy_score(label_test, y_pred))
     
 clf = BaggingClassifier(
         LogisticRegression(),
         n_estimators=100,
         max_samples=2000, 
         bootstrap=True,
         n_jobs=-1,
         oob_score=True)
 '''
 X = X.toarray()
 X_test = X_test.toarray()
 Y = np.array(labels)
 sgd = SGD()
 sgd.fit(X, Y)
 y_pred = sgd.predict(X_test)
 print(accuracy_score(label_test, y_pred))
 end = time.time()
 print(end - start)
예제 #11
0
        network = MutiLayerNet(input_size=x_train.shape[1], hidden_size_list= hidden_size_list, output_size=t_train.shape[1],weight_init_std=weight_init_std)
        return train(x_train,t_train,x_test,t_test,network,optimizer,iters_num)
    return train_NN

import matplotlib.pyplot as plt

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

training = Prepare_training(x_train,t_train,x_test,t_test)

from SGD import SGD
from Momentum import Momentum
from AdaGrad import AdaGrad
from Adam import Adam

optimizer1 = SGD(0.01)
optimizer2 = Momentum(0.01,0.9)
optimizer3 = AdaGrad(0.01)
optimizer4 = Adam(0.01)

train_loss_list1,train_acc_list1,test_acc_list1 = training(optimizer1,[50,50,50],200,0.01)
train_loss_list2,train_acc_list2,test_acc_list2 = training(optimizer2,[50,50,50],200,0.01)
train_loss_list3,train_acc_list3,test_acc_list3 = training(optimizer4,[50,50,50],200,0.01)

ones = np.ones(5)/5.0

plt.subplot(1,2,1)
plt.plot(np.convolve(train_loss_list1,ones,'valid'),label="SGD")
plt.plot(np.convolve(train_loss_list2,ones,'valid'),label="Momentum")
plt.plot(np.convolve(train_loss_list3,ones,'valid'),label="Adam")
예제 #12
0
def otimizacao(U, X, tipo, metodo):
    if tipo == 1:
        #-----------------------------------------------------------
        #variáveis globais
        #-----------------------------------------------------------
        glob = GlobalVariables()
        maxNGrad = glob.getMaxNGrad()
        #ganhoAlpha = glob.getGanhoAlpha
        #gamma = glob.getGamma
        #global maxNGrad, ganhoAlpha, gamma
        #-----------------------------------------------------------
        #iniciar variáveis de controle inicial
        #-----------------------------------------------------------
        u1 = U[0, 0]
        u2 = U[1, 0]
        u3 = U[2, 0]
        u4 = U[3, 0]
        u5 = U[4, 0]
        #-----------------------------------------------------------
        #inicio do método
        #----------------------------------------------------------
        fo = 1  #condição de parada
        #-----------------------------------------------------------
        #melhores valores
        #----------------------------------------------------------
        fm = fo
        UM = U
        #-----------------------------------------------------------
        #vetores axiliares para os métodos de otimização
        #----------------------------------------------------------
        vt = np.zeros((4, 1))  #usado como auxiliar NAG
        Grad = np.zeros((4, 1))  #usado como auxiliar adagrad

        [pa, pb, pc, M, ponto] = trajetoria(U, X)
        fo = funcaoObjetivo(pa, pb, pc)
        if fo < 1 * 10**(-10):
            print("Valores já otimizados")
            return

        for j in range(1, maxNGrad, 1):

            #-----------------------------------------------------------
            # gradiente descendente estocástico SGD
            #----------------------------------------------------------
            if metodo == 0:
                U = SGD(U, X)

            #-----------------------------------------------------------
            #SGD com momento
            #----------------------------------------------------------
            if metodo == 1:
                [U, vt] = SGDMomento(U, X, vt)

            #-----------------------------------------------------------
            #Nesterov accelerated gradient
            #----------------------------------------------------------
            if metodo == 2:
                [U, vt] = NAG(U, X, vt)

            #-----------------------------------------------------------
            #Adagrad
            #----------------------------------------------------------
            if metodo == 3:
                [U, Grad] = adagrad(U, X, Grad)

            #-----------------------------------------------------------
            #Setar os limites inferiores e superiores em U
            #----------------------------------------------------------
            U0 = np.zeros((5, 1))
            U0 = setLimites(U)
            u1 = U0[0, 0]
            u2 = U0[1, 0]
            u3 = U0[2, 0]
            u4 = U0[3, 0]
            u5 = U0[4, 0]
            #-----------------------------------------------------------
            #atualizar o vetor U  (variáveis de controle)
            #----------------------------------------------------------
            U = np.array([[u1], [u2], [u3], [u4], [u5]])
            #-----------------------------------------------------------
            #Cálculo do valor da função objetivo
            #a função de otimização é usada para calcular os valores de U,
            #que serão inseridos na função de calcular a trajetória
            #----------------------------------------------------------
            [pa, pb, pc, M, ponto] = trajetoria(U, X)
            fo = funcaoObjetivo(pa, pb, pc)
            #-----------------------------------------------------------
            #verificar melhor resultado
            #fm = 1, inicialmente
            #cada vez que fo < fm, fm armazena o valor de fo
            #fo sempre é comparado com seu valor anterior, desde que
            #esteja convergindo
            #valores de fo maiores que o anterior, serão ignorados
            #----------------------------------------------------------
            if fo < fm:
                fm = fo
                UM = U

            #-----------------------------------------------------------
            #verificar condição de parada
            #a condição de parada ocorre quando a projeção do CoM no plano xy
            #praticamente coincide com o ponto médio das duas pernas, no mesmo plano
            #isso deve acontecer na fase LH, da caminhada
            #----------------------------------------------------------
            if fo < 1 * 10**(-10):
                break

            #-----------------------------------------------------------
            #imprimir resultado no console
            #----------------------------------------------------------
            imprimirConsole(j, [U, fo])

        #-----------------------------------------------------------
        #imprimir resultado final no console
        #----------------------------------------------------------
        print('************************************************************')
        print('Melhor Solução: ')
        imprimirConsole(0, [UM, fm])
        print('************************************************************')
        #-----------------------------------------------------------
        #mostrar a trajetória
        #----------------------------------------------------------
        plotarTrajetoria(UM, X)
    else:

        #-----------------------------------------------------------
        #mostrar a trajetoria
        #----------------------------------------------------------
        plotarTrajetoria(U, X)
        self.bias = torch.Tensor(out_features)
        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
        bound = 1 / math.sqrt(fan_in)
        init.uniform_(self.bias, -bound, bound)
        self.bias = Parameter(self.bias)

    def forward(self, input):
        return torch.sparse.mm(self.weight, input.T).T + self.bias


if __name__ == "__main__":
    x = torch.tensor([[1, 2]], dtype=torch.float)
    a = SparseLinear(2, 3)
    #  b = Parameter(torch.tensor([[1, 2], [-1, -2]], dtype=torch.float).to_sparse())

    #  optimizer = optim.SparseAdam(a.parameters(), lr=1e-3)
    #  optimizer = optim.Adam(a.parameters())
    from SGD import SGD

    optimizer = SGD(a.parameters(), lr=1e-3)
    #  loss = a(x).sum()
    loss = a(x).sum()
    loss.backward()
    optimizer.step()

    print(a.weight)
    with torch.no_grad():
        a.weight.add_(a.weight.grad)
    print(a.weight)
예제 #14
0
    #   save wordToIndex
    pickle.dump( wordToIndex, open( "wordToIndex.p", "wb" ) )


#############################################
data = indices

embed = Embedding(vocabSize=len(vocab), dim=512)
model = RNNCell(
    numInputs=512, 
    numHidden=512,
    numOutput=len(vocab))

criterion = CrossEntropyLoss()
optimizer = SGD(
    parameters = model.getParameters() + embed.getParameters(), 
    alpha = 0.05)  

#############################################
def generateSample(n=30, initChar = ' '):
    s = ""
    hidden = model.initHidden(batchSize = 1)
    input = Tensor(np.array([wordToIndex[initChar]]))
    for i in range(n):
        rnnInput = embed.forward(input)
        output, hidden = model.forward(input=rnnInput, hidden=hidden)
        output.data *= 10
        tempDist = output.softmax()
        tempDist /= tempDist.sum()

        m = (tempDist > np.random.rand()).argmax()
예제 #15
0
def main():
    # Handle refit argument
    refit = False
    if len(sys.argv) > 2:
        print("Too many arguments... check out the readme.")
        exit()
    if len(sys.argv) == 2:
        if string(sys.argv)[1] != "--refit":
            print("Unrecognized argument passed... check out the readme.")
            exit()
        else:
            print("Refit is turned on!")
            refit = True

    # load in training and test data
    print("Loading Data...")
    train = DataLoader("training")
    validation = DataLoader("validation")
    test = DataLoader("test")
    test = np.vstack((test, validation))

    # split the data
    X = train[:, :-1]
    y = train[:, -1]
    X_test = test[:, :-1]
    y_test = test[:, -1]
    print("Data Loaded.\n")

    # PCA and Scaling
    print(f"Reducing dimensionality of data with PCA...")
    pca = make_pipeline(StandardScaler(), PCA(n_components=100,
                                              random_state=0))
    pca.fit(X, y)
    pca_model = pca.named_steps['pca']
    print("{0:.2f}% of variance explained\n".format(
        pca_model.explained_variance_ratio_.cumsum()[-1] * 100))
    X = pca.transform(X)
    X_test = pca.transform(X_test)

    # Experimentation
    accuracies = []
    end = []
    start = []

    print("-= Begin KNN =-")
    kstart = time.time()
    accuracy, k, st, nd = KNN(X, y, X_test, y_test, refit)
    kend = time.time()
    start.append(st)
    end.append(nd)
    accuracies.append(accuracy)
    print(
        f"KNN acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(kend-kstart):.2f}s\t best k:{k}\n"
    )

    print("-= Begin SVM =-")
    start.append(time.time())
    accuracy = SVM(X, y, X_test, y_test, refit)
    end.append(time.time())
    accuracies.append(accuracy)
    print(
        f"SVM acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(end[-1]-start[-1]):.2f}s\n"
    )

    print("-= Begin SGD =-")
    start.append(time.time())
    accuracy = SGD(X, y, X_test, y_test, refit)
    end.append(time.time())
    accuracies.append(accuracy)
    print(
        f"SGD acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(end[-1]-start[-1]):.4f}s\n"
    )

    print("-= Begin NN =-")
    start.append(time.time())
    accuracy = NN.test_model("models/nn-vgg4.h5", "vgg4")
    end.append(time.time())
    accuracies.append(accuracy)
    print(
        f"SVM acc:{100 * accuracy[-1,1]:.2f}%\t total time:{(end[-1]-start[-1]):.2f}s\n"
    )

    # Plot Sumary Visualizations
    np.array(accuracies)
    fig, ax = plt.subplots()
    PlotClassAccuracy(accuracies, ax)
    fig.tight_layout()
    plt.show()

    fig, ax = plt.subplots()
    PlotAlgAccEff(accuracies, np.subtract(end, start), ax)
    fig.tight_layout()
    plt.show()
print("Kết quả dự đoán giá nhà của 10 examples đầu trong tập test: ",
      yPredBGD[:10])
print("E test:", ETestBGD)
print("E train:", ETrainBGD)

plot(xTrain, yTrain, xTest, yTest, wBGD, yScaler, 50, 'Batch Gradient Descent',
     'BGD.png')
'''
    SGD
'''
from SGD import SGD
# tìm bộ trọng số và độ lỗi dựa trên tập train
wSGD = SGD(xTrain,
           yTrain,
           learningRate=0.00005,
           epsilon=1e-15,
           numberOfIterations=1000)

# dự đoán giá nhà theo examples trong tập test, có scale về khoảng giá ban đầu
yPredSGD = linearRegressionPredict(xTest, wSGD, yScaler)

# tìm độ lỗi với w tìm được trên tập test và tập train
ETestSGD = mean_squared_error(yScaler.inverse_transform(yTest), yPredSGD)
ETrainSGD = mean_squared_error(yScaler.inverse_transform(yTrain),
                               linearRegressionPredict(xTrain, wSGD, yScaler))

print("Kết quả dự đoán giá nhà của 10 examples đầu trong tập test: ",
      yPredSGD[:10])
print("E test:", ETestSGD)
print("E train:", ETrainSGD)
    def training(self, data, target, epochs, mini_batch_size,
            eta = 0.5, eta_schedule = ('decay',0.1),
            momentum = True, gamma = 0.1,
            lmbd = 0.1, tolerance = 1e-3,
            test_data = None,
            validation_data = None):
        """
        training NN
        data shape (#samples, #features)
        target shape (#samples, #output nodes)
        eta: learning rate
        eta_schedule: (scheme, cycles) 'decay' or 'const', if 'decay' the time is multiplied with cycles
        momentum, gamma, set momentum to true, gamma strength of momentum (gamma=0 ==momentum =False)
        lmbd fraction of old weights taken into change
        test_data/validation_data  (inut, outpur ); input shape (#samples, #features), output shape (#samples, #output nodes)
        """
        data = np.copy(data)
        target = np.copy(target)
        self.gradient = SGD( self.cost_function, epochs = epochs, mini_batch_size = mini_batch_size,
                learning_rate = eta, adaptive_learning_rate = eta_schedule[0],
                momentum = momentum, m0 = gamma)

        self.lmbd = lmbd
        best_accuracy = 0.0
        samples = data.shape[0]
        num_mini_batches = samples // mini_batch_size
        self.init_eta = eta
        self.tolerance = tolerance

        for self.epoch in range(epochs):
            #run minibatches
            for mini_batch_data, mini_batch_target in self.gradient.creat_mini_batch(data, target, num_mini_batches):
                Neural_Network.feedforward(self, mini_batch_data.T)
                #calls backpropagation to find the new gradient
                Neural_Network.__backpropagation(self, mini_batch_data.T, mini_batch_target.T)

            self.gradient.time += float(eta_schedule[1])* 1 #update time for decay

            # calculate the cost of the epoch
            Neural_Network.__epoch_output(self, data, target, name = 'train')
            if test_data != None:
                Neural_Network.__epoch_output(self, *test_data, name = 'test')

            # Checking if accuracy
            if self.has_acc == True:
                if self.accuracy > best_accuracy:
                    best_accuracy = self.accuracy
                    best_weights = np.copy(self.weights)
                if Neural_Network.accuracy_test(self) == True:
                    break
                
            # Checking if MSE
            if self.cost_mse == True:
                if Neural_Network.cost_test(self) == True:
                    break
        #after training set the weights to the best weights        
        if self.has_acc:
            self.weights = best_weights
            
        if validation_data != None:
            Neural_Network.__epoch_output(self, *validation_data, name = 'validation')