Esempio n. 1
0
    def train(self, N_minibatches, learning_rate, n_epochs, decay_rate=0):
        # Ensure that the mini-batch size is NOT greater than
        assert N_minibatches <= self.X.shape[0]
        # Increment the epoch counter
        self.total_epochs += n_epochs

        for epoch in range(n_epochs):
            # Pick out a new mini-batch
            mb = SGD.minibatch(self.X, N_minibatches)
            for i in range(N_minibatches):
                # with replacement, replace i with k
                # k = np.random.randint(M)
                X_mb = self.X[mb[i]]
                Y_mb = self.Y[mb[i]]
                M = X_mb.shape[0]  # Size of each minibach (NOT constant, see SGD.minibatch)
                # Feed-Forward to compute all the activations
                self.__feed_forward(X_mb, M)
                # Back-propogate to compute the gradients
                self.__backpropogation(X_mb, Y_mb, M)

                # update the learning rate using the choosen rate using the chosen method
                lr = self.learning_rate_func(learning_rate, decay_rate)
                # Update the weights and biases using gradient descent
                for l in range(self.N_layers + 1):
                    # Change of weights
                    dw = self.weights[l] * self.momentum - lr / M * self.cost_weight_gradient[l]
                    # Change of bias
                    db = self.biases[l] * self.momentum - lr / M * self.cost_bias_gradient[l]
                    # Update weights and biases
                    self.weights[l] += dw
                    self.biases[l] += db
        return
Esempio n. 2
0
    def fit(self, training_file, output_file, word_map_file='word_map.bin'):

        if not os.path.exists(word_map_file):
            tr.build_word_map(training_file, word_map_file)
        self.word_map_file = word_map_file

        self.trees = tr.load_trees(self.data_folder + training_file,
                                   self.word_map_file)

        self.num_words = len(tr.load_word_map(self.word_map_file))

        self.rntn = RNTN.RNTN(self.vect_dim, self.output_dim, self.num_words,
                              self.mini_batch_size)
        self.sgd = SGD.SGD(self.rntn, self.learning_rate, self.mini_batch_size)

        for e in range(self.optim_epochs):

            # Fit model
            # --------------------------

            start = time.time()
            print "Running epoch %d" % e

            self.sgd.optimize(self.trees)

            end = time.time()
            print "\nTime per epoch : %f" % (end - start)

            # Save model specifications
            with open(output_file, 'w+') as fid:
                pickle.dump([(i, self.values[i]) for i in self.args][1:], fid)
                pickle.dump(self.sgd.cost_list, fid)
                pickle.dump(self.rntn.stack, fid)
Esempio n. 3
0
def main(dataType):
    print(f"Solving the {dataType} problem")
    # ---------------------------------------------------------------------------------------- #
    # Data Selection Section
    # ---------------------------------------------------------------------------------------- #
    if (dataType == "Synthetic"):
        X = np.loadtxt("features.txt")
        y = np.loadtxt("labels.txt")
    else:
        data = pd.read_csv("heart.csv")
        X = np.array(data.iloc[:, 0:13])
        y = np.array(data['target'])

    X, y = shuffle(X, y, random_state=0)

    # Change the ground truths from [0,1] to [-1,1]
    y = np.where(y == 1, 1, -1)

    # Add the ones to the end for the bias
    X = np.concatenate((X, np.ones((len(X), 1))), axis=1)

    # Split the Sample space into training and test
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    # Train the classifiers (Minimize the loss function) with varying batch sizes
    batch_sizes = [1, 2, 4, 6, 8, 16, 32, 64, 128]
    sgd_accuracies = []
    obfgs_accuracies = []
    nobfgs_accuracies = []
    for size in batch_sizes:
        #print("Solving with Stochastic Gradient Descent")
        W = SGD.Minimize(loss, loss_gradient, 0.001, 50, size,
                         (X_train, y_train))
        y_pred = predict(W, X_test)
        sgd_accuracies.append(accuracy_score(y_test, y_pred))
        #print("Solving with Stochastic BFGS")
        W = OBFGS.Minimize(loss, loss_gradient, 50, size, (X_train, y_train))
        y_pred = predict(W, X_test)
        obfgs_accuracies.append(accuracy_score(y_test, y_pred))
        #print("Solving with Nesterov Stochastic BFGS")
        W = NOBFGS.Minimize(loss, loss_gradient, 50, size, (X_train, y_train))
        y_pred = predict(W, X_test)
        nobfgs_accuracies.append(accuracy_score(y_test, y_pred))

    # Plot the graphs
    plt.figure(figsize=(10, 5))
    plt.title("Accuracy per batch size")
    plt.xlabel("Batch size")
    plt.ylabel("Accuracy")
    plt.plot(batch_sizes, sgd_accuracies, label="SGD")
    plt.plot(batch_sizes, obfgs_accuracies, label="sBFGS")
    plt.plot(batch_sizes, nobfgs_accuracies, label="nsBFGS")
    plt.legend(loc=("lower right"))
    plt.show()
Esempio n. 4
0
def logreg(x, y, M, init_w, n_epochs, learning_rate, momentum, lambd=None):
    """Logistic (softmax) regression for multiclass categorization. Uses momentum SGD."""

    # prob = np.exp(X @ theta)/np.sum(np.exp(X @ theta),axis=1)
    # prob.shape = [examples,classes]. Category probabilites as given
    # by the Softmax-function.

    # cost_function = -np.sum(Y * np.ln(prob) + (1-Y)*np.ln(prob))
    # scalar, full cross-entropy cost function. Note that the last term
    # is not really necessary, when using softmax there is an implicit
    # penalty for giving nonzero probabilites to false categories.
    # This version is a little more aggressive in punishing confidence in wrong
    # labels, but is far clunkier and possibly numerically unstable.

    # cost_function = -np.sum(Y * np.ln(prob))
    # is the preferred cost function when using softmax. Also given as
    # Softmax_loss in CostFunctions.py

    # shorthand = prob * (Y / prob - (1-Y)/(1-prob))
    # helper matrix for vectorizing the computation of the cost_gradients
    # shape = [examples,classes]. Remove the last term if using Softmax_loss
    # as cost_function.

    # cost_gradients = X.T @ (prob * np.sum(shorthand,axis=1)) - X.T @ shorthand
    # Derivatives of cost wrt thetas, shape = [predictors,classes]

    # NOTE! The above expressions for the cost_gradients have not been
    # double-checked! Given the potential for error in their derivations
    # they should be treated as highly suspect. The simpler version for
    # Softmax_loss implemented below, is however, pretty safe.

    w = init_w
    dw = np.zeros(w.shape)

    for epoch in range(n_epochs):
        mb = SGD.minibatch(x, M)  # Split x into M minibatches
        for i in range(M):
            # Pick out a random mini-batch index
            k = np.random.randint(M)
            # compute gradient with random minibatch
            X, Y = x[mb[k]], y[mb[k]]
            # Probabilities from Softmax:
            prob = np.exp(X @ w) / np.sum(np.exp(X @ w), axis=1, keepdims=True)

            # cost_function = -np.sum(Y * np.ln(prob)) gives:
            grad = X.T @ (prob - Y)

            # Add l2 penalty:
            if lambd != None:
                grad += 2 * lambd * w

            # increment weights
            dw = momentum * dw - learning_rate * grad / X.shape[0]
            w = w + dw
    return w
    def SGDM_score_func(X_training, y_training, X_test, y_test):
        weights = SGD.SGDM(
            x=X_training,
            y=y_training,
            M=M,
            init_w=init_w,
            n_epochs=n_epochs,
            learning_rate=learning_rate,
            momentum=momentum,
            cost_gradient=cost_gradient,
            lambd=lambd,
        )

        return stat_tools.MSE(y_test, X_test @ weights)
Esempio n. 6
0
    def fit(self, word_map_file='word_map.bin'):

        data_train_file = self.opts.training_file

        print '================\nTRAINING\n================'

        # If the word map file does not exist, create it.
        if not os.path.exists(word_map_file):
            tr.build_word_map(data_train_file, word_map_file)
        self.word_map_file = word_map_file

        # Load trees and set the RNTN.
        self.trees = tr.load_trees(self.opts.data_folder + data_train_file,
                                   self.word_map_file)
        self.num_words = len(tr.load_word_map(self.word_map_file))

        self.rntn = RNTN.RNTN(vec_dim=self.opts.vect_dim,
                              output_dim=self.opts.output_dim,
                              num_words=self.num_words,
                              mini_batch_size=self.opts.mini_batch_size)

        self.sgd = optimizer.SGD(self.rntn, self.opts.learning_rate,
                                 self.opts.mini_batch_size)

        for e in range(self.opts.optim_epochs):

            # Fit model.
            # After the training phase, model specifications
            # are in self.rntn.stack.
            # --------------------------

            start = time.time()
            print "Running epoch %d" % e

            self.sgd.optimize(self.trees)

            end = time.time()
            print "Time per epoch : %f" % (end - start)
Esempio n. 7
0
__author__ = 'computer'
import sys
import numpy as np
import SGD
import LoadData

dataDic, labelDic = LoadData.loadData("horseColicTraining.txt")
w = SGD.gd(dataDic, labelDic, alpha = 0.001, epochs = 1000)
#w = SGD.sgd(dataDic, labelDic, alpha = 0.001, epochs = 500)
dataDic, labelDic = LoadData.loadData("horseColicTest.txt")
h = np.mat(dataDic).dot(w)
cnt = 0
for i in range(len(labelDic)):
    if h[i] >= 0.5 and labelDic[i] >= 0.5:
        cnt += 1
    elif h[i] < 0.5 and labelDic[i] <= 0.5 :
        cnt += 1
print(cnt / len(labelDic))
Esempio n. 8
0
    b = np.random.rand(class_num, 1)
    """
  ### Part1: SGD ---------------------------------------
  """
    t = 100
    landa = [0.01, 0.05]
    batch_size = 1

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3)

    for lr in landa:
        print("Run SGD with learning rate: {}.".format(lr))
        sgd = SGD.sgd(train_data,
                      test_data,
                      w,
                      b,
                      batch_size=batch_size,
                      lr=lr,
                      t=t,
                      gamma=gamma)
        sgd.run()

        sgd.plot_loss(ax1, label='lr: ' + str(lr))
        sgd.plot_eval_data_accuracy(ax2, label='lr: ' + str(lr))
        sgd.plot_steps_variance(ax3, label='lr: ' + str(lr))

    ax3.set_yscale('log')
    ax1.title.set_text('Loss')
    ax2.title.set_text('Accuracy')
    ax3.title.set_text('Variance')
    ax1.legend()
    ax2.legend()
'''
'''
#存放参数监控
b2_value_list=[]
b1_value_list=[]
w1_value_list=[]
w2_value_list=[]
value={'w1':w1_value_list,'w2':w2_value_list,'b1':b1_value_list,'b2':b2_value_list}
'''#现在这个没用了

iter_per_epoch = max(train_size / batch_size, 1)

network = simpleConvNet.SimpleConvNet()  #参数就用默认了
#network=Newtwolayecrnet.newtwolayernet(input_size=784,hidden_size=50,output_size=10,batch_size=100)
#优化器
optimizer = sgd.SGD(learning_rate)  #学习率设置0.002
#optimizer=momentum.Momentum(learning_rate)#学习率设置0.01
#optimizer=adagrad.AdaGrad(learning_rate)
#optimizer=adam.Adam()
#optimizer=nesterov.Nesterov()

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    train_images_batch = train_images[batch_mask]
    train_lables_batch = train_lables[batch_mask]

    #通过 误差 反向传播算法求 梯度
    grad = network.gradient(train_images_batch, train_lables_batch)
    '''w1_list.append(np.mean(grad['W1']))
    b1_list.append(np.mean(grad['b1']))
    w2_list.append(np.mean(grad['W2']))
Esempio n. 10
0
+model.add(Conv2D(64, (3, 3), activation='relu'))
+# Второй слой подвыборки
+model.add(MaxPooling2D(pool_size=(2, 2)))
+# Слой регуляризации Dropout
+model.add(Dropout(0.25))
+# Слой преобразования данных из 2D представления в плоское
+model.add(Flatten())
+# Полносвязный слой для классификации
+model.add(Dense(512, activation='relu'))
+# Слой регуляризации Dropout
+model.add(Dropout(0.5))
+# Выходной полносвязный слой
+model.add(Dense(nb_classes, activation='softmax'))
+
+# Задаем параметры оптимизации
+sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
+model.compile(loss='categorical_crossentropy',
+              optimizer=sgd,
+              metrics=['accuracy'])
+# Обучаем модель
+model.fit(X_train, Y_train,
+              batch_size=batch_size,
+              epochs=nb_epoch,
+              validation_split=0.1,
+              shuffle=True,
+              verbose=2)
+
+# Оцениваем качество обучения модели на тестовых данных
+scores = model.evaluate(X_test, Y_test, verbose=0)
+print("Точность работы на тестовых данных: %.2f%%" % (scores[1]*100))
Esempio n. 11
0
## result for Classification error
mean_class = [[], []]
std_dev_class = [[], []]

## train SGD and gather the result for sigma1 = 0.05
for sets in train_sets_sigma1:
    log_loss = []
    log_loss_mean = 0
    log_loss_std  = 0
    log_loss_min = 300
    log_excess = 0
    class_error = []
    class_error_mean = 0
    class_std = 0
    for train_set in sets:
        sgd = s.logSGD(train_set, test_set2_sigma1,2)
        sgd.computeLearnRate(rho_2, M_2)
        sgd.learn()
        sgd.output()
        ##calculate error and loss
        loss = sgd.log_risk_average()
        log_loss.append(loss)
        error = sgd.class_error_average()
        class_error.append(error)
        if(loss<log_loss_min):
        	log_loss_min = loss
    ##compute the estimate
    log_loss_mean = np.mean(log_loss)
    class_error_mean = np.mean(class_error)
    log_loss_std = np.std(log_loss)
    class_std = np.std(class_error)
best_r = 0.01
best_epoch = 25
          
print("best r:", best_r)
print("best epoch:", best_epoch)
  
###  

predictTrains = []
predictTests = []

dataAccuracy = []
testAccuracy = []

for i in range(5):
    sgd = SGD.SGD(r=best_r,epochs=best_epoch,W0=[0]*len(data[i][0]))
    sgd.fit(data[i],data_labels)
    
    predictTrains.append(sgd.predict(data[i]))
    predictTests.append(sgd.predict(testset[i]))
    
    dataAccuracy.append(Stat.F1_Score(sgd.predict(data[i]), data_labels))
    testAccuracy.append(Stat.F1_Score(sgd.predict(testset[i]), test_labels))

trainT = np.asarray(predictTrains).T.tolist()
testT = np.asarray(predictTests).T.tolist()

predictTrain = []
for i in range(len(data[0])):
    probPos = 0
    probNeg = 0
Esempio n. 13
0
                subtract_mean=mean_color,
                swap_channels=swap_channels)

# 2: Load some weights into the model.

# TODO: Set the path to the weights you want to load.
weights_path = './VGG_ILSVRC_16_layers_fc_reduced.h5'

model.load_weights(weights_path, by_name=True)

# 3: Instantiate an optimizer and the SSD loss function and compile the model.
#    If you want to follow the original Caffe implementation, use the preset SGD
#    optimizer, otherwise I'd recommend the commented-out Adam optimizer.

#adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=sgd, loss=ssd_loss.compute_loss)

# 1: Instantiate two `DataGenerator` objects: One for training, one for validation.

# Optional: If you have enough memory, consider loading the images into memory for the reasons explained above.

train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)
val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None)

# 2: Parse the image and label lists for the training and validation datasets. This can take a while.

# TODO: Set the paths to the datasets here.
Esempio n. 14
0
                mean = train_ch.factratebase.mean()  # Надем среднее
                std = train_ch.factratebase.std()  # Стандартное отклонение

                train_ch = train_ch[train_ch.factratebase < mean + 3 * std]

                # NNLS_model
                columns, coef_ = models.NNLS_model(
                    train_ch, train_ch.factratebase.values)

                # CatBoost
                regr = models.CatBoost_model(train_ch[columns].values,
                                             train_ch.factratebase.values)

                # SGD
                stoch_grad_desc_weights, stoch_errors_by_iter = SGD.stochastic_gradient_descent(
                    train_ch[columns].values, train_ch.factratebase.values,
                    coef_)

                logger.info(
                    str(list(zip(columns, coef_, stoch_grad_desc_weights))))

                # Посчитаем прогноз по 3 моделям
                test_ch['Pred_CatBoost'] = regr.predict(
                    test_ch[columns].values)
                test_ch['Pred_SGD'] = models.predict(stoch_grad_desc_weights,
                                                     test_ch[columns])
                test_ch['Pred_NNLS'] = models.predict(coef_, test_ch[columns])
                test_ch['Today'] = (
                    start_predict_day +
                    datetime.timedelta(days=3)).strftime("%Y-%m-%d")
Esempio n. 15
0
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random
import SGDUtil
import SGD

x, y = SGDUtil.make_regression(30)

plt.figure(1)
plt.plot(x, y, marker='o', linestyle='None')
# linestyle='None' 선 제거
plt.show()

sgd = SGD(x, y)
dict_result = sgd.process(epoch=250, w=np.array([30., -40.]), lr=0.2)

w_list = dict_result['w']
loss_list = dict_result['loss']
step_list = dict_result['step']

plt.figure(1)
plt.xlabel('step(epoch)')
plt.ylabel('loss')
plt.plot(step_list, loss_list, color='orange')
plt.show()

t_w0, t_w1 = w_list[len(w_list) - 1]
t_loss = loss_list[len(loss_list) - 1]
print("t_w0: ", t_w0, "t_w1: ", t_w1, "t_loss: ", t_loss)
Esempio n. 16
0
import SGD
SGD.problem_1(silent=True)
Esempio n. 17
0
## result for Classification error
mean_class = [[], []]
std_dev_class = [[], []]

## train SGD and gather the result for sigma1 = 0.05
for sets in train_sets_sigma1:
    log_loss = []
    log_loss_mean = 0
    log_loss_std  = 0
    log_loss_min = 300
    log_excess = 0
    class_error = []
    class_error_mean = 0
    class_std = 0
    for train_set in sets:
        sgd = s.logSGD(train_set, test_set1_sigma1,1)
        sgd.computeLearnRate(rho_1, M_1)
        sgd.learn()
        sgd.output()
        ##calculate error and loss
        loss = sgd.log_risk_average()
        log_loss.append(loss)
        error = sgd.class_error_average()
        class_error.append(error)
        if(loss<log_loss_min):
        	log_loss_min = loss
    ##compute the estimate
    log_loss_mean = np.mean(log_loss)
    class_error_mean = np.mean(class_error)
    log_loss_std = np.std(log_loss)
    class_std = np.std(class_error)
Esempio n. 18
0
print("Spliting data...")

# Split the data!
valid_ratings, train, test, valid_users, valid_items = split_data(
    ratings,
    num_items_per_user,
    num_users_per_item,
    min_num_ratings=0,
    p_test=0.1)

print("Normalizing training...")
# For every item, we normalize the values
norm_train, means = normalize(train)

print("Finding MF SGD...")
# We get the prediction for the training data given and the properties
sgd = SGD(norm_train,
          test,
          means,
          gamma=0.001,
          num_features=10,
          lambda_user=0.1,
          lambda_item=0.1,
          max_it=20)

print("Denormalizing")
# Once we have a prediction we have to denormalize the value
prediction = denormalize(predictions[-1], means)

# We create the final_submission.csv in ../data
submit(prediction, 'final_submission')
from sklearn import cross_validation

mortality = mortality_loader.doItTogether()

cv = cross_validation.KFold(len(mortality), n_folds=10)
errors_cv = []
for i in xrange(100):
    error_cv = []
    for trainCV, testCV in cv:
        training_data = map(mortality.__getitem__, trainCV)
        test_data = map(mortality.__getitem__, testCV)
        errors_epoch, errors_batch, ws, bs = SGD.SGD(
            training_data=training_data,
            epochs=4,
            batch_size=53,
            stepsize=0.08,
            init_w=None,
            init_b=None,
            test_data=test_data,
            regular=0.99,
            decay=1)
        error_cv.append(errors_epoch[-1])
    errors_cv.append(np.mean(error_cv))

print np.nanmin(errors_cv)

# slices = [mortality[i::10] for i in xrange(10)]
#
# for j in xrange(len(slices)):
#     errors_epoch, errors_batch, ws, bs = SGD.SGD(mortality, 4, 1, 0.05, init_w=None,init_b=None, test_data=slices[j], regular=None, decay=0.98)
Esempio n. 20
0
    print(
        '----------------------------------------------------------------------------------------------'
    )
    print('choose your classification method:')
    print('1 -----> Bayes')
    print('2 -----> Random Forest')
    print('3 -----> Logical Regression')
    print('4 -----> Boost')
    print('5 -----> KNN')
    print('6 -----> SVM')
    print('7 -----> SGD')
    print('0 -----> exit this program')
    M = input('Method number = ')
    if M == '1':
        Bayes.GBC(method, norm, selection)
    if M == '2':
        Random_Forest.RandomF(method, norm, selection)
    if M == '3':
        LogicalRegression.LogicR(method, norm, selection)
    if M == '4':
        Boost.Ada(method, norm, selection)
    if M == '5':
        KNN.KClassifier(method, norm, selection)
    if M == '6':
        SVMC.support(method, norm, selection)
    if M == '7':
        SGD.SGD(method, norm, selection)
    if M == '0':
        print('The classification progress finished.')
        break
Esempio n. 21
0
def main():
    # Generate data
    np.random.seed(0)
    n = 1000
    X = 2.0 * np.random.rand(n, 1)

    # parameters
    w1 = 3.0
    w2 = 4.5
    # noisy data
    y = w1 + w2 * X + np.random.randn(n, 1)

    X_b = np.c_[np.ones((n, 1)), X]  # add 1 to each instance
    # save data and x to files to be used later to calculate objectives and gradients
    np.savetxt('test1_data.txt', y)
    np.savetxt('test1_x.txt', X_b)

    # select the algorithm to run
    # acceptable terms: SGD, SGDmomentum, SGDnesterov, AdaGrad, RMSprop, Adam, Adamax, Adadelta, Nadam, minibatchSGD, SAG, SVRG
    alg = 'Adam'

    # initial parameter
    w10 = 2.0
    w20 = 0.5
    theta = np.array([w10, w20])
    R = objFun(theta)  # initial objective
    it = 0  # set iteration counter to 0
    maxIt = 2500  # maximum iteration
    dR = gradFun(theta)  # initial gradient
    if alg == 'SGD':
        # Stochastic Gradient Descent
        eta = 0.0025  # learning rate
        opt = sgd.SGD(obj=R,
                      grad=dR,
                      eta=eta,
                      param=theta,
                      iter=it,
                      maxiter=maxIt,
                      objFun=objFun,
                      gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'SGDmomentum':
        # Stochastic Gradient Descent with momentum
        eta = 0.001  # learning rate
        opt = sgd.SGD(obj=R,
                      grad=dR,
                      eta=eta,
                      param=theta,
                      iter=it,
                      maxiter=maxIt,
                      objFun=objFun,
                      gradFun=gradFun,
                      momentum=0.9)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'SGDnesterov':
        # Stochastic Gradient Descent with Nesterov momentum
        eta = 0.001  # learning rate
        opt = sgd.SGD(obj=R,
                      grad=dR,
                      eta=eta,
                      param=theta,
                      iter=it,
                      maxiter=maxIt,
                      objFun=objFun,
                      gradFun=gradFun,
                      momentum=0.9,
                      nesterov=True)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'AdaGrad':
        # AdaGrad
        eta = 0.25  # learning rate
        opt = sgd.AdaGrad(gradHist=0.0,
                          obj=R,
                          grad=dR,
                          eta=eta,
                          param=theta,
                          iter=it,
                          maxiter=maxIt,
                          objFun=objFun,
                          gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'RMSprop':
        # RMSprop
        eta = 0.9  # learning rate
        opt = sgd.RMSprop(gradHist=0.0,
                          rho=0.1,
                          obj=R,
                          grad=dR,
                          eta=eta,
                          param=theta,
                          iter=it,
                          maxiter=maxIt,
                          objFun=objFun,
                          gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'Adam':
        # Adam
        eta = 0.025  # learning rate
        opt = sgd.Adam(m=0.0,
                       v=0.0,
                       beta1=0.9,
                       beta2=0.999,
                       obj=R,
                       grad=dR,
                       eta=eta,
                       param=theta,
                       iter=it,
                       maxiter=maxIt,
                       objFun=objFun,
                       gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'Adamax':
        # Adamax
        eta = 0.025  # learning rate
        opt = sgd.Adamax(m=0.0,
                         u=0.0,
                         beta1=0.9,
                         beta2=0.999,
                         obj=R,
                         grad=dR,
                         eta=eta,
                         param=theta,
                         iter=it,
                         maxiter=maxIt,
                         objFun=objFun,
                         gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'Adadelta':
        # Adadelta
        eta = 1.0  # learning rate
        opt = sgd.Adadelta(gradHist=0.0,
                           updateHist=0.0,
                           rho=0.99,
                           obj=R,
                           grad=dR,
                           eta=eta,
                           param=theta,
                           iter=it,
                           maxiter=maxIt,
                           objFun=objFun,
                           gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'Nadam':
        # Nadam
        eta = 0.01  # learning rate
        opt = sgd.Nadam(m=0.0,
                        v=0.0,
                        beta1=0.9,
                        beta2=0.999,
                        obj=R,
                        grad=dR,
                        eta=eta,
                        param=theta,
                        iter=it,
                        maxiter=maxIt,
                        objFun=objFun,
                        gradFun=gradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'minibatchSGD':
        # mini batch stochastic gradient descent
        eta = 0.025  # learning rate
        opt = sgd.minibatchSGD(nSamples=10,
                               nTotSamples=n,
                               newGrad=0.0,
                               obj=R,
                               grad=dR,
                               eta=eta,
                               param=theta,
                               iter=it,
                               maxiter=maxIt,
                               objFun=objFun,
                               gradFun=batchGradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'SAG':
        # stochastic average gradient descent
        eta = 0.0025  # learning rate
        opt = sgd.SAG(nSamples=20,
                      nTotSamples=n,
                      obj=R,
                      grad=dR,
                      eta=eta,
                      param=theta,
                      iter=it,
                      maxiter=maxIt,
                      objFun=objFun,
                      gradFun=batchGradFun)  # initialize
        opt.performIter()  # perform iterations
        thetaHist = opt.getParamHist()
    elif alg == 'SVRG':
        # stochastic variance reduced gradient descent
        eta = 0.004
        opt = sgd.SVRG(nTotSamples=n,
                       innerIter=10,
                       outerIter=200,
                       option=1,
                       obj=R,
                       grad=dR,
                       eta=eta,
                       param=theta,
                       iter=it,
                       maxiter=maxIt,
                       objFun=objFun,
                       gradFun=batchGradFun)
        opt.performOuterIter()
        thetaHist = opt.getParamHist()
    else:
        raise ValueError(
            'No such algorithm is in the module.\n Please use one of the following options:\nSGD, SGDmomentum, SGDnesterov, AdaGrad, RMSprop, Adam, Adamax, Adadelta, Nadam, minibatchSGD, SAG, SVRG'
        )

    # Plot the results
    matplotlib.rcParams['xtick.direction'] = 'out'
    matplotlib.rcParams['ytick.direction'] = 'out'
    delta = 0.025
    w1 = np.arange(-2.0, 10.0, delta)
    w2 = np.arange(-2.0, 10.0, delta)
    Xx, Yy = np.meshgrid(w1, w2)
    nx = np.shape(Xx)
    Z = np.zeros(nx)
    for i in range(nx[0]):
        for j in range(nx[1]):
            Z[i, j] = (np.linalg.norm(y - Xx[i, j] - Yy[i, j] * X, 2))**2 / n

    plt.figure()
    levels = np.arange(0, 40, 4)
    CS = plt.contour(Xx,
                     Yy,
                     Z,
                     levels,
                     origin='lower',
                     linewidths=2,
                     extent=(-2, 10, -2, 10))
    #plt.clabel(CS, inline=1, fontsize=10)
    # Thicken the zero contour.
    zc = CS.collections[6]
    plt.setp(zc, linewidth=4)

    plt.clabel(
        CS,
        levels[1::2],  # label every second level
        inline=1,
        fmt='%1.1f',
        fontsize=10)
    im = plt.imshow(Z,
                    interpolation='bilinear',
                    origin='lower',
                    cmap=cm.Wistia,
                    extent=(-2, 10, -2, 10))

    # make a colorbar
    plt.colorbar(im, shrink=0.8, extend='both')
    plt.plot(thetaHist[0, :], thetaHist[1, :], 'r.', linewidth=6)
    titl = opt.alg + ' with a learning rate ' + str(eta)
    plt.title(titl)
    return opt
Esempio n. 22
0
import random

#Author: Tom Camenzind
#Citations: Data, technique from Richard Socher's Treebank Analysis dataset / paper.

lambda_reg = config.lambda_reg
lambda_L = config.lambda_L

#Create Train, Dev,Test instances from file
temp = DataInit.getInstances(config.max_train_inst, config.max_dev_inst, config.max_test_inst)
training_instances, dev_instances, test_instances, word_index, index_word = temp
LANG_SIZE = len(word_index)


#run SGD on the data
errors, W, Ws, L, errors_avg_log, errors_total_log = SGD.runSGD(training_instances, dev_instances, LANG_SIZE, lambda_reg, lambda_L)
print "above was dev errors; below is test errors, d=%d, root_x_factor = %d " % (config.d, config.root_x_factor)
test_errors = SGD.getErrors(training_instances, test_instances, W, Ws, L)
SGD.printErrors(test_errors)


'''
Make plots of the error from SGD 
'''
import matplotlib.pyplot as plt 
def myplot(error_log, msg):
	plt.plot(errors_avg_log)
	plt.xlabel("Number of iterations")
	plt.ylabel("Error")
	plt.title(msg)
	plt.show()
Esempio n. 23
0
File: main.py Progetto: e6990/Python
    plt.contourf(xx1, xx2, Z, alpha = 0.5, cmap = cmap);
    plt.xlim(x1_min,x1_max);
    plt.ylim(x2_min,x2_max);
    
    #plot the class smaple
    for idx,sample in enumerate(np.unique(Y)):
        plt.scatter(X[Y == sample,0],X[Y == sample,1],alpha = 0.8, color = cmap(idx),marker = markers[idx],label = idx);
    

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header = None);
y = df.iloc[0:100,4].values;
y = np.where(y == 'Iris-setosa',1,-1);
x = df.iloc[0:100,[0,2]].values;
positive = np.where(y == 1);
negative = np.where(y == -1);
plt.scatter(x[positive,0], x[positive,1], color = 'red', marker = 'o', label = 'setosa');
plt.scatter(x[negative,0], x[negative,1], color = 'blue', marker = '*', label = 'versicolor');
plt.xlabel('petal length');
plt.ylabel('sepal length');
plt.legend(loc='upper left');
plt.show();
npp = SGD.GradientDescent(step = 0.01,n_iter = 50);
npp.fix(x,y);
decision_regions(x,y,classifier = npp);
'''
plt.scatter(range(1,len(npp.error)+1),npp.error,color = 'green', marker = 'o');
plt.xlabel('number of sample');
plt.ylabel('error');
plt.show();
'''
Esempio n. 24
0
import SGD
SGD.problem_5(silent=True)
Esempio n. 25
0
import plot
import SGD
import configurations
import numpy as np

dots_x = np.linspace(-10, 10, 10)
dots_y = np.linspace(-30, 30, 10)

dots = np.concatenate([[dots_x, dots_y]]).transpose()

plot = plot.Plot()
sdg = SGD.StochasticGradientDecent()
print(
    sdg.grad_descent_2d(configurations.config['low'],
                        configurations.config['high'],
                        callback=plot.add_dots))
plot.plot_function()
plot.create_animation()
plot.show_animation()
# plot.save_animation('multi_dots_diff_fig_gcd')
Esempio n. 26
0
            #best_g0 = g0
            #best_C = C
            #best_sigma = sigma
            best_r = r
                                                  
            print("Best result so far >>",best_f1,epoch,r)#,g0,C,sigma)
                  
#print("best g0:", best_g0)
#print("best C:", best_C)
#print("best sigma:", best_sigma)
print("best epoch:", best_epoch)
print("best r:",best_r)
'''
###

sgd = SGD.SGD(epochs=best_epoch, W0=[0] * len(data[0]),
              r=best_r)  #,gamma0=best_g0,sigma=best_sigma,C=best_C)
sgd.fit(data, data_labels)

predictTrain = sgd.predict(data)
predictTest = sgd.predict(testset)

print("Accuracy for training set:")
print(Stat.Accuracy(predictTrain, data_labels))

print("F1 score for training set:")
print(Stat.F1_Score(predictTrain, data_labels))

print("Precision for training set:")
print(Stat.Precision(predictTrain, data_labels))

print("Recall for training set:")