def train(self, N_minibatches, learning_rate, n_epochs, decay_rate=0): # Ensure that the mini-batch size is NOT greater than assert N_minibatches <= self.X.shape[0] # Increment the epoch counter self.total_epochs += n_epochs for epoch in range(n_epochs): # Pick out a new mini-batch mb = SGD.minibatch(self.X, N_minibatches) for i in range(N_minibatches): # with replacement, replace i with k # k = np.random.randint(M) X_mb = self.X[mb[i]] Y_mb = self.Y[mb[i]] M = X_mb.shape[0] # Size of each minibach (NOT constant, see SGD.minibatch) # Feed-Forward to compute all the activations self.__feed_forward(X_mb, M) # Back-propogate to compute the gradients self.__backpropogation(X_mb, Y_mb, M) # update the learning rate using the choosen rate using the chosen method lr = self.learning_rate_func(learning_rate, decay_rate) # Update the weights and biases using gradient descent for l in range(self.N_layers + 1): # Change of weights dw = self.weights[l] * self.momentum - lr / M * self.cost_weight_gradient[l] # Change of bias db = self.biases[l] * self.momentum - lr / M * self.cost_bias_gradient[l] # Update weights and biases self.weights[l] += dw self.biases[l] += db return
def fit(self, training_file, output_file, word_map_file='word_map.bin'): if not os.path.exists(word_map_file): tr.build_word_map(training_file, word_map_file) self.word_map_file = word_map_file self.trees = tr.load_trees(self.data_folder + training_file, self.word_map_file) self.num_words = len(tr.load_word_map(self.word_map_file)) self.rntn = RNTN.RNTN(self.vect_dim, self.output_dim, self.num_words, self.mini_batch_size) self.sgd = SGD.SGD(self.rntn, self.learning_rate, self.mini_batch_size) for e in range(self.optim_epochs): # Fit model # -------------------------- start = time.time() print "Running epoch %d" % e self.sgd.optimize(self.trees) end = time.time() print "\nTime per epoch : %f" % (end - start) # Save model specifications with open(output_file, 'w+') as fid: pickle.dump([(i, self.values[i]) for i in self.args][1:], fid) pickle.dump(self.sgd.cost_list, fid) pickle.dump(self.rntn.stack, fid)
def main(dataType): print(f"Solving the {dataType} problem") # ---------------------------------------------------------------------------------------- # # Data Selection Section # ---------------------------------------------------------------------------------------- # if (dataType == "Synthetic"): X = np.loadtxt("features.txt") y = np.loadtxt("labels.txt") else: data = pd.read_csv("heart.csv") X = np.array(data.iloc[:, 0:13]) y = np.array(data['target']) X, y = shuffle(X, y, random_state=0) # Change the ground truths from [0,1] to [-1,1] y = np.where(y == 1, 1, -1) # Add the ones to the end for the bias X = np.concatenate((X, np.ones((len(X), 1))), axis=1) # Split the Sample space into training and test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) # Train the classifiers (Minimize the loss function) with varying batch sizes batch_sizes = [1, 2, 4, 6, 8, 16, 32, 64, 128] sgd_accuracies = [] obfgs_accuracies = [] nobfgs_accuracies = [] for size in batch_sizes: #print("Solving with Stochastic Gradient Descent") W = SGD.Minimize(loss, loss_gradient, 0.001, 50, size, (X_train, y_train)) y_pred = predict(W, X_test) sgd_accuracies.append(accuracy_score(y_test, y_pred)) #print("Solving with Stochastic BFGS") W = OBFGS.Minimize(loss, loss_gradient, 50, size, (X_train, y_train)) y_pred = predict(W, X_test) obfgs_accuracies.append(accuracy_score(y_test, y_pred)) #print("Solving with Nesterov Stochastic BFGS") W = NOBFGS.Minimize(loss, loss_gradient, 50, size, (X_train, y_train)) y_pred = predict(W, X_test) nobfgs_accuracies.append(accuracy_score(y_test, y_pred)) # Plot the graphs plt.figure(figsize=(10, 5)) plt.title("Accuracy per batch size") plt.xlabel("Batch size") plt.ylabel("Accuracy") plt.plot(batch_sizes, sgd_accuracies, label="SGD") plt.plot(batch_sizes, obfgs_accuracies, label="sBFGS") plt.plot(batch_sizes, nobfgs_accuracies, label="nsBFGS") plt.legend(loc=("lower right")) plt.show()
def logreg(x, y, M, init_w, n_epochs, learning_rate, momentum, lambd=None): """Logistic (softmax) regression for multiclass categorization. Uses momentum SGD.""" # prob = np.exp(X @ theta)/np.sum(np.exp(X @ theta),axis=1) # prob.shape = [examples,classes]. Category probabilites as given # by the Softmax-function. # cost_function = -np.sum(Y * np.ln(prob) + (1-Y)*np.ln(prob)) # scalar, full cross-entropy cost function. Note that the last term # is not really necessary, when using softmax there is an implicit # penalty for giving nonzero probabilites to false categories. # This version is a little more aggressive in punishing confidence in wrong # labels, but is far clunkier and possibly numerically unstable. # cost_function = -np.sum(Y * np.ln(prob)) # is the preferred cost function when using softmax. Also given as # Softmax_loss in CostFunctions.py # shorthand = prob * (Y / prob - (1-Y)/(1-prob)) # helper matrix for vectorizing the computation of the cost_gradients # shape = [examples,classes]. Remove the last term if using Softmax_loss # as cost_function. # cost_gradients = X.T @ (prob * np.sum(shorthand,axis=1)) - X.T @ shorthand # Derivatives of cost wrt thetas, shape = [predictors,classes] # NOTE! The above expressions for the cost_gradients have not been # double-checked! Given the potential for error in their derivations # they should be treated as highly suspect. The simpler version for # Softmax_loss implemented below, is however, pretty safe. w = init_w dw = np.zeros(w.shape) for epoch in range(n_epochs): mb = SGD.minibatch(x, M) # Split x into M minibatches for i in range(M): # Pick out a random mini-batch index k = np.random.randint(M) # compute gradient with random minibatch X, Y = x[mb[k]], y[mb[k]] # Probabilities from Softmax: prob = np.exp(X @ w) / np.sum(np.exp(X @ w), axis=1, keepdims=True) # cost_function = -np.sum(Y * np.ln(prob)) gives: grad = X.T @ (prob - Y) # Add l2 penalty: if lambd != None: grad += 2 * lambd * w # increment weights dw = momentum * dw - learning_rate * grad / X.shape[0] w = w + dw return w
def SGDM_score_func(X_training, y_training, X_test, y_test): weights = SGD.SGDM( x=X_training, y=y_training, M=M, init_w=init_w, n_epochs=n_epochs, learning_rate=learning_rate, momentum=momentum, cost_gradient=cost_gradient, lambd=lambd, ) return stat_tools.MSE(y_test, X_test @ weights)
def fit(self, word_map_file='word_map.bin'): data_train_file = self.opts.training_file print '================\nTRAINING\n================' # If the word map file does not exist, create it. if not os.path.exists(word_map_file): tr.build_word_map(data_train_file, word_map_file) self.word_map_file = word_map_file # Load trees and set the RNTN. self.trees = tr.load_trees(self.opts.data_folder + data_train_file, self.word_map_file) self.num_words = len(tr.load_word_map(self.word_map_file)) self.rntn = RNTN.RNTN(vec_dim=self.opts.vect_dim, output_dim=self.opts.output_dim, num_words=self.num_words, mini_batch_size=self.opts.mini_batch_size) self.sgd = optimizer.SGD(self.rntn, self.opts.learning_rate, self.opts.mini_batch_size) for e in range(self.opts.optim_epochs): # Fit model. # After the training phase, model specifications # are in self.rntn.stack. # -------------------------- start = time.time() print "Running epoch %d" % e self.sgd.optimize(self.trees) end = time.time() print "Time per epoch : %f" % (end - start)
__author__ = 'computer' import sys import numpy as np import SGD import LoadData dataDic, labelDic = LoadData.loadData("horseColicTraining.txt") w = SGD.gd(dataDic, labelDic, alpha = 0.001, epochs = 1000) #w = SGD.sgd(dataDic, labelDic, alpha = 0.001, epochs = 500) dataDic, labelDic = LoadData.loadData("horseColicTest.txt") h = np.mat(dataDic).dot(w) cnt = 0 for i in range(len(labelDic)): if h[i] >= 0.5 and labelDic[i] >= 0.5: cnt += 1 elif h[i] < 0.5 and labelDic[i] <= 0.5 : cnt += 1 print(cnt / len(labelDic))
b = np.random.rand(class_num, 1) """ ### Part1: SGD --------------------------------------- """ t = 100 landa = [0.01, 0.05] batch_size = 1 fig, (ax1, ax2, ax3) = plt.subplots(1, 3) for lr in landa: print("Run SGD with learning rate: {}.".format(lr)) sgd = SGD.sgd(train_data, test_data, w, b, batch_size=batch_size, lr=lr, t=t, gamma=gamma) sgd.run() sgd.plot_loss(ax1, label='lr: ' + str(lr)) sgd.plot_eval_data_accuracy(ax2, label='lr: ' + str(lr)) sgd.plot_steps_variance(ax3, label='lr: ' + str(lr)) ax3.set_yscale('log') ax1.title.set_text('Loss') ax2.title.set_text('Accuracy') ax3.title.set_text('Variance') ax1.legend() ax2.legend()
''' ''' #存放参数监控 b2_value_list=[] b1_value_list=[] w1_value_list=[] w2_value_list=[] value={'w1':w1_value_list,'w2':w2_value_list,'b1':b1_value_list,'b2':b2_value_list} '''#现在这个没用了 iter_per_epoch = max(train_size / batch_size, 1) network = simpleConvNet.SimpleConvNet() #参数就用默认了 #network=Newtwolayecrnet.newtwolayernet(input_size=784,hidden_size=50,output_size=10,batch_size=100) #优化器 optimizer = sgd.SGD(learning_rate) #学习率设置0.002 #optimizer=momentum.Momentum(learning_rate)#学习率设置0.01 #optimizer=adagrad.AdaGrad(learning_rate) #optimizer=adam.Adam() #optimizer=nesterov.Nesterov() for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) train_images_batch = train_images[batch_mask] train_lables_batch = train_lables[batch_mask] #通过 误差 反向传播算法求 梯度 grad = network.gradient(train_images_batch, train_lables_batch) '''w1_list.append(np.mean(grad['W1'])) b1_list.append(np.mean(grad['b1'])) w2_list.append(np.mean(grad['W2']))
+model.add(Conv2D(64, (3, 3), activation='relu')) +# Второй слой подвыборки +model.add(MaxPooling2D(pool_size=(2, 2))) +# Слой регуляризации Dropout +model.add(Dropout(0.25)) +# Слой преобразования данных из 2D представления в плоское +model.add(Flatten()) +# Полносвязный слой для классификации +model.add(Dense(512, activation='relu')) +# Слой регуляризации Dropout +model.add(Dropout(0.5)) +# Выходной полносвязный слой +model.add(Dense(nb_classes, activation='softmax')) + +# Задаем параметры оптимизации +sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) +model.compile(loss='categorical_crossentropy', + optimizer=sgd, + metrics=['accuracy']) +# Обучаем модель +model.fit(X_train, Y_train, + batch_size=batch_size, + epochs=nb_epoch, + validation_split=0.1, + shuffle=True, + verbose=2) + +# Оцениваем качество обучения модели на тестовых данных +scores = model.evaluate(X_test, Y_test, verbose=0) +print("Точность работы на тестовых данных: %.2f%%" % (scores[1]*100))
## result for Classification error mean_class = [[], []] std_dev_class = [[], []] ## train SGD and gather the result for sigma1 = 0.05 for sets in train_sets_sigma1: log_loss = [] log_loss_mean = 0 log_loss_std = 0 log_loss_min = 300 log_excess = 0 class_error = [] class_error_mean = 0 class_std = 0 for train_set in sets: sgd = s.logSGD(train_set, test_set2_sigma1,2) sgd.computeLearnRate(rho_2, M_2) sgd.learn() sgd.output() ##calculate error and loss loss = sgd.log_risk_average() log_loss.append(loss) error = sgd.class_error_average() class_error.append(error) if(loss<log_loss_min): log_loss_min = loss ##compute the estimate log_loss_mean = np.mean(log_loss) class_error_mean = np.mean(class_error) log_loss_std = np.std(log_loss) class_std = np.std(class_error)
best_r = 0.01 best_epoch = 25 print("best r:", best_r) print("best epoch:", best_epoch) ### predictTrains = [] predictTests = [] dataAccuracy = [] testAccuracy = [] for i in range(5): sgd = SGD.SGD(r=best_r,epochs=best_epoch,W0=[0]*len(data[i][0])) sgd.fit(data[i],data_labels) predictTrains.append(sgd.predict(data[i])) predictTests.append(sgd.predict(testset[i])) dataAccuracy.append(Stat.F1_Score(sgd.predict(data[i]), data_labels)) testAccuracy.append(Stat.F1_Score(sgd.predict(testset[i]), test_labels)) trainT = np.asarray(predictTrains).T.tolist() testT = np.asarray(predictTests).T.tolist() predictTrain = [] for i in range(len(data[0])): probPos = 0 probNeg = 0
subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # TODO: Set the path to the weights you want to load. weights_path = './VGG_ILSVRC_16_layers_fc_reduced.h5' model.load_weights(weights_path, by_name=True) # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # TODO: Set the paths to the datasets here.
mean = train_ch.factratebase.mean() # Надем среднее std = train_ch.factratebase.std() # Стандартное отклонение train_ch = train_ch[train_ch.factratebase < mean + 3 * std] # NNLS_model columns, coef_ = models.NNLS_model( train_ch, train_ch.factratebase.values) # CatBoost regr = models.CatBoost_model(train_ch[columns].values, train_ch.factratebase.values) # SGD stoch_grad_desc_weights, stoch_errors_by_iter = SGD.stochastic_gradient_descent( train_ch[columns].values, train_ch.factratebase.values, coef_) logger.info( str(list(zip(columns, coef_, stoch_grad_desc_weights)))) # Посчитаем прогноз по 3 моделям test_ch['Pred_CatBoost'] = regr.predict( test_ch[columns].values) test_ch['Pred_SGD'] = models.predict(stoch_grad_desc_weights, test_ch[columns]) test_ch['Pred_NNLS'] = models.predict(coef_, test_ch[columns]) test_ch['Today'] = ( start_predict_day + datetime.timedelta(days=3)).strftime("%Y-%m-%d")
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import random import SGDUtil import SGD x, y = SGDUtil.make_regression(30) plt.figure(1) plt.plot(x, y, marker='o', linestyle='None') # linestyle='None' 선 제거 plt.show() sgd = SGD(x, y) dict_result = sgd.process(epoch=250, w=np.array([30., -40.]), lr=0.2) w_list = dict_result['w'] loss_list = dict_result['loss'] step_list = dict_result['step'] plt.figure(1) plt.xlabel('step(epoch)') plt.ylabel('loss') plt.plot(step_list, loss_list, color='orange') plt.show() t_w0, t_w1 = w_list[len(w_list) - 1] t_loss = loss_list[len(loss_list) - 1] print("t_w0: ", t_w0, "t_w1: ", t_w1, "t_loss: ", t_loss)
import SGD SGD.problem_1(silent=True)
## result for Classification error mean_class = [[], []] std_dev_class = [[], []] ## train SGD and gather the result for sigma1 = 0.05 for sets in train_sets_sigma1: log_loss = [] log_loss_mean = 0 log_loss_std = 0 log_loss_min = 300 log_excess = 0 class_error = [] class_error_mean = 0 class_std = 0 for train_set in sets: sgd = s.logSGD(train_set, test_set1_sigma1,1) sgd.computeLearnRate(rho_1, M_1) sgd.learn() sgd.output() ##calculate error and loss loss = sgd.log_risk_average() log_loss.append(loss) error = sgd.class_error_average() class_error.append(error) if(loss<log_loss_min): log_loss_min = loss ##compute the estimate log_loss_mean = np.mean(log_loss) class_error_mean = np.mean(class_error) log_loss_std = np.std(log_loss) class_std = np.std(class_error)
print("Spliting data...") # Split the data! valid_ratings, train, test, valid_users, valid_items = split_data( ratings, num_items_per_user, num_users_per_item, min_num_ratings=0, p_test=0.1) print("Normalizing training...") # For every item, we normalize the values norm_train, means = normalize(train) print("Finding MF SGD...") # We get the prediction for the training data given and the properties sgd = SGD(norm_train, test, means, gamma=0.001, num_features=10, lambda_user=0.1, lambda_item=0.1, max_it=20) print("Denormalizing") # Once we have a prediction we have to denormalize the value prediction = denormalize(predictions[-1], means) # We create the final_submission.csv in ../data submit(prediction, 'final_submission')
from sklearn import cross_validation mortality = mortality_loader.doItTogether() cv = cross_validation.KFold(len(mortality), n_folds=10) errors_cv = [] for i in xrange(100): error_cv = [] for trainCV, testCV in cv: training_data = map(mortality.__getitem__, trainCV) test_data = map(mortality.__getitem__, testCV) errors_epoch, errors_batch, ws, bs = SGD.SGD( training_data=training_data, epochs=4, batch_size=53, stepsize=0.08, init_w=None, init_b=None, test_data=test_data, regular=0.99, decay=1) error_cv.append(errors_epoch[-1]) errors_cv.append(np.mean(error_cv)) print np.nanmin(errors_cv) # slices = [mortality[i::10] for i in xrange(10)] # # for j in xrange(len(slices)): # errors_epoch, errors_batch, ws, bs = SGD.SGD(mortality, 4, 1, 0.05, init_w=None,init_b=None, test_data=slices[j], regular=None, decay=0.98)
print( '----------------------------------------------------------------------------------------------' ) print('choose your classification method:') print('1 -----> Bayes') print('2 -----> Random Forest') print('3 -----> Logical Regression') print('4 -----> Boost') print('5 -----> KNN') print('6 -----> SVM') print('7 -----> SGD') print('0 -----> exit this program') M = input('Method number = ') if M == '1': Bayes.GBC(method, norm, selection) if M == '2': Random_Forest.RandomF(method, norm, selection) if M == '3': LogicalRegression.LogicR(method, norm, selection) if M == '4': Boost.Ada(method, norm, selection) if M == '5': KNN.KClassifier(method, norm, selection) if M == '6': SVMC.support(method, norm, selection) if M == '7': SGD.SGD(method, norm, selection) if M == '0': print('The classification progress finished.') break
def main(): # Generate data np.random.seed(0) n = 1000 X = 2.0 * np.random.rand(n, 1) # parameters w1 = 3.0 w2 = 4.5 # noisy data y = w1 + w2 * X + np.random.randn(n, 1) X_b = np.c_[np.ones((n, 1)), X] # add 1 to each instance # save data and x to files to be used later to calculate objectives and gradients np.savetxt('test1_data.txt', y) np.savetxt('test1_x.txt', X_b) # select the algorithm to run # acceptable terms: SGD, SGDmomentum, SGDnesterov, AdaGrad, RMSprop, Adam, Adamax, Adadelta, Nadam, minibatchSGD, SAG, SVRG alg = 'Adam' # initial parameter w10 = 2.0 w20 = 0.5 theta = np.array([w10, w20]) R = objFun(theta) # initial objective it = 0 # set iteration counter to 0 maxIt = 2500 # maximum iteration dR = gradFun(theta) # initial gradient if alg == 'SGD': # Stochastic Gradient Descent eta = 0.0025 # learning rate opt = sgd.SGD(obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'SGDmomentum': # Stochastic Gradient Descent with momentum eta = 0.001 # learning rate opt = sgd.SGD(obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun, momentum=0.9) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'SGDnesterov': # Stochastic Gradient Descent with Nesterov momentum eta = 0.001 # learning rate opt = sgd.SGD(obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun, momentum=0.9, nesterov=True) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'AdaGrad': # AdaGrad eta = 0.25 # learning rate opt = sgd.AdaGrad(gradHist=0.0, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'RMSprop': # RMSprop eta = 0.9 # learning rate opt = sgd.RMSprop(gradHist=0.0, rho=0.1, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'Adam': # Adam eta = 0.025 # learning rate opt = sgd.Adam(m=0.0, v=0.0, beta1=0.9, beta2=0.999, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'Adamax': # Adamax eta = 0.025 # learning rate opt = sgd.Adamax(m=0.0, u=0.0, beta1=0.9, beta2=0.999, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'Adadelta': # Adadelta eta = 1.0 # learning rate opt = sgd.Adadelta(gradHist=0.0, updateHist=0.0, rho=0.99, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'Nadam': # Nadam eta = 0.01 # learning rate opt = sgd.Nadam(m=0.0, v=0.0, beta1=0.9, beta2=0.999, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=gradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'minibatchSGD': # mini batch stochastic gradient descent eta = 0.025 # learning rate opt = sgd.minibatchSGD(nSamples=10, nTotSamples=n, newGrad=0.0, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=batchGradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'SAG': # stochastic average gradient descent eta = 0.0025 # learning rate opt = sgd.SAG(nSamples=20, nTotSamples=n, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=batchGradFun) # initialize opt.performIter() # perform iterations thetaHist = opt.getParamHist() elif alg == 'SVRG': # stochastic variance reduced gradient descent eta = 0.004 opt = sgd.SVRG(nTotSamples=n, innerIter=10, outerIter=200, option=1, obj=R, grad=dR, eta=eta, param=theta, iter=it, maxiter=maxIt, objFun=objFun, gradFun=batchGradFun) opt.performOuterIter() thetaHist = opt.getParamHist() else: raise ValueError( 'No such algorithm is in the module.\n Please use one of the following options:\nSGD, SGDmomentum, SGDnesterov, AdaGrad, RMSprop, Adam, Adamax, Adadelta, Nadam, minibatchSGD, SAG, SVRG' ) # Plot the results matplotlib.rcParams['xtick.direction'] = 'out' matplotlib.rcParams['ytick.direction'] = 'out' delta = 0.025 w1 = np.arange(-2.0, 10.0, delta) w2 = np.arange(-2.0, 10.0, delta) Xx, Yy = np.meshgrid(w1, w2) nx = np.shape(Xx) Z = np.zeros(nx) for i in range(nx[0]): for j in range(nx[1]): Z[i, j] = (np.linalg.norm(y - Xx[i, j] - Yy[i, j] * X, 2))**2 / n plt.figure() levels = np.arange(0, 40, 4) CS = plt.contour(Xx, Yy, Z, levels, origin='lower', linewidths=2, extent=(-2, 10, -2, 10)) #plt.clabel(CS, inline=1, fontsize=10) # Thicken the zero contour. zc = CS.collections[6] plt.setp(zc, linewidth=4) plt.clabel( CS, levels[1::2], # label every second level inline=1, fmt='%1.1f', fontsize=10) im = plt.imshow(Z, interpolation='bilinear', origin='lower', cmap=cm.Wistia, extent=(-2, 10, -2, 10)) # make a colorbar plt.colorbar(im, shrink=0.8, extend='both') plt.plot(thetaHist[0, :], thetaHist[1, :], 'r.', linewidth=6) titl = opt.alg + ' with a learning rate ' + str(eta) plt.title(titl) return opt
import random #Author: Tom Camenzind #Citations: Data, technique from Richard Socher's Treebank Analysis dataset / paper. lambda_reg = config.lambda_reg lambda_L = config.lambda_L #Create Train, Dev,Test instances from file temp = DataInit.getInstances(config.max_train_inst, config.max_dev_inst, config.max_test_inst) training_instances, dev_instances, test_instances, word_index, index_word = temp LANG_SIZE = len(word_index) #run SGD on the data errors, W, Ws, L, errors_avg_log, errors_total_log = SGD.runSGD(training_instances, dev_instances, LANG_SIZE, lambda_reg, lambda_L) print "above was dev errors; below is test errors, d=%d, root_x_factor = %d " % (config.d, config.root_x_factor) test_errors = SGD.getErrors(training_instances, test_instances, W, Ws, L) SGD.printErrors(test_errors) ''' Make plots of the error from SGD ''' import matplotlib.pyplot as plt def myplot(error_log, msg): plt.plot(errors_avg_log) plt.xlabel("Number of iterations") plt.ylabel("Error") plt.title(msg) plt.show()
plt.contourf(xx1, xx2, Z, alpha = 0.5, cmap = cmap); plt.xlim(x1_min,x1_max); plt.ylim(x2_min,x2_max); #plot the class smaple for idx,sample in enumerate(np.unique(Y)): plt.scatter(X[Y == sample,0],X[Y == sample,1],alpha = 0.8, color = cmap(idx),marker = markers[idx],label = idx); df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',header = None); y = df.iloc[0:100,4].values; y = np.where(y == 'Iris-setosa',1,-1); x = df.iloc[0:100,[0,2]].values; positive = np.where(y == 1); negative = np.where(y == -1); plt.scatter(x[positive,0], x[positive,1], color = 'red', marker = 'o', label = 'setosa'); plt.scatter(x[negative,0], x[negative,1], color = 'blue', marker = '*', label = 'versicolor'); plt.xlabel('petal length'); plt.ylabel('sepal length'); plt.legend(loc='upper left'); plt.show(); npp = SGD.GradientDescent(step = 0.01,n_iter = 50); npp.fix(x,y); decision_regions(x,y,classifier = npp); ''' plt.scatter(range(1,len(npp.error)+1),npp.error,color = 'green', marker = 'o'); plt.xlabel('number of sample'); plt.ylabel('error'); plt.show(); '''
import SGD SGD.problem_5(silent=True)
import plot import SGD import configurations import numpy as np dots_x = np.linspace(-10, 10, 10) dots_y = np.linspace(-30, 30, 10) dots = np.concatenate([[dots_x, dots_y]]).transpose() plot = plot.Plot() sdg = SGD.StochasticGradientDecent() print( sdg.grad_descent_2d(configurations.config['low'], configurations.config['high'], callback=plot.add_dots)) plot.plot_function() plot.create_animation() plot.show_animation() # plot.save_animation('multi_dots_diff_fig_gcd')
#best_g0 = g0 #best_C = C #best_sigma = sigma best_r = r print("Best result so far >>",best_f1,epoch,r)#,g0,C,sigma) #print("best g0:", best_g0) #print("best C:", best_C) #print("best sigma:", best_sigma) print("best epoch:", best_epoch) print("best r:",best_r) ''' ### sgd = SGD.SGD(epochs=best_epoch, W0=[0] * len(data[0]), r=best_r) #,gamma0=best_g0,sigma=best_sigma,C=best_C) sgd.fit(data, data_labels) predictTrain = sgd.predict(data) predictTest = sgd.predict(testset) print("Accuracy for training set:") print(Stat.Accuracy(predictTrain, data_labels)) print("F1 score for training set:") print(Stat.F1_Score(predictTrain, data_labels)) print("Precision for training set:") print(Stat.Precision(predictTrain, data_labels)) print("Recall for training set:")