def main(): start = time.clock() k_s = 1000 x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, k_s) end = time.clock() print "Time-sgd: ", end - start #convert list to numpy array and evaluate function value x_vals_a = np.asarray(x_vals_s) f_vals_s = fsum(x_vals_a) # plot showing f(xi) for each iteration of both methods plt.plot(range(k_s), f_vals_s, 'r-') plt.xlabel('Iteration') plt.ylabel('f(xi)') plt.xticks(np.arange(0, k_s, 100)) plt.title('''f(xi) vs i of Stochastic Gradient Descent''') plt.show() data_750 = [] data_1000 = [] for i in range(30): x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, 1000) data_1000.append(fsum(x_vals_s[-1])) for i in range(30): x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, 750) data_750.append(fsum(x_vals_s[-1])) print 'SGD 750 iterations, mean: ', np.mean( data_750), 'variance: ', np.var(data_750) print 'SGD 1000 iterations, mean: ', np.mean( data_1000), 'variance: ', np.var(data_1000)
def main(): df = pd.read_csv(DATA_FILEPATH) print(df.columns) x_data = df.drop(["clase"], axis=1).as_matrix() y_data = np.array(list(map(int, df["clase"])), dtype="int32") n_samples = x_data.shape[0] print(type(y_data)) x = tensor.matrix(name="x") y = tensor.ivector(name="y") clf = lr.LogisticRegression(x, x_data.shape[1], 2) with_validation = True if with_validation: val_frac = 0.3 val_samples = int(n_samples * val_frac) train_samples = n_samples - val_samples x_tr, y_tr = x_data[:train_samples, :], y_data[:train_samples] x_tr_sh = theano.shared(x_tr, borrow=True) y_tr_sh = theano.shared(y_tr, borrow=True) x_val, y_val = (x_data[train_samples:(train_samples + val_samples), :], y_data[train_samples:(train_samples + val_samples)]) x_val_sh = theano.shared(x_val, borrow=True) y_val_sh = theano.shared(y_val, borrow=True) print("calling sgd_with_validation") sgd.sgd_with_validation(clf, x_tr_sh, y_tr_sh, x_val_sh, y_val_sh, learning_rate=0.01, reg_term=0.0001, batch_size=32, n_epochs=1000, max_its=10000, improv_thresh=0.01, max_its_incr=4, rel_val_tol=1e-3, verbose=True) else: x_tr_sh = theano.shared(x_data, borrow=True) y_tr_sh = theano.shared(y_data, borrow=True) print("calling sgd") sgd.sgd(clf, x_tr_sh, y_tr_sh, y=y, learning_rate=0.01, reg_term=0.0001, batch_size=220, rel_tol=2e-3, n_epochs=256, verbose=True) acc = theano.function([x, y], clf.score(y)) print("accuracy: %.2f%%" % (100 * acc(x_data, y_data)))
def buildClassifier(subset, name, cs, iterations=100, dataDir='data', det=False, verbose=0): if (verbose >= 1): print 'Enter buildClassifier' if det: random.seed(1) evalC=[] bC=0.0 evbC=0.0 mfccs, mfccMatching, _, _, _ = preprocess.preprocess(subset, dataDir=dataDir, verbose=verbose) y = buildLabels(name, mfccMatching) dicL, dicV, dicT = split(mfccMatching, name, verbose=verbose) # learning data xL=[mfccs[k] for k in np.concatenate(dicL.values())] yL=[y[k] for k in np.concatenate(dicL.values())] # validation data xV=[mfccs[k] for k in np.concatenate(dicV.values())] yV=[y[k] for k in np.concatenate(dicV.values())] # testing data xT=[mfccs[k] for k in np.concatenate(dicT.values())] yT=[y[k] for k in np.concatenate(dicT.values())] for c in cs: if (verbose >= 1): print 'Processing C: ', c print 'Learning...' w = sgd.sgd(xL, yL, np.zeros(len(xL[0])+1), iterations, 1, sgd.L, 0.01, c) if (verbose >= 1): print 'Evaluating...' ev=sgd.eval(xV, yV, w[:-1], w[-1]) evalC.append(ev) if (ev > evbC): evbC=ev bC=c if (verbose >= 1): print 'Building classifier with C:', bC, '...' xL2=xL+xV yL2=yL+yV w = sgd.sgd(xL2, yL2, np.zeros(len(xL[0])+1), iterations, 1, sgd.L, 0.01, bC) if (verbose >= 1): print 'Evaluating classifier...' ev=sgd.eval(xT, yT, w[:-1], w[-1]) def f(wavFile): x = preprocess.mfcc(wavFile) tot=len(x) ok=0.0 for i in xrange(tot): ok += np.dot(w[:-1], x[i]) + w[-1] return int(ok/tot > 0) if (verbose >= 1): print 'Exit buildClassifier' return f, ev, evalC
def main(): x_750 = [] x_1000 = [] for i in range(0, 30): x_750.append(sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=750)) x_1000.append( sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=1000)) x_750 = np.matrix(x_750) x_1000 = np.matrix(x_1000) print "sgd complete" print "750 iterations and 30 times, mean = %.5f, var = %.5f" % (np.mean( x_750[..., -1]), np.var(x_750[..., -1])) print "1000 iterations and 30 times, mean = %.5f, var = %.5f" % (np.mean( x_1000[..., -1]), np.var(x_1000[..., -1]))
def word2vec_model(args, dataset): tokens = dataset.tokens() nWords = len(tokens) startTime = time.time() wordVectors = np.concatenate( ((np.random.rand(nWords, args.vector_size) - 0.5) / args.vector_size, np.zeros((nWords, args.vector_size))), axis=0) wordVectors = sgd( lambda vec: word2vec_sgd_wrapper( skipgram, tokens, vec, dataset, args.window_size, negSamplingCostAndGradient), wordVectors, args.learning_rate, args.iterations, None, args.use_saved, args.save_every, args.vector_path) # Note that normalization is not called here. This is not a bug, # normalizing during training loses the notion of length. logging.info("training took %d seconds" % (time.time() - startTime)) # concatenate the input and output word vectors wordVectors = np.concatenate( (wordVectors[:nWords, :], wordVectors[nWords:, :]), axis=0) # wordVectors = wordVectors[:nWords,:] + wordVectors[nWords:,:] return wordVectors
def main(): start = time.clock() x_SGD = sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=1000) end = time.clock() print "SGD time: %f" % (end - start) print "fsum = %f" % (fsum(x_SGD[-1])) start = time.clock() x_GD = GradientDescent(fsum, fsumprime, -5, epson=0.0001) end = time.clock() print "GD time: %f" % (end - start) print "fsum = %f" % (fsum(x_GD[-1])) start = time.clock() x_new = NewtonMethod(fsum, fsumprime, fsumprimeprime, -5, epson=0.0001) end = time.clock() print "Newton time: %f" % (end - start) print "fsum = %f" % (fsum(x_new[-1])) plt.subplot(311) plt.plot(x_SGD, 'r') plt.subplot(312) plt.plot(x_GD, 'b') plt.subplot(313) plt.plot(x_new, 'm') plt.show()
def task3(): sgd_params = { 'GMM': { 'alpha': 1.0, 'mb_num': 200 }, 'Peaks': { 'alpha': 0.1, 'mb_num': 250 }, 'SwissRoll': { 'alpha': 0.08, 'mb_num': 250 } } for data_set, params in sgd_params.items(): X_tr, y_tr, X_te, y_te = get_data(data_set) _ = sgd(X_tr, y_tr, X_te, y_te, alpha=params['alpha'], mb_num=params['mb_num'], max_epochs=200, data_set=data_set)
def main(): x = sgd(fi,fiprime,x0=-5,i_range = maxi,t=1,iteration = 1000) print "sgd complete" f = [] print "Plotting, may take a while" for n in x: f.append(fsum(n)) plt.plot(f) plt.xlabel("Number of iterations(i)") plt.ylabel("fsum(x_i)") plt.show()
def fit(self, X, qmatrix=None, learn_b=True): """ Normalization: want elements of Q-matrix be betw. 0 - 1 """ S, P = X.shape C = self.concepts if qmatrix is None: qmatrix = np.random.normal(loc=0.5, scale=0.1, size=(C, P)) skills = np.random.normal(scale=0.1, size=(S, C)) mdat = np.ma.masked_array(X, np.isnan(X)) b = np.mean(mdat, axis=0).filled(0) if learn_b else np.zeros(P) self.qmatrix, self.skills, self.b = sgd(X, self.alpha, int(C), self.n_iters, qmatrix, skills, b, learn_b) self.prediction = np.dot(self.skills, self.qmatrix) + self.b return nan_rmse(X, self.prediction)
def main(): start = time.clock() x_vals_s = sgd(10000, fsum, fsumprime, fi, fiprime, -5, 1, 1000) end = time.clock() print "Time-sgd: ", end - start start = time.clock() [x_vals_g, k_g] = gradientdescent(fsum, fsumprime, -5, 0.0001, 0.1, 0.6) end = time.clock() print "Time-gradient descent: ", end - start start = time.clock() [x_vals_n, k_n] = newtonsmethod(fsum, fsumprime, fsumprimeprime, -5, 0.0001, 0.1, 0.6) end = time.clock() print "Time-newton's method: ", end - start print 'SGD fsum(x*): ', fsum(x_vals_s[-1]) print 'Gradient Descent fsum(x*): ', fsum(x_vals_g[-1]) print '''Newton's method fsum(x*): ''', fsum(x_vals_n[-1])
def my_lslr(dataset, max_epochs, alpha): # Initialize local variables coeffs = [0.0 for i in range(len(dataset.iloc[0,:]))] losses = [] epochs = 0 # Iterate over the dataset until max epochs has been reached. while epochs < max_epochs: for index, data in dataset.iterrows(): # Run the SGD algorithm. coeffs, y_real, y_pred = sgd(data, coeffs, alpha, 0) # Record loss for this epoch. losses.append(utilities.loss(y_real, y_pred)) # Stop conditions. epochs += 1 if epochs >= max_epochs: break print(epochs) return coeffs, losses
def train(train_x, train_y, val_x, val_y, d, hl, ol, config, lf): print("Function Invoked: train") epochs, eta, alpha, init_strategy, optimiser, batch_size, ac = config[ "epochs"], config["learning_rate"], config["weight_decay"], config[ "init_strategy"], config["optimiser"], config[ "batch_size"], config["ac"] if optimiser == "vgd": return vgd.vgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, alpha) elif optimiser == "sgd": return sgd.sgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, alpha) elif optimiser == "mgd": return mgd.mgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size, alpha) elif optimiser == "nag": return nag.nag(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size, alpha) elif optimiser == "adam": return adam.adam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size) elif optimiser == "rmsprop": return rmsprop.rmsprop(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size) elif optimiser == "nadam": return nadam.nadam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size)
for c in [0,1]: for i in xrange(N): for j in [0,1]: x[c][i][j] = averages[c] + random.uniform(-1,1) res = (np.concatenate([x[0],x[1]]),y) return res iterations=800 eps = 0.01 eta = 1 C=1 averages = [1,3] sample=100 (xApp, yApp) = genData(averages, sample) k=np.dot w1 = sgd.sgd(xApp, yApp, np.zeros(len(xApp[0])+1),iterations,eta,sgd.L,eps,C) w,b = w1[:-1],w1[-1] # w=np.dot(alp, xApp) yPred=[np.dot(w, xApp[i])+b for i in xrange(2*sample)] # print np.multiply(yApp, yPred) def makeTitle(iterations,eta,eps): res = 'SGD with ' res = res + str(iterations) + ' iterations, ' res = res + 'eta=' + str(eta)+ ', epsilon=' + str(eps) res = res + '\n' return res
for delta in [0.05, 0.1, 0.2]: for learning_rate in [0.005, 0.01, 0.02]: print("starting: " + str(delta) + ", " + str(learning_rate)) noise_b = lambda x: np.random.normal(0,delta) exp_points = int(iters/(datapoints*100)) xs_exact_loss = np.zeros((exp,exp_points+1)) xs_noisy_loss = np.zeros((exp,exp_points+1)) xs_exactm_loss = np.zeros((exp,exp_points+1)) xs_noisym_loss = np.zeros((exp,exp_points+1)) xs_exact = np.zeros((exp,exp_points+1,dims)) xs_noisy = np.zeros((exp,exp_points+1,dims)) xs_exactm = np.zeros((exp,exp_points+1,dims)) xs_noisym = np.zeros((exp,exp_points+1,dims)) AT = np.transpose(A) (xs_exact_n) = sgd.sgd(A, b, np.zeros((dims,)), learning_rate, iters, 21200)[1] for j in range(exp): sys.stdout.write("running: " + str(j) + '\r') sys.stdout.flush() bn = generate_noise(datapoints, noise_b) b2 = b + bn (xs_noisy_n) = sgd.sgd(A, b2, np.zeros((dims,)), learning_rate, iters, 21200)[1] xs_exact[j] = xs_exact_n xs_noisy[j] = xs_noisy_n for i in range(exp_points+1): xs_exactm[j,i] = np.mean(xs_exact[j,(i//2):i+1,:], axis=0) xs_noisym[j,i] = np.mean(xs_noisy[j,(i//2):i+1,:], axis=0) xs_exact_loss[j] = np.linalg.norm(np.dot(x-xs_exact[j],AT), axis=1)**2 xs_noisy_loss[j] = np.linalg.norm(np.dot(x-xs_noisy[j],AT), axis=1)**2 xs_exactm_loss[j] = np.linalg.norm(np.dot(x-xs_exactm[j],AT), axis=1)**2 xs_noisym_loss[j] = np.linalg.norm(np.dot(x-xs_noisym[j],AT), axis=1)**2
def validate(self, epochs=200, **kwargs): """Validates all models for all polynomials and all parameters, and stores data in validation_errors. Creates and populates pandas.DataFrame validation_errors with MSE from bootstrap and kfold resampling techniques, as well as model bias and variance from bootstrap, for all combinations of hyperparameters. Parameters: ----------- epochs: int Number of epochs. 200 by default. **kwargs: keyword arguments Passed to sgd.sgd """ model_properties = [model.property_dict for model in self.models] model_uniques, model_common = helpers.filter_dicts(model_properties) for unique in model_uniques: assert len(unique) == len( model_uniques[0] ), "All models must have the same property types" assert len( unique ) > 0, "Two models with the same property_dict has been sent in" index_parameters = helpers.listify_dict_values(model_uniques) parameter_names = [key for key in index_parameters] model_parameters = [values for _, values in index_parameters.items()] metric_texts = [metric.__doc__ for metric in self._metrics] errors_index = pd.MultiIndex.from_product( [metric_texts, *model_parameters], names=['Metric', *parameter_names]) self.errors_df = pd.DataFrame(dtype=float, index=errors_index, columns=range(1, epochs + 1)) if self.polynomials is not None: X_train = linear_models.poly_design_matrix(self.polynomials, self.data['x_train']) X_validate = linear_models.poly_design_matrix( self.polynomials, self.data['x_validate']) y_train, y_validate = self.data['y_train'], self.data['y_validate'] idx = pd.IndexSlice for i, (model, model_unique) in enumerate(zip(self.models, model_uniques)): print( f"\r |{'='*(i*50//len(self.models))}{' '*(50-i*50//len(self.models))}| {i/len(self.models):.2%}", end="", flush=True) if model.name == 'OLS' or model.name == 'Ridge': x_train, x_validate = X_train, X_validate else: x_train, x_validate = self.data['x_train'], self.data[ 'x_validate'] model_indexes = [model_unique[key] for key in index_parameters] for j, metric in enumerate(self._metrics): model.compile() errors = sgd.sgd(model, x_train, x_validate, y_train, y_validate, epochs=epochs, metric=metric, **kwargs)[1] self.errors_df.loc[tuple( pd.IndexSlice[s] for s in [metric.__doc__, *model_indexes])] = errors print("") self.errors_df.dropna(thresh=2, inplace=True) self.errors_df.to_csv("../dataframes/tune.csv")
def main(): print("loading data...", end="", flush=True) data = load_data(DATA_FILEPATH) print(" done") train_set, cv_set, test_set = data x_tr, y_tr = train_set x_cv, y_cv = cv_set x_te, y_te = test_set """with open("/home/erik/db/data.pkl", "rb") as f: x, y = pickle.load(f) tr = 300 x = np.array(x, dtype="float64") y = np.array(y, dtype="int32") x_tr, y_tr = x[:tr, :], y[:tr] x_cv, y_cv = x[tr:, :], y[tr:] x_te, y_te = x_cv, y_cv""" print("\ttrain:", x_tr.shape, y_tr.shape) print("\tcv:", x_cv.shape, y_cv.shape) print("\ttest:", x_te.shape, y_te.shape) x = tensor.matrix(name="x") y = tensor.ivector(name="y") layer_0_params = ( {#conv "n_inp_maps": 1, "inp_maps_shape": (28, 28), #"inp_maps_shape": (48, 32), "n_out_maps": 5, #"n_out_maps": 4, "filter_shape": (7, 7), }, {#pool "shape": (2, 2) } ) layer_1_params = ( {#conv #"n_in_maps": 4, "n_out_maps": 10, #"n_out_maps": 6, "filter_shape": (5, 5), }, {#pool "shape": (2, 2) } ) fully_connected_layer_params = { #"n_inp": 10*4*4, "n_hidden": 64, "n_out": 10 } inp = x.reshape((x.shape[0], 1, 28, 28)) #inp = x.reshape((x.shape[0], 1, 48, 32)) load = False if load: print("loading model...") with open("cnn_model.pkl", "rb") as f: clf = pickle.load(f) else: clf = cnn.ConvolutionalNeuralNetwork( #inp=inp, inp=x, conv_pool_layers_params=[ layer_0_params, layer_1_params], fully_connected_layer_params=fully_connected_layer_params) with_validation = True x_tr_sh = theano.shared(x_tr, borrow=True) y_tr_sh = theano.shared(y_tr, borrow=True) x_cv_sh = theano.shared(x_cv, borrow=True) y_cv_sh = theano.shared(y_cv, borrow=True) if with_validation: print("calling sgd_with_validation", flush=True) sgd.sgd_with_validation(clf, x_tr_sh, y_tr_sh, x_cv_sh, y_cv_sh, #learning_rate=0.003, reg_term=0.03, 95% learning_rate=0.003, reg_term=0.03, batch_size=100, n_epochs=32, max_its=20000, improv_thresh=0.01, max_its_incr=4, x=x, rel_val_tol=4e-3, val_freq="auto", verbose=True) else: print("calling sgd") sgd.sgd(clf, x_tr, y_tr, learning_rate=0.1, reg_term=1, batch_size=32, rel_tol=2e-3, n_epochs=128, verbose=True) print("saving model...") with open("cnn_model.pkl", "wb") as f: pickle.dump(clf, f) acc = theano.function([clf.inp, y], clf.score(y)) te_len = x_te.shape[0] print("accuracy: %.2f%%" % (100*acc( np.reshape(x_te, (te_len, 1, 28, 28)), #np.reshape(x_te, (te_len, 1, 48, 32)), y_te)))
def main(): print "############# Load Datasets ##############" import stanfordSentimentTreebank as sst skip_unknown_words = bool(args.get("--skip")) shuffle_flag = bool(args.get("--shuffle")) datatype = args.get("--datatype") if datatype == 5: # Fine-grained 5-class n_class = 5 elif datatype == 2: # Binary 2-class n_class = 2 # print "skip_unknown_words",skip_unknown_words vocab, index2word, datasets, datasets_all_sentences, funcs = sst.load_stanfordSentimentTreebank_dataset(normalize=True, skip_unknown_words=skip_unknown_words, datatype=datatype) train_set, test_set, dev_set = datasets train_set_sentences, test_set_sentences, dev_set_sentences = datasets_all_sentences get,sentence2ids, ids2sentence = funcs # 関数を読み込み scores, sentences = zip(*train_set_sentences) sentences = [[word for word in sentence.lower().split()] for sentence in sentences] vocab_size = len(vocab) dev_unknown_count = sum([unknown_word_count for score,(ids,unknown_word_count) in dev_set]) test_unknown_count = sum([unknown_word_count for score,(ids,unknown_word_count) in test_set]) train_set = [(score, ids) for score,(ids,unknown_word_count) in train_set] test_set = [(score, ids) for score,(ids,unknown_word_count) in test_set] dev_set = [(score, ids) for score,(ids,unknown_word_count) in dev_set] print "train_size : ", len(train_set) print "dev_size : ", len(dev_set) print "test_size : ", len(test_set) print "-"*30 print "vocab_size: ", len(vocab) print "dev_unknown_words : ", dev_unknown_count print "test_unknown_words : ", test_unknown_count print args # EMB_DIM = 50 EMB_DIM = args.get("--emb_size") vocab_size = len(vocab) feat_map_n_1 = args.get("--feat_map_n_1") feat_map_n_final = args.get("--feat_map_n_final") height = 1 width1 = args.get("--width1") width2 = args.get("--width2") k_top = args.get("--k_top") n_class = n_class alpha = args.get("--alpha") n_epoch = args.get("--n_epoch") dropout_rate0 = args.get("--dropout_rate0") dropout_rate1 = args.get("--dropout_rate1") dropout_rate2 = args.get("--dropout_rate2") activation = args.get("--activation") learn = args.get("--learn") number_of_convolutinal_layer = 2 use_regular = bool(args.get("--use_regular")) regular_c = args.get("--regular_c") pretrain = args.get('--pretrain') if pretrain == 'word2vec': print "*Using word2vec" embeddings_W, model = pretrained_embedding.use_word2vec(sentences=sentences, index2word=index2word, emb_dim=EMB_DIM) # -0.5 ~ 0.5で初期化している elif pretrain == 'glove': print "*Using glove" embeddings_W = pretrained_embedding.use_glove(sentences=sentences, index2word=index2word, emb_dim=EMB_DIM, model_file='glove_model/glove_50_iter2900.model') else: embeddings_W = np.asarray( rng.normal(0, 0.05, size = (vocab_size, EMB_DIM)), dtype = theano.config.floatX ) embeddings_W[0,:] = 0 print np.amax(embeddings_W) print np.amin(embeddings_W) # print "*embeddings" print embeddings_W # print bool(embeddings) # input_x = [1, 3, 4, 5, 0, 22, 4, 5] print "############# Model Setting ##############" x = T.imatrix('x') length_x = T.iscalar('length_x') y = T.ivector('y') # the sentence sentiment label embeddings = WordEmbeddingLayer(rng=rng, input=x, vocab_size=vocab_size, embed_dm=EMB_DIM, embeddings=embeddings_W) def dropout(X, p=0.5): if p > 0: retain_prob = 1 - p X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX) # X /= retain_prob return X # number_of_convolutinal_layer = theano.shared(number_of_convolutinal_layer) # dynamic_func = theano.function(inputs=[length_x], outputs=number_of_convolutinal_layer * length_x) # dynamic_func_test = theano.function( # inputs = [length_x], # outputs = dynamic_func(length_x), # ) # print dynamic_func(len([1,2,3])) l1 = DynamicConvFoldingPoolLayer(rng, input = dropout(embeddings.output, p=dropout_rate0), filter_shape = (feat_map_n_1, 1, height, width1), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=1, length_x=length_x, activation = activation ) l1_no_dropout = DynamicConvFoldingPoolLayer(rng, input = embeddings.output, W=l1.W * (1 - dropout_rate0), b=l1.b, filter_shape = (feat_map_n_1, 1, height, width1), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=1, length_x=length_x, activation = activation ) l2 = DynamicConvFoldingPoolLayer(rng, input = dropout(l1.output, p=dropout_rate1), filter_shape = (feat_map_n_final, feat_map_n_1, height, width2), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=2, length_x=length_x, activation = activation ) l2_no_dropout = DynamicConvFoldingPoolLayer(rng, input = l1_no_dropout.output, W=l2.W * (1 - dropout_rate1), b=l2.b, filter_shape = (feat_map_n_final, feat_map_n_1, height, width2), # two feature map, height: 1, width: 2, k_top = k_top, number_of_convolutinal_layer=number_of_convolutinal_layer, index_of_convolitonal_layer=2, length_x=length_x, activation = activation ) # l2_output = theano.function( # inputs = [x,length_x], # outputs = l2.output, # # on_unused_input='ignore' # ) # TODO: # check the dimension # input: 1 x 1 x 6 x 4 # out = l2_output( # np.array([input_x], dtype = np.int32), # len(input_x), # ) # test = theano.function( # inputs = [x], # outputs = embeddings.output, # ) # print "--input--" # print np.array([input_x], dtype = np.int32).shape # print "--input embeddings--" # a = np.array([input_x], dtype = np.int32) # print test(a).shape # print "-- output --" # print out # print out.shape # x = T.dscalar("x") # b = T.dscalar("b") # a = 1 # f = theano.function(inputs=[x,b], outputs=b * x + a) # print f(2,2) # expected = (1, feat_map_n, EMB_DIM / 2, k) # assert out.shape == expected, "%r != %r" %(out.shape, expected) ##### Test Part Three ############### # LogisticRegressionLayer ################################# # print "############# LogisticRegressionLayer ##############" l_final = LogisticRegression( rng, input = dropout(l2.output.flatten(2), p=dropout_rate2), n_in = feat_map_n_final * k_top * EMB_DIM, # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out = n_class, # five sentiment level ) l_final_no_dropout = LogisticRegression( rng, input = l2_no_dropout.output.flatten(2), W = l_final.W * (1 - dropout_rate2), b = l_final.b, n_in = feat_map_n_final * k_top * EMB_DIM, # n_in = feat_map_n * k * EMB_DIM / 2, # we fold once, so divide by 2 n_out = n_class, # five sentiment level ) print "n_in : ", feat_map_n_final * k_top * EMB_DIM # print "n_in = %d" %(2 * 2 * math.ceil(EMB_DIM / 2.)) # p_y_given_x = theano.function( # inputs = [x, length_x], # outputs = l_final.p_y_given_x, # allow_input_downcast=True, # # mode = "DebugMode" # ) # print "p_y_given_x = " # print p_y_given_x( # np.array([input_x], dtype=np.int32), # len(input_x) # ) cost = theano.function( inputs = [x, length_x, y], outputs = l_final.nnl(y), allow_input_downcast=True, # mode = "DebugMode" ) # print "cost:\n", cost( # np.array([input_x], dtype = np.int32), # len(input_x), # np.array([1], dtype = np.int32) # ) print "############# Learning ##############" from sgd import sgd, rmsprop, adagrad, adadelta, adam from regularizer import regularize_l2 layers = [] layers.append(embeddings) layers.append(l1) layers.append(l2) layers.append(l_final) cost = l_final.nnl(y) params = [p for layer in layers for p in layer.params] param_shapes = [l.param_shapes for l in layers] param_grads = [T.grad(cost, param) for param in params] # regularizer setting regularizers = {} regularizers['c'] = regular_c # 2.0, 4.0, 15.0 regularizers['func'] = [None for _ in range(len(params))] if use_regular: regularizers_func = [] regularizers_func.append([regularize_l2(l=0.0001)]) # [embeddings] regularizers_func.append([regularize_l2(l=0.00003), None]) # [W, b] regularizers_func.append([regularize_l2(l=0.000003), None]) # [W, b] regularizers_func.append([regularize_l2(l=0.0001), None]) # [logreg_W, logreg_b] regularizers_func = [r_func for r in regularizers_func for r_func in r] regularizers['func'] = regularizers_func # if third conv layer: 1e-5 print embeddings.params print l1.params print l2.params print l_final.params # updates = sgd(cost, l_final.params) # RegE = 1e-4 # print param_grads if learn == "sgd": updates = sgd(cost, params, lr=0.05) elif learn == "adam": updates = adam(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers) elif learn == "adagrad": updates = adagrad(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers) elif learn == "adadelta": updates = adadelta(loss_or_grads=cost, params=params, regularizers=regularizers) elif learn == "rmsprop": updates = rmsprop(loss_or_grads=cost, params=params, learning_rate=alpha, regularizers=regularizers) train = theano.function(inputs=[x, length_x, y], outputs=cost, updates=updates, allow_input_downcast=True) # predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True) predict = theano.function( inputs = [x, length_x], outputs = T.argmax(l_final_no_dropout.p_y_given_x, axis=1), allow_input_downcast=True, # mode = "DebugMode" ) def b(x_data): return np.array(x_data, dtype=np.int32) def test(test_set): # print "############# TEST ##############" y_pred = [] test_set_y = [] # for train_x, train_y in zip(X_data, Y_data): # print test_set # Accuracy_count = 0 for test_y,test_x in test_set: test_x = b([test_x]) p = predict(test_x, len(test_x))[0] y_pred.append(p) test_set_y.append(test_y) # if test_y == p: # Accuracy_count += 1 # print "*predict :",predict(train_x, len(train_x)), train_y # Accuracy = float(Accuracy_count) / len(test_set) # print " accuracy : %f" % Accuracy, return accuracy_score(test_set_y, y_pred) # print classification_report(test_set_y, y_pred) # train_set_rand = np.ndarray(train_set) train_set_rand = train_set[:] train_cost_sum = 0.0 for epoch in xrange(n_epoch): print "== epoch : %d ==" % epoch if shuffle_flag: np.random.shuffle(train_set_rand) # train_set_rand = np.random.permutation(train_set) for i,x_y_set in enumerate(train_set_rand): train_y, train_x = x_y_set train_x = b([train_x]) train_y = b([train_y]) train_cost = train(train_x, len(train_x) , train_y) train_cost_sum += train_cost if i % 1000 == 0 or i == len(train_set)-1: print "i : (%d/%d)" % (i, len(train_set)) , print " (cost : %f )" % train_cost print ' cost :', train_cost_sum print ' train_set : %f' % test(train_set) print ' dev_set : %f' % test(dev_set) print ' test_set : %f' % test(test_set) '''
# Training should happen here # Initialize parameters randomly # Construct the params input_dim = 50 hidden_dim = 50 output_dim = vocabsize dimensions = [input_dim, hidden_dim, output_dim] params = np.random.randn( (input_dim + 1) * hidden_dim + (hidden_dim + 1) * output_dim, ) print(f"#params: {len(params)}") print(f"#train examples: {num_of_examples}") # run SGD params = sgd( lambda vec: lm_wrapper(in_word_index, out_word_index, num_to_word_embedding, dimensions, vec), params, LEARNING_RATE, NUM_OF_SGD_ITERATIONS, None, True, 1000) print(f"training took {time.time() - startTime} seconds") # Evaluate perplexity with dev-data perplexity = eval_neural_lm('data/lm/ptb-dev.txt') print(f"dev perplexity : {perplexity}") # Evaluate perplexity with test-data (only at test time!) if os.path.exists('data/lm/ptb-test.txt'): perplexity = eval_neural_lm('data/lm/ptb-test.txt') print(f"test perplexity : {perplexity}") else: print("test perplexity will be evaluated only at test time!")
test_bs=args.test_batch_size) # make sure to use cudnn.benchmark for second backprop cudnn.benchmark = True # get model and optimizer model = resnet(num_classes=10, depth=args.depth).cuda() print(model) model = torch.nn.DataParallel(model) print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion = nn.CrossEntropyLoss() if args.optimizer == 'sgd': optimizer = sgd(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adamw': print( 'For AdamW, we automatically correct the weight decay term for you! If this is not what you want, please modify the code!' ) args.weight_decay = args.weight_decay / args.lr optimizer = optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'adahessian': print(
# generate test data A_test = np.random.randn(n, d) y_test = np.sign(np.dot(A_test, x_true)) # preprocess data tmp = lil_matrix((n, n)) tmp.setdiag(y) data = theano.shared(tmp * A) # define objective function and gradient via Theano l2 = 1e-2 par = T.vector() loss = T.log(1 + T.exp(-T.dot(data, par))).mean() + l2 / 2 * (par**2).sum() func = theano.function(inputs=[par], outputs=loss) idx = T.ivector() grad = theano.function(inputs=[par, idx], outputs=T.grad(loss, wrt=par), givens={data: data[idx, :]}) print('\nBegin to run SGD:') x = sgd(grad, 1e-3, n, d, phi=lambda k: k, func=func, max_epoch=50) y_predict = np.sign(np.dot(A_test, x)) print('Test accuracy: %f' % (np.count_nonzero(y_test == y_predict) / n)) print('\nBegin to run SGD-mom:') x = sgd_mom(grad, 1e-3, n, d, phi=lambda k: k, func=func, max_epoch=50) y_predict = np.sign(np.dot(A_test, x)) print('Test accuracy: %f' % (np.count_nonzero(y_test == y_predict) / n))
# Context size C = 5 # Reset the random seed to make sure that everyone gets the same results random.seed(31415) np.random.seed(9265) startTime = time.time() wordVectors = np.concatenate( ((np.random.rand(nWords, dimVectors) - 0.5) / dimVectors, np.zeros((nWords, dimVectors))), axis=0) wordVectors = sgd( lambda vec: word2vec_sgd_wrapper(skipgram, tokens, vec, dataset, C, negSamplingLossAndGradient), wordVectors, 0.3, 40000, None, True, PRINT_EVERY=10) # Note that normalization is not called here. This is not a bug, # normalizing during training loses the notion of length. print("sanity check: cost at convergence should be around or below 10") print("training took %d seconds" % (time.time() - startTime)) # concatenate the input and output word vectors wordVectors = np.concatenate( (wordVectors[:nWords, :], wordVectors[nWords:, :]), axis=0) visualizeWords = [ "great", "cool", "brilliant", "wonderful", "well", "amazing", "worth", "sweet", "enjoyable", "boring", "bad", "dumb",
sum = 0 for i in range(0, maxi): sum = sum + fiprimeprime(x, i) return sum if __name__ == "__main__": #this is just to see the function, you don't have to use this plotting code xvals = np.arange(-10, 10, 0.01) # Grid of 0.01 spacing from -10 to 10 yvals = fsum(xvals) # Evaluate function on xvals plt.plot(xvals, yvals) # Create line plot with yvals against xvals #this is the timing code you should use start = time.clock() #my sgd code x = sgd(fi, fiprime, x0=-5, i_range=maxi, t=1, iteration=1000) end = time.clock() print "Result from sgd = %.5f" % (x[-1]) print "xval corresponding to the minimal yval = %.5f" % ( xvals[np.argmin(yvals)]) print "Time: ", end - start plt.show() #show the plot # Plot how does SGD goes # plt.plot(x) # plt.plot([0,1000],[6.4,6.4]) # plt.xlabel("Number of iterations") # plt.ylabel("x") # plt.show()
sgd_accuracy_list = [] svm_accuracy_list = [] logreg_accuracy_list = [] print("Generating accoracy values") for i in range(n_iter): print("Iteration", i) X_train, X_valid, y_train, y_valid = train_test_split(train_data_clean, targets, train_size=0.8, test_size=0.2, shuffle=True) var = sgd(max_df=0.5, reset_train_test_split=True, X_train_arg=X_train, y_train_arg=y_train, X_valid_arg=X_valid, y_valid_arg=y_valid) sgd_accuracy_list.append(var) var = svm(max_df=0.5, reset_train_test_split=True, X_train_arg=X_train, y_train_arg=y_train, X_valid_arg=X_valid, y_valid_arg=y_valid) svm_accuracy_list.append(var) var = log_reg(max_df=0.5, reset_train_test_split=True, X_train_arg=X_train,
def trainSgd(name, dic, x,C,iterations=None): y = build_labels(name,dic) if iterations==None: iterations = 10 w = sgd.sgd(x,y,np.zeros(len(x[0])+1),iterations,1,sgd.L,0.01,C) return w
model1.addLayer(neural_model.Input(64)) model1.addLayer(neural_model.Dense(64, activations=activations.relus)) model1.addLayer(neural_model.Output(10, d_func=lambda a, y, _: y - a)) model1.compile() model2.addLayer(neural_model.Input(64)) model2.addLayer(neural_model.Dense(32, activations=activations.relus)) model2.addLayer(neural_model.Output(10, d_func=lambda a, y, _: y - a)) model2.compile() errors_1 = sgd(model1, x_train, x_test, y_train, y_test, epochs=5000, epochs_without_progress=500, mini_batch_size=40, metric=metrics.accuracy)[1] errors_2 = sgd(model1, x_train, x_test, y_train, y_test, epochs=5000, epochs_without_progress=500, mini_batch_size=40, metric=metrics.accuracy)[1] print("Model1: 64x64x10, ReLu activation")
def main(): print("loading data...", end="", flush=True) data = load_data(DATA_FILEPATH) print(" done") train_set, cv_set, test_set = data x_tr, y_tr = train_set x_cv, y_cv = cv_set x_te, y_te = test_set print("\ttrain:", x_tr.shape, y_tr.shape) print("\tcv:", x_cv.shape, y_cv.shape) print("\ttest:", x_te.shape, y_te.shape) x = tensor.matrix(name="x") y = tensor.ivector(name="y") clf = mlp.MultiLayerPerceptron(x, n_inp=x_tr.shape[1], n_hidden=64, n_out=10) acc = theano.function([x, y], clf.score(y)) with_validation = True x_tr_sh = theano.shared(x_tr, borrow=True) y_tr_sh = theano.shared(y_tr, borrow=True) x_cv_sh = theano.shared(x_cv, borrow=True) y_cv_sh = theano.shared(y_cv, borrow=True) if with_validation: print("calling sgd_with_validation", flush=True) sgd.sgd_with_validation(clf, x_tr_sh, y_tr_sh, x_cv_sh, y_cv_sh, learning_rate=0.01, reg_term=0.00005, batch_size=256, n_epochs=1000, max_its=5000, improv_thresh=0.01, max_its_incr=4, rel_val_tol=5e-3, val_freq="auto", verbose=True) print("accuracy: %.2f%%" % (100 * acc(x_te, y_te))) else: print("calling sgd") sgd.sgd(clf, x_tr_sh, y_tr_sh, learning_rate=0.1, reg_term=1, batch_size=32, n_epochs=128, rel_tol=2e-3, verbose=True) print("accuracy: %.2f%%" % (100 * acc(x_tr, y_tr)))
devFeatures = np.zeros((nDev, dimVectors)) devLabels = np.zeros((nDev,), dtype=np.int32) for i in xrange(nDev): words, devLabels[i] = devset[i] devFeatures[i, :] = getSentenceFeature(tokens, wordVectors, words) # Try our regularization parameters results = [] for regularization in REGULARIZATION: random.seed(3141) np.random.seed(59265) weights = np.random.randn(dimVectors, 5) print "Training for reg=%f" % regularization # We will do batch optimization weights = sgd(lambda weights: softmax_wrapper(trainFeatures, trainLabels, weights, regularization), weights, 3.0, 10000, PRINT_EVERY=100) # Test on train set _, _, pred = softmaxRegression(trainFeatures, trainLabels, weights) trainAccuracy = accuracy(trainLabels, pred) print "Train accuracy (%%): %f" % trainAccuracy # Test on dev set _, _, pred = softmaxRegression(devFeatures, devLabels, weights) devAccuracy = accuracy(devLabels, pred) print "Dev accuracy (%%): %f" % devAccuracy # Save the results and weights results.append({ "reg" : regularization, "weights" : weights,
from plot_tools import max_df_plot from log_reg import log_reg from svm import svm from sgd import sgd import pickle import os import numpy as np max_df_list = np.linspace(0.05, 1, 20) if not os.path.isfile("max_df_%d.cPickle" % len(max_df_list)): print("Generate ") max_df_logreg = [] max_df_svm = [] max_df_sgd = [] counter = 0 for max_df in max_df_list: print("Iteration", counter) max_df_logreg.append(log_reg(max_df)) max_df_svm.append(svm(max_df)) max_df_sgd.append(sgd(max_df)) counter += 1 pickle.dump((max_df_list, max_df_logreg, max_df_svm, max_df_sgd), open("max_df_%d.cPickle" % len(max_df_list), 'wb')) else: max_df_list, max_df_logreg, max_df_svm, max_df_sgd = pickle.load( open("max_df_%d.cPickle" % len(max_df_list), 'rb')) max_df_plot(max_df_list, max_df_logreg, max_df_svm, max_df_sgd)