def run(): print "Loading data..." # load training data trainImages,trainLabels=dl.load_mnist_train() imDim = trainImages.shape[0] inputDim = 50 outputDim = 10 layerSizes = [16]*2 trainImages = trainImages.reshape(imDim**2,-1) pcer = pc.Preprocess() pcer.computePCA(trainImages) whitenedTrain = pcer.whiten(trainImages, inputDim) minibatch = whitenedTrain.shape[1] print "minibatch size: %d" % (minibatch) epochs = 10000 stepSize = 1e-2 nn = nnet.NNet(inputDim,outputDim,layerSizes,minibatch) nn.initParams() SGD = sgd.SGD(nn,alpha=stepSize,minibatch=minibatch) for e in range(epochs): print "Running epoch %d"%e SGD.run(whitenedTrain,trainLabels) SGD.dumptrace()
def run(): #Get Parameter data = param.parameter(15, 15, 5) data.get_sample() #Get SVD_Data MF = sgd.SGD(0.001, 0.0001) MF.iteration(data, 2000) #Outuout Result MF.re(data)
def fit(self): ''' This function fits the given data into the linear regression function. This is the main function of the LR model and is not implemented implicitly within the code, instead, it is only implemented when it is called by the LR object ''' self.set_params() if self.optimization == 0: sgd_optim = sgd.SGD(params=self.params, data=self.mapped_data, epoch=self.epoch, lr=self.lr, activation=2, cost=self.cost, huber_point=self.h_p, quantile=self.q) self.params = sgd_optim.updated_params if self.optimization == 1: Adagrad_optim = Adagrad.ADAGRAD(params=self.params, data=self.mapped_data, epoch=self.epoch, lr=self.lr, activation=2, cost=self.cost, ep=self.ep, huber_point=self.h_p, quantile=self.q) self.params = Adagrad_optim.updated_params if self.optimization == 2: Adadelta_optim = Adadelta.ADADELTA(params=self.params, data=self.mapped_data, epoch=self.epoch, lr=self.lr, activation=2, cost=self.cost, alpha=self.alpha, ep=self.ep, huber_point=self.h_p, quantile=self.q) self.params = Adadelta_optim.updated_params if self.optimization == 3: Adam_optim = Adam.ADAM(params=self.params, data=self.mapped_data, epoch=self.epoch, lr=self.lr, activation=2, cost=self.cost, ep=self.ep, b1=self.b1, b2=self.b2, huber_point=self.h_p, quantile=self.q) self.params = Adam_optim.updated_params
def initSGD(self, lossScheme=None): """ Initialize the SGD instance which will be used to perform the fit """ if lossScheme is None: lossScheme = self.lossScheme self.lossScheme = lossScheme self.sgd = sgd.SGD(self.learningRate, miniBatchSize=self.miniBatchSize, \ featureMapFcn=self.featureMap, lamScale=self.regScale, \ arrayBatchSize=self.arrayBatchSize, lossScheme=lossScheme)
def fit(self, trees, export_filename='models/RNTN.pickle', verbose=False): import sgd self.word_map = tr.load_word_map() self.num_words = len(self.word_map) self.init_params() self.optimizer = sgd.SGD(self, self.learning_rate, self.batch_size, self.optimizer_algorithm) test_trees = tr.load_trees('test') with open("log.csv", "a", newline='') as csvfile: csvwriter = csv.writer(csvfile) fieldnames = [ "Timestamp", "Vector size", "Learning rate", "Batch size", "Regularization", "Epoch", "Train cost", "Train accuracy", "Test cost", "Test accuracy" ] if csvfile.tell() == 0: csvwriter.writerow(fieldnames) for epoch in range(self.max_epochs): print("Running epoch {} ...".format(epoch)) start = time.time() self.optimizer.optimize(trees) end = time.time() print(" Time per epoch = {:.4f}".format(end - start)) # Save the model self.save(export_filename) # Test the model on train and test set train_cost, train_result = self.test(trees) train_accuracy = 100.0 * train_result.trace( ) / train_result.sum() test_cost, test_result = self.test(test_trees) test_accuracy = 100.0 * test_result.trace() / test_result.sum() # Append data to CSV file row = [ datetime.now(), self.dim, self.learning_rate, self.batch_size, self.reg, epoch, train_cost, train_accuracy, test_cost, test_accuracy ] csvwriter.writerow(row)
def run(): print "Loading data..." # load training data trainImages, trainLabels = data_loader.load_mnist_train() imDim = trainImages.shape[0] visibleSize = 784 # 69 hiddenSize = 500 trainImages = trainImages.reshape(imDim**2, -1) import numpy as np trainImages = trainImages - np.mean(trainImages, axis=1).reshape(-1, 1) # preprocess print "Preprocessing Data..." #prp = preprocess.Preprocess() #prp.computePCA(trainImages) # prp.plot_explained_var() #trainImages = prp.whiten(trainImages,numComponents=visibleSize) RBM = rbm.RBM(visibleSize, hiddenSize, grbm=True, sp_target=0.05, sp_weight=5) # initialize RBM parameters RBM.initParams() SGD = sgd.SGD(RBM, epochs=2, alpha=1e-5, minibatch=50) # run SGD loop print "Training..." SGD.run(trainImages) # view up to 100 learned features post training W = RBM.W #prp.unwhiten(RBM.W) vsl.view_patches(W.reshape(imDim, imDim, hiddenSize), min(hiddenSize, 100)) print "Sampling Gibbs chains..."
### to project grad vector in random direction. # if ndata == None: ndata = 1 if x.ndim == 1: x = x.reshape(1, x.size) grad = self.grad(x) u = scipy.randn(*x.shape) # u = u / scipy.sqrt( (u*u).sum(axis=1) ) # u = u / scipy.linalg.norm(u,2,axis=1) u = u / scipy.expand_dims(scipy.linalg.norm(u, 2, axis=1), 1) gradfx = u * scipy.sum((self.grad(x) * u).sum(axis=1)) return bound(gradfx) if __name__ == '__main__': alpha = scipy.array([10, 200]) center = scipy.array([2, 1]) x = scipy.matrix([3, 3]) # x = scipy.matrix(scipy.random.randint(-3,3,(4,2))) sfunc = sgd.step_size(0.9, 1) para = ParabolaDir(alpha, center) sgd = sgd.SGD(afunc=para, x0=x, sfunc=sfunc) print sgd.getSoln() for i in range(200): sgd.nsteps(1) fname = 'vid5/sgd_q1_{0:03d}'.format(i) # sgd.plot(alphaMult=0.9)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) parser.add_option("--distance", action="store_true", dest="distance", default=False) parser.add_option("--metric", dest="metric", default="cosine") # Optimizer parser.add_option("--minibatch", dest="minibatch", type="int", default=30) parser.add_option("--optimizer", dest="optimizer", type="string", default="adagrad") parser.add_option("--model", dest="model", type="string", default="rnn") parser.add_option("--epochs", dest="epochs", type="int", default=50) parser.add_option("--step", dest="step", type="float", default=1e-2) parser.add_option("--output_dim", dest="output_dim", type="int", default=0) parser.add_option("--wvec_dim", dest="wvec_dim", type="int", default=50) parser.add_option("--out_file", dest="out_file", type="string", default="models/test.bin") parser.add_option("--in_file", dest="in_file", type="string", default="models/test.bin") parser.add_option("--data", dest="data", type="string", default="train") parser.add_option("--wvec_file", dest="wvec_file", type="string", default=None) (opts, args) = parser.parse_args(args) # Testing if opts.test: test(opts.in_file, opts.data) return # Finding nearest neighbors to input words if opts.distance: distance(opts.in_file, opts.metric) return print("Loading data...") # load training data trees = load_trees() word_map = load_word_map() opts.num_words = len(word_map) if opts.output_dim == 0: opts.output_dim = len(load_label_map()) if opts.wvec_file is None: wvecs = None else: print("Loading word vectors...") wvecs = load_word_vectors(opts.wvec_dim, opts.wvec_file, word_map) model = models[opts.model] net = model(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch, wvecs) sgd = optimizer.SGD(net, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) save(net, opts, sgd) for e in range(opts.epochs): start = time.time() print("Running epoch %d" % e) sgd.run(trees) end = time.time() print("Time per epoch : %f" % (end - start)) save(net, opts, sgd)
net = models.resnet26(num_classes) start_epoch = 0 best_acc = 0 # best test accuracy results = np.zeros((4,start_epoch+args.nb_epochs,len(args.num_classes))) all_tasks = range(len(args.dataset)) np.random.seed(1993) if args.use_cuda: net.cuda() cudnn.benchmark = True args.criterion = nn.CrossEntropyLoss() optimizer = sgd.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr, momentum=0.9, weight_decay=args.wd) print("Start training") for epoch in range(start_epoch, start_epoch+args.nb_epochs): training_tasks = utils_pytorch.adjust_learning_rate_and_learning_taks(optimizer, epoch, args) st_time = time.time() # Training and validation train_acc, train_loss = utils_pytorch.train(epoch, train_loaders, training_tasks, net, args, optimizer) test_acc, test_loss, best_acc = utils_pytorch.test(epoch,val_loaders, all_tasks, net, best_acc, args, optimizer) # Record statistics for i in range(len(training_tasks)): current_task = training_tasks[i] results[0:2,epoch,current_task] = [train_loss[i],train_acc[i]]
momentum = 0.95 epochs = 2 step = 1e-6 anneal = 1.1 dataDir = "/scail/group/deeplearning/speech/awni/kaldi-stanford/kaldi-trunk/egs/swbd/s5b/exp/train_ctc/" inputDim = 41 * 15 rawDim = 41 * 15 outputDim = 35 maxUttLen = 1500 temporalLayer = 3 loader = dl.DataLoader(dataDir, rawDim, inputDim) nn = nnet.NNet(inputDim, outputDim, layerSize, numLayers, maxUttLen, temporalLayer=temporalLayer) nn.initParams() SGD = sgd.SGD(nn, maxUttLen, alpha=step, momentum=momentum) data_dict, alis, keys, sizes = loader.loadDataFileDict(1) # Training for e in range(epochs): print "Epoch %d" % e SGD.run(data_dict, alis, keys, sizes) SGD.alpha /= anneal
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test",action="store_true",dest="test",default=False) # Optimizer parser.add_option("--minibatch",dest="minibatch",type="int",default=30) parser.add_option("--optimizer",dest="optimizer",type="string", default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--step",dest="step",type="float",default=1e-2) parser.add_option("--middleDim",dest="middleDim",type="int",default=10) parser.add_option("--outputDim",dest="outputDim",type="int",default=3) parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30) # for DCNN only parser.add_option("--ktop",dest="ktop",type="int",default=5) parser.add_option("--m1",dest="m1",type="int",default=10) parser.add_option("--m2",dest="m2",type="int",default=7) parser.add_option("--n1",dest="n1",type="int",default=6) parser.add_option("--n2",dest="n2",type="int",default=12) parser.add_option("--outFile",dest="outFile",type="string", default="models/test.bin") parser.add_option("--inFile",dest="inFile",type="string", default="models/test.bin") parser.add_option("--data",dest="data",type="string",default="train") parser.add_option("--model",dest="model",type="string",default="RNN") (opts,args)=parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: cmfile = opts.inFile + ".confusion_matrix-" + opts.data test(opts.inFile,opts.data,None,opts.model,confusion_matrix_file=cmfile,full=True) return print "Loading data..." train_accuracies = [] dev_accuracies = [] # load training data trees = tr.loadTrees('train') opts.numWords = len(tr.loadWordMap()) #Load word embeddings L = tr.loadWordEmbedding() if(opts.model=='RNN2'): nn = RNN2(opts.wvecDim,opts.middleDim,opts.outputDim,opts.numWords,opts.minibatch) else: raise '%s is not a valid neural network, only RNN2'%opts.model nn.initParams(L) sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.loadTrees("dev") for e in range(opts.epochs): start = time.time() print "Running epoch %d"%e sgd.run(trees) end = time.time() print "Time per epoch : %f"%(end-start) with open(opts.outFile,'w') as fid: pickle.dump(opts,fid) pickle.dump(sgd.costt,fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set real quick" train_accuracies.append(test(opts.outFile,"train",L,opts.model,trees)) print "testing on dev set real quick" dev_accuracies.append(test(opts.outFile,"dev",L,opts.model,dev_trees)) # clear the fprop flags in trees and dev_trees for tree in trees: tr.leftTraverse(tree.root,nodeFn=tr.clearFprop) for tree in dev_trees: tr.leftTraverse(tree.root,nodeFn=tr.clearFprop) print "fprop in trees cleared" if evaluate_accuracy_while_training: # pdb.set_trace() print train_accuracies print dev_accuracies # Plot train/dev_accuracies here? plt.figure() plt.plot(range(len(train_accuracies)), train_accuracies, label='Train') plt.plot(range(len(dev_accuracies)), dev_accuracies, label='Dev') plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.legend() # plot.show() plt.savefig(opts.outFile + ".accuracy_plot.png")
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) # Architecture parser.add_option("--layers", dest="layers", type="string", default="100,100", help="layer1size,layer2size,...,layernsize") parser.add_option("--temporal_layer", dest="temporalLayer", type="int", default=-1) # Optimization parser.add_option("--optimizer", dest="optimizer", type="string", default="momentum") parser.add_option("--momentum", dest="momentum", type="float", default=0.9) parser.add_option("--epochs", dest="epochs", type="int", default=1) parser.add_option("--step", dest="step", type="float", default=1e-4) parser.add_option( "--anneal", dest="anneal", type="float", default=1, help="Sets (learning rate := learning rate / anneal) after each epoch." ) # Data parser.add_option( "--dataDir", dest="dataDir", type="string", default= "/scail/group/deeplearning/speech/awni/kaldi-stanford/kaldi-trunk/egs/swbd/s5b/exp/train_ctc/" ) parser.add_option("--numFiles", dest="numFiles", type="int", default=384) parser.add_option("--inputDim", dest="inputDim", type="int", default=41 * 15) parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15) parser.add_option("--outputDim", dest="outputDim", type="int", default=34) parser.add_option("--outFile", dest="outFile", type="string", default="models/test.bin") parser.add_option("--inFile", dest="inFile", type="string", default=None) (opts, args) = parser.parse_args(args) opts.layers = [int(l) for l in opts.layers.split(',')] # Testing if opts.test: test(opts) return loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim) #NOTE at some point we need to unify the nnet and rnnet modules nn = None if opts.temporalLayer > 0: nn = rnnet.RNNet(opts.inputDim, opts.outputDim, opts.layers, opts.temporalLayer) else: nn = nnet.NNet(opts.inputDim, opts.outputDim, opts.layers) nn.initParams() # Load model if exists if opts.inFile is not None: with open(opts.inFile, 'r') as fid: _ = pickle.load(fid) _ = pickle.load(fid) _ = pickle.load(fid) nn.fromFile(fid) SGD = sgd.SGD(nn, alpha=opts.step, optimizer=opts.optimizer, momentum=opts.momentum) # Setup some random keys for tracing with open('randKeys.bin', 'r') as fid: traceK = pickle.load(fid) for k in traceK: nn.hist[k] = [] # write initial model to disk with open(opts.outFile, 'w') as fid: pickle.dump(opts, fid) pickle.dump(SGD.costt, fid) pickle.dump(nn.hist, fid) nn.toFile(fid) # Training import time for _ in range(opts.epochs): for i in np.random.permutation(opts.numFiles) + 1: start = time.time() data_dict, alis, keys, sizes = loader.loadDataFileDict(i) SGD.run_seq(data_dict, alis, keys, sizes) end = time.time() print "File time %f" % (end - start) # Save anneal after epoch SGD.alpha = SGD.alpha / opts.anneal with open(opts.outFile, 'w') as fid: pickle.dump(opts, fid) pickle.dump(SGD.costt, fid) pickle.dump(nn.hist, fid) nn.toFile(fid)
rnet_imagenet.cuda() rnet_imagenet = nn.DataParallel(rnet_imagenet) # fix the imagenet pre-trained model for name, param in rnet_imagenet.named_parameters(): #if isinstance(m, nn.Conv2d): if "dim_reduction" not in name: param.requires_grad = False adapt_net_params = [] for name, param in rnet_imagenet.named_parameters(): if param.requires_grad == True: adapt_net_params.append(param) adapt_optimizer = sgd.SGD(adapt_net_params, lr=args.lr, momentum=0.9, weight_decay=0.0) net_params = [] for name, param in net.named_parameters(): if param.requires_grad == True: net_params.append(param) optimizer = sgd.SGD(net_params, lr=args.lr, momentum=0.9, weight_decay=0.0) results = np.zeros((4, args.nb_epochs)) best_acc = 0.0 # best test accuracy start_epoch = 0 for epoch in range(start_epoch, start_epoch + args.nb_epochs): adjust_learning_rate_and_learning_taks(optimizer, epoch, args) adjust_learning_rate_and_learning_taks(adapt_optimizer, epoch, args)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) # Optimizer parser.add_option("--minibatch", dest="minibatch", type="int", default=30) parser.add_option("--optimizer", dest="optimizer", type="string", default="adagrad") parser.add_option("--epochs", dest="epochs", type="int", default=50) parser.add_option("--step", dest="step", type="float", default=1e-2) parser.add_option("--outputDim", dest="outputDim", type="int", default=5) parser.add_option("--wvecDim", dest="wvecDim", type="int", default=30) parser.add_option("--outFile", dest="outFile", type="string", default="models/test.bin") parser.add_option("--inFile", dest="inFile", type="string", default="models/test.bin") parser.add_option("--data", dest="data", type="string", default="train") (opts, args) = parser.parse_args(args) # Testing if opts.test: test(opts.inFile, opts.data) return print "Loading data..." # load training data trees = tr.loadTrees() opts.numWords = len(tr.loadWordMap()) rnn = nnet.RNN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) rnn.initParams() sgd = optimizer.SGD(rnn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) for e in range(opts.epochs): start = time.time() print "Running epoch %d" % e sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end - start) with open(opts.outFile, 'w') as fid: pickle.dump(opts, fid) pickle.dump(sgd.costt, fid) rnn.toFile(fid)
for i in range(len(self.W_list)): result_list.append(self.predict(X_dev, index=i)) reslut_array = np.array(result_list) return np.count_nonzero( np.dot(reslut_array.T, np.array([1, 2, 3])) - y_dev) import pandas as pd import sgd df1 = pd.read_csv("wine_train.csv") df2 = pd.read_csv("wine_test(1).csv") X = df1.iloc[:, :-1].values y = df1.iloc[:, -1].values X_dev = df2.iloc[:, :-1].values y_dev = df2.iloc[:, -1].values result = [] sgd_list = [] sdg1 = sgd.SGD(SGD_op='SGD1') sdg1.fit(X, y, X_dev, y_dev) print('sgd1', sdg1.error[-1]) sdg1 = sgd.SGD(SGD_op='SGD2') sdg1.fit(X, y, X_dev, y_dev) print('sgd2', sdg1.error[-1]) # for i in range(10): # sgd_list.append(sgd.SGD(SGD_op='SGD1')) # sgd_list[-1].fit(X, y, X_dev, y_dev) # result.append(sgd_list[-1].error)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) # Optimizer parser.add_option("--minibatch", dest="minibatch", type="int", default=30) parser.add_option("--optimizer", dest="optimizer", type="string", default="adagrad") parser.add_option("--epochs", dest="epochs", type="int", default=50) parser.add_option("--step", dest="step", type="float", default=1e-2) parser.add_option("--init", dest="init", type="float", default=0.01) parser.add_option("--outputDim", dest="outputDim", type="int", default=5) parser.add_option("--wvecDim", dest="wvecDim", type="int", default=30) parser.add_option("--rho", dest="rho", type="float", default=1e-6) parser.add_option("--outFile", dest="outFile", type="string", default="models/test.bin") parser.add_option("--inFile", dest="inFile", type="string", default="models/test.bin") parser.add_option("--data", dest="data", type="string", default="train") parser.add_option("--model", dest="model", type="string", default="RNTN") parser.add_option("--maxTrain", dest="maxTrain", type="int", default=-1) parser.add_option("--activation", dest="acti", type="string", default="tanh") parser.add_option("--partial", action="store_true", dest="partial", default=False) parser.add_option("--w2v", dest="w2vmodel", type="string") (opts, args) = parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: cmfile = opts.inFile + ".confusion_matrix-" + opts.data + ".png" test(opts.inFile, opts.data, opts.model, acti=opts.acti) return print "Loading data..." embedding = None wordMap = None if opts.w2vmodel is not None: print "Loading pre-trained word2vec model from %s" % opts.w2vmodel w2v = models.Word2Vec.load(opts.w2vmodel) embedding, wordMap = readW2v(w2v, opts.wvecDim) train_accuracies = [] train_rootAccuracies = [] dev_accuracies = [] dev_rootAccuracies = [] # load training data trees = tr.loadTrees('train', wordMap=wordMap)[:opts.maxTrain] #train.full.15 if opts.maxTrain > -1: print "Training only on %d trees" % opts.maxTrain opts.numWords = len(tr.loadWordMap()) if opts.partial == True: print "Only partial feedback" if (opts.model == 'RNTN'): nn = RNTN(wvecDim=opts.wvecDim, outputDim=opts.outputDim, numWords=opts.numWords, mbSize=opts.minibatch, rho=opts.rho, acti=opts.acti, init=opts.init, partial=opts.partial) else: raise '%s is not a valid neural network so far only RNTN, RNN' % opts.model nn.initParams(embedding=embedding) sgd = optimizer.SGD(nn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.loadTrees("dev") #dev.full.15 for e in range(opts.epochs): start = time.time() print "Running epoch %d" % e sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end - start) with open(opts.outFile, 'w') as fid: pickle.dump(opts, fid) pickle.dump(sgd.costt, fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set" acc, sacc = test(opts.outFile, "train", opts.model, trees, acti=opts.acti) train_accuracies.append(acc) train_rootAccuracies.append(sacc) print "testing on dev set" dacc, dsacc = test(opts.outFile, "dev", opts.model, dev_trees, acti=opts.acti) dev_accuracies.append(dacc) dev_rootAccuracies.append(dsacc) # clear the fprop flags and dev_trees for tree in trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) for tree in dev_trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) print "fprop in trees cleared" if evaluate_accuracy_while_training: pdb.set_trace() print train_accuracies print dev_accuracies print "on sentence-level:" print train_rootAccuracies print dev_rootAccuracies # Plot train/dev_accuracies plt.figure() plt.plot(range(len(train_accuracies)), train_accuracies, label='Train') plt.plot(range(len(dev_accuracies)), dev_accuracies, label='Dev') plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.legend() # plot.show() plt.savefig(opts.outFile + ".accuracy_plot.png") # Plot train/dev_accuracies plt.figure() plt.plot(range(len(train_rootAccuracies)), train_rootAccuracies, label='Train') plt.plot(range(len(dev_rootAccuracies)), dev_rootAccuracies, label='Dev') plt.xlabel("Epoch") plt.ylabel("Accuracy") plt.legend() # plot.show() plt.savefig(opts.outFile + ".sent.accuracy_plot.png")
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) # Optimizer parser.add_option("--minibatch", dest="minibatch", type="int", default=30) parser.add_option("--optimizer", dest="optimizer", type="string", default="adagrad") parser.add_option("--epochs", dest="epochs", type="int", default=50) parser.add_option("--step", dest="step", type="float", default=1e-2) parser.add_option("--middleDim", dest="middleDim", type="int", default=10) parser.add_option("--outputDim", dest="outputDim", type="int", default=5) parser.add_option("--wvecDim", dest="wvecDim", type="int", default=30) # By @tiagokv, just to ease the first assignment test parser.add_option("--wvecDimBatch", dest="wvecDimBatch", type="string", default="") # for DCNN only parser.add_option("--ktop", dest="ktop", type="int", default=5) parser.add_option("--m1", dest="m1", type="int", default=10) parser.add_option("--m2", dest="m2", type="int", default=7) parser.add_option("--n1", dest="n1", type="int", default=6) parser.add_option("--n2", dest="n2", type="int", default=12) parser.add_option("--outFile", dest="outFile", type="string", default="models/test.bin") parser.add_option("--inFile", dest="inFile", type="string", default="models/test.bin") parser.add_option("--data", dest="data", type="string", default="train") parser.add_option("--model", dest="model", type="string", default="RNN") (opts, args) = parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: test(opts.inFile, opts.data, opts.model) return print "Loading data..." train_accuracies = [] dev_accuracies = [] # load training data trees = tr.loadTrees('train') opts.numWords = len(tr.loadWordMap()) if (opts.model == 'RNTN'): nn = RNTN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN'): nn = RNN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN2'): nn = RNN2(opts.wvecDim, opts.middleDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN3'): nn = RNN3(opts.wvecDim, opts.middleDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'DCNN'): nn = DCNN(opts.wvecDim, opts.ktop, opts.m1, opts.m2, opts.n1, opts.n2, 0, opts.outputDim, opts.numWords, 2, opts.minibatch, rho=1e-4) trees = cnn.tree2matrix(trees) else: raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model nn.initParams() sgd = optimizer.SGD(nn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) # assuring folder for plots exists if (os.path.isdir('plots') == False): os.makedirs('test') if (os.path.isdir('plots/' + opts.model) == False): os.makedirs('plots/' + opts.model) dev_trees = tr.loadTrees("dev") for e in range(opts.epochs): start = time.time() print "Running epoch %d" % e sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end - start) with open(opts.outFile, 'w') as fid: pickle.dump(opts, fid) pickle.dump(sgd.costt, fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set real quick" train_accuracies.append( test(opts.outFile, "train", opts.model, trees)) print "testing on dev set real quick" dev_accuracies.append( test(opts.outFile, "dev", opts.model, dev_trees)) # clear the fprop flags in trees and dev_trees for tree in trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) for tree in dev_trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) print "fprop in trees cleared" if evaluate_accuracy_while_training: #pdb.set_trace() plt.figure() #Lets set up the plot plt.title('Accuracy in set per epochs') plt.plot(range(opts.epochs), train_accuracies, label='train') plt.plot(range(opts.epochs), dev_accuracies, label='dev') with open('dev_accu' + opts.model, 'a') as fid: fid.write( str(opts.wvecDim) + ',' + str(opts.middleDim) + ',' + str(dev_accuracies[-1]) + ';') #plt.axis([0,opts.epochs,0,1]) plt.xlabel('epochs') plt.ylabel('accuracy') plt.legend(loc=2, borderaxespad=0.) #always save with middleDim, even if it's a one-layer RNN plt.savefig('plots/' + opts.model + '/accuracy_wvec_' + str(opts.wvecDim) + '_middleDim_' + str(opts.middleDim) + ' .png') print 'image saved at %s' % os.getcwd()
# Create the network net = models.resnet26(num_classes) start_epoch = 0 best_acc = 0 # best test accuracy results = np.zeros((4, start_epoch + args.nb_epochs, len(args.num_classes))) all_tasks = list(range(len(args.dataset))) np.random.seed(1993) if args.use_cuda: net.cuda() cudnn.benchmark = True args.criterion = nn.CrossEntropyLoss() optimizer = sgd.SGD([p for p in net.parameters() if p.requires_grad], lr=args.lr, momentum=0.9, weight_decay=args.wd) print("Start training") for epoch in range(start_epoch, start_epoch + args.nb_epochs): training_tasks = utils_pytorch.adjust_learning_rate_and_learning_taks( optimizer, epoch, args) st_time = time.time() # Training and validation train_acc, train_loss = utils_pytorch.train(epoch, train_loaders, training_tasks, net, args, optimizer) test_acc, test_loss, best_acc = utils_pytorch.test(epoch, val_loaders, all_tasks, net, best_acc, args,
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option('--cfg_file', dest='cfg_file', default=None, help='File with settings from previously trained net') parser.add_option("--test", action="store_true", dest="test", default=False) # Architecture parser.add_option("--layerSize", dest="layerSize", type="int", default=1824) parser.add_option("--numLayers", dest="numLayers", type="int", default=5) parser.add_option("--temporalLayer", dest="temporalLayer", type="int", default=3) # Optimization parser.add_option("--momentum", dest="momentum", type="float", default=0.95) parser.add_option("--epochs", dest="epochs", type="int", default=20) parser.add_option("--step", dest="step", type="float", default=1e-5) parser.add_option( "--anneal", dest="anneal", type="float", default=1.3, help="Sets (learning rate := learning rate / anneal) after each epoch." ) parser.add_option( '--reg', dest='reg', type='float', default=0.0, help='lambda for L2 regularization of the weight matrices') # Data parser.add_option("--dataDir", dest="dataDir", type="string", default=TRAIN_DATA_DIR['fbank']) parser.add_option('--alisDir', dest='alisDir', type='string', default=TRAIN_ALIS_DIR) parser.add_option('--startFile', dest='startFile', type='int', default=1, help='Start file for running testing') parser.add_option("--numFiles", dest="numFiles", type="int", default=384) parser.add_option("--inputDim", dest="inputDim", type="int", default=41 * 15) parser.add_option("--rawDim", dest="rawDim", type="int", default=41 * 15) parser.add_option("--outputDim", dest="outputDim", type="int", default=35) parser.add_option("--maxUttLen", dest="maxUttLen", type="int", default=MAX_UTT_LEN) # Save/Load parser.add_option( '--save_every', dest='save_every', type='int', default=10, help='During training, save parameters every x number of files') parser.add_option('--run_desc', dest='run_desc', type='string', default='', help='Description of experiment run') (opts, args) = parser.parse_args(args) if opts.cfg_file: cfg = load_config(opts.cfg_file) else: cfg = vars(opts) # These config values should be updated every time cfg['host'] = get_hostname() cfg['git_rev'] = get_git_revision() cfg['pid'] = os.getpid() # Create experiment output directory if not opts.cfg_file: time_string = str(TimeString()) output_dir = pjoin(RUN_DIR, time_string) cfg['output_dir'] = output_dir if not os.path.exists(output_dir): print 'Creating %s' % output_dir os.makedirs(output_dir) opts.cfg_file = pjoin(output_dir, 'cfg.json') else: output_dir = cfg['output_dir'] cfg['output_dir'] = output_dir cfg['in_file'] = pjoin(output_dir, 'params.pk') cfg['out_file'] = pjoin(output_dir, 'params.pk') cfg['test'] = opts.test if opts.test: cfg['dataDir'] = opts.dataDir cfg['numFiles'] = opts.numFiles cfg['startFile'] = opts.startFile if 'reg' not in cfg: cfg['reg'] = 0.0 # Logging logging.basicConfig(filename=pjoin(output_dir, 'train.log'), level=logging.DEBUG) logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) logger.info('Running on %s' % cfg['host']) # seed for debugging, turn off when stable np.random.seed(33) import random random.seed(33) if 'CUDA_DEVICE' in os.environ: cm.cuda_set_device(int(os.environ['CUDA_DEVICE'])) else: cm.cuda_set_device(0) # Default opts = CfgStruct(**cfg) # Testing if opts.test: test(opts) return alisDir = opts.alisDir if opts.alisDir else opts.dataDir loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim, alisDir) nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers, opts.maxUttLen, temporalLayer=opts.temporalLayer, reg=opts.reg) nn.initParams() SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum) # Dump config cfg['param_count'] = nn.paramCount() dump_config(cfg, opts.cfg_file) # Training epoch_file = pjoin(output_dir, 'epoch') if os.path.exists(epoch_file): start_epoch = int(open(epoch_file, 'r').read()) + 1 else: start_epoch = 0 # Load model if specified if os.path.exists(opts.in_file): with open(opts.in_file, 'r') as fid: SGD.fromFile(fid) SGD.alpha = SGD.alpha / (opts.anneal**start_epoch) nn.fromFile(fid) num_files_file = pjoin(output_dir, 'num_files') for k in range(start_epoch, opts.epochs): perm = np.random.permutation(opts.numFiles) + 1 loader.loadDataFileAsynch(perm[0]) file_start = 0 if k == start_epoch: if os.path.exists(num_files_file): file_start = int(open(num_files_file, 'r').read().strip()) logger.info('Starting from file %d, epoch %d' % (file_start, start_epoch)) else: open(num_files_file, 'w').write(str(file_start)) for i in xrange(file_start, perm.shape[0]): start = time.time() data_dict, alis, keys, sizes = loader.getDataAsynch() # Prefetch if i + 1 < perm.shape[0]: loader.loadDataFileAsynch(perm[i + 1]) SGD.run(data_dict, alis, keys, sizes) end = time.time() logger.info('File time %f' % (end - start)) # Save parameters and cost if (i + 1) % opts.save_every == 0: logger.info('Saving parameters') with open(opts.out_file, 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) open(num_files_file, 'w').write('%d' % (i + 1)) logger.info('Done saving parameters') with open(pjoin(output_dir, 'last_cost'), 'w') as fid: if opts.reg > 0.0: fid.write(str(SGD.expcost[-1] - SGD.regcost[-1])) else: fid.write(str(SGD.expcost[-1])) # Save epoch completed open(pjoin(output_dir, 'epoch'), 'w').write(str(k)) # Save parameters for the epoch with open(opts.out_file + '.epoch{0:02}'.format(k), 'wb') as fid: SGD.toFile(fid) nn.toFile(fid) SGD.alpha = SGD.alpha / opts.anneal # Run now complete, touch sentinel file touch_file(pjoin(output_dir, 'sentinel'))
scipy.misc.imresize(images[i, :, :] * -1 + 256, (20, 20)).flatten()) images = np.array([imgp(i) for i in img]) print(images[1]) y = np.array([1 / (i[0] + i[1]) for i in x]).reshape((-1, 1)) perm = np.random.permutation(len(images)) images = images[perm] labels = labels[perm] labels = [[(1 if (i == j) else 0) for j in range(10)] for i in labels] images_train = np.array(images[0:2000]) images_test = np.array(images[2000:2500]) labels_train = np.array(labels[0:2000]) labels_test = np.array(labels[2000:2500]) x = images_train y = labels_train optimizer = sgd.SGD() for i in range(60): print(images_train[i] - images_train[i + 1]) net = NN(x, y, [100], cat, optimizer, [relu, softmax]) for layer in net.layers: print(layer.weight.shape) net.train() ct = 0 print(net.layers[-1].bias) print(net.layers[-1].weight) print(net.layers[-2].activation) for i in range(500): lab1 = np.argmax(net.predict(images_test[i])) lab2 = np.argmax(labels_test[i]) #print(net.layers[-2].activation) #plt.imshow(images_test[i].resize(20,20))
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) # Optimizer parser.add_option("--minibatch", dest="minibatch", type="int", default=30) parser.add_option("--optimizer", dest="optimizer", type="string", default="adagrad") parser.add_option("--epochs", dest="epochs", type="int", default=50) parser.add_option("--step", dest="step", type="float", default=5e-2) parser.add_option("--rho", dest="rho", type="float", default=1e-3) # Dimension parser.add_option("--wvecDim", dest="wvec_dim", type="int", default=30) parser.add_option("--memDim", dest="mem_dim", type="int", default=30) parser.add_option("--outFile", dest="out_file", type="string", default="models/test.bin") parser.add_option("--inFile", dest="in_file", type="string", default="models/test.bin") parser.add_option("--data", dest="data", type="string", default="train") parser.add_option("--model", dest="model", type="string", default="RNN") parser.add_option("--label", dest="label_method", type="string", default="rating") (opts, args) = parser.parse_args(args) evaluate_accuracy_while_training = True if opts.label_method == 'rating': label_method = tree.rating_label opts.output_dim = 5 elif opts.label_method == 'aspect': label_method = tree.aspect_label opts.output_dim = 5 elif opts.label_method == 'pair': label_method = tree.pair_label opts.output_dim = 25 else: raise '%s is not a valid labelling method.' % opts.label_method # Testing if opts.test: test(opts.in_file, opts.data, label_method, opts.model) return print "Loading data..." train_accuracies = [] dev_accuracies = [] # load training data trees = tree.load_trees('./data/train.json', label_method) training_word_map = tree.load_word_map() opts.num_words = len(training_word_map) tree.convert_trees(trees, training_word_map) labels = [each.label for each in trees] count = np.zeros(opts.output_dim) for label in labels: count[label] += 1 # weight = 10 / (count ** 0.1) weight = np.ones(opts.output_dim) if opts.model == 'RNTN': nn = RNTN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho) elif opts.model == 'RNN': nn = RNN(opts.wvec_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho, weight=weight) elif opts.model == 'TreeLSTM': nn = TreeLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho) elif opts.model == 'TreeTLSTM': nn = TreeTLSTM(opts.wvec_dim, opts.mem_dim, opts.output_dim, opts.num_words, opts.minibatch, rho=opts.rho) else: raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model nn.init_params() sgd = optimizer.SGD(nn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tree.load_trees('./data/dev.json', label_method) tree.convert_trees(dev_trees, training_word_map) for e in range(opts.epochs): start = time.time() print "Running epoch %d" % e sgd.run(trees, e) end = time.time() print "Time per epoch : %f" % (end - start) with open(opts.out_file, 'w') as fid: pickle.dump(opts, fid) pickle.dump(sgd.costt, fid) nn.to_file(fid) if evaluate_accuracy_while_training: # pdb.set_trace() print "testing on training set real quick" train_accuracies.append(test(opts.out_file, "train", label_method, opts.model, trees)) print "testing on dev set real quick" dev_accuracies.append(test(opts.out_file, "dev", label_method, opts.model, dev_trees)) if evaluate_accuracy_while_training: print train_accuracies print dev_accuracies plt.plot(train_accuracies, label='train') plt.plot(dev_accuracies, label='dev') plt.legend(loc=2) plt.axvline(x=np.argmax(dev_accuracies), linestyle='--') plt.show()
def run(): print "Loading data..." model = "RNN" trees = tr.loadTrees('train') dev_trees = tr.loadTrees('dev') wvecDimList = [5, 15, 25, 35, 45] #wvecDimList = [10,20,40] accuracy_per_wvecDim = [] epochs = 100 outFileText = "./param/%s/%s_cost_and_acc" % (model, model) f = open(outFileText, 'w') for wvecDim in wvecDimList: nn = RNN(wvecDim, 5, len(tr.loadWordMap()), 30) nn.initParams() sgd = optimizer.SGD(nn, alpha=0.01, minibatch=30, optimizer="adagrad") outFile = "./param/%s/%s_wvecDim_%d_epochs_%d_step_001.bin" % ( model, model, wvecDim, epochs) train_cost = [] train_acc = [] dev_cost = [] dev_acc = [] cost = 0 accuracy = 0 for e in range(epochs): start = time.time() sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end - start) with open(outFile, 'w') as fid: hyperparam = {} hyperparam['alpha'] = 0.01 hyperparam['minibatch'] = 30 hyperparam['wvecDim'] = wvecDim pickle.dump(hyperparam, fid) nn.toFile(fid) cost, accuracy = test(nn, trees) train_cost.append(cost) train_acc.append(accuracy) cost, accuracy = test(nn, dev_trees) dev_cost.append(cost) dev_acc.append(accuracy) for tree in trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) for tree in dev_trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) print "fprop in trees cleared" plot_cost_acc( train_cost, dev_cost, "./figures/%s/%s_Cost_Figure_%d" % (model, model, wvecDim), epochs) plot_cost_acc( train_acc, dev_acc, "./figures/%s/%s_Accuracy_Figure_%d" % (model, model, wvecDim), epochs) anwser = "Cost = %f, Acc= %f" % (cost, accuracy) f.write(anwser) accuracy_per_wvecDim.append(accuracy) f.close() plt.figure(figsize=(6, 4)) plt.title(r"Accuracies and vector Dimension") plt.xlabel("vector Dimension") plt.ylabel(r"Accuracy") plt.ylim(ymin=min(accuracy_per_wvecDim) * 0.8, ymax=max(accuracy_per_wvecDim) * 1.2) plt.plot(wvecDimList, accuracy_per_wvecDim, color='b', marker='o', linestyle='-') plt.savefig("./figures/%s/%s_Accuracy_and_vectorDimsension.png" % (model, model)) plt.close()
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test",action="store_true",dest="test",default=False) # Optimizer parser.add_option("--minibatch",dest="minibatch",type="int",default=30) parser.add_option("--optimizer",dest="optimizer",type="string", default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--step",dest="step",type="float",default=1e-2) parser.add_option("--middleDim",dest="middleDim",type="int",default=10) parser.add_option("--outputDim",dest="outputDim",type="int",default=5) parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30) parser.add_option("--outFile",dest="outFile",type="string", default="models/test.bin") parser.add_option("--inFile",dest="inFile",type="string", default="models/test.bin") parser.add_option("--data",dest="data",type="string",default="train") parser.add_option("--model",dest="model",type="string",default="RNN") (opts, args) = parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: test(opts.inFile, opts.data, opts.model) return print "Loading data..." train_accuracies = [] dev_accuracies = [] # load training data trees = tr.load_trees(TRAIN_DATA_FILE) opts.numWords = len(tr.load_word_to_index_map()) if (opts.model=='RNTN'): nn = RNTN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch) elif(opts.model=='RNN'): nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch) elif(opts.model=='RNN2'): nn = RNN2(opts.wvecDim,opts.middleDim,opts.outputDim,opts.numWords,opts.minibatch) else: raise '%s is not a valid neural network so far only RNTN, RNN, RNN2' % opts.model nn.initParams() sgd = optimizer.SGD(nn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.load_trees(DEV_DATA_FILE) for e in range(opts.epochs): start = time.time() print "Running epoch %d" % e sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end-start) # save the net to the output file #f = open(opts.outFile, 'wb') #pickle.dump(opts, f, -1) #pickle.dump(sgd.costt, f, -1) #pickle.dump(nn.stack, f, -1) #np.save(f, nn.stack) #f.close() joblib.dump(opts, opts.outFile + "_opts") joblib.dump(sgd.costt, opts.outFile + "_cost") joblib.dump(nn.stack, opts.outFile + "_stack") if evaluate_accuracy_while_training: print "testing on training set..." train_accuracies.append(test(opts.outFile, "train", opts.model, trees)) print "testing on dev set..." dev_accuracies.append(test(opts.outFile, "dev", opts.model, dev_trees)) # clear the fprop flags in trees and dev_trees for tree in trees: tr.traverse(tree.root, func=tr.clear_fprop) for tree in dev_trees: tr.traverse(tree.root, func=tr.clear_fprop) print "fprop in trees cleared" if False: # don't do this for now #if evaluate_accuracy_while_training: #print train_accuracies #print dev_accuracies # Plot train/dev_accuracies here x = range(opts.epochs) figure(figsize=(6,4)) plot(x, train_accuracies, color='b', marker='o', linestyle='-', label="training") plot(x, dev_accuracies, color='g', marker='o', linestyle='-', label="dev") title("Accuracy vs num epochs.") xlabel("Epochs") ylabel("Accuracy") #ylim(ymin=0, ymax=max(1.1*max(train_accuracies),3*min(train_accuracies))) legend() savefig("train_dev_acc.png")
train = MNISTDataSet('train') test = MNISTDataSet('test') train.plotIdx(0) # first figure # K-Means loss = MNISTSqLoss(train.ds) x = scipy.zeros((10, 784)) grad = loss.gradEval(x, (train.ds[0][:50], train.ds[1][:50])) train.plotDigit(grad[3], nvals=True) # second figure sfunc = sgd.step_size(0.9, 1) opt = sgd.SGD(afunc=loss, x0=x, sfunc=sfunc, histsize=500, ndata=300, keepobj=False) opt.nsteps(500) ans = opt.getAvgSoln(100) result = loss.classify(ans, test.ds[0]) loss.errorInd(result) print 'Sum of parameters:', scipy.sum(result) # MultiNom multinom = MNISTMultiNom(train.ds) grad = multinom.gradEval(x, (train.ds[0][:50], train.ds[1][:50])) train.plotDigit(grad[3], nvals=True) # third figure
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test", action="store_true", dest="test", default=False) # Optimizer parser.add_option("--minibatch", dest="minibatch", type="int", default=30) parser.add_option("--optimizer", dest="optimizer", type="string", default="adagrad") parser.add_option("--epochs", dest="epochs", type="int", default=50) parser.add_option("--step", dest="step", type="float", default=1e-2) parser.add_option("--middleDim", dest="middleDim", type="int", default=10) parser.add_option("--outputDim", dest="outputDim", type="int", default=5) parser.add_option("--wvecDim", dest="wvecDim", type="int", default=30) # for DCNN only parser.add_option("--ktop", dest="ktop", type="int", default=5) parser.add_option("--m1", dest="m1", type="int", default=10) parser.add_option("--m2", dest="m2", type="int", default=7) parser.add_option("--n1", dest="n1", type="int", default=6) parser.add_option("--n2", dest="n2", type="int", default=12) parser.add_option("--outFile", dest="outFile", type="string", default="models/test.bin") parser.add_option("--inFile", dest="inFile", type="string", default="models/test.bin") parser.add_option("--data", dest="data", type="string", default="train") parser.add_option("--model", dest="model", type="string", default="RNN") (opts, args) = parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: test(opts.inFile, opts.data, opts.model) return print "Loading data..." train_accuracies = [] dev_accuracies = [] # load training data trees = tr.loadTrees('train') opts.numWords = len(tr.loadWordMap()) if (opts.model == 'RNTN'): nn = RNTN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN'): nn = RNN(opts.wvecDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN2'): nn = RNN2(opts.wvecDim, opts.middleDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'RNN3'): nn = RNN3(opts.wvecDim, opts.middleDim, opts.outputDim, opts.numWords, opts.minibatch) elif (opts.model == 'DCNN'): nn = DCNN(opts.wvecDim, opts.ktop, opts.m1, opts.m2, opts.n1, opts.n2, 0, opts.outputDim, opts.numWords, 2, opts.minibatch, rho=1e-4) trees = cnn.tree2matrix(trees) else: raise '%s is not a valid neural network so far only RNTN, RNN, RNN2, RNN3, and DCNN' % opts.model nn.initParams() sgd = optimizer.SGD(nn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.loadTrees("dev") for e in range(opts.epochs): start = time.time() print "Running epoch %d" % e sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end - start) with open(opts.outFile, 'w') as fid: pickle.dump(opts, fid) pickle.dump(sgd.costt, fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set real quick" train_accuracies.append( test(opts.outFile, "train", opts.model, trees)) print "testing on dev set real quick" dev_accuracies.append( test(opts.outFile, "dev", opts.model, dev_trees)) # clear the fprop flags in trees and dev_trees for tree in trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) for tree in dev_trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) print "fprop in trees cleared" if evaluate_accuracy_while_training: pdb.set_trace() print train_accuracies print dev_accuracies
import random random.seed(33) # Load model if specified with open(inFile, 'r') as fid: opts = pickle.load(fid) loader = dl.DataLoader(opts.dataDir, opts.rawDim, opts.inputDim) nn = rnnet.NNet(opts.inputDim, opts.outputDim, opts.layerSize, opts.numLayers, opts.maxUttLen, temporalLayer=opts.temporalLayer) nn.initParams() SGD = sgd.SGD(nn, opts.maxUttLen, alpha=opts.step, momentum=opts.momentum) SGD.expcost = pickle.load(fid) SGD.it = 100 nn.fromFile(fid) velocity = pickle.load(fid) for (w, b), (wv, bv) in zip(velocity, SGD.velocity): wv.copy_to_host() bv.copy_to_host() wv.numpy_array[:] = w[:] bv.numpy_array[:] = b[:] wv.copy_to_device() bv.copy_to_device() # Training pdb.set_trace() for i in np.random.permutation(opts.numFiles) + 1: