def train(model, data, words, params): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if (utils.checkIfQuarter(uidx, len(kf))): if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) sys.stdout.flush() #undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(model, data, words, params): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if (utils.checkIfQuarter(uidx, len(kf))): if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) sys.stdout.flush() # undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(model, data, words, params): start_time = time.time() counter = 0 pre_ss = 0 try: for eidx in xrange(params.epochs): #10 cross validation #nc = len(data)/10 #test_nn = range((eidx%10)*nc, ((eidx%10)+1)*nc) #train_nn = list(set(xrange(len(data)))-set(test_nn)) #test = [data[i] for i in test_nn] #data = [data[i] for i in train_nn] kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g2x, p1x, p2x) = utils.getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' #if (utils.checkIfQuarter(uidx, len(kf))): # ss = utils.evaluate(model, words, test) # if (params.save) and ss > pre_ss: # counter += 1 # utils.saveParams(model, params.outfile + '.pickle') # pre_ss = ss # sys.stdout.flush() # undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost #ss = utils.evaluate(model, words, test) #if (params.save) and ss > pre_ss: # counter += 1 # utils.saveParams(model, params.outfile + '.pickle') # pre_ss = ss utils.saveParams(model, params.outfile + str(eidx) + '.pickle') print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)