def train_epochs(self, start_epoch=1): start_time = time.time() self.megabatch = [] self.ep_loss = 0 self.curr_idx = 0 self.eval() evaluate_sts(self, self.args) self.train() try: for ep in range(start_epoch, self.args.epochs + 1): self.mb = utils.get_minibatches_idx(len(self.data), self.args.batchsize, shuffle=True) self.curr_idx = 0 self.ep_loss = 0 self.megabatch = [] cost = 0 counter = 0 while (cost is not None): cost = pairing.compute_loss_one_batch(self) if cost is None: continue self.ep_loss += cost.item() counter += 1 print("Epoch {0}, Counter {1}/{2}".format( ep, counter, len(self.mb))) if self.save_interval > 0 and counter > 0: if counter % self.save_interval == 0: self.eval() evaluate_sts(self, self.args) self.train() self.save_params(ep, counter=counter) self.optimizer.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_(self.parameters, self.args.grad_clip) self.optimizer.step() self.eval() evaluate_sts(self, self.args) self.train() if self.args.save_every_epoch: self.save_params(ep) print('Epoch {0}\tCost: '.format(ep), self.ep_loss / counter) except KeyboardInterrupt: print("Training Interrupted") if self.args.save_final: self.save_params(ep) end_time = time.time() print("Total Time:", (end_time - start_time))
def fit(self, X, y, sess): max_epochs = 20 # Split into training and validation sets X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42) for epoch in range(max_epochs): start = time.time() train_indices = get_minibatches_idx(len(X_train), batch_size, shuffle=True) print("\nEpoch %d" % (epoch + 1)) train_accs = [] for c, it in enumerate(train_indices): batch_train_x = [X_train[i] for i in it] batch_train_y = [y_train[i] for i in it] feed_dict = { self.x: batch_train_x, self.y: batch_train_y, self.deterministic: False } _, acc = sess.run([self.train_step, self.accuracy], feed_dict) train_accs.append(acc) #print(c,len(train_indices),acc) print("Training accuracy: %.3f" % np.mean(train_accs)) val_pred = self.predict(X_val, sess) y = np.argmax(y_val, axis=1) val_acc = np.mean(np.equal(val_pred, y)) print("Val accuracy: %.3f" % val_acc) print("Time taken: %.3fs" % (time.time() - start)) return
def simple_test_batch(testloader, model, config, hidden=False): model.eval() total = 0.0 correct = 0.0 pred_np = [] hidden_vectors = [] minibatches_idx = get_minibatches_idx( len(testloader), minibatch_size=config['simple_test_batch_size'], shuffle=False) for minibatch in minibatches_idx: inputs = torch.Tensor( np.array([list(testloader[x][0].cpu().numpy()) for x in minibatch])) targets = torch.Tensor( np.array([list(testloader[x][1].cpu().numpy()) for x in minibatch])) inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable( targets.cuda()).squeeze() outputs = model(inputs) if hidden: hiddens = get_hidden(model, inputs) hidden_vectors.extend(list(hiddens.cpu().data.numpy())) pred_np.extend(list(outputs.cpu().data.numpy())) predicted = (outputs >= 0.5).long().squeeze() total += targets.size(0) correct += predicted.eq(targets.long()).sum().item() test_accuracy = correct / total pred_np = np.array(pred_np) if hidden: return test_accuracy, pred_np, np.array(hidden_vectors) return test_accuracy, pred_np
def simple_train_batch(trainloader, model, loss_function, optimizer, config): model.train() for epoch in range(config['epoch_num']): if epoch == int(config['epoch_num'] / 3): for g in optimizer.param_groups: g['lr'] = config['lr'] / 10 print('divide current learning rate by 10') elif epoch == int(config['epoch_num'] * 2 / 3): for g in optimizer.param_groups: g['lr'] = config['lr'] / 100 print('divide current learning rate by 10') total_loss = 0 minibatches_idx = get_minibatches_idx( len(trainloader), minibatch_size=config['simple_train_batch_size'], shuffle=True) for minibatch in minibatches_idx: inputs = torch.Tensor( np.array([ list(trainloader[x][0].cpu().numpy()) for x in minibatch ])) targets = torch.Tensor( np.array([ list(trainloader[x][1].cpu().numpy()) for x in minibatch ])) inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable( targets.long().cuda()).squeeze() optimizer.zero_grad() outputs = model(inputs).squeeze() loss = loss_function(outputs, targets) total_loss += loss loss.backward() optimizer.step() print('epoch:', epoch, 'loss:', total_loss)
def do_eval(sess, train_q, train_a, train_lab): train_correct = 0.0 # number_examples = len(train_q) # print("valid examples:", number_examples) eval_loss, eval_accc, eval_counter = 0.0, 0.0, 0 eval_true_positive, eval_false_positive, eval_true_negative, eval_false_negative = 0, 0, 0, 0 # batch_size = 1 weights_label = {} # weight_label[label_index]=(number,correct) weights = np.ones((opt.batch_size)) kf_train = get_minibatches_idx(len(train_q), opt.batch_size, shuffle=True) for _, train_index in kf_train: train_sents_1 = [train_q[t] for t in train_index] train_sents_2 = [train_a[t] for t in train_index] train_labels = [train_lab[t] for t in train_index] train_labels_array = np.array(train_labels) # print("train_labels", train_labels.shape) # train_labels = train_labels.reshape((len(train_labels), opt.category)) train_labels = np.eye(opt.category)[train_labels_array] x_train_batch_1, x_train_mask_1 = prepare_data_for_emb( train_sents_1, opt) x_train_batch_2, x_train_mask_2 = prepare_data_for_emb( train_sents_2, opt) curr_eval_loss, curr_accc, logits = sess.run( [loss_, accuracy_, logits_], feed_dict={ x_1_: x_train_batch_1, x_2_: x_train_batch_2, x_mask_1_: x_train_mask_1, x_mask_2_: x_train_mask_2, y_: train_labels, opt.weights_label: weights, keep_prob: 1.0 }) true_positive, false_positive, true_negative, false_negative = compute_confuse_matrix( logits, train_labels ) # logits:[batch_size,label_size]-->logits[0]:[label_size] # write_predict_error_to_file(start,file_object,logits[0], evalY[start:end][0],vocabulary_index2word,evalX1[start:end],evalX2[start:end]) eval_loss, eval_accc, eval_counter = eval_loss + curr_eval_loss, eval_accc + curr_accc, eval_counter + 1 # 注意这里计算loss和accc的方法,计算累加值,然后归一化 weights_label = compute_labels_weights( weights_label, logits, train_labels_array ) # compute_labels_weights(weights_label,logits,labels) eval_true_positive, eval_false_positive = eval_true_positive + true_positive, eval_false_positive + false_positive eval_true_negative, eval_false_negative = eval_true_negative + true_negative, eval_false_negative + false_negative # weights_label = compute_labels_weights(weights_label, logits, evalY[start:end]) #compute_labels_weights(weights_label,logits,labels) print("true_positive:", eval_true_positive, ";false_positive:", eval_false_positive, ";true_negative:", eval_true_negative, ";false_negative:", eval_false_negative) p = float(eval_true_positive) / float(eval_true_positive + eval_false_positive) r = float(eval_true_positive) / float(eval_true_positive + eval_false_negative) f1_score = (2 * p * r) / (p + r) print("eval_counter:", eval_counter, ";eval_acc:", eval_accc) return eval_loss / float(eval_counter), eval_accc / float( eval_counter), f1_score, p, r, weights_label
def train(self, params, train, dev, test): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(train), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [train[t] for t in train_index] vocab = self.get_word_arr(batch) y = self.get_y(batch) x, xmask = self.prepare_data(self.populate_embeddings_words(batch, vocab)) idxs = self.get_idxs(xmask) if params.nntype == "charlstm" or params.nntype == "charcnn": char_indices = self.populate_embeddings_characters(vocab) if params.nntype == "charagram": char_hash = self.populate_embeddings_characters_charagram(vocab) if params.nntype == "charlstm": c, cmask = self.prepare_data(char_indices) if params.nntype == "charcnn": c = self.prepare_data_conv(char_indices) if params.nntype == "charlstm": cost = self.train_function(c, cmask, x, xmask, idxs, y) if params.nntype == "charcnn": cost = self.train_function(c, x, xmask, idxs, y) if params.nntype == "charagram": cost = self.train_function(char_hash, x, xmask, idxs, y) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if(params.save): counter += 1 utils.save_params(self, params.outfile+str(counter)+'.pickle') if(params.evaluate): devscore = self.evaluate(dev, params) testscore = self.evaluate(test, params) trainscore = self.evaluate(train, params) print "accuracy: ", devscore, testscore, trainscore print 'Epoch ', (eidx+1), 'Cost ', cost except KeyboardInterrupt: print "Training interrupted" end_time = time.time() print "total time:", (end_time - start_time)
def run_model(opt, X): try: params = np.load('./param_g.npz') if params['Wemb'].shape == (opt.n_words, opt.embed_size): print('Use saved embedding.') opt.W_emb = params['Wemb'] else: print('Emb Dimension mismatch: param_g.npz:' + str(params['Wemb'].shape) + ' opt: ' + str( (opt.n_words, opt.embed_size))) opt.fix_emb = False except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:0'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) if opt.plot_type == 'ae': x_lat_ = ae(x_, opt) elif opt.plot_type == 'vae' or opt.plot_type == 'cyc': mu_, z_ = vae(x_, opt) x_lat_ = z_ if opt.use_z else mu_ config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.3 np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() #print([var.name[:-2] for var in t_vars]) loader = restore_from_save(t_vars, sess, opt) except Exception as e: print(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) X_emb = np.zeros([len(X), opt.z_dim], dtype='float32') kf = get_minibatches_idx(len(X), opt.batch_size) t = 0 for _, index in kf: sents_b = [X[i] for i in index] x_b = prepare_data_for_cnn(sents_b, opt) x_lat = np.squeeze(sess.run(x_lat_, feed_dict={x_: x_b})) X_emb[t * opt.batch_size:(t + 1) * opt.batch_size] = x_lat if (t + 1) % 10 == 0: print('%d / %d' % (t + 1, len(kf))) t += 1 return X_emb
def train(model, data, words, params): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if (utils.checkIfQuarter(uidx, len(kf))): if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) sys.stdout.flush() #undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() #print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def train(model, data, words, params): start_time = time.time() counter = 0 try: for eidx in xrange(params.epochs): kf = utils.get_minibatches_idx(len(data), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [data[t] for t in train_index] for i in batch: i[0].populate_embeddings(words) i[1].populate_embeddings(words) (g1x, g1mask, g2x, g2mask, p1x, p1mask, p2x, p2mask) = getpairs(model, batch, params) cost = model.train_function(g1x, g2x, p1x, p2x, g1mask, g2mask, p1mask, p2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' if (utils.checkIfQuarter(uidx, len(kf))): if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) sys.stdout.flush() # undo batch to save RAM for i in batch: i[0].representation = None i[1].representation = None i[0].unpopulate_embeddings() i[1].unpopulate_embeddings() # print 'Epoch ', (eidx+1), 'Update ', (uidx+1), 'Cost ', cost if (params.save): counter += 1 utils.saveParams(model, params.outfile + str(counter) + '.pickle') if (params.evaluate): evaluate_all(model, words) print 'Epoch ', (eidx + 1), 'Cost ', cost except KeyboardInterrupt: print "Training interupted" end_time = time.time() print "total time:", (end_time - start_time)
def predict(self, X, sess): indices = get_minibatches_idx(len(X), batch_size, shuffle=False) pred = [] for i in indices: batch_x = [X[j] for j in i] feed_dict = {self.x: batch_x, self.deterministic: True} pred_batch = sess.run(self.pred, feed_dict) pred.append(pred_batch) pred = np.concatenate(pred, axis=0) pred = np.argmax(pred, axis=1) pred = np.reshape(pred, (-1)) return pred
def get_features(trainloader, model, config): total_features = [] total_labels = [] minibatches_idx = get_minibatches_idx( len(trainloader), minibatch_size=config['simple_test_batch_size'], shuffle=False) for minibatch in minibatches_idx: inputs = torch.Tensor( np.array( [list(trainloader[x][0].cpu().numpy()) for x in minibatch])) targets = torch.Tensor( np.array( [list(trainloader[x][1].cpu().numpy()) for x in minibatch])) inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable( targets.cuda()).squeeze() features = model.get_features(inputs) total_features.extend(features.cpu().data.numpy().tolist()) total_labels.extend(targets.cpu().data.numpy().tolist()) total_features = np.array(total_features) total_labels = np.array(total_labels) print('total features', total_features.shape) print('total labels', total_labels.shape) avg_feature = np.mean(total_features, axis=0) # print('avg feature', np.linalg.norm(avg_feature)) centralized_features = total_features - avg_feature feature_norm = np.square(np.linalg.norm(centralized_features, axis=1)) class_features = [] feature_norm_list = [] for i in range(10): mask_index = (total_labels == i) mask_index = mask_index.reshape(len(mask_index), 1) # print('mask index', mask_index) if config['R'] == 'inf' and i == config['t1']: break class_features.append( np.sum(total_features * mask_index, axis=0) / np.sum(mask_index.reshape(-1))) feature_norm_list.append( np.sum(feature_norm * mask_index.reshape(-1)) / np.sum(mask_index.reshape(-1))) class_features = np.array(class_features) # print('original class features', class_features) class_features = np.array(class_features) - avg_feature # print('centralized class features', class_features) print('feature norm list', feature_norm_list) print('avg square feature norm', np.mean(feature_norm_list)) return class_features
def run_epoch(sess, epoch, mode, print_freq=-1, train_writer=None): fetches_ = { 'loss': loss_, 'accuracy': accuracy_ } if mode == 'train': x, y, is_train = train, train_lab, 1 fetches_['train_op'] = train_op_ fetches_['summary'] = merged elif mode == 'val': assert(print_freq == -1) x, y, is_train = val, val_lab, None elif mode == 'test': assert(print_freq == -1) x, y, is_train = test, test_lab, None correct, acc_loss, acc_n = 0.0, 0.0, 0.0 local_t = 0 global_t = epoch*epoch_t # only used in train mode start_time = time.time() kf = get_minibatches_idx(len(x), opt.batch_size, shuffle=True) for _, index in kf: local_t += 1 global_t += 1 sents_b = [x[i] for i in index] sents_b_n = add_noise(sents_b, opt) y_b = [y[i] for i in index] y_b = np.array(y_b) y_b = y_b.reshape((len(y_b), 1)) x_b = prepare_data_for_cnn(sents_b_n, opt) # Batch L feed_t = {x_: x_b, y_: y_b, is_train_: is_train} fetches = sess.run(fetches_, feed_dict=feed_t) batch_size = len(index) acc_n += batch_size acc_loss += fetches['loss']*batch_size correct += fetches['accuracy']*batch_size if print_freq>0 and local_t%print_freq==0: print("%s Iter %d: loss %.4f, acc %.4f, time %.1fs" % (mode, local_t, acc_loss/acc_n, correct/acc_n, time.time()-start_time)) if mode == 'train' and train_writer != None: train_writer.add_summary(fetches['summary'], global_t) print("%s Epoch %d: loss %.4f, acc %.4f, time %.1fs" % (mode, epoch, acc_loss/acc_n, correct/acc_n, time.time()-start_time)) return acc_loss/acc_n, correct/acc_n
def reset(self): """ Resets the state of the environment, returning an initial observation. Outputs ------- observation : the initial observation of the space. (Initial reward is assumed to be 0.) """ t0 = time() #for now lets get one sample with all. kf = utils.get_minibatches_idx(len(self.qi), len(self.qi), shuffle=True) _, train_index = kf[ 0] #iterate if len(kf)>1 --> for _, train_index in kf: print "kf", kf, len(self.qi) print("Got minibatch index {}".format(time() - t0)) qi, qi_i, qi_lst, D_gt_id, D_gt_url = self.get_samples( self.qi, self.dt, self.vocab, train_index, self.search.engine, max_words_input=self.search.max_words_input) current_queries = qi_lst self.current_queries = qi_lst self.D_gt_id = D_gt_id print('current queries are', current_queries) n_iterations = 1 # number of query reformulation iterations. if n_iterations < self.search.q_0_fixed_until: ones = np.ones((len(current_queries), self.search.max_words_input)) reformulated_query = ones if n_iterations > 0: # select everything from the original query in the first iteration. reformulated_query = np.concatenate([ones, ones], axis=1) print 'reformulated_query', reformulated_query.shape # reformulated_query is our action!!! actions = reformulated_query state, reward, done = self.execute(actions) print "state", state print "actions", actions print "rew", reward return state
def train(self, X, training_epochs=10): print("\nStarting training") for epoch in range(training_epochs): avg_cost = 0.0 train_indices = get_minibatches_idx(len(X), batch_size, shuffle=True) for it in train_indices: batch_x = [X[i] for i in it] _, cost = self.sess.run((self.train_step, self.cost), feed_dict={self.x: batch_x}) avg_cost += cost / n_samples * batch_size print("Epoch:", '%d' % (epoch + 1), "cost=", "{:.3f}".format(avg_cost))
def predict(test_inputs, model, config): # test inputs: (T, D) model.eval() pred_np = [] minibatches_idx = get_minibatches_idx( len(test_inputs), minibatch_size=config['test_batch_size'], shuffle=False) for minibatch in minibatches_idx: inputs = torch.Tensor(np.array([test_inputs[x] for x in minibatch])) inputs = Variable(inputs.cuda().squeeze()) # (B, D) outputs = model(inputs) pred_np.extend(list(np.exp(outputs.cpu().data.numpy()))) # (B, C) pred_np = np.array(pred_np) # (T, C) return pred_np
def simple_train_batch(trainloader, model, loss_function, optimizer, config): model.train() for epoch in range(config['epoch_num']): if epoch % (config['epoch_num'] // 10) == 0: print('current epoch: ', epoch) total_loss = 0 minibatches_idx = get_minibatches_idx( len(trainloader), minibatch_size=config['simple_train_batch_size'], shuffle=True) # model.train() # BCE, (100, 1, 1) doesn't matter # MSE, (100, 1, 1) matter for minibatch in minibatches_idx: inputs = torch.Tensor( np.array([ list(trainloader[x][0].cpu().numpy()) for x in minibatch ])) targets = torch.Tensor( np.array([ list(trainloader[x][1].cpu().numpy()) for x in minibatch ])) inputs, targets = Variable(inputs.cuda()).squeeze(), Variable( targets.float().cuda()).squeeze() # inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable(targets.float().cuda()).squeeze(1) # print('inputs', inputs.size()) # print('targets', targets.size()) if config['model'] == 'CNN_MNIST': inputs = inputs.unsqueeze(1) optimizer.zero_grad() # outputs = model(inputs) outputs = model(inputs).squeeze() loss = loss_function(outputs, targets) # print('outputs', outputs.size()) # print('loss', loss) total_loss += loss loss.backward() optimizer.step() if epoch % (config['epoch_num'] // 10) == 0: print('loss', total_loss)
def evaluate(self, data, params): kf = utils.get_minibatches_idx(len(data), 100, shuffle=False) preds = [] for _, train_index in kf: batch = [data[t] for t in train_index] vocab = self.get_word_arr(batch) x, xmask = self.prepare_data( self.populate_embeddings_words(batch, vocab)) idxs = self.get_idxs(xmask) if params.nntype == "charlstm" or params.nntype == "charcnn": char_indices = self.populate_embeddings_characters(vocab) if params.nntype == "charagram": char_hash = self.populate_embeddings_characters_charagram( vocab) if params.nntype == "charlstm": c, cmask = self.prepare_data(char_indices) if params.nntype == "charcnn": c = self.prepare_data_conv(char_indices) if params.nntype == "charlstm": temp = self.scoring_function(c, cmask, x, xmask, idxs) if params.nntype == "charcnn": temp = self.scoring_function(c, x, xmask, idxs) if params.nntype == "charagram": temp = self.scoring_function(char_hash, x, xmask, idxs) preds.extend(temp) ys = [] for i in data: for j in i[1]: y = self.tags[j] ys.append(y) return accuracy_score(ys, preds)
def train(train_data, dev_data, model, loss_function, optimizer, ner_to_ix, config): total_loss_list = [] for epoch in range(config['epoch_num']): model.train() print('current epoch: ', epoch, end='\r\n') total_loss = 0 minibatches_idx = get_minibatches_idx( len(train_data['inputs']), minibatch_size=config['train_batch_size'], shuffle=True) for minibatch in minibatches_idx: inputs = torch.Tensor( np.array([train_data['inputs'][x] for x in minibatch])) targets = torch.Tensor( np.array([train_data['labels'][x] for x in minibatch])) confidences = torch.Tensor( np.array([train_data['confidences'][x] for x in minibatch])) inputs, targets = Variable(inputs.cuda()).squeeze(), Variable( targets.cuda()).squeeze().long() confidences = Variable(confidences.cuda(), requires_grad=False).squeeze() # inputs: (B, d), targets: B, confidences: B optimizer.zero_grad() outputs = model(inputs) loss = loss_function(outputs, targets) # print('loss', loss) loss = torch.sum(loss * confidences) total_loss += loss loss.backward() optimizer.step() total_loss_list.append(total_loss.cpu().data.item()) print('train loss', total_loss) # train_accuracy = evaluate(train_data, model, ner_to_ix, config) # print('train accuracy', train_accuracy) dev_accuracy = evaluate(dev_data, model, ner_to_ix, config) print('dev accuracy', dev_accuracy)
def train_batch_autoencoder(trainloader, model, loss_function, optimizer, config): model.train() for epoch in range(config['epoch_num']): if epoch % (config['epoch_num'] // 10) == 0: print('current epoch: ', epoch) total_loss = 0 minibatches_idx = get_minibatches_idx( len(trainloader), minibatch_size=config['simple_train_batch_size'], shuffle=True) for minibatch in minibatches_idx: inputs = torch.Tensor( np.array([ list(trainloader[x][0].cpu().numpy()) for x in minibatch ])) targets = torch.Tensor( np.array([ list(trainloader[x][1].cpu().numpy()) for x in minibatch ])) inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable( targets.float().cuda()).squeeze(1) # print(inputs.size()) optimizer.zero_grad() outputs = model(inputs) # print(outputs.size()) loss = loss_function(outputs, inputs) total_loss += loss loss.backward() optimizer.step() if epoch % (config['epoch_num'] // 10) == 0: pic = to_img(outputs.cpu().data) save_image( pic, '/path/to/experiments/dir/figures/autoencoders/image_{}.png'. format(epoch)) print('loss', total_loss)
def evaluate(self, data, params): kf = utils.get_minibatches_idx(len(data), 100, shuffle=False) preds = [] for _, train_index in kf: batch = [data[t] for t in train_index] vocab = self.get_word_arr(batch) x, xmask = self.prepare_data(self.populate_embeddings_words(batch, vocab)) idxs = self.get_idxs(xmask) if params.nntype == "charlstm" or params.nntype == "charcnn": char_indices = self.populate_embeddings_characters(vocab) if params.nntype == "charagram": char_hash = self.populate_embeddings_characters_charagram(vocab) if params.nntype == "charlstm": c, cmask = self.prepare_data(char_indices) if params.nntype == "charcnn": c = self.prepare_data_conv(char_indices) if params.nntype == "charlstm": temp = self.scoring_function(c, cmask, x, xmask, idxs) if params.nntype == "charcnn": temp = self.scoring_function(c, x, xmask, idxs) if params.nntype == "charagram": temp = self.scoring_function(char_hash, x, xmask, idxs) preds.extend(temp) ys = [] for i in data: for j in i[1]: y = self.tags[j] ys.append(y) return accuracy_score(ys, preds)
def simple_test_batch(testloader, model, config): model.eval() total = 0.0 correct = 0.0 minibatches_idx = get_minibatches_idx( len(testloader), minibatch_size=config['simple_test_batch_size'], shuffle=False) y_true = [] y_pred = [] for minibatch in minibatches_idx: inputs = torch.Tensor( np.array([list(testloader[x][0].cpu().numpy()) for x in minibatch])) targets = torch.Tensor( np.array([list(testloader[x][1].cpu().numpy()) for x in minibatch])) inputs, targets = Variable(inputs.cuda()).squeeze(1), Variable( targets.cuda()).squeeze() outputs = model(inputs) _, predicted = torch.max(outputs, 1) total += targets.size(0) correct += predicted.eq(targets.long()).sum().item() y_true.extend(targets.cpu().data.numpy().tolist()) y_pred.extend(predicted.cpu().data.numpy().tolist()) test_accuracy = correct / total test_confusion_matrix = confusion_matrix(y_true, y_pred) t1 = config['t1'] big_class_acc = np.sum([test_confusion_matrix[i, i] for i in range(t1) ]) / np.sum(test_confusion_matrix[:t1]) if t1 == 10: small_class_acc = None else: small_class_acc = \ np.sum([test_confusion_matrix[i, i] for i in range(10)[t1:]]) / np.sum(test_confusion_matrix[t1:]) return test_accuracy, big_class_acc, small_class_acc, test_confusion_matrix
def main(): # global n_words # Prepare training and testing data loadpath = "./data/yahoo.p" x = cPickle.load(open(loadpath, "rb")) train, val, test = x[0], x[1], x[2] train_lab, val_lab, test_lab = x[3], x[4], x[5] wordtoix, ixtoword = x[6], x[7] train_lab = np.array(train_lab, dtype='float32') val_lab = np.array(val_lab, dtype='float32') test_lab = np.array(test_lab, dtype='float32') opt = Options() opt.n_words = len(ixtoword) del x print(dict(opt)) print('Total words: %d' % opt.n_words) if opt.part_data: np.random.seed(123) train_ind = np.random.choice(len(train), int(len(train) * opt.portion), replace=False) train = [train[t] for t in train_ind] train_lab = [train_lab[t] for t in train_ind] try: params = np.load('./param_g.npz') if params['Wemb'].shape == (opt.n_words, opt.embed_size): print('Use saved embedding.') opt.W_emb = params['Wemb'] else: print('Emb Dimension mismatch: param_g.npz:' + str(params['Wemb'].shape) + ' opt: ' + str((opt.n_words, opt.embed_size))) opt.fix_emb = False except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:1'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen]) x_mask_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen]) keep_prob = tf.placeholder(tf.float32) y_ = tf.placeholder(tf.float32, shape=[opt.batch_size, 10]) accuracy_, loss_, train_op, W_emb_ = emb_classifier( x_, x_mask_, y_, keep_prob, opt) # merged = tf.summary.merge_all() uidx = 0 max_val_accuracy = 0. max_test_accuracy = 0. # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() # print([var.name[:-2] for var in t_vars]) save_keys = tensors_key_in_file(opt.save_path) # print(save_keys.keys()) ss = set([var.name for var in t_vars]) & set( [s + ":0" for s in save_keys.keys()]) cc = {var.name: var for var in t_vars} # only restore variables with correct shape ss_right_shape = set( [s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]]) loader = tf.train.Saver(var_list=[ var for var in t_vars if var.name in ss_right_shape ]) loader.restore(sess, opt.save_path) print("Loading variables from '%s'." % opt.save_path) print("Loaded variables:" + str(ss)) except: print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) try: for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] x_labels = [train_lab[t] for t in train_index] x_labels = np.array(x_labels) x_labels = x_labels.reshape((len(x_labels), 10)) x_batch, x_batch_mask = prepare_data_for_emb(sents, opt) _, loss = sess.run( [train_op, loss_], feed_dict={ x_: x_batch, x_mask_: x_batch_mask, y_: x_labels, keep_prob: opt.drop_rate }) if uidx % opt.valid_freq == 0: train_correct = 0.0 kf_train = get_minibatches_idx(500, opt.batch_size, shuffle=True) for _, train_index in kf_train: train_sents = [train[t] for t in train_index] train_labels = [train_lab[t] for t in train_index] train_labels = np.array(train_labels) train_labels = train_labels.reshape( (len(train_labels), 10)) x_train_batch, x_train_batch_mask = prepare_data_for_emb( train_sents, opt) # Batch L train_accuracy = sess.run(accuracy_, feed_dict={ x_: x_train_batch, x_mask_: x_train_batch_mask, y_: train_labels, keep_prob: 1.0 }) train_correct += train_accuracy * len(train_index) train_accuracy = train_correct / 500 print("Iteration %d: Training loss %f " % (uidx, loss)) print("Train accuracy %f " % train_accuracy) val_correct = 0.0 kf_val = get_minibatches_idx(20000, opt.batch_size, shuffle=True) for _, val_index in kf_val: val_sents = [val[t] for t in val_index] val_labels = [val_lab[t] for t in val_index] val_labels = np.array(val_labels) val_labels = val_labels.reshape( (len(val_labels), 10)) x_val_batch, x_val_batch_mask = prepare_data_for_emb( val_sents, opt) val_accuracy = sess.run(accuracy_, feed_dict={ x_: x_val_batch, x_mask_: x_val_batch_mask, y_: val_labels, keep_prob: 1.0 }) val_correct += val_accuracy * len(val_index) val_accuracy = val_correct / 20000 print("Validation accuracy %f " % val_accuracy) if val_accuracy > max_val_accuracy: max_val_accuracy = val_accuracy test_correct = 0.0 kf_test = get_minibatches_idx(len(test), opt.batch_size, shuffle=True) for _, test_index in kf_test: test_sents = [test[t] for t in test_index] test_labels = [test_lab[t] for t in test_index] test_labels = np.array(test_labels) test_labels = test_labels.reshape( (len(test_labels), 10)) x_test_batch, x_test_batch_mask = prepare_data_for_emb( test_sents, opt) test_accuracy = sess.run(accuracy_, feed_dict={ x_: x_test_batch, x_mask_: x_test_batch_mask, y_: test_labels, keep_prob: 1.0 }) test_correct += test_accuracy * len(test_index) test_accuracy = test_correct / len(test) print("Test accuracy %f " % test_accuracy) max_test_accuracy = test_accuracy print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy)) emb = sess.run(W_emb_, feed_dict={x_: x_test_batch}) cPickle.dump([emb], open("yahoo_emb_max_300.p", "wb")) print("Max Test accuracy %f " % max_test_accuracy) except KeyboardInterrupt: # print 'Training interupted' print('Training interupted') print("Max Test accuracy %f " % max_test_accuracy)
def run_model(opt, train, val, ixtoword): try: params = np.load('./param_g.npz') if params['Wemb'].shape == (opt.n_words, opt.embed_size): print('Use saved embedding.') opt.W_emb = params['Wemb'] else: print('Emb Dimension mismatch: param_g.npz:'+ str(params['Wemb'].shape) + ' opt: ' + str((opt.n_words, opt.embed_size))) opt.fix_emb = False except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:0'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) is_train_ = tf.placeholder(tf.bool, name='is_train_') res_, g_loss_, d_loss_, gen_op, dis_op = textGAN(x_, opt) merged = tf.summary.merge_all() # opt.is_train = False # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt) # merged_val = tf.summary.merge_all() #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006 #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph()) uidx = 0 config = tf.ConfigProto(log_device_placement = False, allow_soft_placement=True, graph_options=tf.GraphOptions(build_cost_model=1)) #config = tf.ConfigProto(device_count={'GPU':0}) config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() run_metadata = tf.RunMetadata() with tf.Session(config = config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: #pdb.set_trace() t_vars = tf.trainable_variables() #print([var.name[:-2] for var in t_vars]) loader = restore_from_save(t_vars, sess, opt) except Exception as e: print(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) for epoch in range(opt.max_epochs): print("\nStarting epoch %d\n" % epoch) # if epoch >= 10: # print("Relax embedding ") # opt.fix_emb = False # opt.batch_size = 2 kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: print "\rIter: %d" % uidx, uidx += 1 sents = [train[t] for t in train_index] sents_permutated = add_noise(sents, opt) #sents[0] = np.random.permutation(sents[0]) x_batch = prepare_data_for_cnn(sents_permutated, opt) # Batch L if x_batch.shape[0] == opt.batch_size: d_loss = 0 g_loss = 0 if profile: if uidx % opt.dis_steps == 0: _, d_loss = sess.run([dis_op, d_loss_], feed_dict={x_: x_batch},options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),run_metadata=run_metadata) if uidx % opt.gen_steps == 0: _, g_loss = sess.run([gen_op, g_loss_], feed_dict={x_: x_batch},options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),run_metadata=run_metadata) else: if uidx % opt.dis_steps == 0: _, d_loss = sess.run([dis_op, d_loss_], feed_dict={x_: x_batch}) if uidx % opt.gen_steps == 0: _, g_loss = sess.run([gen_op, g_loss_], feed_dict={x_: x_batch}) if uidx % opt.valid_freq == 0: is_train = True # print('Valid Size:', len(val)) valid_index = np.random.choice(len(val), opt.batch_size) val_sents = [val[t] for t in valid_index] val_sents_permutated = add_noise(val_sents, opt) x_val_batch = prepare_data_for_cnn(val_sents_permutated, opt) d_loss_val = sess.run(d_loss_, feed_dict={x_: x_val_batch}) g_loss_val = sess.run(g_loss_, feed_dict={x_: x_val_batch}) res = sess.run(res_, feed_dict={x_: x_val_batch}) print("Validation d_loss %f, g_loss %f mean_dist %f" % (d_loss_val, g_loss_val, res['mean_dist'])) print "Sent:" + u' '.join([ixtoword[x] for x in res['syn_sent'][0] if x != 0]).encode('utf-8').strip() print("MMD loss %f, GAN loss %f" % (res['mmd'], res['gan'])) np.savetxt('./text/rec_val_words.txt', res['syn_sent'], fmt='%i', delimiter=' ') if opt.discrimination: print ("Real Prob %f Fake Prob %f" % (res['prob_r'], res['prob_f'])) val_set = [prepare_for_bleu(s) for s in val_sents] [bleu2s, bleu3s, bleu4s] = cal_BLEU([prepare_for_bleu(s) for s in res['syn_sent']], {0: val_set}) print 'Val BLEU (2,3,4): ' + ' '.join([str(round(it, 3)) for it in (bleu2s, bleu3s, bleu4s)]) summary = sess.run(merged, feed_dict={x_: x_val_batch}) test_writer.add_summary(summary, uidx) if uidx % opt.print_freq == 0: #pdb.set_trace() res = sess.run(res_, feed_dict={x_: x_batch}) median_dis = np.sqrt(np.median([((x-y)**2).sum() for x in res['real_f'] for y in res['real_f']])) print("Iteration %d: d_loss %f, g_loss %f, mean_dist %f, realdist median %f" % (uidx, d_loss, g_loss, res['mean_dist'], median_dis)) np.savetxt('./text/rec_train_words.txt', res['syn_sent'], fmt='%i', delimiter=' ') print "Sent:" + u' '.join([ixtoword[x] for x in res['syn_sent'][0] if x != 0]).encode('utf-8').strip() summary = sess.run(merged, feed_dict={x_: x_batch}) train_writer.add_summary(summary, uidx) # print res['x_rec'][0][0] # print res['x_emb'][0][0] if profile: tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), run_meta=run_metadata, tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY) saver.save(sess, opt.save_path, global_step=epoch)
def run_model(opt, train, val, ixtoword): try: params = np.load('./param_g.npz') if params['Wemb'].shape == (opt.n_words, opt.embed_size): print('Use saved embedding.') opt.W_emb = params['Wemb'] else: print('Emb Dimension mismatch: param_g.npz:' + str(params['Wemb'].shape) + ' opt: ' + str((opt.n_words, opt.embed_size))) opt.fix_emb = False except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:1'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) # is_train_ = tf.placeholder(tf.bool, name='is_train_') res_, g_loss_, d_loss_, gen_op, dis_op = textGAN(x_, x_org_, opt) merged = tf.summary.merge_all() # opt.is_train = False # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt) # merged_val = tf.summary.merge_all() #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006 #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph()) uidx = 0 config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, graph_options=tf.GraphOptions(build_cost_model=1)) #config = tf.ConfigProto(device_count={'GPU':0}) config.gpu_options.per_process_gpu_memory_fraction = 0.8 np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() run_metadata = tf.RunMetadata() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: #pdb.set_trace() t_vars = tf.trainable_variables() #print([var.name[:-2] for var in t_vars]) loader = restore_from_save(t_vars, sess, opt) print('\nload successfully\n') except Exception as e: print(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) # for i in range(34): # valid_index = np.random.choice( # len(val), opt.batch_size) # val_sents = [val[t] for t in valid_index] # val_sents_permutated = add_noise(val_sents, opt) # x_val_batch = prepare_data_for_cnn( # val_sents_permutated, opt) # x_val_batch_org = prepare_data_for_rnn(val_sents, opt) # res = sess.run(res_, feed_dict={ # x_: x_val_batch, x_org_: x_val_batch_org}) # if i == 0: # valid_text = res['syn_sent'] # else: # valid_text = np.concatenate( # (valid_text, res['syn_sent']), 0) # np.savetxt('./text_news/vae_words.txt', valid_text, fmt='%i', delimiter=' ') # pdb.set_trace() for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) # if epoch >= 10: # print("Relax embedding ") # opt.fix_emb = False # opt.batch_size = 2 kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] sents_permutated = add_noise(sents, opt) #sents[0] = np.random.permutation(sents[0]) x_batch = prepare_data_for_cnn(sents_permutated, opt) # Batch L x_batch_org = prepare_data_for_rnn(sents, opt) d_loss = 0 g_loss = 0 if profile: if uidx % opt.dis_steps == 0: _, d_loss = sess.run( [dis_op, d_loss_], feed_dict={ x_: x_batch, x_org_: x_batch_org }, options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) if uidx % opt.gen_steps == 0: _, g_loss = sess.run( [gen_op, g_loss_], feed_dict={ x_: x_batch, x_org_: x_batch_org }, options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) else: if uidx % opt.dis_steps == 0: _, d_loss = sess.run([dis_op, d_loss_], feed_dict={ x_: x_batch, x_org_: x_batch_org }) if uidx % opt.gen_steps == 0: _, g_loss = sess.run([gen_op, g_loss_], feed_dict={ x_: x_batch, x_org_: x_batch_org }) ''' validation ''' if uidx % opt.valid_freq == 0: valid_index = np.random.choice(len(val), opt.batch_size) val_sents = [val[t] for t in valid_index] val_sents_permutated = add_noise(val_sents, opt) x_val_batch = prepare_data_for_cnn(val_sents_permutated, opt) x_val_batch_org = prepare_data_for_rnn(val_sents, opt) d_loss_val = sess.run(d_loss_, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org }) g_loss_val = sess.run(g_loss_, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org }) res = sess.run(res_, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org }) print("Validation d_loss %f, g_loss %f mean_dist %f" % (d_loss_val, g_loss_val, res['mean_dist'])) print("Sent:" + u' '.join([ ixtoword[x] for x in res['syn_sent'][0] if x != 0 ])) #.encode('utf-8', 'ignore').decode("utf8").strip()) print("MMD loss %f, GAN loss %f" % (res['mmd'], res['gan'])) # np.savetxt('./text_arxiv/syn_val_words.txt', res['syn_sent'], fmt='%i', delimiter=' ') if opt.discrimination: print("Real Prob %f Fake Prob %f" % (res['prob_r'], res['prob_f'])) for i in range(4): valid_index = np.random.choice(len(val), opt.batch_size) val_sents = [val[t] for t in valid_index] val_sents_permutated = add_noise(val_sents, opt) x_val_batch = prepare_data_for_cnn( val_sents_permutated, opt) x_val_batch_org = prepare_data_for_rnn(val_sents, opt) res = sess.run(res_, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org }) if i == 0: valid_text = res['syn_sent'] else: valid_text = np.concatenate( (valid_text, res['syn_sent']), 0) np.savetxt('./text_news/syn_val_words.txt', valid_text, fmt='%i', delimiter=' ') val_set = [prepare_for_bleu(s) for s in val_sents] [bleu2s, bleu3s, bleu4s] = cal_BLEU( [prepare_for_bleu(s) for s in res['syn_sent']], {0: val_set}) print('Val BLEU (2,3,4): ' + ' '.join( [str(round(it, 3)) for it in (bleu2s, bleu3s, bleu4s)])) summary = sess.run(merged, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org }) test_writer.add_summary(summary, uidx)
def main(opt): # global n_words # Prepare training and testing data data_path = opt.data_dir + "/" + opt.data_name print('loading '+data_path) x = cPickle.load(open(data_path, "rb")) train, val, test = x[0], x[1], x[2] wordtoix, ixtoword = x[3], x[4] opt.n_words = len(ixtoword) print datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y") print dict(opt) print('Total words: %d' % opt.n_words) with tf.device('/gpu:1'): x_1_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) x_2_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) y_ = tf.placeholder(tf.float32, shape=[opt.batch_size,]) l_temp_ = tf.placeholder(tf.float32, shape=[]) res_, loss_ ,train_op = cons_disc(x_1_, x_2_, y_, opt, l_temp_) merged = tf.summary.merge_all() uidx = 0 config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) # feed_dict={x_: np.zeros([opt.batch_size, opt.sent_len]), x_org_: np.zeros([opt.batch_size, opt.sent_len])} if opt.restore: print('-'*20) print("Loading variables from '%s'." % opt.load_path) try: #pdb.set_trace() t_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) #tf.trainable_variables() #print([var.name[:-2] for var in t_vars] save_keys = tensors_key_in_file(opt.load_path) ss = [var for var in t_vars if var.name[:-2] in save_keys.keys()] ss = [var.name for var in ss if var.get_shape() == save_keys[var.name[:-2]]] loader = tf.train.Saver(var_list= [var for var in t_vars if var.name in ss]) loader.restore(sess, opt.load_path) print("Loaded variables:"+str(ss)) print('-'*20) except Exception as e: print 'Error: '+str(e) exit() print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) # train # if don't want to train, set max_epochs=0 for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) opt.l_temp = min(opt.l_temp * opt.l_temp_factor, opt.l_temp_max) print("Annealing temperature " + str(opt.l_temp)) kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] indice = [rand_pair(opt.task, opt.data_name) for _ in range(opt.batch_size)] if opt.task == 'L': x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)] x_2 = [sents[i][idx[1]] for i, idx in enumerate(indice)] y_batch = [(i1-i2)%2 == 0 for i1,i2 in indice] elif opt.task == 'C': batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) y_batch = (range(opt.batch_size) == batch_indice) rn = np.random.choice(7,size = opt.batch_size) x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)] x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)] else: # G batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) y_batch = (range(opt.batch_size) == batch_indice) x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)] x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)] x_1_batch = prepare_data_for_cnn(x_1, opt) # Batch L x_2_batch = prepare_data_for_cnn(x_2, opt) # Batch L feed = {x_1_: x_1_batch, x_2_: x_2_batch, y_:np.float32(y_batch),l_temp_:opt.l_temp} _, loss = sess.run([train_op, loss_], feed_dict=feed) if uidx % opt.print_freq == 1: print("Iteration %d: loss %f " % (uidx, loss)) res = sess.run(res_, feed_dict=feed) if opt.verbose: print("logits:" + str(res['logits'])) print("H1:" + str(res['H_1'][0])) print("H2:" + str(res['H_2'][0])) # print("H2:" + str(res['H_1'][0]*res['H_2'][0]-0.5)) acc = sum(np.equal(res['y_pred'],y_batch))/np.float(opt.batch_size) print("Accuracy: %f" % (acc)) print("y_mean: %f" % (np.mean(y_batch))) print("corr:" + str(res['corr'])) sys.stdout.flush() summary = sess.run(merged, feed_dict=feed) train_writer.add_summary(summary, uidx) if uidx % opt.valid_freq == 1: acc, loss_val, y_mean, corr = 0, 0, 0, 0 indice = [rand_pair(opt.task, opt.data_name) for _ in range(opt.batch_size)] for i in range(100): valid_index = np.random.choice(len(test), opt.batch_size) sents = [test[t] for t in valid_index] if opt.task == 'L': x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)] x_2 = [sents[i][idx[1]] for i, idx in enumerate(indice)] y_batch = [(i1-i2)%2 == 0 for i1,i2 in indice] elif opt.task == 'C': batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) y_batch = (range(opt.batch_size) == batch_indice) rn = np.random.choice(7,size = opt.batch_size) x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)] x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)] else: # G batch_indice = np.concatenate([np.random.permutation(opt.batch_size/2) , range(opt.batch_size/2, opt.batch_size)]) y_batch = (range(opt.batch_size) == batch_indice) x_1 = [sents[i][idx[0]] for i, idx in enumerate(indice)] x_2 = [sents[batch_indice[i]][idx[1]] for i, idx in enumerate(indice)] x_1_batch = prepare_data_for_cnn(x_1, opt) # Batch L x_2_batch = prepare_data_for_cnn(x_2, opt) # Batch L feed = {x_1_: x_1_batch, x_2_: x_2_batch, y_:np.float32(y_batch),l_temp_:opt.l_temp} loss_val += sess.run(loss_, feed_dict=feed) res = sess.run(res_, feed_dict=feed) acc += sum(np.equal(res['y_pred'],y_batch))/np.float(opt.batch_size) y_mean += np.mean(y_batch) corr += res['corr'] loss_val = loss_val / 100.0 acc = acc / 100.0 y_mean = y_mean / 100.0 corr = corr / 100.0 print("Validation loss %.4f " % (loss_val)) print("Validation accuracy: %.4f" % (acc)) print("Validation y_mean: %.4f" % (y_mean)) print("Validation corr: %.4f" % (corr)) print("") sys.stdout.flush() summary = sess.run(merged, feed_dict=feed) test_writer.add_summary(summary, uidx) saver.save(sess, opt.save_path, global_step=epoch) # test if opt.test: print('Testing....') iter_num = np.int(np.floor(len(test)/opt.batch_size))+1 for i in range(iter_num): if i%100 == 0: print('Iter %i/%i'%(i, iter_num)) test_index = range(i*opt.batch_size, (i+1)*opt.batch_size) test_sents = [test[t%len(test)] for t in test_index] indice = [(0,1),(2,3),(4,5),(6,7)] for idx in indice: x_1 = [test_sents[i][idx[0]] for i in range(opt.batch_size)] x_2 = [test_sents[i][idx[1]] for i in range(opt.batch_size)] y_batch = [True for i in range(opt.batch_size)] x_1_batch = prepare_data_for_cnn(x_1, opt) # Batch L x_2_batch = prepare_data_for_cnn(x_2, opt) # Batch L feed = {x_1_: x_1_batch, x_2_: x_2_batch, y_:np.float32(y_batch), l_temp_:opt.l_temp} res = sess.run(res_, feed_dict=feed) for d in range(opt.batch_size): with open(opt.log_path + '.feature.txt', "a") as myfile: myfile.write(str(test_index[d]) + "\t" + str(idx[0]) + "\t" + " ".join([ixtoword[x] for x in x_1_batch[d] if x != 0]) + "\t" + " ".join(map(str,res['H_1'][d]))+ "\n") myfile.write(str(test_index[d]) + "\t" + str(idx[1]) + "\t" + " ".join([ixtoword[x] for x in x_2_batch[d] if x != 0]) + "\t" + " ".join(map(str,res['H_2'][d]))+ "\n")
def run_epoch(sess, epoch, mode, print_freq=-1, display_sent=-1, train_writer=None): fetches_ = {'loss': loss_} if mode == 'train': x, is_train = train, 1 fetches_['train_op'] = train_op_ fetches_['summary'] = merged elif mode == 'val': assert (print_freq == -1) x, is_train = val, None elif mode == 'test': assert (print_freq == -1) x, is_train = test, None acc_loss, acc_n = 0.0, 0.0 local_t = 0 global_t = epoch * epoch_t # only used in train mode start_time = time.time() kf = get_minibatches_idx(len(x), opt.batch_size, shuffle=True) for _, index in kf: local_t += 1 global_t += 1 sents_b = [x[i] for i in index] sents_b_n = add_noise(sents_b, opt) x_b_org = prepare_data_for_rnn(sents_b, opt) # Batch L x_b = prepare_data_for_cnn(sents_b_n, opt) # Batch L feed_t = {x_: x_b, x_org_: x_b_org, is_train_: is_train} fetches = sess.run(fetches_, feed_dict=feed_t) batch_size = len(index) acc_n += batch_size acc_loss += fetches['loss'] * batch_size if print_freq > 0 and local_t % print_freq == 0: print("%s Iter %d: loss %.4f, time %.1fs" % (mode, local_t, acc_loss / acc_n, time.time() - start_time)) sys.stdout.flush() if mode == 'train' and train_writer != None: train_writer.add_summary(fetches['summary'], global_t) if display_sent > 0: index_d = np.random.choice(len(x), opt.batch_size, replace=False) sents_d = [x[i] for i in index_d] sents_d_n = add_noise(sents_d, opt) x_d_org = prepare_data_for_rnn(sents_d, opt) # Batch L x_d = prepare_data_for_cnn(sents_d_n, opt) # Batch L res = sess.run(res_, feed_dict={ x_: x_d, x_org_: x_d_org, is_train_: is_train }) for i in range(display_sent): print( "%s Org: " % mode + " ".join([ ixtoword[ix] for ix in sents_d[i] if ix != 0 and ix != 2 ])) if mode == 'train': print( "%s Rec(feedy): " % mode + " ".join([ ixtoword[ix] for ix in res['rec_sents_feed_y'][i] if ix != 0 and ix != 2 ])) print( "%s Rec: " % mode + " ".join([ ixtoword[ix] for ix in res['rec_sents'][i] if ix != 0 and ix != 2 ])) print("%s Epoch %d: loss %.4f, time %.1fs" % (mode, epoch, acc_loss / acc_n, time.time() - start_time)) return acc_loss / acc_n
def main(): # Prepare training and testing data opt = Options() # load data loadpath = "./data/mimic3.p" embpath = "mimic3_emb.p" opt.num_class = 50 x = cPickle.load(open(loadpath, "rb")) train, train_text, train_lab = x[0], x[1], x[2] val, val_text, val_lab = x[3], x[4], x[5] test, test_text, test_lab = x[6], x[7], x[8] wordtoix, ixtoword = x[10], x[9] del x print("load data finished") train_lab = np.array(train_lab, dtype='float32') val_lab = np.array(val_lab, dtype='float32') test_lab = np.array(test_lab, dtype='float32') opt.n_words = len(ixtoword) if opt.part_data: #np.random.seed(123) train_ind = np.random.choice(len(train), int(len(train) * opt.portion), replace=False) train = [train[t] for t in train_ind] train_lab = [train_lab[t] for t in train_ind] os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.GPUID) print(dict(opt)) print('Total words: %d' % opt.n_words) try: opt.W_emb = np.array(cPickle.load(open(embpath, 'rb')), dtype='float32') opt.W_class_emb = load_class_embedding(wordtoix, opt) except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:1'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen], name='x_') x_mask_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen], name='x_mask_') keep_prob = tf.placeholder(tf.float32, name='keep_prob') y_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.num_class], name='y_') class_penalty_ = tf.placeholder(tf.float32, shape=()) accuracy_, loss_, train_op, W_norm_, global_step, logits_, prob_ = emb_classifier( x_, x_mask_, y_, keep_prob, opt, class_penalty_) uidx = 0 max_val_accuracy = 0. max_test_accuracy = 0. max_val_auc_mean = 0. max_test_auc_mean = 0. config = tf.ConfigProto( log_device_placement=False, allow_soft_placement=True, ) config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() save_keys = tensors_key_in_file(opt.save_path) ss = set([var.name for var in t_vars]) & set( [s + ":0" for s in save_keys.keys()]) cc = {var.name: var for var in t_vars} # only restore variables with correct shape ss_right_shape = set( [s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]]) loader = tf.train.Saver(var_list=[ var for var in t_vars if var.name in ss_right_shape ]) loader.restore(sess, opt.save_path) print("Loading variables from '%s'." % opt.save_path) print("Loaded variables:" + str(ss)) except: print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) try: for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] x_labels = [train_lab[t] for t in train_index] x_labels = np.array(x_labels) x_labels = x_labels.reshape((len(x_labels), opt.num_class)) x_batch, x_batch_mask = prepare_data_for_emb(sents, opt) _, loss, step, = sess.run( [train_op, loss_, global_step], feed_dict={ x_: x_batch, x_mask_: x_batch_mask, y_: x_labels, keep_prob: opt.dropout, class_penalty_: opt.class_penalty }) if uidx % opt.valid_freq == 0: train_correct = 0.0 # sample evaluate accuaccy on 500 sample data kf_train = get_minibatches_idx(500, opt.batch_size, shuffle=True) for _, train_index in kf_train: train_sents = [train[t] for t in train_index] train_labels = [train_lab[t] for t in train_index] train_labels = np.array(train_labels) train_labels = train_labels.reshape( (len(train_labels), opt.num_class)) x_train_batch, x_train_batch_mask = prepare_data_for_emb( train_sents, opt) train_accuracy = sess.run(accuracy_, feed_dict={ x_: x_train_batch, x_mask_: x_train_batch_mask, y_: train_labels, keep_prob: 1.0, class_penalty_: 0.0 }) train_correct += train_accuracy * len(train_index) train_accuracy = train_correct / 500 print("Iteration %d: Training loss %f " % (uidx, loss)) print("Train accuracy %f " % train_accuracy) val_correct = 0.0 val_y = [] val_logits_list = [] val_prob_list = [] val_true_list = [] kf_val = get_minibatches_idx(len(val), opt.batch_size, shuffle=True) for _, val_index in kf_val: val_sents = [val[t] for t in val_index] val_labels = [val_lab[t] for t in val_index] val_labels = np.array(val_labels) val_labels = val_labels.reshape( (len(val_labels), opt.num_class)) x_val_batch, x_val_batch_mask = prepare_data_for_emb( val_sents, opt) val_accuracy, val_logits, val_probs = sess.run( [accuracy_, logits_, prob_], feed_dict={ x_: x_val_batch, x_mask_: x_val_batch_mask, y_: val_labels, keep_prob: 1.0, class_penalty_: 0.0 }) val_correct += val_accuracy * len(val_index) val_y += np.argmax(val_labels, axis=1).tolist() val_logits_list += val_logits.tolist() val_prob_list += val_probs.tolist() val_true_list += val_labels.tolist() val_accuracy = val_correct / len(val) val_logits_array = np.asarray(val_logits_list) val_prob_array = np.asarray(val_prob_list) val_true_array = np.asarray(val_true_list) val_auc_list = [] val_auc_micro = roc_auc_score(y_true=val_true_array, y_score=val_logits_array, average='micro') val_auc_macro = roc_auc_score(y_true=val_true_array, y_score=val_logits_array, average='macro') for i in range(opt.num_class): if np.max(val_true_array[:, i] > 0): val_auc = roc_auc_score( y_true=val_true_array[:, i], y_score=val_logits_array[:, i], ) val_auc_list.append(val_auc) val_auc_mean = np.mean(val_auc) # print("Validation accuracy %f " % val_accuracy) print("val auc macro %f micro %f " % (val_auc_macro, val_auc_micro)) if True: test_correct = 0.0 test_y = [] test_logits_list = [] test_prob_list = [] test_true_list = [] kf_test = get_minibatches_idx(len(test), opt.batch_size, shuffle=True) for _, test_index in kf_test: test_sents = [test[t] for t in test_index] test_labels = [test_lab[t] for t in test_index] test_labels = np.array(test_labels) test_labels = test_labels.reshape( (len(test_labels), opt.num_class)) x_test_batch, x_test_batch_mask = prepare_data_for_emb( test_sents, opt) test_accuracy, test_logits, test_probs = sess.run( [accuracy_, logits_, prob_], feed_dict={ x_: x_test_batch, x_mask_: x_test_batch_mask, y_: test_labels, keep_prob: 1.0, class_penalty_: 0.0 }) test_correct += test_accuracy * len(test_index) test_correct += test_accuracy * len(test_index) test_y += np.argmax(test_labels, axis=1).tolist() test_logits_list += test_logits.tolist() test_prob_list += test_probs.tolist() test_true_list += test_labels.tolist() test_accuracy = test_correct / len(test) test_logits_array = np.asarray(test_logits_list) test_prob_array = np.asarray(test_prob_list) test_true_array = np.asarray(test_true_list) test_auc_list = [] test_auc_micro = roc_auc_score( y_true=test_true_array, y_score=test_logits_array, average='micro') test_auc_macro = roc_auc_score( y_true=test_true_array, y_score=test_logits_array, average='macro') test_f1_micro = micro_f1( test_prob_array.ravel() > 0.5, test_true_array.ravel(), ) test_f1_macro = macro_f1( test_prob_array > 0.5, test_true_array, ) test_p5 = precision_at_k(test_logits_array, test_true_array, 5) for i in range(opt.num_class): if np.max(test_true_array[:, i] > 0): test_auc = roc_auc_score( y_true=test_true_array[:, i], y_score=test_logits_array[:, i], ) test_auc_list.append(test_auc) test_auc_mean = np.mean(test_auc) print("Test auc macro %f micro %f " % (test_auc_macro, test_auc_micro)) print("Test f1 macro %f micro %f " % (test_f1_macro, test_f1_micro)) print("P5 %f" % test_p5) # max_test_accuracy = test_accuracy max_test_auc_mean = test_auc_mean # print("Test accuracy %f " % test_accuracy) # max_test_accuracy = test_accuracy # print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy)) print("Epoch %d: Max Test auc %f" % (epoch, max_test_auc_mean)) saver.save(sess, opt.save_path, global_step=epoch) print("Max Test accuracy %f " % max_test_accuracy) except KeyboardInterrupt: print('Training interupted') print("Max Test accuracy %f " % max_test_accuracy)
def train(model, train_triples, valid_triples, test_triples, sr_index, params): rng = np.random n_entities, n_relations = model.n_entities, model.n_relations train_fn = model.train_fn(num_neg=params[NUM_NEG], lrate=params[LEARNING_RATE], marge=params[MARGE]) ranks_fn = model.ranks_fn() scores_fn = model.scores_fn() uidx = 1 best_p = None history_valid_hits = [] history_test_hits = [] history_epoch_times = [] bins = [1, 11, 21, 31, 51, 101, 1001, 10001, 20000] print("Training on {:d} triples".format(len(train_triples))) num_batches = int(math.ceil(len(train_triples) / params[BATCH_SIZE])) print("Batch size = {:d}, Number of batches = {:d}".format(params[BATCH_SIZE], num_batches)) print("The eval is being printed with number of items the bins -> %s" % bins) try: # We iterate over epochs: train_start_time = time.time() for epoch in range(params[NUM_EPOCHS]): # In each epoch, we do a full pass over the training data: epoch_start_time = time.time() for _, train_index in utils.get_minibatches_idx(len(train_triples), params[BATCH_SIZE], False): # Normalize the entity embeddings if params[IS_NORMALIZED]: model.normalize() tmb = train_triples[train_index] # generating negative examples replacing left entity tmbln_list = [rng.randint(0, n_entities, tmb.shape[0]).astype(dtype=tmb.dtype) for i in xrange(params[NUM_NEG])] # generating negative examples replacing right entity tmbrn_list = [rng.randint(0, n_entities, tmb.shape[0]).astype(dtype=tmb.dtype) for i in xrange(params[NUM_NEG])] cost = train_fn(*([tmb] + tmbln_list + tmbrn_list))[0] if np.isnan(cost) or np.isinf(cost): print('bad cost detected! Cost is ' + str(cost)) return get_best_metric(history_valid_hits) if uidx % params[DISP_FREQ] == 0: print('Epoch ', epoch, 'Iter', uidx, 'Cost ', cost) if uidx % params[VALID_FREQ] == 0: print('Epoch ', epoch, 'Iter', uidx, 'Cost ', cost) # print("Epoch {} of {} uidx {} took {:.3f}s".format( # epoch + 1, params[NUM_EPOCHS], uidx, time.time() - start_time)) if len(history_epoch_times) > 0: print (" Average epoch time - {:.3f}s".format(np.mean(history_epoch_times))) print(" Time since start - {:.3f}s".format(time.time() - train_start_time)) print(" Train Minibatch Metrics") train_hits10 = get_batch_metrics(tmb, sr_index, scores_fn, False) print('') print(" Validation data Metrics") valid_hits10 = get_batch_metrics(valid_triples, sr_index, scores_fn, True) print('') print(" Test data Metrics") test_hits10 = get_batch_metrics(test_triples, sr_index, scores_fn, True) if (best_p is None) or (len(history_valid_hits) > 0 and valid_hits10 >= np.max(history_valid_hits)): print("found best params yet") best_p = utils.get_params(model) history_valid_hits.append(valid_hits10) history_test_hits.append(test_hits10) if uidx % params[SAVE_FREQ] == 0: if best_p is None: all_params = utils.get_params(model) else: all_params = best_p utils.save(params[SAVETO_FILE], all_params) uidx += 1 history_epoch_times.append(time.time() - epoch_start_time) except KeyboardInterrupt: print("training interrupted") return model, get_best_metric(history_valid_hits), train_fn, scores_fn
def main(): loadpath = "./data/snli.p" x = cPickle.load(open(loadpath, "rb")) train, val, test = x[0], x[1], x[2] wordtoix, ixtoword = x[4], x[5] train_q, train_a, train_lab = train[0], train[1], train[2] val_q, val_a, val_lab = val[0], val[1], val[2] test_q, test_a, test_lab = test[0], test[1], test[2] train_lab = np.array(train_lab, dtype='float32') val_lab = np.array(val_lab, dtype='float32') test_lab = np.array(test_lab, dtype='float32') opt = Options() opt.n_words = len(ixtoword) del x print(dict(opt)) print('Total words: %d' % opt.n_words) if opt.part_data: np.random.seed(123) train_ind = np.random.choice(len(train_q), int(len(train_q)*opt.portion), replace=False) train_q = [train_q[t] for t in train_ind] train_a = [train_a[t] for t in train_ind] train_lab = [train_lab[t] for t in train_ind] try: params = np.load('./data/snli_emb.p') if params[0].shape == (opt.n_words, opt.embed_size): print('Use saved embedding.') #pdb.set_trace() opt.W_emb = np.array(params[0], dtype='float32') else: print('Emb Dimension mismatch: param_g.npz:' + str(params[0].shape) + ' opt: ' + str( (opt.n_words, opt.embed_size))) opt.fix_emb = False except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:1'): x_1_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen]) x_2_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen]) x_mask_1_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen]) x_mask_2_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen]) y_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.category]) keep_prob = tf.placeholder(tf.float32) accuracy_, loss_, train_op_, W_emb_ = auto_encoder(x_1_, x_2_, x_mask_1_, x_mask_2_, y_, keep_prob, opt) merged = tf.summary.merge_all() uidx = 0 max_val_accuracy = 0. max_test_accuracy = 0. # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: #pdb.set_trace() t_vars = tf.trainable_variables() # print([var.name[:-2] for var in t_vars]) save_keys = tensors_key_in_file(opt.save_path) # pdb.set_trace() # print(save_keys.keys()) ss = set([var.name for var in t_vars]) & set([s + ":0" for s in save_keys.keys()]) cc = {var.name: var for var in t_vars} #pdb.set_trace() # only restore variables with correct shape ss_right_shape = set([s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]]) loader = tf.train.Saver(var_list=[var for var in t_vars if var.name in ss_right_shape]) loader.restore(sess, opt.save_path) print("Loading variables from '%s'." % opt.save_path) print("Loaded variables:" + str(ss)) except: print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) try: for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) kf = get_minibatches_idx(len(train_q), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents_1 = [train_q[t] for t in train_index] sents_2 = [train_a[t] for t in train_index] x_labels = [train_lab[t] for t in train_index] x_labels = np.array(x_labels) x_labels = x_labels.reshape((len(x_labels), opt.category)) x_batch_1, x_batch_mask_1 = prepare_data_for_emb(sents_1, opt) x_batch_2, x_batch_mask_2 = prepare_data_for_emb(sents_2, opt) _, loss = sess.run([train_op_, loss_], feed_dict={x_1_: x_batch_1, x_2_: x_batch_2, x_mask_1_: x_batch_mask_1, x_mask_2_: x_batch_mask_2, y_: x_labels, keep_prob: opt.dropout_ratio}) if uidx % opt.valid_freq == 0: train_correct = 0.0 kf_train = get_minibatches_idx(3070, opt.batch_size, shuffle=True) for _, train_index in kf_train: train_sents_1 = [train_q[t] for t in train_index] train_sents_2 = [train_a[t] for t in train_index] train_labels = [train_lab[t] for t in train_index] train_labels = np.array(train_labels) train_labels = train_labels.reshape((len(train_labels), opt.category)) x_train_batch_1, x_train_mask_1 = prepare_data_for_emb(train_sents_1, opt) x_train_batch_2, x_train_mask_2 = prepare_data_for_emb(train_sents_2, opt) train_accuracy = sess.run(accuracy_, feed_dict={x_1_: x_train_batch_1, x_2_: x_train_batch_2, x_mask_1_: x_train_mask_1, x_mask_2_: x_train_mask_2, y_: train_labels, keep_prob: 1.0}) train_correct += train_accuracy * len(train_index) train_accuracy = train_correct / 3070 # print("Iteration %d: Training loss %f, dis loss %f, rec loss %f" % (uidx, # loss, dis_loss, rec_loss)) print("Train accuracy %f " % train_accuracy) val_correct = 0.0 is_train = True kf_val = get_minibatches_idx(len(val_q), opt.batch_size, shuffle=True) for _, val_index in kf_val: val_sents_1 = [val_q[t] for t in val_index] val_sents_2 = [val_a[t] for t in val_index] val_labels = [val_lab[t] for t in val_index] val_labels = np.array(val_labels) val_labels = val_labels.reshape((len(val_labels), opt.category)) x_val_batch_1, x_val_mask_1 = prepare_data_for_emb(val_sents_1, opt) x_val_batch_2, x_val_mask_2 = prepare_data_for_emb(val_sents_2, opt) val_accuracy = sess.run(accuracy_, feed_dict={x_1_: x_val_batch_1, x_2_: x_val_batch_2, x_mask_1_: x_val_mask_1, x_mask_2_: x_val_mask_2, y_: val_labels, keep_prob: 1.0}) val_correct += val_accuracy * len(val_index) val_accuracy = val_correct / len(val_q) print("Validation accuracy %f " % val_accuracy) if val_accuracy > max_val_accuracy: max_val_accuracy = val_accuracy test_correct = 0.0 kf_test = get_minibatches_idx(len(test_q), opt.batch_size, shuffle=True) for _, test_index in kf_test: test_sents_1 = [test_q[t] for t in test_index] test_sents_2 = [test_a[t] for t in test_index] test_labels = [test_lab[t] for t in test_index] test_labels = np.array(test_labels) test_labels = test_labels.reshape((len(test_labels), opt.category)) x_test_batch_1, x_test_mask_1 = prepare_data_for_emb(test_sents_1, opt) x_test_batch_2, x_test_mask_2 = prepare_data_for_emb(test_sents_2, opt) test_accuracy = sess.run(accuracy_, feed_dict={x_1_: x_test_batch_1, x_2_: x_test_batch_2, x_mask_1_: x_test_mask_1, x_mask_2_: x_test_mask_2, y_: test_labels, keep_prob: 1.0}) test_correct += test_accuracy * len(test_index) test_accuracy = test_correct / len(test_q) print("Test accuracy %f " % test_accuracy) max_test_accuracy = test_accuracy print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy)) print("Max Test accuracy %f " % max_test_accuracy) except KeyboardInterrupt: print('Training interupted') print("Max Test accuracy %f " % max_test_accuracy)
def main(): # Prepare training and testing data opt = Options() # load data if opt.dataset == 'Tweet': loadpath = "./data/langdetect_tweet0.7.p" embpath = "./data/langdetect_tweet_emb.p" opt.num_class = 4 opt.class_name = ['apple', 'google', 'microsoft', 'twitter'] if opt.dataset == 'N20short': loadpath = "./data/N20short.p" embpath = "./data/N20short_emb.p" opt.class_name = [ 'rec.autos', 'talk.politics.misc', 'sci.electronics', 'comp.sys.ibm.pc.hardware', 'talk.politics.guns', 'sci.med', 'rec.motorcycles', 'soc.religion.christian', 'comp.sys.mac.hardware', 'comp.graphics', 'sci.space', 'alt.atheism', 'rec.sport.baseball', 'comp.windows.x', 'talk.religion.misc', 'comp.os.ms-windows.misc', 'misc.forsale', 'talk.politics.mideast', 'sci.crypt', 'rec.sport.hockey' ] opt.num_class = len(opt.class_name) elif opt.dataset == 'agnews': loadpath = "./data/ag_news.p" embpath = "./data/ag_news_glove.p" opt.num_class = 4 opt.class_name = ['World', 'Sports', 'Business', 'Science'] elif opt.dataset == 'dbpedia': loadpath = "./data/dbpedia.p" embpath = "./data/dbpedia_glove.p" opt.num_class = 14 opt.class_name = [ 'Company', 'Educational Institution', 'Artist', 'Athlete', 'Office Holder', 'Mean Of Transportation', 'Building', 'Natural Place', 'Village', 'Animal', 'Plant', 'Album', 'Film', 'Written Work', ] elif opt.dataset == 'yelp_full': loadpath = "./data/yelp_full.p" embpath = "./data/yelp_full_glove.p" opt.num_class = 5 opt.class_name = ['worst', 'bad', 'middle', 'good', 'best'] x = cPickle.load(open(loadpath, "rb"), encoding='iso-8859-1') train, val, test = x[0], x[1], x[2] print(len(val)) train_lab, val_lab, test_lab = x[3], x[4], x[5] wordtoix, ixtoword = x[6], x[7] del x print("len of train,val,test:", len(train), len(val), len(test)) print("load data finished") train_lab = np.array(train_lab, dtype='float32') val_lab = np.array(val_lab, dtype='float32') test_lab = np.array(test_lab, dtype='float32') opt.n_words = len(ixtoword) if opt.part_data: #np.random.seed(123) train_ind = np.random.choice(len(train), int(len(train) * opt.portion), replace=False) train = [train[t] for t in train_ind] train_lab = [train_lab[t] for t in train_ind] os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.GPUID) print(dict(opt)) print('Total words: %d' % opt.n_words) try: opt.W_emb = np.array(cPickle.load(open(embpath, 'rb'), encoding='iso-8859-1'), dtype='float32') opt.W_class_emb = load_class_embedding(wordtoix, opt) except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/cpu:0'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.maxlen], name='x_') x_mask_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.maxlen], name='x_mask_') keep_prob = tf.placeholder(tf.float32, name='keep_prob') y_ = tf.placeholder(tf.float32, shape=[opt.batch_size, opt.num_class], name='y_') class_penalty_ = tf.placeholder(tf.float32, shape=()) accuracy_, loss_, train_op, W_norm_, global_step, prob_ = emb_classifier( x_, x_mask_, y_, keep_prob, opt, class_penalty_) uidx = 0 max_val_accuracy = 0. max_test_accuracy = 0. config = tf.ConfigProto( log_device_placement=False, allow_soft_placement=True, ) config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() save_keys = tensors_key_in_file(opt.save_path) ss = set([var.name for var in t_vars]) & set( [s + ":0" for s in save_keys.keys()]) cc = {var.name: var for var in t_vars} # only restore variables with correct shape ss_right_shape = set( [s for s in ss if cc[s].get_shape() == save_keys[s[:-2]]]) loader = tf.train.Saver(var_list=[ var for var in t_vars if var.name in ss_right_shape ]) loader.restore(sess, opt.save_path) print("Loading variables from '%s'." % opt.save_path) print("Loaded variables:" + str(ss)) except: print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) try: for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] x_labels = [train_lab[t] for t in train_index] # print(x_labels) x_labels = np.array(x_labels) x_labels = x_labels.reshape((len(x_labels), opt.num_class)) # print(x_labels) # exit() x_batch, x_batch_mask = prepare_data_for_emb(sents, opt) _, loss, step, = sess.run( [train_op, loss_, global_step], feed_dict={ x_: x_batch, x_mask_: x_batch_mask, y_: x_labels, keep_prob: opt.dropout, class_penalty_: opt.class_penalty }) if uidx % opt.valid_freq == 0: train_correct = 0.0 # sample evaluate accuaccy on 500 sample data kf_train = get_minibatches_idx(500, opt.batch_size, shuffle=True) for _, train_index in kf_train: train_sents = [train[t] for t in train_index] train_labels = [train_lab[t] for t in train_index] train_labels = np.array(train_labels) train_labels = train_labels.reshape( (len(train_labels), opt.num_class)) x_train_batch, x_train_batch_mask = prepare_data_for_emb( train_sents, opt) train_accuracy = sess.run(accuracy_, feed_dict={ x_: x_train_batch, x_mask_: x_train_batch_mask, y_: train_labels, keep_prob: 1.0, class_penalty_: 0.0 }) train_correct += train_accuracy * len(train_index) train_accuracy = train_correct / 500 print("Iteration %d: Training loss %f " % (uidx, loss)) print("Train accuracy %f " % train_accuracy) val_correct = 0.0 kf_val = get_minibatches_idx(len(val), opt.batch_size, shuffle=True) for _, val_index in kf_val: val_sents = [val[t] for t in val_index] val_labels = [val_lab[t] for t in val_index] val_labels = np.array(val_labels) val_labels = val_labels.reshape( (len(val_labels), opt.num_class)) x_val_batch, x_val_batch_mask = prepare_data_for_emb( val_sents, opt) val_accuracy = sess.run(accuracy_, feed_dict={ x_: x_val_batch, x_mask_: x_val_batch_mask, y_: val_labels, keep_prob: 1.0, class_penalty_: 0.0 }) val_correct += val_accuracy * len(val_index) val_accuracy = val_correct / len(val) print("Validation accuracy %f " % val_accuracy) if val_accuracy > max_val_accuracy: max_val_accuracy = val_accuracy # test_correct = 0.0 # # kf_test = get_minibatches_idx(len(test), opt.batch_size, shuffle=True) # for _, test_index in kf_test: # test_sents = [test[t] for t in test_index] # test_labels = [test_lab[t] for t in test_index] # test_labels = np.array(test_labels) # test_labels = test_labels.reshape((len(test_labels), opt.num_class)) # x_test_batch, x_test_batch_mask = prepare_data_for_emb(test_sents, opt) # # test_accuracy,predict_prob = sess.run([accuracy_,prob_],feed_dict={x_: x_test_batch, x_mask_: x_test_batch_mask,y_: test_labels, keep_prob: 1.0, class_penalty_: 0.0}) # print(predict_prob) # test_correct += test_accuracy * len(test_index) # # test_accuracy = test_correct / len(test) # print("Test accuracy %f " % test_accuracy) # max_test_accuracy = test_accuracy # print("Epoch %d: Max Test accuracy %f" % (epoch, max_test_accuracy)) saver.save(sess, opt.save_path, global_step=epoch) saver.save(sess, "save_model/model.ckpt") # print("Max Test accuracy %f " % max_test_accuracy) test_correct = 0.0 kf_test = get_minibatches_idx(len(test), opt.batch_size, shuffle=False) for _, test_index in kf_test: test_sents = [test[t] for t in test_index] test_labels = [test_lab[t] for t in test_index] test_labels = np.array(test_labels) test_labels = test_labels.reshape( (len(test_labels), opt.num_class)) x_test_batch, x_test_batch_mask = prepare_data_for_emb( test_sents, opt) test_accuracy, predict_prob = sess.run( [accuracy_, prob_], feed_dict={ x_: x_test_batch, x_mask_: x_test_batch_mask, y_: test_labels, keep_prob: 1.0, class_penalty_: 0.0 }) for prob in predict_prob: topnlabel_onedoc = [0] * opt.num_class for iter_topnlabel in range(opt.topnlabel): index_label = np.argwhere(prob == max(prob)) topnlabel_onedoc[index_label[0] [0]] = prob[index_label][0][0] prob[index_label] = -1 topnlabel_docwithoutlabel.append(topnlabel_onedoc) test_correct += test_accuracy * len(test_index) print(topnlabel_docwithoutlabel) test_accuracy = test_correct / len(test) print("Predict accuracy %f " % test_accuracy) max_test_accuracy = test_accuracy filename = 'test' file = open(filename, 'w') file.write(str(len(test))) file.write('\n') # print(wordtoix.get('close')) # exit() for topic_prob in topnlabel_docwithoutlabel: print(topic_prob) for prob_each_label in topic_prob: file.write(str(prob_each_label)) file.write(" ") file.write('\n') except KeyboardInterrupt: print('Training interupted') print("Max Test accuracy %f " % max_test_accuracy)
def run_model(opt, train, val, ixtoword): try: params = np.load('./param_g.npz') if params['Wemb'].shape == (opt.n_words, opt.embed_size): print('Use saved embedding.') opt.W_emb = params['Wemb'] else: print('Emb Dimension mismatch: param_g.npz:' + str(params['Wemb'].shape) + ' opt: ' + str( (opt.n_words, opt.embed_size))) opt.fix_emb = False except IOError: print('No embedding file found.') opt.fix_emb = False with tf.device('/gpu:1'): x_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) x_org_ = tf.placeholder(tf.int32, shape=[opt.batch_size, opt.sent_len]) is_train_ = tf.placeholder(tf.bool, name='is_train_') res_, loss_, train_op = auto_encoder(x_, x_org_, is_train_, opt) merged = tf.summary.merge_all() # opt.is_train = False # res_val_, loss_val_, _ = auto_encoder(x_, x_org_, opt) # merged_val = tf.summary.merge_all() #tensorboard --logdir=run1:/tmp/tensorflow/ --port 6006 #writer = tf.train.SummaryWriter(opt.log_path, graph=tf.get_default_graph()) uidx = 0 config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, graph_options=tf.GraphOptions(build_cost_model=1)) #config = tf.ConfigProto(device_count={'GPU':0}) # config.gpu_options.per_process_gpu_memory_fraction = 0.8 config.gpu_options.allow_growth = True np.set_printoptions(precision=3) np.set_printoptions(threshold=np.inf) saver = tf.train.Saver() run_metadata = tf.RunMetadata() with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter(opt.log_path + '/train', sess.graph) test_writer = tf.summary.FileWriter(opt.log_path + '/test', sess.graph) sess.run(tf.global_variables_initializer()) if opt.restore: try: t_vars = tf.trainable_variables() #print([var.name[:-2] for var in t_vars]) loader = restore_from_save(t_vars, sess, opt) print('Load pretrain successfully') except Exception as e: print(e) print("No saving session, using random initialization") sess.run(tf.global_variables_initializer()) for epoch in range(opt.max_epochs): print("Starting epoch %d" % epoch) # if epoch >= 10: # print("Relax embedding ") # opt.fix_emb = False # opt.batch_size = 2 kf = get_minibatches_idx(len(train), opt.batch_size, shuffle=True) for _, train_index in kf: uidx += 1 sents = [train[t] for t in train_index] sents_permutated = add_noise(sents, opt) #sents[0] = np.random.permutation(sents[0]) if opt.model != 'rnn_rnn' and opt.model != 'cnn_rnn': x_batch_org = prepare_data_for_cnn(sents, opt) # Batch L else: x_batch_org = prepare_data_for_rnn(sents, opt) # Batch L if opt.model != 'rnn_rnn': x_batch = prepare_data_for_cnn(sents_permutated, opt) # Batch L else: x_batch = prepare_data_for_rnn(sents_permutated, opt, is_add_GO=False) # Batch L if profile: _, loss = sess.run( [train_op, loss_], feed_dict={ x_: x_batch, x_org_: x_batch_org, is_train_: 1 }, options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) else: _, loss = sess.run([train_op, loss_], feed_dict={ x_: x_batch, x_org_: x_batch_org, is_train_: 1 }) if uidx % opt.valid_freq == 0: is_train = None valid_index = np.random.choice(len(val), opt.batch_size) val_sents = [val[t] for t in valid_index] val_sents_permutated = add_noise(val_sents, opt) if opt.model != 'rnn_rnn' and opt.model != 'cnn_rnn': x_val_batch_org = prepare_data_for_cnn(val_sents, opt) else: x_val_batch_org = prepare_data_for_rnn(val_sents, opt) if opt.model != 'rnn_rnn': x_val_batch = prepare_data_for_cnn( val_sents_permutated, opt) else: x_val_batch = prepare_data_for_rnn( val_sents_permutated, opt, is_add_GO=False) loss_val = sess.run(loss_, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org, is_train_: is_train }) print("Validation loss %f " % (loss_val)) res = sess.run(res_, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org, is_train_: is_train }) np.savetxt(opt.save_txt + '/rec_val_words.txt', res['rec_sents'], fmt='%i', delimiter=' ') try: print("Orig:" + u' '.join([ ixtoword[x] for x in x_val_batch_org[0] if x != 0 and x != 1 ])) #.encode('utf-8', 'ignore').strip() print("Sent:" + u' '.join([ ixtoword[x] for x in res['rec_sents'][0] if x != 0 ])) #.encode('utf-8', 'ignore').strip() except: pass if opt.discrimination: print("Real Prob %f Fake Prob %f" % (res['prob_r'], res['prob_f'])) summary = sess.run(merged, feed_dict={ x_: x_val_batch, x_org_: x_val_batch_org, is_train_: is_train }) test_writer.add_summary(summary, uidx) is_train = True if uidx % opt.print_freq == 1: #pdb.set_trace() print("Iteration %d: loss %f " % (uidx, loss)) res = sess.run(res_, feed_dict={ x_: x_batch, x_org_: x_batch_org, is_train_: 1 }) np.savetxt(opt.save_txt + '/rec_train_words.txt', res['rec_sents'], fmt='%i', delimiter=' ') try: print("Orig:" + u' '.join([ ixtoword[x] for x in x_batch_org[0] if x != 0 and x != 1 ])) #.encode('utf-8').strip() print("Sent:" + u' '.join([ ixtoword[x] for x in res['rec_sents'][0] if x != 0 ])) #.encode('utf-8').strip() except: pass summary = sess.run(merged, feed_dict={ x_: x_batch, x_org_: x_batch_org, is_train_: 1 }) train_writer.add_summary(summary, uidx) # print res['x_rec'][0][0] # print res['x_emb'][0][0] if profile: tf.contrib.tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), run_meta=run_metadata, tfprof_options=tf.contrib.tfprof.model_analyzer. PRINT_ALL_TIMING_MEMORY) saver.save(sess, opt.save_path, global_step=epoch)
def train_classifier(train, valid, test, W, n_p=10, n_words=10000, n_x=300, n_h=200, patience=10, max_epochs=50, lrate=0.001, n_train=10000, optimizer='RMSprop', batch_size=50, valid_batch_size=50, dispFreq=10, validFreq=100, saveFreq=500, eps=1e-3): """ train, valid, test : datasets W : the word embedding initialization n_words : vocabulary size n_x : word embedding dimension n_h : LSTM/GRU number of hidden units n_z : latent embedding sapce for a sentence patience : Number of epoch to wait before early stop if no progress max_epochs : The maximum number of epoch to run lrate : learning rate optimizer : methods to do optimization batch_size : batch size during training valid_batch_size : The batch size used for validation/test set dispFreq : Display to stdout the training progress every N updates validFreq : Compute the validation error after this number of update. """ options = {} options['n_p'] = n_p options['n_words'] = n_words options['n_x'] = n_x options['n_h'] = n_h options['patience'] = patience options['max_epochs'] = max_epochs options['lrate'] = lrate options['optimizer'] = optimizer options['batch_size'] = batch_size options['valid_batch_size'] = valid_batch_size options['dispFreq'] = dispFreq options['validFreq'] = validFreq #if config.method in ['SVGD', 'SVGD_KFAC']: patience = 5 logger.info('Model options {}'.format(options)) logger.info('{} train examples'.format(len(train[0]))) logger.info('{} valid examples'.format(len(valid[0]))) logger.info('{} test examples'.format(len(test[0]))) logger.info('Building model...') assert np.min(train[1]) == 0 and np.max(train[1]) == 1 n_y = np.max(train[1]) + 1 options['n_y'] = n_y params = init_params(options, W) tparams = init_tparams(params) (use_noise, x, mask, y, f_pred_prob, f_pred, cost, cache) = build_model(tparams, options) lr_theano = tensor.scalar(name='lr') ntrain_theano = tensor.scalar(name='ntrain') if config.method == 'pSGLD': f_grad_shared, f_update = pSGLD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano) elif config.method == 'SGLD': f_grad_shared, f_update = SGLD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano) elif config.method == 'RMSprop': f_grad_shared, f_update = RMSprop(tparams, cost, [x, mask, y], lr_theano) elif config.method == 'SVGD': f_grad_shared, f_update = SVGD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano, kfac=False) elif config.method == 'SVGD_KFAC': f_grad_shared, f_update = SVGD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano, kfac=True, average=True, cache=cache, eps=eps, n_p=n_p) elif config.method == 'MIXTURE_KFAC': f_grad_shared, f_update = SVGD(tparams, cost, [x, mask, y], ntrain_theano, lr_theano, kfac=True, average=False, cache=cache, eps=eps, n_p=n_p) #print 'Training model...' logger.info('Training model...') kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size) kf_test = get_minibatches_idx(len(test[0]), valid_batch_size) estop = False # early stop history_errs = [] best_train_err, best_valid_err, best_test_err = 0., 0., 0. bad_counter = 0 uidx = 0 # the number of update done start_time = time.time() n_average = 0 train_probs = np.zeros((len(train[0]), n_y)) valid_probs = np.zeros((len(valid[0]), n_y)) test_probs = np.zeros((len(test[0]), n_y)) try: for eidx in xrange(max_epochs): print tparams.keys() from optimizers import sqr_dist ##['Wemb', 'lstm_encoder_W', 'lstm_encoder_U', 'lstm_encoder_rev_W', 'lstm_encoder_rev_U', 'Wy'] tv = tensor.flatten(tparams['Wy'], 2) ftv = theano.function([], sqr_dist(tv, tv)) otv = ftv() print(np.min(otv), np.max(otv), np.mean(otv), np.median(otv), np.sum(otv**2) / n_p) n_samples = 0 kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True) for _, train_index in kf: uidx += 1 #use_noise.set_value(0.5) use_noise.set_value(config.dropout) y = [train[1][t] for t in train_index] x = [train[0][t] for t in train_index] x, mask, y = prepare_data(x, y) n_samples += x.shape[1] cost = f_grad_shared(x, mask, y) if config.method == 'RMSprop': f_update(lrate) elif config.method in ['SVGD', 'pSGLD', 'SGLD']: f_update(lrate, n_train) elif config.method in ['SVGD_KFAC', 'MIXTURE_KFAC']: f_update(lrate, n_train, x, mask, y) if np.isnan(cost) or np.isinf(cost): logger.info('NaN detected') estop = True break return 1., 1., 1. if np.mod(uidx, dispFreq) == 0: logger.info('Epoch {} Update {} Cost {}'.format( eidx, uidx, cost)) if np.mod(uidx, saveFreq) == 0: logger.info('Saving ...') saveto = 'results/%s.npz' % save_prefix np.savez(saveto, history_errs=history_errs) logger.info('Done ...') if np.mod(uidx, validFreq) == 0: use_noise.set_value(0.) if eidx < 1: train_err = pred_error(f_pred, prepare_data, train, kf) valid_err = pred_error(f_pred, prepare_data, valid, kf_valid) test_err = pred_error(f_pred, prepare_data, test, kf_test) history_errs.append([valid_err, test_err, train_err]) else: train_probs_curr = pred_probs(f_pred_prob, prepare_data, train, kf, options) valid_probs_curr = pred_probs(f_pred_prob, prepare_data, valid, kf_valid, options) test_probs_curr = pred_probs(f_pred_prob, prepare_data, test, kf_test, options) train_probs = (n_average * train_probs + train_probs_curr) / (n_average + 1) valid_probs = (n_average * valid_probs + valid_probs_curr) / (n_average + 1) test_probs = (n_average * test_probs + test_probs_curr) / (n_average + 1) n_average += 1 train_pred = train_probs.argmax(axis=1) valid_pred = valid_probs.argmax(axis=1) test_pred = test_probs.argmax(axis=1) train_err = (train_pred == np.array(train[1])).sum() train_err = 1. - numpy_floatX(train_err) / len( train[0]) valid_err = (valid_pred == np.array(valid[1])).sum() valid_err = 1. - numpy_floatX(valid_err) / len( valid[0]) test_err = (test_pred == np.array(test[1])).sum() test_err = 1. - numpy_floatX(test_err) / len(test[0]) history_errs.append([valid_err, test_err, train_err]) if (uidx == 0 or valid_err <= np.array(history_errs)[:, 0].min()): best_train_err = train_err best_valid_err = valid_err best_test_err = test_err bad_counter = 0 logger.info('Train {} Valid {} Test {}'.format( train_err, valid_err, test_err)) if (len(history_errs) > patience and valid_err >= np.array(history_errs)[:-patience, 0].min()): #valid_err >= np.array(history_errs)[:-patience,0].mean()): bad_counter += 1 #valid_err >= np.array(history_errs)[:-patience,0].mean()): if bad_counter > patience: logger.info('Early Stop!') estop = True break logger.info('Seen {} samples'.format(n_samples)) if estop: break except KeyboardInterrupt: logger.info('Training interupted') end_time = time.time() logger.info('Train {} Valid {} Test {}'.format(best_train_err, best_valid_err, best_test_err)) saveto = 'results/%s.npz' % save_prefix np.savez(saveto, train_err=best_train_err, valid_err=best_valid_err, test_err=best_test_err, history_errs=history_errs) logger.info('The code run for {} epochs, with {} sec/epochs'.format( eidx + 1, (end_time - start_time) / (1. * (eidx + 1)))) #print >> sys.stderr, ('Training took %.1fs' % # (end_time - start_time)) return best_train_err, best_valid_err, best_test_err
train = transform(train, w2i) dev = transform(dev, w2i) test = transform(test, w2i) def evaluate(model, dev, params): _, g1x, g1mask, g2x, g2mask = utils.get_prepare_data(dev, params.nout) golds = [score for sa, sb, score in dev] scores = model.scoring_function(g1x, g2x, g1mask, g2mask) preds = np.squeeze(scores) return pearsonr(preds, golds)[0], spearmanr(preds, golds)[0] for epoch in range(300): process_bar = pyprind.ProgPercent(len(train)) kf = utils.get_minibatches_idx(len(train), params.batchsize, shuffle=True) uidx = 0 for _, train_index in kf: uidx += 1 batch = [train[t] for t in train_index] scores, g1x, g1mask, g2x, g2mask = utils.get_prepare_data( batch, params.nout) # print scores[:2], g1x[:2], g1mask[:2], g2x[:2], g2mask[:2] cost = model.train_function(scores, g1x, g2x, g1mask, g2mask) if np.isnan(cost) or np.isinf(cost): print 'NaN detected' print 'Epoch ', (epoch + 1), 'Update ', (uidx + 1), 'Cost ', cost
def main(gpu, mem_frac, batch_size, alpha, gamma, omega, euler_ord, max_len, optimizer, keep_prob, learning_rate, margin, norm_type, balancing): # ========================================================================== # # =============================== Parameters =============================== # # ========================================================================== # params = { 'gpu': gpu, 'mem_frac': mem_frac, 'batch_size': batch_size, 'alpha': alpha, # threshold for euler angle 'gamma': gamma, # weight factor for twist loss 'omega': omega, # weight factor for smooth loss 'euler_ord': euler_ord, 'max_len': max_len, 'optimizer': optimizer, 'keep_prob': keep_prob, 'learning_rate': learning_rate, 'margin': margin, 'norm_type': norm_type, 'balancing': balancing } prefix = "pmnet" for k, v in params.items(): if (k != 'gpu' and k != 'mem_frac' and k != 'euler_ord'): prefix += "_" + k + "=" + str(v) # ========================================================================== # # =============================== Load Data ================================ # # ========================================================================== # data_path = "../datasets/train/" stats_path = "../data/" # Mixamo joint configuration parents = np.array([ -1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 0, 10, 11, 12, 3, 14, 15, 16, 3, 18, 19, 20 ]) all_local = [] all_global = [] all_skel = [] all_names = [] t_skel = [] folders = [ f for f in listdir(data_path) if not f.startswith(".") and not f.endswith("py") and not f.endswith(".npz") ] for folder_name in folders: files = [ f for f in listdir(data_path + folder_name) if not f.startswith(".") and f.endswith("_seq.npy") ] for cfile in files: file_name = cfile[:-8] # Real joint positions positions = np.load(data_path + folder_name + "/" + file_name + "_skel.npy") # After processed (Maybe, last 4 elements are dummy values) sequence = np.load(data_path + folder_name + "/" + file_name + "_seq.npy") # Processed global positions (#frames, 4) offset = sequence[:, -8:-4] # Processed local positions (#frames, #joints, 3) sequence = np.reshape(sequence[:, :-8], [sequence.shape[0], -1, 3]) positions[:, 0, :] = sequence[:, 0, :] # root joint all_local.append(sequence) all_global.append(offset) all_skel.append(positions) all_names.append(folder_name) # Joint positions before processed train_skel = all_skel # After processed, relative position train_local = all_local train_global = all_global # T-pose (real position) for tt in train_skel: t_skel.append(tt[0:1]) # Total training samples all_frames = np.concatenate(train_local) ntotal_samples = all_frames.shape[0] ntotal_sequences = len(train_local) print("Number of sequences: " + str(ntotal_sequences)) # ========================================================================== # # ============================= Data Normalize ============================= # # ========================================================================== # # Calculate total mean and std allframes_n_skel = np.concatenate(train_local + t_skel) local_mean = allframes_n_skel.mean(axis=0)[None, :] global_mean = np.concatenate(train_global).mean(axis=0)[None, :] local_std = allframes_n_skel.std(axis=0)[None, :] global_std = np.concatenate(train_global).std(axis=0)[None, :] # Save the data stats np.save(stats_path + "mixamo_local_motion_mean.npy", local_mean) np.save(stats_path + "mixamo_local_motion_std.npy", local_std) np.save(stats_path + "mixamo_global_motion_mean.npy", global_mean) np.save(stats_path + "mixamo_global_motion_std.npy", global_std) # Normalize the data (whitening) n_joints = all_local[0].shape[-2] local_std[local_std == 0] = 1 for i in xrange(len(train_local)): train_local[i] = (train_local[i] - local_mean) / local_std train_global[i] = (train_global[i] - global_mean) / global_std train_skel[i] = (train_skel[i] - local_mean) / local_std # ========================================================================== # # =============================== Load Model =============================== # # ========================================================================== # models_dir = "../data/models/" + prefix logs_dir = "../data/logs/" + prefix if not exists(models_dir): makedirs(models_dir) if not exists(logs_dir): makedirs(logs_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_frac) with tf.device("/gpu:%d" % gpu): net = pmnet_model(batch_size, alpha, gamma, omega, euler_ord, n_joints, max_len, parents, keep_prob, learning_rate, optimizer, local_mean, local_std, global_mean, global_std, logs_dir, margin, norm_type, balancing) # ========================================================================== # # ================================ Training ================================ # # ========================================================================== # with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) loaded, model_name = net.load(sess, models_dir) if loaded: print("[*] Load SUCCESSFUL") iteration = int(model_name.split("-")[-1]) else: print("[!] Starting from scratch ...") iteration = 0 net.saver = tf.train.Saver(max_to_keep=10) max_iter = 15000 while iteration < max_iter: mini_batches = get_minibatches_idx(len(train_local), batch_size, shuffle=True) for _, batch_idxs in mini_batches: start_time = time.time() if len(batch_idxs) == batch_size: steps = np.repeat(max_len, batch_size) localA_batch = [] globalA_batch = [] skelA_batch = [] localB_batch = [] globalB_batch = [] skelB_batch = [] mask_batch = np.zeros((batch_size, max_len), dtype="float32") aeReg_batch = np.zeros((batch_size, 1), dtype="float32") inp_height_batch = np.zeros((batch_size, 1), dtype="float32") tgt_height_batch = np.zeros((batch_size, 1), dtype="float32") # Make minibatch for bb in xrange(batch_size): low = 0 high = train_local[batch_idxs[bb]].shape[0] - max_len if low >= high: stidx = 0 else: stidx = np.random.randint(low=low, high=high) clocalA = train_local[batch_idxs[bb]][stidx:(stidx + max_len)] mask_batch[ bb, :np.min([max_len, clocalA.shape[0]])] = 1.0 if clocalA.shape[0] < max_len: clocalA = np.concatenate( (clocalA, np.zeros((max_len - clocalA.shape[0], n_joints, 3)))) cglobalA = train_global[batch_idxs[bb]][stidx:( stidx + max_len)] if cglobalA.shape[0] < max_len: cglobalA = np.concatenate( (cglobalA, np.zeros((max_len - cglobalA.shape[0], n_joints, 3)))) cskelA = train_skel[batch_idxs[bb]][stidx:(stidx + max_len)] if cskelA.shape[0] < max_len: cskelA = np.concatenate( (cskelA, np.zeros((max_len - cskelA.shape[0], n_joints, 3)))) rnd_idx = np.random.randint(len(train_local)) cskelB = train_skel[rnd_idx][0:max_len] if cskelB.shape[0] < max_len: cskelB = np.concatenate( (cskelB, np.zeros((max_len - cskelB.shape[0], n_joints, 3)))) joints_a = cskelA[0].copy() joints_a = joints_a[None] joints_a = (joints_a * local_std) + local_mean height_a = get_height_from_skel(joints_a[0]) height_a = height_a / 100 joints_b = cskelB[0].copy() joints_b = joints_b[None] joints_b = (joints_b * local_std + local_mean) height_b = get_height_from_skel(joints_b[0]) height_b = height_b / 100 aeReg_on = np.random.binomial(1, p=0.5) if aeReg_on: cskelB = cskelA.copy() aeReg_batch[bb, 0] = 1 inp_height_batch[bb, 0] = height_a tgt_height_batch[bb, 0] = height_a else: aeReg_batch[bb, 0] = 0 inp_height_batch[bb, 0] = height_a tgt_height_batch[bb, 0] = height_b localA_batch.append(clocalA) globalA_batch.append(cglobalA) skelA_batch.append(cskelA) localB_batch.append(clocalA) globalB_batch.append(cglobalA) skelB_batch.append(cskelB) localA_batch = np.array(localA_batch).reshape( (batch_size, max_len, -1)) globalA_batch = np.array(globalA_batch).reshape( (batch_size, max_len, -1)) seqA_batch = np.concatenate((localA_batch, globalA_batch), axis=-1) skelA_batch = np.array(skelA_batch).reshape( (batch_size, max_len, -1)) localB_batch = np.array(localB_batch).reshape( (batch_size, max_len, -1)) globalB_batch = np.array(globalB_batch).reshape( (batch_size, max_len, -1)) seqB_batch = np.concatenate((localB_batch, globalB_batch), axis=-1) skelB_batch = np.array(skelB_batch).reshape( (batch_size, max_len, -1)) mid_time = time.time() mf, mr, mg, shape, base = net.train( sess, seqA_batch, skelA_batch, seqB_batch, skelB_batch, mask_batch, aeReg_batch, inp_height_batch, tgt_height_batch, iteration) print("step=%d/%d, time=%.2f+%.2f" % (iteration, max_iter, mid_time - start_time, time.time() - mid_time)) if np.isnan(mg) or np.isinf(mg): return if iteration >= 1000 and iteration % 5000 == 0: net.save(sess, models_dir, iteration) iteration = iteration + 1 net.save(sess, models_dir, iteration)
def main(gpu, batch_size, alpha, beta, gamma, omega, margin, d_arch, d_rand, euler_ord, max_steps, min_steps, num_layer, gru_units, optim, norm_type, mem_frac, keep_prob, learning_rate): prefix = "Online_Retargeting_Mixamo_Cycle_Adv" for kk, vv in locals().iteritems(): if (kk != "prefix" and kk != "mem_frac" and kk != "batch_size" and kk != "min_steps" and kk != "max_steps" and kk != "gpu"): prefix += "_" + kk + "=" + str(vv) layers_units = [] for i in range(num_layer): layers_units.append(gru_units) data_path = "./datasets/train/" alllocal = [] allglobal = [] allskel = [] allnames = [] folders = [ f for f in listdir(data_path) if not f.startswith(".") and not f.endswith("py") and not f.endswith(".npz") ] for folder in folders: files = [ f for f in listdir(data_path + folder) if not f.startswith(".") and f.endswith("_seq.npy") ] for cfile in files: positions = np.load(data_path + folder + "/" + cfile[:-8] + "_skel.npy") if positions.shape[0] >= min_steps: sequence = np.load(data_path + folder + "/" + cfile[:-8] + "_seq.npy") offset = sequence[:, -8:-4] sequence = np.reshape(sequence[:, :-8], [sequence.shape[0], -1, 3]) positions[:, 0, :] = sequence[:, 0, :] alllocal.append(sequence) allglobal.append(offset) allskel.append(positions) allnames.append(folder) trainlocal = alllocal trainskel = allskel trainglobal = allglobal print("Number of examples: " + str(len(trainlocal))) tskel = [] for tt in trainskel: tskel.append(tt[0:1]) allframes_n_skel = np.concatenate(trainlocal + tskel) min_root = allframes_n_skel[:, 0:1].min(axis=0) max_root = allframes_n_skel[:, 0:1].max(axis=0) local_mean = allframes_n_skel.mean(axis=0)[None, :] global_mean = np.concatenate(trainglobal).mean(axis=0)[None, :] local_std = allframes_n_skel.std(axis=0)[None, :] global_std = np.concatenate(trainglobal).std(axis=0)[None, :] np.save(data_path[:-6] + "mixamo_local_motion_mean.npy", local_mean) np.save(data_path[:-6] + "mixamo_local_motion_std.npy", local_std) local_std[local_std == 0] = 1 np.save(data_path[:-6] + "mixamo_global_motion_mean.npy", global_mean) np.save(data_path[:-6] + "mixamo_global_motion_std.npy", global_std) n_joints = alllocal[0].shape[-2] for i in xrange(len(trainlocal)): trainlocal[i] = (trainlocal[i] - local_mean) / local_std trainglobal[i] = (trainglobal[i] - global_mean) / global_std trainskel[i] = (trainskel[i] - local_mean) / local_std models_dir = "./models/" + prefix logs_dir = "./logs/" + prefix parents = np.array([ -1, 0, 1, 2, 3, 4, 0, 6, 7, 8, 0, 10, 11, 12, 3, 14, 15, 16, 3, 18, 19, 20 ]) with tf.device("/gpu:%d" % gpu): gru = EncoderDecoderGRU(batch_size, alpha, beta, gamma, omega, euler_ord, n_joints, layers_units, max_steps, local_mean, local_std, global_mean, global_std, parents, keep_prob, logs_dir, learning_rate, optim, margin, d_arch, d_rand, norm_type) if not exists(models_dir): makedirs(models_dir) if not exists(logs_dir): makedirs(logs_dir) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_frac) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) loaded, model_name = gru.load(sess, models_dir) if loaded: print("[*] Load SUCCESSFUL") step = int(model_name.split("-")[-1]) else: print("[!] Starting from scratch ...") step = 0 total_steps = 50000 gru.saver = tf.train.Saver(max_to_keep=10) while step < total_steps: mini_batches = get_minibatches_idx(len(trainlocal), batch_size, shuffle=True) for _, batchidx in mini_batches: start_time = time.time() if len(batchidx) == batch_size: if min_steps >= max_steps: steps = np.repeat(max_steps, batch_size) else: steps = np.random.randint(low=min_steps, high=max_steps + 1, size=(batch_size, )) realLocal_batch = [] realSkel_batch = [] realGlobal_batch = [] localA_batch = [] globalA_batch = [] skelA_batch = [] localB_batch = [] globalB_batch = [] skelB_batch = [] aeReg_batch = np.zeros((batch_size, 1), dtype="float32") mask_batch = np.zeros((batch_size, max_steps), dtype="float32") for b in xrange(batch_size): low = 0 high = trainlocal[batchidx[b]].shape[0] - max_steps if low >= high: stidx = 0 else: stidx = np.random.randint(low=low, high=high) clocalA = trainlocal[batchidx[b]][stidx:stidx + max_steps] mask_batch[ b, :np.min([steps[b], clocalA.shape[0]])] = 1.0 if clocalA.shape[0] < max_steps: clocalA = np.concatenate( (clocalA, np.zeros((max_steps - clocalA.shape[0], n_joints, 3)))) cglobalA = trainglobal[batchidx[b]][stidx:stidx + max_steps] if cglobalA.shape[0] < max_steps: cglobalA = np.concatenate( (cglobalA, np.zeros((max_steps - cglobalA.shape[0], 4)))) cskelA = trainskel[batchidx[b]][stidx:stidx + max_steps] if cskelA.shape[0] < max_steps: cskelA = np.concatenate( (cskelA, np.zeros((max_steps - cskelA.shape[0], n_joints, 3)))) rnd_idx = np.random.randint(len(trainlocal)) cskelB = trainskel[rnd_idx][stidx:stidx + max_steps] if cskelB.shape[0] < max_steps: cskelB = np.concatenate( (cskelB, np.zeros((max_steps - cskelB.shape[0], n_joints, 3)))) tgtname = allnames[rnd_idx] rnd_idx = np.random.randint(len(trainlocal)) while tgtname != allnames[rnd_idx]: rnd_idx = np.random.randint(len(trainlocal)) low = 0 high = trainlocal[rnd_idx].shape[0] - max_steps if low >= high: stidx = 0 else: stidx = np.random.randint(low=low, high=high) crealLocal = trainlocal[rnd_idx][stidx:stidx + max_steps] crealGlobal = trainglobal[rnd_idx][stidx:stidx + max_steps] crealSkel = trainskel[rnd_idx][stidx:stidx + max_steps] regon = np.random.binomial(1, p=0.2) if regon: cskelB = cskelA.copy() aeReg_batch[b, 0] = 1 else: aeReg_batch[b, 0] = 0 localA_batch.append(clocalA) globalA_batch.append(cglobalA) skelA_batch.append(cskelA) localB_batch.append(clocalA) globalB_batch.append(cglobalA) skelB_batch.append(cskelB) realLocal_batch.append(crealLocal) realGlobal_batch.append(crealGlobal) realSkel_batch.append(crealSkel) localA_batch = np.array(localA_batch).reshape( (batch_size, max_steps, -1)) globalA_batch = np.array(globalA_batch).reshape( (batch_size, max_steps, -1)) seqA_batch = np.concatenate((localA_batch, globalA_batch), axis=-1) skelA_batch = np.array(skelA_batch).reshape( (batch_size, max_steps, -1)) localB_batch = np.array(localB_batch).reshape( (batch_size, max_steps, -1)) globalB_batch = np.array(globalB_batch).reshape( (batch_size, max_steps, -1)) seqB_batch = np.concatenate((localB_batch, globalB_batch), axis=-1) skelB_batch = np.array(skelB_batch).reshape( (batch_size, max_steps, -1)) realLocal_batch = np.array(realLocal_batch).reshape( (batch_size, max_steps, -1)) realGlobal_batch = np.array(realGlobal_batch).reshape( (batch_size, max_steps, -1)) realSeq_batch = np.concatenate( (realLocal_batch, realGlobal_batch), axis=-1) realSkel_batch = np.array(realSkel_batch).reshape( (batch_size, max_steps, -1)) mid_time = time.time() dlf, dlr, gl, lc = gru.train(sess, realSeq_batch, realSkel_batch, seqA_batch, skelA_batch, seqB_batch, skelB_batch, aeReg_batch, mask_batch, step) print( "step=%d/%d, g_loss=%.5f, d_loss=%.5f, cyc_loss=%.5f, " "time=%.2f+%.2f" % (step, total_steps, gl, dlf + dlr, lc, mid_time - start_time, time.time() - mid_time)) if np.isnan(gl) or np.isinf(gl): return if step >= 1000 and step % 1000 == 0: gru.save(sess, models_dir, step) step = step + 1 gru.save(sess, models_dir, step)
def main(options): C.update(options) model = ResnetModel() x_train, y_train, x_validate, y_validate, x_test, y_test, train_size, validate_size, test_size = \ pre_process_CIFAR10_data() learning_rate = 0.1 for epoch in range(C['num_epoch']): start_time = time.time() kf = get_minibatches_idx(train_size, C['batch_size'], shuffle=True) train_loss = 0.0 train_batches = 0 for _, train_index in kf: inputs = x_train[train_index] targets = y_train[train_index] inputs, targets = prepare_CIFAR10_data(inputs, targets) loss = model.f_grad_shared(inputs, targets) model.f_update(learning_rate) train_loss += loss train_batches += 1 kf_valid = get_minibatches_idx(validate_size, C['valid_batch_size'], shuffle=False) valid_loss = 0.0 valid_accuracy = 0.0 valid_batches = 0 for _, valid_index in kf_valid: inputs = x_validate[valid_index] targets = y_validate[valid_index] inputs, targets = prepare_CIFAR10_data(inputs, targets) loss, accuracy = model.f_validate(inputs, targets) valid_loss += loss valid_accuracy += accuracy valid_batches += 1 print( '''\ Epoch {} of {} took {:.3f}s training loss: {:.6f} validation loss: {:.6f} validation accuracy: {:.2f} %'''.format( epoch, C['num_epoch'], time.time() - start_time, train_loss / train_batches, valid_loss / valid_batches, valid_accuracy / valid_batches * 100.0, ) ) if epoch + 1 == 41 or epoch + 1 == 61: learning_rate *= 0.1 print('Discount learning rate to', learning_rate) print('Saving model...', end='') np.savez('cifar10_deep_residual_model.npz', *lasagne.layers.get_all_param_values(model.network)) print('Done')