def eval(tag_path, corpus_path): correct = 0 total = 0 acc_list = [] model_name = MODEL_NAME embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM word_to_ix = WORD_TO_IX model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim) checkpoint = torch.load(model_name) model.load_state_dict(checkpoint['model_state_dict']) model.eval() tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4} sentences, tags = load_train_data(tag_path, corpus_path) labels = torch.tensor([[tag_to_ix[tag]] for tag in tags[:]]) with torch.no_grad(): for i, sen in enumerate(tqdm(sentences[:])): input = prepare_sequence(sen, word_to_ix) output = model(input) _, predicted = torch.max(output.data, 1) label = labels[i] total += label.size(0) correct += (predicted == label).sum().item() acc = round(100 * correct / total, 2) acc_list.append(acc) assert len(acc_list) == len(sentences) final_acc = acc plt.plot(list(range(len(tags))), acc_list) plt.xlabel('pred_num') plt.ylabel('accuracy / %') plt.show() return final_acc
def get_time_to_score(tsv_path, thing, model_path): time_to_count = {} time_to_scoresum = {} if thing == 'hair_dryer': id = '732252283' elif thing == 'microwave': id = '423421857' else: id = '246038397' with open('train_' + thing + '_word_to_ix.json', 'r') as j: word_to_ix = json.load(j) embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim) checkpoints = torch.load(model_path) model.load_state_dict(checkpoints['model_state_dict']) model.eval() with open(tsv_path, 'r') as f: reader = csv.reader(f, delimiter='\t') for i, r in enumerate(reader): if i == 0 or r[4] != id: continue month, _, year = r[14].split('/') if year not in {'2014', '2015'}: continue time = get_idx_by_year_month(int(year), int(month)) if time < 8: continue sen = (r[12] + ' ' + r[13]).lower() sen = re.sub(r'[^A-Za-z0-9,.!]+', ' ', sen) input = prepare_sequence(sen.split(), word_to_ix) with torch.no_grad(): output = model(input) _, predicted = torch.max(output.data, 1) pred_score = predicted.item() if time not in time_to_count: time_to_count[time] = 0 time_to_scoresum[time] = 0. time_to_count[time] += 1 time_to_scoresum[time] += pred_score time_to_scoremean = {} for time in time_to_count.keys(): time_to_scoremean[time] = time_to_scoresum[time] / time_to_count[time] print(time_to_count) return time_to_scoremean
class Trainer: def __init__(self, config): self.config = config self.load_data() # 加载数据集 self.model = BiLSTM(self.config, self.vocab_size, self.word_vectors) # 初始化模型 def load_data(self): self.train_dataloader = TrainData(self.config) self.eval_dataloader = TestData(self.config) train_data_path = os.path.join(self.config.BASE_DIR, self.config.train_data_path) self.train_inputs, self.train_labels, self.t2ix = self.train_dataloader.gen_train_data( train_data_path) eval_data_path = os.path.join(self.config.BASE_DIR, self.config.eval_data_path) self.eval_inputs, self.eval_labels, _ = self.eval_dataloader.gen_test_data( eval_data_path) self.vocab_size = self.train_dataloader.vocab_size self.word_vectors = self.train_dataloader.word_vectors def train(self): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9, allow_growth=True) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, gpu_options=gpu_options) with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) # 初始化计算图变量 current_step = 0 # 创建Train/Eval的summar路径和写入对象 train_summary_path = os.path.join( self.config.BASE_DIR, self.config.summary_path + "/train") eval_summary_path = os.path.join( self.config.BASE_DIR, self.config.summary_path + "/eval") self._check_directory(train_summary_path) self._check_directory(eval_summary_path) train_summary_writer = tf.summary.FileWriter( train_summary_path, sess.graph) eval_summary_writer = tf.summary.FileWriter( eval_summary_path, sess.graph) # Train & Eval Process for epoch in range(self.config.epochs): print(f"----- Epoch {epoch + 1}/{self.config.epochs} -----") for batch in self.train_dataloader.next_batch( self.train_inputs, self.train_labels, self.config.batch_size): summary, loss, predictions = self.model.train( sess, batch, self.config.keep_prob) accuracy = self.model.get_metrics(sess, batch) train_summary_writer.add_summary(summary, current_step) print( f"! Train epoch: {epoch}, step: {current_step}, train loss: {loss}, accuracy: {accuracy}" ) current_step += 1 if self.eval_dataloader and current_step % self.config.eval_every == 0: losses = [] acces = [] for eval_batch in self.eval_dataloader.next_batch( self.eval_inputs, self.eval_labels, self.config.batch_size): eval_summary, eval_loss, eval_predictions = self.model.eval( sess, eval_batch) eval_accuracy = self.model.get_metrics(sess, batch) eval_summary_writer.add_summary( eval_summary, current_step) losses.append(eval_loss) acces.append(eval_accuracy) print( f"! Eval epoch: {epoch}, step: {current_step}, eval loss: {sum(losses) / len(losses)}, accuracy: {sum(acces) / len(acces)}" ) if self.config.ckpt_model_path: save_path = os.path.join( self.config.BASE_DIR, self.config.ckpt_model_path) self._check_directory(save_path) model_save_path = os.path.join( save_path, self.config.model_name) self.model.saver.save(sess, model_save_path, global_step=current_step) def _check_directory(self, path): if not os.path.exists(path): os.makedirs(path)
# create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes) xs, ys = iter.get_next() train_init_opt = iter.make_initializer(train_batches) dev_init_opt = iter.make_initializer(dev_batches) # index+=1 model = BiLSTM(param) # print('xs') # print(xs) # print('ys') # print(ys) loss,train_opt,pred_train,train_summaries,global_step,lstm_cell_fw,x_check = model.train(xs,ys) logits_eval,probs_eval,pred_eval,ys = model.eval(xs,ys) #Variables for early stop dev_history = [] dev_best = 0 stop_times = 0 logging.info('# Session') saver = tf.train.Saver(max_to_keep=model.epoch) with tf.Session() as sess: ckpt = tf.train.latest_checkpoint(ckpt_path) if ckpt is None: logging.info("Initializing from scratch") sess.run(tf.global_variables_initializer())
class Seq_MNIST_Trainer(): def __init__(self, trainer_params, args): self.args = args self.trainer_params = trainer_params random.seed(trainer_params.random_seed) torch.manual_seed(trainer_params.random_seed) if args.cuda: torch.cuda.manual_seed_all(trainer_params.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} self.train_data = seq_mnist_train(trainer_params) self.val_data = seq_mnist_val(trainer_params) self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, shuffle=True, **kwargs) self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, shuffle=True, **kwargs) self.starting_epoch = 1 self.prev_loss = 10000 self.model = BiLSTM(trainer_params) self.criterion = wp.CTCLoss(size_average=True) self.labels = [i for i in range(trainer_params.num_classes-1)] self.decoder = seq_mnist_decoder(labels=self.labels) if args.resume or args.eval or args.export: print("Loading model from {}".format(args.save_path)) package = torch.load(args.save_path, map_location=lambda storage, loc: storage) self.model.load_state_dict(package['state_dict']) if args.cuda: torch.cuda.set_device(args.gpus) self.model = self.model.cuda() self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr) if args.resume: self.optimizer.load_state_dict(package['optim_dict']) self.starting_epoch = package['starting_epoch'] self.prev_loss = package['prev_loss'] if args.cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if args.init_bn_fc_fusion: if not trainer_params.prefused_bn_fc: self.model.batch_norm_fc.init_fusion() self.trainer_params.prefused_bn_fc = True else: raise Exception("BN and FC are already fused.") def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss): package = {'state_dict': model.state_dict(), 'trainer_params': trainer_params, 'optim_dict' : optimizer.state_dict(), 'starting_epoch' : starting_epoch, 'prev_loss': prev_loss } return package def save_model(self, epoch, loss_value): print("Model saved at: {}\n".format(self.args.save_path)) self.prev_loss = loss_value torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params, optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), self.args.save_path) def train(self, epoch): self.model.train() for i, (item) in enumerate(self.train_loader): data, labels, output_len, lab_len = item data = Variable(data.transpose(1,0), requires_grad=False) labels = Variable(labels.view(-1), requires_grad=False) output_len = Variable(output_len.view(-1), requires_grad=False) lab_len = Variable(lab_len.view(-1), requires_grad=False) if self.args.cuda: data = data.cuda() output = self.model(data) # print("Input = ", data.shape) # print("model output (x) = ", output) # print("GTs (y) = ", labels.type()) # print("model output len (xs) = ", output_len.type()) # print("GTs len (ys) = ", lab_len.type()) # exit(0) loss = self.criterion(output, labels, output_len, lab_len) loss_value = loss.data[0] print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch, self.trainer_params.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self.args.cuda: torch.cuda.synchronize() def test(self, epoch=0, save_model_flag=False): self.model.eval() loss_value = 0 for i, (item) in enumerate(self.val_loader): data, labels, output_len, lab_len = item data = Variable(data.transpose(1,0), requires_grad=False) labels = Variable(labels.view(-1), requires_grad=False) output_len = Variable(output_len.view(-1), requires_grad=False) lab_len = Variable(lab_len.view(-1), requires_grad=False) if self.args.cuda: data = data.cuda() output = self.model(data) # print("Input = ", data) # print("model output (x) = ", output.shape) # print("model output (x) = ", output) # print("Label = ", labels) # print("model output len (xs) = ", output_len) # print("GTs len (ys) = ", lab_len) index = random.randint(0,self.trainer_params.test_batch_size-1) label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy() label = label-1 prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index]) accuracy = self.decoder.hit(prediction, label) print("Sample Label = {}".format(self.decoder.to_string(label))) print("Sample Prediction = {}".format(self.decoder.to_string(prediction))) print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy)) loss = self.criterion(output, labels, output_len, lab_len) loss_value += loss.data.numpy() loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size) print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value))) if loss_value < self.prev_loss and save_model_flag: self.save_model(epoch, loss_value) def eval_model(self): self.test() def train_model(self): for epoch in range(self.starting_epoch, self.trainer_params.epochs + 1): self.train(epoch) self.test(epoch=epoch, save_model_flag=True) if epoch%20==0: self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98 def export_model(self, simd_factor, pe): self.model.eval() self.model.export('r_model_fw_bw.hpp', simd_factor, pe) def export_image(self, idx=100): img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx] img = img.transpose(1, 0) label -= 1 label = self.decoder.to_string(label) from PIL import Image from matplotlib import cm im = Image.fromarray(np.uint8(cm.gist_earth(img)*255)) im.save('test_image.png') img = img.transpose(1, 0) img = np.reshape(img, (-1, 1)) np.savetxt("test_image.txt", img, fmt='%.10f') f = open('test_image_gt.txt','w') f.write(label) f.close() print("Exported image with label = {}".format(label))
def main(options): use_cuda = (len(options.gpuid) >= 1) if options.gpuid: cuda.set_device(options.gpuid[0]) train, dev, test, vocab = torch.load(open(options.data_file, 'rb'), pickle_module=dill) batched_train, batched_train_mask, _ = utils.tensor.advanced_batchize( train, options.batch_size, vocab.stoi["<pad>"]) batched_dev, batched_dev_mask, _ = utils.tensor.advanced_batchize( dev, options.batch_size, vocab.stoi["<pad>"]) vocab_size = len(vocab) if options.load_file: rnnlm = torch.load(options.load_file) else: rnnlm = BiLSTM(vocab_size) if use_cuda > 0: rnnlm.cuda() else: rnnlm.cpu() criterion = torch.nn.NLLLoss() optimizer = eval("torch.optim." + options.optimizer)(rnnlm.parameters(), options.learning_rate) # main training loop last_dev_avg_loss = float("inf") rnnlm.train() for epoch_i in range(options.epochs): logging.info("At {0}-th epoch.".format(epoch_i)) # srange generates a lazy sequence of shuffled range for i, batch_i in enumerate(utils.rand.srange(len(batched_train))): train_batch = Variable( batched_train[batch_i]) # of size (seq_len, batch_size) train_mask = Variable(batched_train_mask[batch_i]) if use_cuda: train_batch = train_batch.cuda() train_mask = train_mask.cuda() sys_out_batch = rnnlm( train_batch ) # (seq_len, batch_size, vocab_size) # TODO: substitute this with your module train_in_mask = train_mask.view(-1) train_in_mask = train_in_mask.unsqueeze(1).expand( len(train_in_mask), vocab_size) train_out_mask = train_mask.view(-1) sys_out_batch = sys_out_batch.view(-1, vocab_size) train_out_batch = train_batch.view(-1) sys_out_batch = sys_out_batch.masked_select(train_in_mask).view( -1, vocab_size) train_out_batch = train_out_batch.masked_select(train_out_mask) loss = criterion(sys_out_batch, train_out_batch) logging.debug("loss at batch {0}: {1}".format(i, loss.data[0])) optimizer.zero_grad() loss.backward() optimizer.step() # validation -- this is a crude esitmation because there might be some paddings at the end dev_loss = 0.0 rnnlm.eval() for batch_i in range(len(batched_dev)): dev_batch = Variable(batched_dev[batch_i], volatile=True) dev_mask = Variable(batched_dev_mask[batch_i], volatile=True) if use_cuda: dev_batch = dev_batch.cuda() dev_mask = dev_mask.cuda() sys_out_batch = rnnlm(dev_batch) dev_in_mask = dev_mask.view(-1) dev_in_mask = dev_in_mask.unsqueeze(1).expand( len(dev_in_mask), vocab_size) dev_out_mask = dev_mask.view(-1) sys_out_batch = sys_out_batch.view(-1, vocab_size) dev_out_batch = dev_batch.view(-1) sys_out_batch = sys_out_batch.masked_select(dev_in_mask).view( -1, vocab_size) dev_out_batch = dev_out_batch.masked_select(dev_out_mask) loss = criterion(sys_out_batch, dev_out_batch) dev_loss += loss dev_avg_loss = dev_loss / len(batched_dev) logging.info( "Average loss value per instance is {0} at the end of epoch {1}". format(dev_avg_loss.data[0], epoch_i)) #if (last_dev_avg_loss - dev_avg_loss).data[0] < options.estop: # logging.info("Early stopping triggered with threshold {0} (previous dev loss: {1}, current: {2})".format(epoch_i, last_dev_avg_loss.data[0], dev_avg_loss.data[0])) # break torch.save( rnnlm, open( options.model_file + ".nll_{0:.2f}.epoch_{1}".format(dev_avg_loss.data[0], epoch_i), 'wb'), pickle_module=dill) last_dev_avg_loss = dev_avg_loss
import torchvision from model import BiLSTM from data import load_dataset from config import model_name, device if __name__ == "__main__": # the string to test! test_string = "<s> john can" # ######################## # LOAD DATASET # ######################## corpus, word_to_idx, idx_to_word, train_dataset = load_dataset() # ######################## # TEST VARIABLES # ######################## model = BiLSTM(len(corpus)) model.load_state_dict(torch.load(model_name)) model.eval() sentence = test_string.split() sentence = torch.tensor([[word_to_idx[w] for w in sentence]]) s = model.sample(sentence) print(test_string.split() + s)
dev_batch_sizes[fold], cfg.num_classes, index=str(fold), shuffle=False) # create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(test_batches.output_types, test_batches.output_shapes) xs, ys = iter.get_next() test_init_opt = iter.make_initializer(test_batches) dev_init_opt = iter.make_initializer(dev_batches) logging.info('# load Model') model = BiLSTM(param) logits_test, probs_test, pred_test, ys = model.eval(xs, ys) logits_dev, probs_dev, pred_dev, ys = model.eval(xs, ys) logging.info('# Session') with tf.Session() as sess: model_path = os.path.join(cfg.result_path, cfg.version, 'index' + str(index) + '_models', parameter_string, str(fold)) ckpt = tf.train.latest_checkpoint(model_path) saver = tf.train.Saver() saver.restore(sess, ckpt) sess.run(dev_init_opt) # begin iterate dev data all_probs_dev, dev_labels, accuracy_dev = evaluate_data( 'Dev', dev_labels, parameter_string, fold, all_probs_dev,