def train_bilstm(self): # Load Data pre_processor = PreProcessor(file_path=TEST_DATA_PATH) sentences, entities = pre_processor.run() n_test_data = len(sentences) test_generator = generate_data_by_batch( x=sentences, y=entities, n_classes=pre_processor.n_entities + 1, entity_to_index=pre_processor.entity_to_index, batch_size=BATCH_SIZE ) bilstm = BiLSTM(n_class=pre_processor.n_entities + 1) bilstm.load() # Saving model with `model.save()` doesn't store custom loss or metrics function. Model has to be stored # separately into "config" and "weight" file and loaded from both. This causes an essential step of compiling # before evaluating. I think this issue exist from keras 2.0. # https://github.com/keras-team/keras/issues/5916 bilstm.model.compile( optimizer="nadam", loss="categorical_crossentropy", metrics=["accuracy", custom_f1, custom_precision, custom_recall] ) bilstm.model.evaluate_generator( test_generator, steps=n_test_data//BATCH_SIZE, verbose=1, )
def __init__(self, config): self.config = config self.output_path = os.path.join(self.config.BASE_DIR, self.config.output_path) self.w2ix, self.ix2t = self.load_vocab() # 加载索引字典 self.vocab_size = len(self.w2ix) self.sequence_length = self.config.sequence_length self.model = BiLSTM(self.config, self.vocab_size) self.load_graph()
def __init__(self, trainer_params, args): self.args = args self.trainer_params = trainer_params random.seed(trainer_params.random_seed) torch.manual_seed(trainer_params.random_seed) if args.cuda: torch.cuda.manual_seed_all(trainer_params.random_seed) self.train_data = seq_mnist_train(trainer_params) self.val_data = seq_mnist_val(trainer_params) self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, \ shuffle=True, num_workers=trainer_params.num_workers) self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, \ shuffle=False, num_workers=trainer_params.num_workers) self.starting_epoch = 1 self.prev_loss = 10000 self.model = BiLSTM(trainer_params) self.criterion = wp.CTCLoss(size_average=False) self.labels = [i for i in range(trainer_params.num_classes - 1)] self.decoder = seq_mnist_decoder(labels=self.labels) self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr) if args.cuda: torch.cuda.set_device(args.gpus) self.model = self.model.cuda() self.criterion = self.criterion.cuda() if args.resume or args.eval or args.export: print("Loading model from {}".format(args.resume)) package = torch.load(args.resume, map_location=lambda storage, loc: storage) self.model.load_state_dict(package['state_dict']) self.optimizer.load_state_dict(package['optim_dict']) self.starting_epoch = package['starting_epoch'] self.prev_loss = package['prev_loss'] if args.cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if args.init_bn_fc_fusion: if not trainer_params.prefused_bn_fc: self.model.batch_norm_fc.init_fusion() self.trainer_params.prefused_bn_fc = True else: raise Exception("BN and FC are already fused.")
def predict(sentence): sentence = sentence.split() model_name = BEST_NAME embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM word_to_ix = WORD_TO_IX model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim) checkpoint = torch.load(model_name) model.load_state_dict(checkpoint['model_state_dict']) input = prepare_sequence(sentence, word_to_ix) with torch.no_grad(): output = model(input) print(output) _, predicted = torch.max(output.data, 1) print(predicted)
def model_load_test(test_df, vocab_file, embeddings_file, pretrained_file, test_prediction_dir, test_prediction_name, mode, num_labels=2, max_length=50, gpu_index=0, batch_size=128): device = torch.device( "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for testing ", 20 * "=") if platform == "linux" or platform == "linux2": checkpoint = torch.load(pretrained_file) else: checkpoint = torch.load(pretrained_file, map_location=device) # Retrieving model parameters from checkpoint. embeddings = load_embeddings(embeddings_file) print("\t* Loading test data...") test_data = My_Dataset(test_df, vocab_file, max_length, mode) test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size) print("\t* Building model...") model = BiLSTM(embeddings, num_labels=num_labels, max_length=max_length, device=device).to(device) model.load_state_dict(checkpoint["model"]) print(20 * "=", " Testing BiLSTM model on device: {} ".format(device), 20 * "=") batch_time, total_time, accuracy, predictions = test(model, test_loader) print( "\n-> Average batch processing time: {:.4f}s, total test time: {:.4f}s, accuracy: {:.4f}%\n" .format(batch_time, total_time, (accuracy * 100))) test_prediction = pd.DataFrame({'prediction': predictions}) if not os.path.exists(test_prediction_dir): os.makedirs(test_prediction_dir) test_prediction.to_csv(os.path.join(test_prediction_dir, test_prediction_name), index=False)
def eval(tag_path, corpus_path): correct = 0 total = 0 acc_list = [] model_name = MODEL_NAME embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM word_to_ix = WORD_TO_IX model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim) checkpoint = torch.load(model_name) model.load_state_dict(checkpoint['model_state_dict']) model.eval() tag_to_ix = {'1': 0, '2': 1, '3': 2, '4': 3, '5': 4} sentences, tags = load_train_data(tag_path, corpus_path) labels = torch.tensor([[tag_to_ix[tag]] for tag in tags[:]]) with torch.no_grad(): for i, sen in enumerate(tqdm(sentences[:])): input = prepare_sequence(sen, word_to_ix) output = model(input) _, predicted = torch.max(output.data, 1) label = labels[i] total += label.size(0) correct += (predicted == label).sum().item() acc = round(100 * correct / total, 2) acc_list.append(acc) assert len(acc_list) == len(sentences) final_acc = acc plt.plot(list(range(len(tags))), acc_list) plt.xlabel('pred_num') plt.ylabel('accuracy / %') plt.show() return final_acc
class Predictor: def __init__(self, config): self.config = config self.output_path = os.path.join(self.config.BASE_DIR, self.config.output_path) self.w2ix, self.ix2t = self.load_vocab() # 加载索引字典 self.vocab_size = len(self.w2ix) self.sequence_length = self.config.sequence_length self.model = BiLSTM(self.config, self.vocab_size) self.load_graph() def load_vocab(self): with open(os.path.join(self.output_path, 'word_to_index.pkl'), 'rb') as fr: word_to_index = pickle.load(fr) with open(os.path.join(self.output_path, 'label_to_index.pkl'), 'rb') as fr: label_to_index = pickle.load(fr) index_to_label = {v: k for k, v in label_to_index.items()} return word_to_index, index_to_label def load_graph(self): self.sess = tf.Session() ckpt = tf.train.get_checkpoint_state( os.path.join(self.config.BASE_DIR, self.config.ckpt_model_path)) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reloading model parameters..") self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) else: raise ValueError("No such file: [{}]".format( self.config.ckpt_model_path)) def sentence_to_ids(self, sentence): sentence_ids = [ self.w2ix.get(token, self.w2ix.get("<UNK>")) for token in sentence ] sentence_padded = [ sentence_ids[:self.sequence_length] if len(sentence_ids) > self.sequence_length else sentence_ids + [0] * (self.sequence_length - len(sentence_ids)) ] return sentence_padded def predict(self, sentence): sentence_idx = self.sentence_to_ids(sentence) prediction = self.model.predict(self.sess, sentence_idx).tolist() label = self.ix2t[prediction[0]] return label
def initialize_model(gpu, vocab_size, v_vec, emb_requires_grad, args): emb_dim = args.emb_dim h_dim = None class_num = 2 is_gpu = True if gpu == -1: is_gpu = False if args.emb_type == 'ELMo' or args.emb_type == 'ELMoForManyLangs': bilstm = BiLSTM(emb_dim, h_dim, class_num, vocab_size, is_gpu, v_vec, emb_type=args.emb_type, elmo_model_dir=args.emb_path) elif args.emb_type == 'None': bilstm = BiLSTM(emb_dim, h_dim, class_num, vocab_size, is_gpu, v_vec, emb_type=args.emb_type) else: bilstm = BiLSTM(emb_dim, h_dim, class_num, vocab_size, is_gpu, v_vec, emb_type=args.emb_type) if is_gpu: bilstm = bilstm.cuda() for m in bilstm.modules(): print(m.__class__.__name__) weights_init(m) if args.emb_type != 'ELMo' and args.emb_type != 'ELMoForManyLangs' and args.emb_type != 'None': for param in bilstm.word_embed.parameters(): param.requires_grad = emb_requires_grad return bilstm
def main(): X_train, Y_train, X_valid, Y_valid, timestamp, close_prices = load_data( 'data.csv', TIME_WINDOW) [X_train, Y_train, X_valid, Y_valid] = [ torch.from_numpy(i.astype(np.float32)) for i in [X_train, Y_train, X_valid, Y_valid] ] model = BiLSTM(feature_num=FEATURE_NUM, time_window=TIME_WINDOW - 1) dataset_train = torch.utils.data.TensorDataset(X_train, Y_train) dataset_valid = torch.utils.data.TensorDataset(X_valid, Y_valid) train_dataloader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=BATCH_SIZE, shuffle=False) valid_dataloader = torch.utils.data.DataLoader(dataset=dataset_valid, batch_size=BATCH_SIZE, shuffle=False) min_loss = train(model, train_dataloader, valid_dataloader) print(f'Best trained model has a loss of {min_loss:.5f}.')
def initialize_model(gpu, vocab_size, v_vec, dropout_ratio, n_layers, model, statistics_of_each_case_type): is_gpu = True if gpu == -1: is_gpu = False if model == 'Base' or model == 'FT': bilstm = BiLSTM(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu) elif model == 'OneH': bilstm = OneHot(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu) elif model == 'FA': bilstm = FeatureAugmentation(vocab_size, v_vec, dropout_ratio, n_layers, gpu=is_gpu) elif model == 'CPS': bilstm = ClassProbabilityShift( vocab_size, v_vec, dropout_ratio, n_layers, statistics_of_each_case_type=statistics_of_each_case_type, gpu=is_gpu) elif model == 'MIX': bilstm = Mixture( vocab_size, v_vec, dropout_ratio, n_layers, statistics_of_each_case_type=statistics_of_each_case_type, gpu=is_gpu) if is_gpu: bilstm = bilstm.cuda() for m in bilstm.modules(): print(m.__class__.__name__) weights_init(m) return bilstm
def get_time_to_score(tsv_path, thing, model_path): time_to_count = {} time_to_scoresum = {} if thing == 'hair_dryer': id = '732252283' elif thing == 'microwave': id = '423421857' else: id = '246038397' with open('train_' + thing + '_word_to_ix.json', 'r') as j: word_to_ix = json.load(j) embedding_dim = EMBEDDING_DIM hidden_dim = HIDDEN_DIM model = BiLSTM(len(word_to_ix), 5, embedding_dim, hidden_dim) checkpoints = torch.load(model_path) model.load_state_dict(checkpoints['model_state_dict']) model.eval() with open(tsv_path, 'r') as f: reader = csv.reader(f, delimiter='\t') for i, r in enumerate(reader): if i == 0 or r[4] != id: continue month, _, year = r[14].split('/') if year not in {'2014', '2015'}: continue time = get_idx_by_year_month(int(year), int(month)) if time < 8: continue sen = (r[12] + ' ' + r[13]).lower() sen = re.sub(r'[^A-Za-z0-9,.!]+', ' ', sen) input = prepare_sequence(sen.split(), word_to_ix) with torch.no_grad(): output = model(input) _, predicted = torch.max(output.data, 1) pred_score = predicted.item() if time not in time_to_count: time_to_count[time] = 0 time_to_scoresum[time] = 0. time_to_count[time] += 1 time_to_scoresum[time] += pred_score time_to_scoremean = {} for time in time_to_count.keys(): time_to_scoremean[time] = time_to_scoresum[time] / time_to_count[time] print(time_to_count) return time_to_scoremean
def main(args): print "Running BiLSTM model" print args random.seed(args.seed) trainset = [] devset = [] print >> sys.stderr, "Loading dataset.." assert(os.path.isdir(args.datapath)) word_vocab = [] for fname in sorted(os.listdir(args.datapath)): if os.path.isdir(fname): continue #if fname.endswith('train.ner.txt'): if fname.endswith('.ppi.txt'): print fname dataset, vocab = load_dataset(os.path.join(args.datapath,fname)) word_vocab += vocab trainset += dataset print >> sys.stderr, "Loaded {} instances with a vocab size of {} from {}".format(len(dataset),len(vocab),fname) print "Loaded {} instances from data set".format(len(trainset)) word_vocab = sorted(set(word_vocab)) vocab_cache = os.path.join(args.datapath,'word_vocab.ner.txt') with open(vocab_cache,'w') as f: print "Saved vocab to", vocab_cache pickle.dump(word_vocab,f) embeddings = load_embeddings(args.embeddings_path, word_vocab, 200) labels = ['B-MISC','I-MISC','O'] model_name = 'saved_model_autumn' if not os.path.exists('{}/scratch'.format(args.datapath)): os.mkdir('{}/scratch'.format(args.datapath)) if os.path.exists('{}/{}'.format(args.datapath,model_name)): os.rename('{}/{}'.format(args.datapath,model_name), '{}/{}_{}'.format(args.datapath,model_name,int(time.time()))) os.mkdir('{}/{}'.format(args.datapath,model_name)) for j in range(num_ensembles): m = BiLSTM(labels=labels, word_vocab=word_vocab, word_embeddings=embeddings, optimizer=args.optimizer, embedding_size=200, char_embedding_size=32, lstm_dim=200, num_cores=args.num_cores, embedding_factor=args.embedding_factor, learning_rate=args.learning_rate, decay_rate=args.decay_rate, dropout_keep=args.keep_prob) training_samples = random.sample(trainset,len(trainset)/2) cut = int(0.8 * len(training_samples)) X_train, y_train = zip(*training_samples[:cut]) X_dev, y_dev = zip(*training_samples[cut:]) print "Training on {}, tuning on {}".format(len(X_train),len(X_dev)) m.fit(X_train, y_train, X_dev, y_dev, num_iterations=args.num_iterations, num_it_per_ckpt=args.num_it_per_ckpt, batch_size=args.batch_size, seed=j, fb2=True) save_path = '{}/{}/model_{}'.format(args.datapath,model_name,j) m.save(save_path) print "Saved model {} to {}".format(j,save_path)
with tf.Graph().as_default(): session = tf.Session() with session.as_default(): # Define training procedure with tf.variable_scope('embedding'): embedding = tf.get_variable( 'embedding', shape=word_embedding.shape, dtype=tf.float32, initializer=tf.constant_initializer(word_embedding), trainable=True) model = BiLSTM(FLAGS.seq_length, FLAGS.hidden_size, FLAGS.layer_num, FLAGS.class_num, FLAGS.learning_rate, FLAGS.l2_reg_lambda) train_writer = tf.summary.FileWriter(FLAGS.log_path + '/train', session.graph) dev_writer = tf.summary.FileWriter(FLAGS.log_path + '/dev', session.graph) merged = tf.summary.merge_all() saver = tf.train.Saver() session.run(tf.global_variables_initializer()) session.run(tf.local_variables_initializer()) # training loop, for each batch for step in range(FLAGS.epochs_num):
model_name, 'epochs', str(args.epochs), args.optimizer, 'lr', str(args.lr), 'hidden', str(args.hidden), 'layers', str(args.layers) ] model_name = '_'.join(model_name) model_path = os.path.join(save_model_dir, model_name) print('writer_path:', writer_path) print('save_model_dir:', save_model_dir) print('model_name:', model_name) if args.crf: model = LSTM_CRF(args.hidden, args.layers, args.dropout) else: model = BiLSTM(args.hidden, 8, args.dropout, args.layers) criterion = nn.CrossEntropyLoss() if args.load_model: model.load_state_dict(torch.load(model_path)) if args.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) else: optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) lr_lambda = lambda epoch: 1 / (1 + (epoch + 1) * args.lr_decay) scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda) if use_cuda: model = model.cuda() if not args.crf:
if FLAGS.mode != 'distil' : #创建词表 word2idx, idx2word, vocab_path = create_vocabulary(FLAGS.vocab_size) create_data_ids(word2idx) else: #创建词表(增强数据集) word2idx, idx2word, vocab_path = create_vocabulary_distil(FLAGS.vocab_size) create_data_ids_distil(word2idx) if not tf.gfile.Exists(FLAGS.model_save_dir): tf.gfile.MakeDirs(FLAGS.model_save_dir) #创建模型对象 model = BiLSTM(vocab_size=FLAGS.vocab_size, batch_size=FLAGS.batch_size, embedding_size=FLAGS.num_embedding_units, num_hidden_size=FLAGS.num_hidden_units, maxlen=FLAGS.maxlen) #创建训练对象 solver = Solver(model=model, training_iter=FLAGS.train_step, word2idx=word2idx, idx2word=idx2word, log_dir=FLAGS.log_dir, model_save_dir=FLAGS.model_save_dir) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'test': solver.test() elif FLAGS.mode=='distil':
class Seq_MNIST_Trainer(): def __init__(self, trainer_params, args): self.args = args self.trainer_params = trainer_params random.seed(trainer_params.random_seed) torch.manual_seed(trainer_params.random_seed) if args.cuda: torch.cuda.manual_seed_all(trainer_params.random_seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} self.train_data = seq_mnist_train(trainer_params) self.val_data = seq_mnist_val(trainer_params) self.train_loader = DataLoader(self.train_data, batch_size=trainer_params.batch_size, shuffle=True, **kwargs) self.val_loader = DataLoader(self.val_data, batch_size=trainer_params.test_batch_size, shuffle=True, **kwargs) self.starting_epoch = 1 self.prev_loss = 10000 self.model = BiLSTM(trainer_params) self.criterion = wp.CTCLoss(size_average=True) self.labels = [i for i in range(trainer_params.num_classes-1)] self.decoder = seq_mnist_decoder(labels=self.labels) if args.resume or args.eval or args.export: print("Loading model from {}".format(args.save_path)) package = torch.load(args.save_path, map_location=lambda storage, loc: storage) self.model.load_state_dict(package['state_dict']) if args.cuda: torch.cuda.set_device(args.gpus) self.model = self.model.cuda() self.optimizer = optim.Adam(self.model.parameters(), lr=trainer_params.lr) if args.resume: self.optimizer.load_state_dict(package['optim_dict']) self.starting_epoch = package['starting_epoch'] self.prev_loss = package['prev_loss'] if args.cuda: for state in self.optimizer.state.values(): for k, v in state.items(): if torch.is_tensor(v): state[k] = v.cuda() if args.init_bn_fc_fusion: if not trainer_params.prefused_bn_fc: self.model.batch_norm_fc.init_fusion() self.trainer_params.prefused_bn_fc = True else: raise Exception("BN and FC are already fused.") def serialize(self, model, trainer_params, optimizer, starting_epoch, prev_loss): package = {'state_dict': model.state_dict(), 'trainer_params': trainer_params, 'optim_dict' : optimizer.state_dict(), 'starting_epoch' : starting_epoch, 'prev_loss': prev_loss } return package def save_model(self, epoch, loss_value): print("Model saved at: {}\n".format(self.args.save_path)) self.prev_loss = loss_value torch.save(self.serialize(model=self.model, trainer_params=self.trainer_params, optimizer=self.optimizer, starting_epoch=epoch + 1, prev_loss=self.prev_loss), self.args.save_path) def train(self, epoch): self.model.train() for i, (item) in enumerate(self.train_loader): data, labels, output_len, lab_len = item data = Variable(data.transpose(1,0), requires_grad=False) labels = Variable(labels.view(-1), requires_grad=False) output_len = Variable(output_len.view(-1), requires_grad=False) lab_len = Variable(lab_len.view(-1), requires_grad=False) if self.args.cuda: data = data.cuda() output = self.model(data) # print("Input = ", data.shape) # print("model output (x) = ", output) # print("GTs (y) = ", labels.type()) # print("model output len (xs) = ", output_len.type()) # print("GTs len (ys) = ", lab_len.type()) # exit(0) loss = self.criterion(output, labels, output_len, lab_len) loss_value = loss.data[0] print("Loss value for epoch = {}/{} and batch {}/{} is = {:.4f}".format(epoch, self.trainer_params.epochs, (i+1)*self.trainer_params.batch_size, len(self.train_data) , loss_value)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self.args.cuda: torch.cuda.synchronize() def test(self, epoch=0, save_model_flag=False): self.model.eval() loss_value = 0 for i, (item) in enumerate(self.val_loader): data, labels, output_len, lab_len = item data = Variable(data.transpose(1,0), requires_grad=False) labels = Variable(labels.view(-1), requires_grad=False) output_len = Variable(output_len.view(-1), requires_grad=False) lab_len = Variable(lab_len.view(-1), requires_grad=False) if self.args.cuda: data = data.cuda() output = self.model(data) # print("Input = ", data) # print("model output (x) = ", output.shape) # print("model output (x) = ", output) # print("Label = ", labels) # print("model output len (xs) = ", output_len) # print("GTs len (ys) = ", lab_len) index = random.randint(0,self.trainer_params.test_batch_size-1) label = labels[index*self.trainer_params.word_size:(index+1)*self.trainer_params.word_size].data.numpy() label = label-1 prediction = self.decoder.decode(output[:,index,:], output_len[index], lab_len[index]) accuracy = self.decoder.hit(prediction, label) print("Sample Label = {}".format(self.decoder.to_string(label))) print("Sample Prediction = {}".format(self.decoder.to_string(prediction))) print("Accuracy on Sample = {:.2f}%\n\n".format(accuracy)) loss = self.criterion(output, labels, output_len, lab_len) loss_value += loss.data.numpy() loss_value /= (len(self.val_data)//self.trainer_params.test_batch_size) print("Average Loss Value for Val Data is = {:.4f}\n".format(float(loss_value))) if loss_value < self.prev_loss and save_model_flag: self.save_model(epoch, loss_value) def eval_model(self): self.test() def train_model(self): for epoch in range(self.starting_epoch, self.trainer_params.epochs + 1): self.train(epoch) self.test(epoch=epoch, save_model_flag=True) if epoch%20==0: self.optimizer.param_groups[0]['lr'] = self.optimizer.param_groups[0]['lr']*0.98 def export_model(self, simd_factor, pe): self.model.eval() self.model.export('r_model_fw_bw.hpp', simd_factor, pe) def export_image(self, idx=100): img, label = self.val_data.images[:,idx,:], self.val_data.labels[0][idx] img = img.transpose(1, 0) label -= 1 label = self.decoder.to_string(label) from PIL import Image from matplotlib import cm im = Image.fromarray(np.uint8(cm.gist_earth(img)*255)) im.save('test_image.png') img = img.transpose(1, 0) img = np.reshape(img, (-1, 1)) np.savetxt("test_image.txt", img, fmt='%.10f') f = open('test_image_gt.txt','w') f.write(label) f.close() print("Exported image with label = {}".format(label))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', type=str, default='rnn', help= "Available models are: 'rnn', 'cnn', 'bilstm', 'fasttext', and 'distilbert'\nDefault is 'rnn'" ) parser.add_argument('--train_data_path', type=str, default="./data/train_clean.csv", help="Path to the training data") parser.add_argument('--test_data_path', type=str, default="./data/dev_clean.csv", help="Path to the test data") parser.add_argument('--seed', type=int, default=1234) parser.add_argument('--vectors', type=str, default='fasttext.simple.300d', help=""" Pretrained vectors: Visit https://github.com/pytorch/text/blob/9ce7986ddeb5b47d9767a5299954195a1a5f9043/torchtext/vocab.py#L146 for more """) parser.add_argument('--max_vocab_size', type=int, default=750) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--bidirectional', type=bool, default=True) parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--hidden_dim', type=int, default=64) parser.add_argument('--output_dim', type=int, default=1) parser.add_argument('--n_layers', type=int, default=2) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--n_epochs', type=int, default=5) parser.add_argument('--n_filters', type=int, default=100) parser.add_argument('--filter_sizes', type=list, default=[3, 4, 5]) args = parser.parse_args() torch.manual_seed(args.seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') ########## BILSTM ########## if args.model == "bilstm": print('\nBiLSTM') TEXT = Field(tokenize='spacy') LABEL = LabelField(dtype=torch.float) data_fields = [("text", TEXT), ("label", LABEL)] train_data = TabularDataset(args.train_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) test_data = TabularDataset(args.test_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) train_data, val_data = train_data.split(split_ratio=0.8, random_state=random.seed( args.seed)) TEXT.build_vocab(train_data, max_size=args.max_vocab_size, vectors=args.vectors, unk_init=torch.Tensor.normal_) LABEL.build_vocab(train_data) train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train_data, val_data, test_data), batch_size=args.batch_size, sort_key=lambda x: len(x.text), device=device) input_dim = len(TEXT.vocab) embedding_dim = get_embedding_dim(args.vectors) pad_idx = TEXT.vocab.stoi[TEXT.pad_token] unk_idx = TEXT.vocab.stoi[TEXT.unk_token] model = BiLSTM(input_dim, embedding_dim, args.hidden_dim, args.output_dim, args.n_layers, args.bidirectional, args.dropout, pad_idx) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) model.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim) model.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCEWithLogitsLoss() model.to(device) criterion.to(device) best_valid_loss = float('inf') print("\nTraining...") print("===========") for epoch in range(1, args.n_epochs + 1): start_time = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), './checkpoints/{}-model.pt'.format(args.model)) print( f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%' ) model.load_state_dict( torch.load('./checkpoints/{}-model.pt'.format(args.model))) test_loss, test_acc = evaluate(model, test_iterator, criterion) print('\nEvaluating...') print("=============") print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%' ) # Test Loss: 0.139, Test Acc: 95.27% ########## VANILLA RNN ########## else: print('\nVanilla RNN') TEXT = Field(tokenize='spacy') LABEL = LabelField(dtype=torch.float) data_fields = [("text", TEXT), ("label", LABEL)] train_data = TabularDataset(args.train_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) test_data = TabularDataset(args.test_data_path, format='csv', fields=data_fields, skip_header=True, csv_reader_params={'delimiter': ","}) train_data, val_data = train_data.split(split_ratio=0.8, random_state=random.seed( args.seed)) TEXT.build_vocab(train_data, max_size=args.max_vocab_size, vectors=args.vectors) LABEL.build_vocab(train_data) train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train_data, val_data, test_data), batch_size=args.batch_size, sort_key=lambda x: len(x.text), device=device) input_dim = len(TEXT.vocab) embedding_dim = get_embedding_dim(args.vectors) model = RNN(input_dim, embedding_dim, args.hidden_dim, args.output_dim) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.BCEWithLogitsLoss() model.to(device) criterion.to(device) best_valid_loss = float('inf') print("\nTraining...") print("===========") for epoch in range(1, args.n_epochs + 1): start_time = time.time() train_loss, train_acc = train(model, train_iterator, optimizer, criterion) valid_loss, valid_acc = evaluate(model, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model.state_dict(), './checkpoints/{}-model.pt'.format(args.model)) print( f'[Epoch: {epoch:02}] | Epoch Time: {epoch_mins}m {epoch_secs}s' ) print( f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%' ) print( f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%' ) model.load_state_dict( torch.load('./checkpoints/{}-model.pt'.format(args.model))) test_loss, test_acc = evaluate(model, test_iterator, criterion) print('\nEvaluating...') print("=============") print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%' ) # Test Loss: 0.138, Test Acc: 95.05%
class Trainer: def __init__(self, config): self.config = config self.load_data() # 加载数据集 self.model = BiLSTM(self.config, self.vocab_size, self.word_vectors) # 初始化模型 def load_data(self): self.train_dataloader = TrainData(self.config) self.eval_dataloader = TestData(self.config) train_data_path = os.path.join(self.config.BASE_DIR, self.config.train_data_path) self.train_inputs, self.train_labels, self.t2ix = self.train_dataloader.gen_train_data( train_data_path) eval_data_path = os.path.join(self.config.BASE_DIR, self.config.eval_data_path) self.eval_inputs, self.eval_labels, _ = self.eval_dataloader.gen_test_data( eval_data_path) self.vocab_size = self.train_dataloader.vocab_size self.word_vectors = self.train_dataloader.word_vectors def train(self): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9, allow_growth=True) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True, gpu_options=gpu_options) with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) # 初始化计算图变量 current_step = 0 # 创建Train/Eval的summar路径和写入对象 train_summary_path = os.path.join( self.config.BASE_DIR, self.config.summary_path + "/train") eval_summary_path = os.path.join( self.config.BASE_DIR, self.config.summary_path + "/eval") self._check_directory(train_summary_path) self._check_directory(eval_summary_path) train_summary_writer = tf.summary.FileWriter( train_summary_path, sess.graph) eval_summary_writer = tf.summary.FileWriter( eval_summary_path, sess.graph) # Train & Eval Process for epoch in range(self.config.epochs): print(f"----- Epoch {epoch + 1}/{self.config.epochs} -----") for batch in self.train_dataloader.next_batch( self.train_inputs, self.train_labels, self.config.batch_size): summary, loss, predictions = self.model.train( sess, batch, self.config.keep_prob) accuracy = self.model.get_metrics(sess, batch) train_summary_writer.add_summary(summary, current_step) print( f"! Train epoch: {epoch}, step: {current_step}, train loss: {loss}, accuracy: {accuracy}" ) current_step += 1 if self.eval_dataloader and current_step % self.config.eval_every == 0: losses = [] acces = [] for eval_batch in self.eval_dataloader.next_batch( self.eval_inputs, self.eval_labels, self.config.batch_size): eval_summary, eval_loss, eval_predictions = self.model.eval( sess, eval_batch) eval_accuracy = self.model.get_metrics(sess, batch) eval_summary_writer.add_summary( eval_summary, current_step) losses.append(eval_loss) acces.append(eval_accuracy) print( f"! Eval epoch: {epoch}, step: {current_step}, eval loss: {sum(losses) / len(losses)}, accuracy: {sum(acces) / len(acces)}" ) if self.config.ckpt_model_path: save_path = os.path.join( self.config.BASE_DIR, self.config.ckpt_model_path) self._check_directory(save_path) model_save_path = os.path.join( save_path, self.config.model_name) self.model.saver.save(sess, model_save_path, global_step=current_step) def _check_directory(self, path): if not os.path.exists(path): os.makedirs(path)
shuffle=True) dev_batches, num_dev_batches, num_dev_samples = get_batch(cfg.data_npy_path, cfg.filename_x_dev, cfg.filename_y_dev, cfg.epochs, cfg.maxlen, cfg.len_wv, cfg.batch_size[0], cfg.num_classes, str(fold), shuffle=False) # create a iterator of the correct shape and type iter = tf.data.Iterator.from_structure(train_batches.output_types, train_batches.output_shapes) xs, ys = iter.get_next() train_init_opt = iter.make_initializer(train_batches) dev_init_opt = iter.make_initializer(dev_batches) # index+=1 model = BiLSTM(param) # print('xs') # print(xs) # print('ys') # print(ys) loss,train_opt,pred_train,train_summaries,global_step,lstm_cell_fw,x_check = model.train(xs,ys) logits_eval,probs_eval,pred_eval,ys = model.eval(xs,ys) #Variables for early stop dev_history = [] dev_best = 0 stop_times = 0 logging.info('# Session')
embeddings = gensim.models.KeyedVectors.load_word2vec_format('../embeddings/german.model', binary=True) print("Done.") # loop through each word in embeddings for word in embeddings.vocab: if word.lower() in words: vector = embeddings.wv[word] word_embeddings.append(vector) word2Idx[word] = len(word2Idx) word_embeddings = np.array(word_embeddings) print(f"Found embeddings for {word_embeddings.shape[0]} of {len(words)} words.") train_sentences = format_to_tensor(train_sentences, word2Idx, label2Idx) model = BiLSTM(word_embeddings=torch.FloatTensor(word_embeddings), num_classes=len(labels)) model.train() epochs = 50 learning_rate = 0.015 momentum = 0.9 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) def eval(): correct = 0 total = 0 for tokens, true_labels in train_sentences: total += len(true_labels)
def train_model(args, train_text=None, train_labels=None, eval_text=None, eval_labels=None, tokenizer=None): textattack.shared.utils.set_seed(args.random_seed) _make_directories(args.output_dir) num_gpus = torch.cuda.device_count() # Save logger writes to file log_txt_path = os.path.join(args.output_dir, "log.txt") fh = logging.FileHandler(log_txt_path) fh.setLevel(logging.DEBUG) logger.addHandler(fh) logger.info(f"Writing logs to {log_txt_path}.") train_examples_len = len(train_text) # label_id_len = len(train_labels) label_set = set(train_labels) args.num_labels = len(label_set) logger.info( f"Loaded dataset. Found: {args.num_labels} labels: {sorted(label_set)}" ) if len(train_labels) != len(train_text): raise ValueError( f"Number of train examples ({len(train_text)}) does not match number of labels ({len(train_labels)})" ) if len(eval_labels) != len(eval_text): raise ValueError( f"Number of teste xamples ({len(eval_text)}) does not match number of labels ({len(eval_labels)})" ) if args.model == "gru": textattack.shared.logger.info( "Loading textattack model: GRUForClassification") model = BiGRU() model.to(device) elif args.model == "lstm": textattack.shared.logger.info( "Loading textattack model: LSTMForClassification") model = BiLSTM() model.to(device) # attack_class = attack_from_args(args) # We are adversarial training if the user specified an attack along with # the training args. # adversarial_training = (attack_class is not None) and (not args.check_robustness) # multi-gpu training if num_gpus > 1: model = torch.nn.DataParallel(model) logger.info("Using torch.nn.DataParallel.") logger.info(f"Training model across {num_gpus} GPUs") num_train_optimization_steps = ( int(train_examples_len / args.batch_size / args.grad_accum_steps) * args.num_train_epochs) if args.model == "lstm" or args.model == "cnn" or args.model == "gru": def need_grad(x): return x.requires_grad optimizer = torch.optim.Adam(filter(need_grad, model.parameters()), lr=args.learning_rate) scheduler = None else: param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] optimizer = transformers.optimization.AdamW( optimizer_grouped_parameters, lr=args.learning_rate) scheduler = transformers.optimization.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_proportion, num_training_steps=num_train_optimization_steps, ) # Start Tensorboard and log hyperparams. from torch.utils.tensorboard import SummaryWriter tb_writer = SummaryWriter(args.output_dir) # Use Weights & Biases, if enabled. if args.enable_wandb: global wandb wandb = textattack.shared.utils.LazyLoader("wandb", globals(), "wandb") wandb.init(sync_tensorboard=True) # Save original args to file args_save_path = os.path.join(args.output_dir, "train_args.json") _save_args(args, args_save_path) logger.info(f"Wrote original training args to {args_save_path}.") tb_writer.add_hparams( {k: v for k, v in vars(args).items() if _is_writable_type(v)}, {}) # Start training logger.info("***** Running training *****") # if augmenter: # logger.info(f"\tNum original examples = {train_examples_len}") # logger.info(f"\tNum examples after augmentation = {len(train_text)}") # else: # logger.info(f"\tNum examples = {train_examples_len}") logger.info(f"\tNum examples = {train_examples_len}") logger.info(f"\tBatch size = {args.batch_size}") logger.info(f"\tMax sequence length = {args.max_length}") logger.info(f"\tNum steps = {num_train_optimization_steps}") logger.info(f"\tNum epochs = {args.num_train_epochs}") logger.info(f"\tLearning rate = {args.learning_rate}") eval_dataloader = _make_dataloader(tokenizer, eval_text, eval_labels, args.batch_size) train_dataloader = _make_dataloader(tokenizer, train_text, train_labels, args.batch_size) global_step = 0 tr_loss = 0 model.train() args.best_eval_score = 0 args.best_eval_score_epoch = 0 args.epochs_since_best_eval_score = 0 def loss_backward(loss): if num_gpus > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training if args.grad_accum_steps > 1: loss = loss / args.grad_accum_steps loss.backward() return loss # if args.do_regression: # # TODO integrate with textattack `metrics` package # loss_fct = torch.nn.MSELoss() # else: # loss_fct = torch.nn.CrossEntropyLoss() loss_fct = torch.nn.CrossEntropyLoss() for epoch in tqdm.trange(int(args.num_train_epochs), desc="Epoch", position=0, leave=True): # if adversarial_training: # if epoch >= args.num_clean_epochs: # if (epoch - args.num_clean_epochs) % args.attack_period == 0: # # only generate a new adversarial training set every args.attack_period epochs # # after the clean epochs # logger.info("Attacking model to generate new training set...") # adv_attack_results = _generate_adversarial_examples( # model_wrapper, attack_class, list(zip(train_text, train_labels)) # ) # adv_train_text = [r.perturbed_text() for r in adv_attack_results] # train_dataloader = _make_dataloader( # tokenizer, adv_train_text, train_labels, args.batch_size # ) # else: # logger.info(f"Running clean epoch {epoch+1}/{args.num_clean_epochs}") prog_bar = tqdm.tqdm(train_dataloader, desc="Iteration", position=0, leave=True) # Use these variables to track training accuracy during classification. correct_predictions = 0 total_predictions = 0 for step, batch in enumerate(prog_bar): ids1, ids2, msk1, msk2, labels = batch # input_ids, labels = batch labels = labels.to(device) # if isinstance(input_ids, dict): # ## dataloader collates dict backwards. This is a workaround to get # # ids in the right shape for HuggingFace models # input_ids = { # k: torch.stack(v).T.to(device) for k, v in input_ids.items() # } # logits = model(**input_ids)[0] # else: ids1 = ids1.to(device) ids2 = ids2.to(device) msk1 = msk1.to(device) msk2 = msk2.to(device) logits = model(ids1, ids2, msk1, msk2) # if args.do_regression: # # TODO integrate with textattack `metrics` package # loss = loss_fct(logits.squeeze(), labels.squeeze()) # else: loss = loss_fct(logits, labels) pred_labels = logits.argmax(dim=-1) correct_predictions += (pred_labels == labels).sum().item() total_predictions += len(pred_labels) loss = loss_backward(loss) tr_loss += loss.item() if global_step % args.tb_writer_step == 0: tb_writer.add_scalar("loss", loss.item(), global_step) if scheduler is not None: tb_writer.add_scalar("lr", scheduler.get_last_lr()[0], global_step) else: tb_writer.add_scalar("lr", args.learning_rate, global_step) if global_step > 0: prog_bar.set_description(f"Loss {tr_loss/global_step}") if (step + 1) % args.grad_accum_steps == 0: optimizer.step() if scheduler is not None: scheduler.step() optimizer.zero_grad() # Save model checkpoint to file. if (global_step > 0 and (args.checkpoint_steps > 0) and (global_step % args.checkpoint_steps) == 0): _save_model_checkpoint(model, args.output_dir, global_step) # Inc step counter. global_step += 1 # Print training accuracy, if we're tracking it. if total_predictions > 0: train_acc = correct_predictions / total_predictions logger.info(f"Train accuracy: {train_acc*100}%") tb_writer.add_scalar("epoch_train_score", train_acc, epoch) # Check accuracy after each epoch. # skip args.num_clean_epochs during adversarial training # if (not adversarial_training) or (epoch >= args.num_clean_epochs): if (epoch >= args.num_clean_epochs): eval_score = _get_eval_score(model, eval_dataloader, False) tb_writer.add_scalar("epoch_eval_score", eval_score, epoch) if args.checkpoint_every_epoch: _save_model_checkpoint(model, args.output_dir, args.global_step) logger.info( f"Eval {'pearson correlation' if args.do_regression else 'accuracy'}: {eval_score*100}%" ) if eval_score > args.best_eval_score: args.best_eval_score = eval_score args.best_eval_score_epoch = epoch args.epochs_since_best_eval_score = 0 _save_model(model, args.output_dir, args.weights_name, args.config_name) logger.info( f"Best acc found. Saved model to {args.output_dir}.") _save_args(args, args_save_path) logger.info(f"Saved updated args to {args_save_path}") else: args.epochs_since_best_eval_score += 1 if (args.early_stopping_epochs > 0) and (args.epochs_since_best_eval_score > args.early_stopping_epochs): logger.info( f"Stopping early since it's been {args.early_stopping_epochs} steps since validation acc increased" ) break if args.check_robustness: samples_to_attack = list(zip(eval_text, eval_labels)) samples_to_attack = random.sample(samples_to_attack, 1000) adv_attack_results = _generate_adversarial_examples( model_wrapper, attack_class, samples_to_attack) attack_types = [r.__class__.__name__ for r in adv_attack_results] attack_types = collections.Counter(attack_types) adv_acc = 1 - (attack_types["SkippedAttackResult"] / len(adv_attack_results)) total_attacks = (attack_types["SuccessfulAttackResult"] + attack_types["FailedAttackResult"]) adv_succ_rate = attack_types[ "SuccessfulAttackResult"] / total_attacks after_attack_acc = attack_types["FailedAttackResult"] / len( adv_attack_results) tb_writer.add_scalar("robustness_test_acc", adv_acc, global_step) tb_writer.add_scalar("robustness_total_attacks", total_attacks, global_step) tb_writer.add_scalar("robustness_attack_succ_rate", adv_succ_rate, global_step) tb_writer.add_scalar("robustness_after_attack_acc", after_attack_acc, global_step) logger.info(f"Eval after-attack accuracy: {100*after_attack_acc}%") # read the saved model and report its eval performance logger.info( "Finished training. Re-loading and evaluating model from disk.") model_wrapper = model_from_args(args, args.num_labels) model = model_wrapper.model model.load_state_dict( torch.load(os.path.join(args.output_dir, args.weights_name))) eval_score = _get_eval_score(model, eval_dataloader, args.do_regression) logger.info( f"Saved model {'pearson correlation' if args.do_regression else 'accuracy'}: {eval_score*100}%" ) if args.save_last: _save_model(model, args.output_dir, args.weights_name, args.config_name) # end of training, save tokenizer try: tokenizer.save_pretrained(args.output_dir) logger.info(f"Saved tokenizer {tokenizer} to {args.output_dir}.") except AttributeError: logger.warn( f"Error: could not save tokenizer {tokenizer} to {args.output_dir}." ) # Save a little readme with model info write_readme(args, args.best_eval_score, args.best_eval_score_epoch) _save_args(args, args_save_path) tb_writer.close() logger.info(f"Wrote final training args to {args_save_path}.")
from model import BiLSTM from utils import batch_iter, get_data from vocab import Vocab from seqeval.metrics import classification_report from torch import optim import numpy as np import torch x_train, x_valid, x_test, y_train, y_valid, y_test = get_data('time_delay') train_data = list(zip(x_train, y_train)) vocab = Vocab.from_corpus(x_train) tag_vocab = Vocab.from_corpus(y_train) model = BiLSTM(vocab, tag_vocab, 100, 256) torch.cuda.set_device(0) model.cuda() optimizer = optim.Adam(model.parameters(), lr=0.01) for epoch in range(3): for sents, labels in batch_iter(train_data, 16): model.zero_grad() loss, acc = model(sents, labels) print("epoch {}:".format(epoch), loss, acc) loss.backward() optimizer.step() test_data = list(zip(x_test, y_test)) preds = [] for sent, labels in test_data: pred = model.predict([sent]) preds.append(pred.tolist()[0]) preds = [[tag_vocab.id2word[i] for i in sent] for sent in preds]
def model_train_validate_test(train_df, dev_df, test_df, embeddings_file, vocab_file, target_dir, mode, num_labels=2, max_length=50, epochs=50, batch_size=128, lr=0.0005, patience=5, max_grad_norm=10.0, gpu_index=0, if_save_model=False, checkpoint=None): device = torch.device( "cuda:{}".format(gpu_index) if torch.cuda.is_available() else "cpu") print(20 * "=", " Preparing for training ", 20 * "=") # 保存模型的路径 if not os.path.exists(target_dir): os.makedirs(target_dir) # -------------------- Data loading ------------------- # print("\t* Loading training data...") train_data = My_Dataset(train_df, vocab_file, max_length, mode) train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size) print("\t* Loading validation data...") dev_data = My_Dataset(dev_df, vocab_file, max_length, mode) dev_loader = DataLoader(dev_data, shuffle=True, batch_size=batch_size) print("\t* Loading test data...") test_data = My_Dataset(test_df, vocab_file, max_length, mode) test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size) # -------------------- Model definition ------------------- # print("\t* Building model...") if (embeddings_file is not None): embeddings = load_embeddings(embeddings_file) else: embeddings = None model = BiLSTM(embeddings, num_labels=num_labels, device=device).to(device) total_params = sum(p.numel() for p in model.parameters()) print(f'{total_params:,} total parameters.') total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print(f'{total_trainable_params:,} training parameters.') # -------------------- Preparation for training ------------------- # criterion = nn.CrossEntropyLoss() # 过滤出需要梯度更新的参数 parameters = filter(lambda p: p.requires_grad, model.parameters()) # optimizer = optim.Adadelta(parameters, params["LEARNING_RATE"]) optimizer = torch.optim.Adam(parameters, lr=lr) # optimizer = torch.optim.Adam(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.85, patience=0) best_score = 0.0 start_epoch = 1 # Data for loss curves plot epochs_count = [] train_losses = [] valid_losses = [] # Continuing training from a checkpoint if one was given as argument if checkpoint: checkpoint = torch.load(checkpoint) start_epoch = checkpoint["epoch"] + 1 best_score = checkpoint["best_score"] print("\t* Training will continue on existing model from epoch {}...". format(start_epoch)) model.load_state_dict(checkpoint["model"]) optimizer.load_state_dict(checkpoint["optimizer"]) epochs_count = checkpoint["epochs_count"] train_losses = checkpoint["train_losses"] valid_losses = checkpoint["valid_losses"] # Compute loss and accuracy before starting (or resuming) training. _, valid_loss, valid_accuracy, _, = validate(model, dev_loader, criterion) print("\t* Validation loss before training: {:.4f}, accuracy: {:.4f}%". format(valid_loss, (valid_accuracy * 100))) # -------------------- Training epochs ------------------- # print("\n", 20 * "=", "Training BiLSTM model on device: {}".format(device), 20 * "=") patience_counter = 0 for epoch in range(start_epoch, epochs + 1): epochs_count.append(epoch) print("* Training epoch {}:".format(epoch)) epoch_time, epoch_loss, epoch_accuracy = train(model, train_loader, optimizer, criterion, epoch, max_grad_norm) train_losses.append(epoch_loss) print("-> Training time: {:.4f}s, loss = {:.4f}, accuracy: {:.4f}%". format(epoch_time, epoch_loss, (epoch_accuracy * 100))) print("* Validation for epoch {}:".format(epoch)) epoch_time, epoch_loss, epoch_accuracy, _, = validate( model, dev_loader, criterion) valid_losses.append(epoch_loss) print("-> Valid. time: {:.4f}s, loss: {:.4f}, accuracy: {:.4f}%\n". format(epoch_time, epoch_loss, (epoch_accuracy * 100))) # Update the optimizer's learning rate with the scheduler. scheduler.step(epoch_accuracy) # Early stopping on validation accuracy. if epoch_accuracy < best_score: patience_counter += 1 else: best_score = epoch_accuracy patience_counter = 0 if (if_save_model): torch.save( { "epoch": epoch, "model": model.state_dict(), "best_score": best_score, "epochs_count": epochs_count, "train_losses": train_losses, "valid_losses": valid_losses }, os.path.join(target_dir, "best.pth.tar")) print("save model succesfully!\n") print("* Test for epoch {}:".format(epoch)) _, _, test_accuracy, predictions = validate( model, test_loader, criterion) print("Test accuracy: {:.4f}%\n".format(test_accuracy)) test_prediction = pd.DataFrame({'prediction': predictions}) test_prediction.to_csv(os.path.join(target_dir, "test_prediction.csv"), index=False) if patience_counter >= patience: print("-> Early stopping: patience limit reached, stopping...") break
mapping_file = ".\\dataset\\map_data.map" mapping = {} with open(mapping_file, 'rb') as f: mapping = cPickle.load(f) word_to_id = mapping['word_to_id'] tag_to_id = mapping['tag_to_id'] char_to_id = mapping['char_to_id'] word_embeds = mapping['word_embeds'] model = BiLSTM(voca_size=len(word_to_id), word_emb_dim=100, pre_word_emb=word_embeds, char_emb_dim=25, char_lstm_dim=25, char_to_ix=char_to_id, n_cap=4, cap_emb_dim=8, hidden_dim=200, tag_to_ix=tag_to_id) x = torch.load(model_path) model.load_state_dict(x()) model.eval() def test(): test_sentences = loader.load_data(test_path, zeros=False) loader.update_tag_scheme(test_sentences, 'iob')
print(str(datetime.now()), "Generating vocab") vocab = Vocab(train_colors, min_count=min_count, add_padding=True, add_bos=True, add_eos=True) embeddings = nn.Embedding(len(vocab.index2token), embedding_size, padding_idx=vocab.PAD.hash) model = BiLSTM( embeddings=embeddings, hidden_size=hidden_size, num_labels=len(vocab), #num_labels, bidirectional=bidirectional, num_layers=num_layers, color_representation_size=54) #54) model_id = str(int(time.time())) + "w_fourier" save_path = os.path.join(output_path, model_id) if not os.path.isdir(save_path): os.makedirs(save_path) writer = SummaryWriter(save_path) if cuda: model.cuda() print(model)
import torchvision from model import BiLSTM from data import load_dataset from config import model_name, device if __name__ == "__main__": # the string to test! test_string = "<s> john can" # ######################## # LOAD DATASET # ######################## corpus, word_to_idx, idx_to_word, train_dataset = load_dataset() # ######################## # TEST VARIABLES # ######################## model = BiLSTM(len(corpus)) model.load_state_dict(torch.load(model_name)) model.eval() sentence = test_string.split() sentence = torch.tensor([[word_to_idx[w] for w in sentence]]) s = model.sample(sentence) print(test_string.split() + s)
def __init__(self, config): self.config = config self.load_data() # 加载数据集 self.model = BiLSTM(self.config, self.vocab_size, self.word_vectors) # 初始化模型
def main(options): use_cuda = (len(options.gpuid) >= 1) if options.gpuid: cuda.set_device(options.gpuid[0]) train, dev, test, vocab = torch.load(open(options.data_file, 'rb'), pickle_module=dill) batched_train, batched_train_mask, _ = utils.tensor.advanced_batchize( train, options.batch_size, vocab.stoi["<pad>"]) batched_dev, batched_dev_mask, _ = utils.tensor.advanced_batchize( dev, options.batch_size, vocab.stoi["<pad>"]) vocab_size = len(vocab) if options.load_file: rnnlm = torch.load(options.load_file) else: rnnlm = BiLSTM(vocab_size) if use_cuda > 0: rnnlm.cuda() else: rnnlm.cpu() criterion = torch.nn.NLLLoss() optimizer = eval("torch.optim." + options.optimizer)(rnnlm.parameters(), options.learning_rate) # main training loop last_dev_avg_loss = float("inf") rnnlm.train() for epoch_i in range(options.epochs): logging.info("At {0}-th epoch.".format(epoch_i)) # srange generates a lazy sequence of shuffled range for i, batch_i in enumerate(utils.rand.srange(len(batched_train))): train_batch = Variable( batched_train[batch_i]) # of size (seq_len, batch_size) train_mask = Variable(batched_train_mask[batch_i]) if use_cuda: train_batch = train_batch.cuda() train_mask = train_mask.cuda() sys_out_batch = rnnlm( train_batch ) # (seq_len, batch_size, vocab_size) # TODO: substitute this with your module train_in_mask = train_mask.view(-1) train_in_mask = train_in_mask.unsqueeze(1).expand( len(train_in_mask), vocab_size) train_out_mask = train_mask.view(-1) sys_out_batch = sys_out_batch.view(-1, vocab_size) train_out_batch = train_batch.view(-1) sys_out_batch = sys_out_batch.masked_select(train_in_mask).view( -1, vocab_size) train_out_batch = train_out_batch.masked_select(train_out_mask) loss = criterion(sys_out_batch, train_out_batch) logging.debug("loss at batch {0}: {1}".format(i, loss.data[0])) optimizer.zero_grad() loss.backward() optimizer.step() # validation -- this is a crude esitmation because there might be some paddings at the end dev_loss = 0.0 rnnlm.eval() for batch_i in range(len(batched_dev)): dev_batch = Variable(batched_dev[batch_i], volatile=True) dev_mask = Variable(batched_dev_mask[batch_i], volatile=True) if use_cuda: dev_batch = dev_batch.cuda() dev_mask = dev_mask.cuda() sys_out_batch = rnnlm(dev_batch) dev_in_mask = dev_mask.view(-1) dev_in_mask = dev_in_mask.unsqueeze(1).expand( len(dev_in_mask), vocab_size) dev_out_mask = dev_mask.view(-1) sys_out_batch = sys_out_batch.view(-1, vocab_size) dev_out_batch = dev_batch.view(-1) sys_out_batch = sys_out_batch.masked_select(dev_in_mask).view( -1, vocab_size) dev_out_batch = dev_out_batch.masked_select(dev_out_mask) loss = criterion(sys_out_batch, dev_out_batch) dev_loss += loss dev_avg_loss = dev_loss / len(batched_dev) logging.info( "Average loss value per instance is {0} at the end of epoch {1}". format(dev_avg_loss.data[0], epoch_i)) #if (last_dev_avg_loss - dev_avg_loss).data[0] < options.estop: # logging.info("Early stopping triggered with threshold {0} (previous dev loss: {1}, current: {2})".format(epoch_i, last_dev_avg_loss.data[0], dev_avg_loss.data[0])) # break torch.save( rnnlm, open( options.model_file + ".nll_{0:.2f}.epoch_{1}".format(dev_avg_loss.data[0], epoch_i), 'wb'), pickle_module=dill) last_dev_avg_loss = dev_avg_loss
# ---- Build Vocabulary ------ w2v_map = data.load_map("resources/w2v_map_SQ.pkl") w2v_map['<pad>'] = np.zeros(300) word_to_ix = data.load_map("resources/word_to_ix_SQ.pkl") label_to_ix = data.load_map("resources/rel_to_ix_SQ.pkl") vocab_size = len(word_to_ix) num_classes = len(label_to_ix) max_sent_length = 36 # set from the paper # ---- Define Model, Loss, Optim ------ config = args config.d_out = num_classes config.n_directions = 2 if config.birnn else 1 print(config) model = BiLSTM(config) loss_function = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # ---- Test Model ------ if args.test: print("Test Mode: loading pre-trained model and testing on test set...") # model = torch.load(args.resume_snapshot, map_location=lambda storage, location: storage.cuda(args.gpu)) model.load_state_dict(torch.load(args.resume_snapshot)) test_acc = evaluate_dataset_batch(test_set, max_sent_length, model, w2v_map, label_to_ix) print("Accuracy: {}".format(test_acc)) sys.exit(0) # ---- Train Model ------ start = time.time()
return epoch_loss / len(valid_it), epoch_acc / len(valid_it) vocab_size = len(TEXT.vocab) emb_dim = 50 hidden_dim = 50 out_dim = 1 lr = 1e-2 nlayers = 2 bidir = True dropout = 0.3 model = BiLSTM(vocab_size, hidden_dim, emb_dim, out_dim, bsize, nlayers, bidir, dropout, gpu=gpu) n_filters = 3 filter_sizes = [3, 4, 5] modelc = CNN(vocab_size, emb_dim, n_filters, filter_sizes, out_dim, dropout) optimizer = optim.Adam(model.parameters()) #no need to specify LR for adam lossf = nn.BCEWithLogitsLoss() ep = 5 modelatt = LSTMAttn(vocab_size, hidden_dim, emb_dim, out_dim, bsize, gpu=gpu)