def train_and_val(): embedding_dim = 100 hidden_dim = 100 model_load_path = None best_model_save_path = 'model/model_100_best_0223.pth' max_score = 0 stop_epoch = 30 unimprove_time = 0 val_json_path = '/home/agwave/Data/resume/val_0222.json' val_pdf_dir = '/home/agwave/Data/resume/val_0222/' training_data = get_data_from_data_txt(TRAIN_WORD_TO_TAG_PATH) with open('supporting_document/train_word_to_tag_0223.json', 'r') as j: word_to_ix = json.load(j) tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5, 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11, 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17, 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23, 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29, 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35, 'o': 36, '<start>': 37, '<stop>': 38} model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim) optimizer = optim.Adam(model.parameters(), lr=0.01) start_epoch = 0 if model_load_path != None: print('load model...') checkpoint = torch.load(model_load_path) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) start_epoch = checkpoint['epoch'] + 1 preliminary_score = get_score_by_model(model, val_json_path, val_pdf_dir) print('preliminary score:', preliminary_score) for epoch in range(start_epoch, stop_epoch): print("---------------------") print("running epoch : ", epoch) start_time = time.time() for sentence, tags in tqdm(training_data): model.zero_grad() sentence_in = prepare_sequence(sentence, word_to_ix) targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long) loss = model.neg_log_likelihood(sentence_in, targets) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() cur_epoch_score = get_score_by_model(model, val_json_path, val_pdf_dir) print('score', cur_epoch_score) print('running time:', time.time() - start_time) if cur_epoch_score > max_score: unimprove_time = 0 max_score = cur_epoch_score torch.save({ 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'epoch': epoch }, best_model_save_path) print('save best model successfully.') else: break
def _main(): data_manager = DataManager() vocab_size = len(data_manager.word2ix) model = BiLSTM_CRF(device, vocab_size, data_manager.tag2ix, EMBEDDING_DIM, HIDDEN_DIM) model = model.to(device) train_set = NerDataset(data_manager.train_sents, data_manager.train_tags) dev_set = NerDataset(data_manager.dev_sents, data_manager.dev_tags) train_loader = DataLoader(train_set, batch_size=BATCH_SZ, shuffle=True) dev_loader = DataLoader(dev_set, batch_size=BATCH_SZ, shuffle=True) optimizer = optim.Adam(model.parameters(), lr=0.01) epoch_loss = [] '''with torch.no_grad(): precheck_sent = to_tensor(train_loader[0]) precheck_tag = to_tensor(dataset.train_tags[0]) print(precheck_tag) print(model(precheck_sent))''' for epoch in range(EPOCH_NUM): for sents, tags, lengths in tqdm(train_loader): sents = sents.to(device) tags = tags.to(device) lengths = lengths.to(device) # print(lengths, sents.size(), tags.size()) loss = model.neg_log_likelihood(sents, tags, lengths) epoch_loss.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print(epoch, ' epoch loss: ', sum(epoch_loss)/len(epoch_loss)) save_model(model, epoch) eval(model, dev_loader)
def train_all_data(): embedding_dim = 100 hidden_dim = 100 stop_epoch = 1 model_1_epoch = 'model/model_1_epoch_lr0001.pth' training_data = get_data_from_data_txt(DATA_PERFECT_PATH) word_to_ix = get_word_to_ix(training_data, min_word_freq=1) tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5, 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11, 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17, 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23, 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29, 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35, 'o': 36, '<start>': 37, '<stop>': 38, 'c-live': 39, 'c-proj': 40, 'c-woti': 41, 'c-post': 42, 'c-unv': 43, 'c-nati': 44, 'c-poli': 45, 'c-prti':46, 'c-comp': 47} model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim) optimizer = optim.Adam(model.parameters(), lr=0.001) # Make sure prepare_sequence from earlier in the LSTM section is loaded for epoch in range( stop_epoch): # again, normally you would NOT do 300 epochs, it is toy data print("---------------------") print("running epon : ", epoch + 1) start_time = time.time() for sentence, tags in tqdm(training_data): model.zero_grad() sentence_in = prepare_sequence(sentence, word_to_ix) targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long) loss = model.neg_log_likelihood(sentence_in, targets) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), 15) optimizer.step() cur_epoch_score = get_score_by_model(model, TRAIN_JSON_PATH, TRAIN_PDF_DIR) print('score', cur_epoch_score) print('running time:', time.time() - start_time) print() if epoch == stop_epoch: torch.save({ 'model_state_dict': model.state_dict() }, model_1_epoch)
def train(): """ 模型训练 """ train_writer = SummaryWriter(log_dir='./log/train') test_writer = SummaryWriter(log_dir='./log/test') # step1 模型 bilstm_crf = BiLSTM_CRF(opt.vocab_size, opt.emb_dim, opt.emb_dim//2, opt.tag_num, dropout=opt.dropout) if opt.load_model_path: # 是否加载checkpoint bilstm_crf.load(opt.load_model_path) # step2 数据 rmrb_train_dataset = RmrbDataset(train=True) rmrb_test_dataset = RmrbDataset(train=False) rmrb_train_dataloader = DataLoader(rmrb_train_dataset, batch_size=64, shuffle=True) rmrb_test_dataloader = DataLoader(rmrb_test_dataset, batch_size=len(rmrb_test_dataset), shuffle=True) # step3 损失函数和优化器 # loss_fn = t.nn.CrossEntropyLoss() lr = opt.lr optimizer = t.optim.Adam(params=bilstm_crf.parameters(), lr=lr, weight_decay=opt.weight_decay) previous_loss = 1e9 iteration = 0 for epoch in range(opt.max_epoch): print('epoch {}'.format(epoch)) for ii, (x_batch, y_batch) in enumerate(rmrb_train_dataloader): # 计算loss loss = bilstm_crf.log_likelihood(x_batch, y_batch) loss.backward() optimizer.step() optimizer.zero_grad() if ii % 20 == 0: # print('loss:{}'.format(loss.item())) train_writer.add_scalar('Loss', loss.item(), iteration) iteration += 1 if loss > previous_loss: lr = lr * opt.lr_decay else: previous_loss = loss.item() # 保存模型检查点 bilstm_crf.save() # 评价指标 with t.no_grad(): bilstm_crf.eval() # 将模型设置为验证模式 for x_test, y_test in rmrb_test_dataloader: test_loss = bilstm_crf.log_likelihood(x_test, y_test) test_writer.add_scalar('Loss', test_loss.item(), iteration) y_pre = bilstm_crf(x_test) print(classification_report(t.flatten(y_test), t.flatten(y_pre))) bilstm_crf.train() # 将模型恢复成训练模式
def run(word_train, label_train, word_dev, label_dev, vocab, device, kf_index=0): # build dataset train_dataset = SegDataset(word_train, label_train, vocab, config.label2id) dev_dataset = SegDataset(word_dev, label_dev, vocab, config.label2id) # build data_loader train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=train_dataset.collate_fn) dev_loader = DataLoader(dev_dataset, batch_size=config.batch_size, shuffle=True, collate_fn=dev_dataset.collate_fn) # model model = BiLSTM_CRF(embedding_size=config.embedding_size, hidden_size=config.hidden_size, vocab_size=vocab.vocab_size(), target_size=vocab.label_size(), num_layers=config.lstm_layers, lstm_drop_out=config.lstm_drop_out, nn_drop_out=config.nn_drop_out) model.to(device) # optimizer optimizer = optim.Adam(model.parameters(), lr=config.lr, betas=config.betas) scheduler = StepLR(optimizer, step_size=config.lr_step, gamma=config.lr_gamma) # how to initialize these parameters elegantly for p in model.crf.parameters(): _ = torch.nn.init.uniform_(p, -1, 1) # train and test # train(train_loader, dev_loader, vocab, model, optimizer, scheduler, device, kf_index) with torch.no_grad(): # test on the final test set test_loss, f1 = test(config.test_dir, vocab, device, kf_index) return test_loss, f1
def prepare_sequence(seq, to_ix): idxs = [to_ix[w] for w in seq] return torch.tensor(idxs, dtype=torch.long) # word转化id word_to_ix = {} for sentence, tags in training_data: for word in sentence: if word not in word_to_ix: word_to_ix[word] = len(word_to_ix) tag_to_ix = {"B": 0, "I": 1, "O": 2, START_TAG: 3, STOP_TAG: 4} model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM) optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4) # Check predictions before training with torch.no_grad(): # sentence id序列 precheck_sent = prepare_sequence(training_data[0][0], word_to_ix) # 此training data的tags id序列 precheck_tags = torch.tensor([tag_to_ix[t] for t in training_data[0][1]], dtype=torch.long) print('预测前score和tags id序列:') print(model(precheck_sent)) # Make sure prepare_sequence from earlier in the LSTM section is loaded for epoch in range( 300): # again, normally you would NOT do 300 epochs, it is toy data for sentence, tags in training_data:
tag_to_ix=tag_to_id, embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], use_gpu=use_gpu, char_to_ix=char_to_id, pre_word_embeds=word_embeds, use_crf=parameters['crf'], char_mode=parameters['char_mode']) # n_cap=4, # cap_embedding_dim=10) file_name = './evaluation/saved_checkpoint_wfeats.txt' if use_gpu: model.cuda() learning_rate = 0.015 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) #print(restore_checkpoint(file_name, model, optimizer)) model, optimizer, current_epoch, other_info = restore_checkpoint( file_name, model, optimizer) print("model loaded") sys.stdout.flush() print("begin labeling") def label(model, datas): prediction = [] for data in datas: words = data['str_words'] chars2 = data['chars'] caps = data['caps']
def train(conf): train_sentences = load_sentences(conf.train_file, conf.zeros) dev_sentences = load_sentences(conf.dev_file, conf.zeros) test_sentences = load_sentences(conf.test_file, conf.zeros) dico_chars_train = char_mapping(train_sentences, conf.lower)[0] dico_chars, char_to_id, id_to_char = augment_with_pretrained( dico_chars_train.copy(), conf.emb_file, list( itertools.chain.from_iterable([[w[0] for w in s] for s in test_sentences]))) _t, tag_to_id, id_to_tag = tag_mapping(train_sentences) # prepare data, get a collection of list containing index train_data = prepare_dataset(train_sentences, char_to_id, tag_to_id, conf.lower) dev_data = prepare_dataset(dev_sentences, char_to_id, tag_to_id, conf.lower) test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id, conf.lower) #loading word embeddings all_word_embeds = {} for i, line in enumerate(codecs.open(conf.emb_file, 'r', 'utf-8')): s = line.strip().split() if len(s) == conf.embedding_dim + 1: all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]]) word_embeds_dict = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (len(char_to_id), conf.embedding_dim)) for w in char_to_id: if w in all_word_embeds: word_embeds_dict[char_to_id[w]] = all_word_embeds[w] elif w.lower() in all_word_embeds: word_embeds_dict[char_to_id[w]] = all_word_embeds[w.lower()] print('Loaded %i pretrained embeddings.' % len(all_word_embeds)) train_manager = BatchManager(train_data, conf.batch_size) model = BiLSTM_CRF(conf, tag_to_id, char_to_id, word_embeds_dict) optimizer = torch.optim.SGD(model.parameters(), lr=conf.learning_rate, weight_decay=1e-4) epoch = conf.epochs dev_f1_ = 0 for epoch in range(1, epoch + 1): print(f'train on epoch {epoch}') j = 1 for batch in train_manager.iter_batch(shuffle=True): batch_loss = 0.0 sentences = batch[1] tags = batch[-1] for i, index in enumerate(np.random.permutation(len(sentences))): model.zero_grad() sentence_in = sentences[index] tags_in = tags[index] loss = model.neg_log_likelihood(sentence_in, tags_in) loss.backward() optimizer.step() batch_loss += loss.data print( f'[batch {j},batch size:{conf.batch_size}] On this batch loss: {batch_loss}' ) j = j + 1 print(f'Begin validing result on [epoch {epoch}] valid dataset ...') dev_results = get_predictions(model, dev_data, id_to_tag) dev_f1 = evaluate_ner(dev_results, conf) if dev_f1 > dev_f1_: torch.save(model, conf.model_file) print('save model success.') test_results = get_predictions(model, test_data, id_to_tag) test_f1 = evaluate_ner(test_results, conf) print(f'[epoch {epoch}] On test dataset] f1: {test_f1:3f}')
embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], use_gpu=use_gpu, pre_word_embeds=word_embeds, use_crf=True, #parameters['crf'], semroles_embedding_dim=2000, ) if parameters['reload']: model = torch.load(model_name) if use_gpu: model.cuda() learning_rate = 0.015 parameters_alg = itertools.ifilter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.SGD(parameters_alg, lr=learning_rate, momentum=0.9) losses = [] loss = 0.0 best_dev_F = -1.0 best_test_F = -1.0 best_train_F = -1.0 best_dev_Acc = -1.0 best_test_Acc = -1.0 best_train_Acc = -1.0 best_dev_Acc_post = -1.0 best_test_Acc_post = -1.0 best_train_Acc_post = -1.0
use_gpu=use_gpu, char_to_ix=char_to_id, pre_word_embeds=word_embeds, use_crf=parameters['crf'], char_mode=parameters['char_mode'], char_embedding_dim=parameters['char_dim'], char_lstm_dim=parameters['char_lstm_dim'], alpha=parameters['alpha']) # n_cap=4, # cap_embedding_dim=10) if use_gpu: model.cuda() learning_rate = 0.015 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) losses = [] best_dev_F = -1.0 best_test_F = -1.0 best_train_F = -1.0 all_F = [[0, 0, 0]] plot_every = 10 eval_every = 20 sample_count = 0 best_idx = 0 if parameters['reload']: print('loading model:', parameters['reload']) checkpoint = torch.load(models_path + parameters['reload']) #model.load_state_dict(checkpoint)
char_mode=char_mode, char_embedding_dim=char_dim, char_lstm_dim=char_lstm_dim, char_lstm_bidirect=char_lstm_bidirect, char_cnn_win=char_cnn_win, char_cnn_output=char_cnn_dim, char_to_id=char_to_id, use_gpu=use_gpu, dropout=dropout, use_crf=use_crf, ) print(model) p_count = 0 for parameter in model.parameters(): if parameter.requires_grad: p_count += 1 name_count = 0 param_list = [] for param_name, param in model.named_parameters(): if param.requires_grad: name_count += 1 print(param_name, " ", param.size()) param_list.append(param_name) print("p_count:{0},name_count:{1}".format(p_count, name_count)) log = str(model) with open(os.path.join(logs_path, "{0}.important.log".format(name)),
"rb")) print('word vocab', len(word_vocab)) print('char vocab', len(char_vocab)) print('pos vocab', len(pos_vocab)) print('tag vocab', len(tag_vocab)) schema = get_schemas(source_path) # model train_device = torch.device(device if torch.cuda.is_available() else "cpu") model = BiLSTM_CRF(char_init_embed=(len(char_vocab), char_embed_dim), word_init_embed=(len(word_vocab), word_embed_dim), pos_init_embed=(len(pos_vocab), pos_embed_dim), spo_embed_dim=len(schema), sentence_length=seq_len, hidden_size=hidden_dim, num_classes=len(tag_vocab), dropout=dropout, id2words=tag_vocab.idx2word, encoding_type=encoding_type, weight=weight) model.to(train_device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) # train writer = SummaryWriter(log_dir=log_path) train() writer.close()
def my_train(): os.makedirs(f"model_result", exist_ok=True) torch.manual_seed(1) device = torch.device('cuda') data_dir = f"data/{DATASET}/processed" # 加载 train_data = NERDataset(os.path.join(data_dir, "train.pkl")) test_data = NERDataset(os.path.join(data_dir, "test.pkl")) dev_data = NERDataset(os.path.join(data_dir, "dev.pkl")) word_to_idx = load_obj(os.path.join(data_dir, "word_to_idx.pkl")) tag_to_idx = load_obj(os.path.join(data_dir, "tag_to_idx.pkl")) idx_to_tag = {n: m for m, n in tag_to_idx.items()} train_loader = DataLoader( train_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) dev_loader = DataLoader( dev_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) test_loader = DataLoader( test_data, batch_size=BATCH_SIZE, collate_fn=BatchPadding(), shuffle=True, num_workers=2, pin_memory=True, ) # 建模 model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), EMBEDDING_DIM, HIDDEN_DIM, DROPOUT).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=LEARN_RATE) print("\n开始训练") f1_max = 0 cur_patience = 0 # 用于避免过拟合 for epoch in range(EPOCHS): model.train() for i, (seqs, tags, masks) in enumerate(train_loader, 1): optimizer.zero_grad() loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % LOG_INTERVAL == 0: print("epoch {}: {:.0f}%\t\tLoss: {:.6f}".format( epoch, 100.0 * i / len(train_loader), loss.item())) dev_precision, dev_recall, dev_f1 = evaluate(model, dev_loader, idx_to_tag) test_precision, test_recall, test_f1 = evaluate( model, test_loader, idx_to_tag) print( f"\ndev\tprecision: {dev_precision}, recall: {dev_recall}, f1: {dev_f1}" ) print( f"test\tprecision: {test_precision}, recall: {test_recall}, f1: {test_f1}\n" ) torch.save(model.state_dict(), f"model_result/{epoch}.pt") if dev_f1 > f1_max: # 用于检测过拟合情况 f1_max = dev_f1 cur_patience = 0 if dev_f1 > 0.9 and test_f1 > 0.9: break else: cur_patience += 1 if cur_patience >= PATIENCE: # 多次低于最高f1,break break print("Best dev F1: ", f1_max)
) test_loader = DataLoader( test_data, batch_size=args.batch_size, collate_fn=BatchPadding(), shuffle=False, num_workers=2, pin_memory=True, ) # Model model = BiLSTM_CRF( len(word_to_ix), len(tag_to_ix), args.embed_dim, args.hidden_dim, args.dropout ).to(device) print(model) optimizer = optim.Adam(model.parameters(), lr=args.lr) print("Training...") best_dev_f1 = 0 bad_count = 0 for epoch in range(args.epochs): model.train() for i, (seqs, tags, masks) in enumerate(train_loader, 1): optimizer.zero_grad() loss = model.loss(seqs.to(device), tags.to(device), masks.to(device)) loss.backward() optimizer.step() if i % args.log_interval == 0: print( "Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch + 1,
# hidden_size=parameters['word_lstm_size'], # use_gpu=use_gpu, # char_to_ix=char_to_id, # pre_word_embeds=word_embeds, # crf=parameters['crf'], # char_mode=parameters['char_mode']) # n_cap=4, # cap_embedding_size=10) if parameters['reload']: model.load_state_dict(torch.load(model_name)) if use_gpu: model.cuda() learning_rate = args.lr #args.lr_method = "adadelta"#"momentum"#"adadelta"#"adagrad"#"sgd" if args.lr_method == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) elif args.lr_method == "adadelta": optimizer = torch.optim.Adadelta(model.parameters(), learning_rate, rho=0.95, eps=1e-06) losses = [] loss = 0.0 best_dev_F = -1.0 best_test_F = -1.0 best_train_F = -1.0 all_F = [[0, 0, 0]] plot_every = 500 eval_every = 5000
tag_to_ix=tag_to_id, embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], use_gpu=use_gpu, pre_word_embeds=word_embeds, use_crf=True, #parameters['crf'], semroles_embedding_dim=2000, ) if parameters['reload']: model = torch.load(model_name) if use_gpu: model.cuda() learning_rate = 0.015 parameters_alg = itertools.ifilter(lambda p: p.requires_grad, model.parameters()) optimizer = torch.optim.SGD(parameters_alg, lr=learning_rate, momentum=0.9) losses = [] loss = 0.0 best_dev_F = -1.0 best_test_F = -1.0 best_train_F = -1.0 best_dev_Acc = -1.0 best_test_Acc = -1.0 best_train_Acc = -1.0 best_dev_Acc_post = -1.0 best_test_Acc_post = -1.0
train_dataset, eval_dataset = torch.utils.data.random_split( dataset, (80000, 10000)) train_dataloder = DataLoader(train_dataset, batch_size=Config.batch_size, shuffle=True, num_workers=1, drop_last=False) model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim, Config.hidden_dim) #默认使用GPU if Config.use_gpu: model = model.to('cuda') optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) best_score = 0 for epoch in range(Config.epochs): model.train() total_loss = 0 for batch_sentence, batch_label, batch_length in train_dataloder: model.zero_grad() batch_sentence, batch_label, batch_length, _ = sort_batch_data( batch_sentence, batch_label, batch_length)
# with open('./data/wvmodel.pkl', 'rb') as inp: # wvmodel = pickle.load(inp) # print('wvmodel loaded!') # # weight = torch.zeros(args.vocab_size, args.embedding_size) # for i in range(len(wvmodel.index2word)): # try: # index = word_to_idx[wvmodel.index2word[i]] # except: # continue # weight[index,:] = torch.from_numpy(wvmodel.get_vector( # idx_to_word[word_to_idx[wvmodel.index2word[i]]])) model = BiLSTM_CRF(args, label2idx, weight,device).to(device) optimizer = optim.Adam(filter(lambda p:p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.weight_decay) best_f1 = 0.0 print('training on ',device) for epoch in range(1): model.train() train_loss_sum = 0.0 steps = 0 for batch in train_iter: X, y = batch.TEXT, batch.LABEL X, y = X.to(device).long(), y.to(device).long() loss = model.neg_log_likelihood(X, y) optimizer.zero_grad() loss.backward()
cPickle.dump(mappings, f) #Model Load model = BiLSTM_CRF(word_to_ix=word_to_id, ix_to_word=id_to_word, tag_to_ix=tag_to_id, char_to_ix = char_to_id, mor_to_ix = mor_to_id, embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], char_lstm_dim=parameters['char_lstm_dim'], char_dim = parameters['char_dim'], pre_word_embeds=word_embeds, pre_char_embeds = char_embeds, use_gpu=parameters['use_gpu'], use_crf=parameters['crf'], use_elmo=parameters['use_elmo'], elmo_option = parameters['elmo_option'], elmo_weight = parameters['elmo_weight']) if parameters['reload']: model.load_state_dict(torch.load(model_name)) if use_gpu: model.cuda() learning_rate = 0.001 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) losses = [] loss = 0.0 best_test = -np.inf best_dev = -np.inf best_dev_F = -1.0 best_test_F = -1.0 best_train_F = -1.0 best_epoch = 0 best_dev_epoch = 0 all_F = [[0, 0, 0]] plot_every = 50 eval_every = 350 count = 0 test_list = [] dev_list = []
batch_size=args.batch_size,collate_fn=collate_fn, shuffle=True) valid_loader = torch.utils.data.DataLoader(dataset=valid_data, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False) test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=args.batch_size, collate_fn=collate_fn, shuffle=False) weight_matrix = get_weight(wvmodel,len(word2id),args.embedding_size) print('weight_matrix',weight_matrix.size()) model = BiLSTM_CRF(len(word2id),label2id, args.embedding_size, weight_matrix, args.hidden_size).cuda() if os.path.exists(args.param_path): print('loading params') # pdb.set_trace() model.load_state_dict(torch.load(args.param_path)) optim = torch.optim.Adam(model.parameters(), args.learning_rate) criterion = torch.nn.CrossEntropyLoss() train(args, train_loader,valid_loader, model, optim, criterion) end_loss, end_f1 = evaluate_accuracy(model, test_loader) print("====================>test loss: %.4f, test f1 : %.4f"%(end_loss, end_f1)) else: print('test begin') with open(args.test_path, 'r', encoding='utf-8') as ftest_text: test_textlines = [line.strip().lower().split(' ') for line in ftest_text.readlines()] test_textlines = [[word2id[word] if word in word2id else unk for word in line] for line in test_textlines] test_textlines = [torch.Tensor(line).long() for line in test_textlines] weight_matrix = get_weight(wvmodel,len(word2id),args.embedding_size)
import torch import torch.optim as optim from dataset import Dataset from model import BiLSTM_CRF # torch.set_default_tensor_type('torch.cuda.FloatTensor') epochs = 100 dataset = Dataset() train_loader = dataset.get_train_loader(1) model = BiLSTM_CRF(dataset.get_vocab_size(), dataset.get_label_index_dict(), 128, 128) optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4) model.train() for epoch in range(epochs): for iter, batch in enumerate(train_loader): sentence_in, targets = batch.line, batch.label sentence_in = sentence_in.permute([1, 0]).reshape(-1).contiguous() targets = targets.permute([1, 0]).reshape(-1).contiguous() model.zero_grad() loss = model.neg_log_likelihood(sentence_in.squeeze(-1), targets.squeeze(-1)) / len(sentence_in) loss.backward() optimizer.step() print("{}-{}: {:.5f}".format(epoch, iter, loss.item()))
use_crf=parameters['crf'], char_mode=parameters['char_mode'], # n_cap=4, # cap_embedding_dim=10 ) if parameters['reload']: model.load_state_dict(torch.load(model_name)) if use_gpu: GPU_id = gpu_id print("GPU ID = ", GPU_id) torch.cuda.set_device(GPU_id) model.cuda() learning_rate = parameters["LR"] optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9) step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.8) t = time.time() train_model(model, step_lr_scheduler, optimizer, train_data, dev_data, test_data) print("total time in training: ", time.time() - t) try: os.remove(parameters["sorted_entity_list_file_name"]) except Exception as e: pass