Ejemplo n.º 1
0
def train_and_val():
    embedding_dim = 100
    hidden_dim = 100
    model_load_path = None
    best_model_save_path = 'model/model_100_best_0223.pth'
    max_score = 0
    stop_epoch = 30
    unimprove_time = 0
    val_json_path = '/home/agwave/Data/resume/val_0222.json'
    val_pdf_dir = '/home/agwave/Data/resume/val_0222/'

    training_data = get_data_from_data_txt(TRAIN_WORD_TO_TAG_PATH)
    with open('supporting_document/train_word_to_tag_0223.json', 'r') as j:
        word_to_ix = json.load(j)
    tag_to_ix = {'b-name': 0, 'i-name': 1, 'b-bir': 2, 'i-bir': 3, 'b-gend': 4, 'i-gend': 5,
                 'b-tel': 6, 'i-tel': 7, 'b-acad': 8, 'i-acad': 9, 'b-nati': 10, 'i-nati': 11,
                 'b-live': 12, 'i-live': 13, 'b-poli': 14, 'i-poli': 15, 'b-unv': 16, 'i-unv': 17,
                 'b-comp': 18, 'i-comp': 19, 'b-work': 20, 'i-work': 21, 'b-post': 22, 'i-post': 23,
                 'b-proj': 24, 'i-proj': 25, 'b-resp': 26, 'i-resp': 27, 'b-degr': 28, 'i-degr': 29,
                 'b-grti': 30, 'i-grti': 31, 'b-woti': 32, 'i-woti': 33, 'b-prti': 34, 'i-prti': 35,
                 'o': 36, '<start>': 37, '<stop>': 38}
    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, embedding_dim, hidden_dim)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    start_epoch = 0
    if model_load_path != None:
        print('load model...')
        checkpoint = torch.load(model_load_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
    preliminary_score = get_score_by_model(model, val_json_path, val_pdf_dir)
    print('preliminary score:', preliminary_score)

    for epoch in range(start_epoch, stop_epoch):
        print("---------------------")
        print("running epoch : ", epoch)
        start_time = time.time()
        for sentence, tags in tqdm(training_data):
            model.zero_grad()
            sentence_in = prepare_sequence(sentence, word_to_ix)
            targets = torch.tensor([tag_to_ix[t] for t in tags], dtype=torch.long)
            loss = model.neg_log_likelihood(sentence_in, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1)
            optimizer.step()
        cur_epoch_score = get_score_by_model(model, val_json_path, val_pdf_dir)
        print('score', cur_epoch_score)
        print('running time:', time.time() - start_time)
        if cur_epoch_score > max_score:
            unimprove_time = 0
            max_score = cur_epoch_score
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'epoch': epoch
            }, best_model_save_path)
            print('save best model successfully.')
        else:
            break
Ejemplo n.º 2
0
def main(args):
    labels = [
        'O', 'B-LOC', 'B-ORG', 'B-T', 'I-LOC', 'I-PER', 'B-PER', 'I-ORG', 'I-T'
    ]
    # labels = ['O', 'I-PER', 'B-PER', 'I-LOC', 'I-ORG', 'B-ORG', 'B-LOC']
    args.num_labels = len(labels)

    tokenizer = None
    word2id = None
    if args.model == 'bert':
        is_BERT = True
        # use 'bert-base-chinese' model
        pretrained_model_name = 'bert-base-chinese'
        tokenizer = BertTokenizer.from_pretrained(pretrained_model_name)
        config = BertConfig.from_pretrained(
            pretrained_model_name,
            num_labels=args.num_labels,
            hidden_dropout_prob=args.hidden_dropout_prob)
        model = BERTforNER_CRF.from_pretrained(pretrained_model_name,
                                               config=config,
                                               use_crf=args.crf)
    else:
        is_BERT = False
        word2id = json.load(open(args.word2id_file, "r", encoding="utf8"))
        model = BiLSTM_CRF(len(word2id), args.embedding_dim, args.hidden_dim,
                           args.num_labels, args.hidden_dropout_prob, args.crf)

    framework = Framework(args)

    if args.mode == "train":
        print("loading training dataset...")
        train_dataset = NERDataset(file_path=args.train_file,
                                   labels=labels,
                                   word2id=word2id,
                                   tokenizer=tokenizer,
                                   max_len=args.max_len,
                                   is_BERT=is_BERT)

        print("loading dev datasets...")
        dev_dataset = NERDataset(file_path=args.dev_file,
                                 labels=labels,
                                 word2id=word2id,
                                 tokenizer=tokenizer,
                                 max_len=args.max_len,
                                 is_BERT=is_BERT)

        framework.train(train_dataset, dev_dataset, model, labels)

    print("\Testing ...")
    print("loading dev datasets...")
    test_dataset = NERDataset(file_path=args.test_file,
                              labels=labels,
                              word2id=word2id,
                              tokenizer=tokenizer,
                              max_len=args.max_len,
                              is_BERT=is_BERT)

    model.load_state_dict(torch.load(args.save_model))
    framework.test(test_dataset, model, labels)
Ejemplo n.º 3
0
parser.add_argument('--char_lstm_hidden_size',type=int, default= 25 , help='字符集sltm隐藏层dim')
parser.add_argument('--word_embed_size',type=int, default = 200, help='word嵌入dim')
parser.add_argument('--input_embed_size',type=int, default = 250, help='lstm_input_嵌入dim')
parser.add_argument('--hidden_size',type=int , default = 250, help='decoder_lstm隐藏层dim')
parser.add_argument('--add_dropout',type= int , default = 1, help='input_embed是否dropout')
parser.add_argument('--device',type=str , default ='cuda:2', help='train device')
args = parser.parse_args(args=[])

idx_to_tag = ['B-ORG','O','B-MISC','B-PER', 'I-PER', 'B-LOC', 'I-ORG', 'I-MISC', 'I-LOC', 'STOP', 'START']
# 获取数据迭代器
seq, char_, train_iter, test_iter, val_iter = get_data_iter()
START ='START'
STOP = 'STOP'
device = tc.device('cuda:2')
net = BiLSTM_CRF(tag_to_idx, seq.vocab, char_.vocab, args)
net.load_state_dict(tc.load(args.save_path))
net = net.to(device)

#测试
def test_(net, data_iter, device, idx_to_tag):
    loss_sum, acc_sum, n = 0.0, 0.0, 0
    seq_pred = []
    net.eval() # 进行测试模式
    for batch_data in data_iter:  
        sentence = (batch_data.Seq).to(device)
        char_ = (batch_data.Char_).to(device)
        char_len = (batch_data.Char_len).to(device)
        tag_seq = net(sentence, char_, char_len)
        seq_pred.append(tag_seq)
        n += sentence.shape[1]
        if n % 200 == 0:
Ejemplo n.º 4
0
from model import BiLSTM_CRF
from preprocess import load_obj


def get_tag(model, sentence, idx_to_tag):
    sentence = sentence.unsqueeze(1)
    mask = sentence.ne(0)
    best_tag_ids = model.decode(sentence, mask)
    tags = [idx_to_tag[idx] for idx in best_tag_ids[0]]
    return tags


if __name__ == '__main__':
    print(TEST_SENTENCE)
    data_dir = 'data/chinese/processed'
    word_to_idx = load_obj(os.path.join(data_dir, 'word_to_idx.pkl'))
    tag_to_idx = load_obj(os.path.join(data_dir, 'tag_to_idx.pkl'))

    idx_to_tag = {v: k for k, v in tag_to_idx.items()}

    model = BiLSTM_CRF(len(word_to_idx), len(tag_to_idx), 100, 200, 0.1)
    model.load_state_dict(
        torch.load(CUR_MODEL, map_location=torch.device('cuda')))
    model.eval()

    processed_sen = [i.split('/')[0] for i in TEST_SENTENCE.split()]
    sentence = torch.LongTensor(
        [word_to_idx.get(w, word_to_idx[UNK]) for w in processed_sen])
    best_tags = get_tag(model, sentence, idx_to_tag)
    print(' '.join(best_tags))
Ejemplo n.º 5
0
	   'mor_to_id' : mor_to_id,
        'word_embeds': word_embeds,
        }

        cPickle.dump(mappings, f)

#Model Load
model = BiLSTM_CRF(word_to_ix=word_to_id, ix_to_word=id_to_word, tag_to_ix=tag_to_id, char_to_ix = char_to_id, mor_to_ix = mor_to_id,
    embedding_dim=parameters['word_dim'], hidden_dim=parameters['word_lstm_dim'], char_lstm_dim=parameters['char_lstm_dim'],
    char_dim = parameters['char_dim'], pre_word_embeds=word_embeds,
    pre_char_embeds = char_embeds, use_gpu=parameters['use_gpu'], use_crf=parameters['crf'], use_elmo=parameters['use_elmo'],
    elmo_option = parameters['elmo_option'], elmo_weight = parameters['elmo_weight'])


if parameters['reload']:
    model.load_state_dict(torch.load(model_name))
if use_gpu:
    model.cuda()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
losses = []
loss = 0.0
best_test = -np.inf
best_dev = -np.inf
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
best_epoch = 0
best_dev_epoch = 0
all_F = [[0, 0, 0]]
plot_every = 50
Ejemplo n.º 6
0
print("p_count:{0},name_count:{1}".format(p_count, name_count))

log = str(model)
with open(os.path.join(logs_path, "{0}.important.log".format(name)),
          "a") as fout:
    fout.write(log)
    fout.write('\n')
    for param in param_list:
        fout.write(param)
        fout.write('\n')
    fout.flush()

if reload:
    last_saved_model = torch.load(model_name, map_location=device_name)
    model.load_state_dict(last_saved_model.state_dict())
    model.use_gpu = use_gpu
if use_gpu:
    model = model.to(device)

# Perf: Adam < AdaDelta < SGD
if optimizer_choice == OptimizationMethod.SGDWithDecreasingLR:
    learning_rate = 0.02
    learning_momentum = 0.9
    print("learning_rate:{0}".format(learning_rate))
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()),
                                lr=learning_rate,
                                momentum=learning_momentum)

elif optimizer_choice == OptimizationMethod.Adam:
Ejemplo n.º 7
0
            tag_batch.append([tag_to_ix[t] for t in data[1]])
        loss = model.neg_log(
            sen_batch, tag_batch,
            torch.tensor(mask[len(batch_data) - 1 - i],
                         dtype=torch.long).cuda())
        loss.backward()
        print(loss)
        optimizer.step()
    torch.save(model.state_dict(), './params5.pkl')
print(crf.transitions.data)
'''''
model.load_state_dict(torch.load('./params3.pkl'))
print(crf.transitions.data)
with codecs.open('./newtrain.txt', encoding='UTF-8') as f:
    train = f.readlines()
    for i in range(20):
        if i % 2 != 0:
            sen = train[i].strip(line_end)
            sen = get_idxseq(sen, word_to_ix)
            print(model(torch.tensor(sen, dtype=torch.long).cuda())[0])
''' ''

with codecs.open("./test.txt", 'r', encoding='UTF-8') as f:
    test_data = f.readlines()

for i in range(test_data.__len__()):
    test_data[i] = test_data[i].strip(line_end)

pre_res = []
for i in range(len(test_data)):
    sentence = test_data[i]
Ejemplo n.º 8
0
    mask = sentence.ne(0)
    best_tag_ids = model.decode(sentence, mask)
    tags = [ix_to_tag[idx] for idx in best_tag_ids[0]]
    return tags


if __name__ == "__main__":

    if args.sentence is None:
        raise ValueError("Please input an sentence")
    if args.model is None:
        raise ValueError("Please specify model file path")

    data_dir = "data/msra/processed"
    word_to_ix = load_obj(os.path.join(data_dir, "word_to_ix.pkl"))
    tag_to_ix = load_obj(os.path.join(data_dir, "tag_to_ix.pkl"))

    ix_to_tag = {v: k for k, v in tag_to_ix.items()}

    # Load trained model
    model = BiLSTM_CRF(len(word_to_ix), len(tag_to_ix), 100, 200, 0.1)
    model.load_state_dict(
        torch.load(args.model, map_location=torch.device("cpu")))
    model.eval()

    # Predict
    sentence = torch.LongTensor(
        [word_to_ix.get(w, word_to_ix[UNK]) for w in args.sentence])
    best_tags = tagging(model, sentence, ix_to_tag)
    print(" ".join(best_tags))
Ejemplo n.º 9
0
def eval_one_sample():
    sample = list(get_str_from_pdf(SAMPLE_PDF_FILE))

    with open('supporting_document/word_to_ix_add_unk_0219.json') as j:
        word_to_ix = json.load(j)

    tag_to_ix = {
        'b-name': 0,
        'i-name': 1,
        'b-bir': 2,
        'i-bir': 3,
        'b-gend': 4,
        'i-gend': 5,
        'b-tel': 6,
        'i-tel': 7,
        'b-acad': 8,
        'i-acad': 9,
        'b-nati': 10,
        'i-nati': 11,
        'b-live': 12,
        'i-live': 13,
        'b-poli': 14,
        'i-poli': 15,
        'b-unv': 16,
        'i-unv': 17,
        'b-comp': 18,
        'i-comp': 19,
        'b-work': 20,
        'i-work': 21,
        'b-post': 22,
        'i-post': 23,
        'b-proj': 24,
        'i-proj': 25,
        'b-resp': 26,
        'i-resp': 27,
        'b-degr': 28,
        'i-degr': 29,
        'b-grti': 30,
        'i-grti': 31,
        'b-woti': 32,
        'i-woti': 33,
        'b-prti': 34,
        'i-prti': 35,
        'o': 36,
        '<start>': 37,
        '<stop>': 38,
        'c-live': 39,
        'c-proj': 40,
        'c-woti': 41,
        'c-post': 42,
        'c-unv': 43,
        'c-nati': 44,
        'c-poli': 45,
        'c-prti': 46,
        'c-comp': 47
    }

    ix_to_word = {}
    for k, v in tag_to_ix.items():
        ix_to_word[v] = k

    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM)

    checkpoint = torch.load('model_100_all_data_0226.pth')
    model.load_state_dict(checkpoint['model_state_dict'])
    with torch.no_grad():
        precheck_sent = prepare_sequence(sample, word_to_ix)
        score, ix = model(precheck_sent)
    print(score)
    predict = []
    for i in ix:
        predict.append(ix_to_word[i])
    for i in range(len(ix)):
        print(sample[i], predict[i])
Ejemplo n.º 10
0
def get_score_from_model_path(model_path,
                              tag_file,
                              pdf_root_dir,
                              pred_json_dir=None):
    path = os.listdir(pdf_root_dir)
    with open('supporting_document/train_word_to_tag_0223.json') as j:
        word_to_ix = json.load(j)
    tag_to_ix = {
        'b-name': 0,
        'i-name': 1,
        'b-bir': 2,
        'i-bir': 3,
        'b-gend': 4,
        'i-gend': 5,
        'b-tel': 6,
        'i-tel': 7,
        'b-acad': 8,
        'i-acad': 9,
        'b-nati': 10,
        'i-nati': 11,
        'b-live': 12,
        'i-live': 13,
        'b-poli': 14,
        'i-poli': 15,
        'b-unv': 16,
        'i-unv': 17,
        'b-comp': 18,
        'i-comp': 19,
        'b-work': 20,
        'i-work': 21,
        'b-post': 22,
        'i-post': 23,
        'b-proj': 24,
        'i-proj': 25,
        'b-resp': 26,
        'i-resp': 27,
        'b-degr': 28,
        'i-degr': 29,
        'b-grti': 30,
        'i-grti': 31,
        'b-woti': 32,
        'i-woti': 33,
        'b-prti': 34,
        'i-prti': 35,
        'o': 36,
        '<start>': 37,
        '<stop>': 38
    }
    ix_to_tag = {v: k for k, v in tag_to_ix.items()}

    model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM)
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    pred_pdf_info = {}
    print('predicting...')
    for p in tqdm(path):
        if p.endswith('.pdf'):
            file_name = p[:-4]
            try:
                content = get_str_from_pdf(os.path.join(pdf_root_dir, p))
                char_list = list(content)
                with torch.no_grad():
                    precheck_sent = prepare_sequence(char_list, word_to_ix)
                    _, ix = model(precheck_sent)
                info = write_info_by_ix(ix, content, ix_to_tag)
                pred_pdf_info[file_name] = info
            except Exception as e:
                if file_name not in pred_pdf_info:
                    pred_pdf_info[file_name] = {}
                print(e)
    print('predict OK!')
    if pred_json_dir != None:
        pred_json_path = os.path.join(pred_json_dir, model_path[-4] + '.json')
        with open(pred_json_path, 'w') as j:
            json.dump(pred_pdf_info, j, ensure_ascii=False)

    with open(tag_file, 'r') as j:
        label_pdf_info = json.load(j)
    score = get_score_by_label_pred(label_pdf_info, pred_pdf_info)
    return score
Ejemplo n.º 11
0
                   use_gpu=use_gpu,
                   char_to_ix=char_to_id,
                   pre_word_embeds=word_embeds,
                   use_crf=parameters['crf'],
                   char_mode=parameters['char_mode'],
                   char_embedding_dim=parameters['char_dim'],
                   char_lstm_dim=parameters['char_lstm_dim'],
                   alpha=parameters['alpha'])
                   # n_cap=4,
                   # cap_embedding_dim=10)
        
if parameters['reload']:
    print('loading model')
    checkpoint = torch.load(models_path+parameters['reload'])
    #model.load_state_dict(checkpoint['state_dict'])
    model.load_state_dict(checkpoint)
    
if use_gpu:
    model.cuda()

learning_rate = 0.015
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
losses = []
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
all_F = [[0, 0, 0]]
plot_every = 10
eval_every = 20
count = 0
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser()
    # parameters
    parser.add_argument("--epoch",
                        default=100,
                        type=int,
                        help="the number of epoches needed to train")
    parser.add_argument("--lr",
                        default=1e-3,
                        type=float,
                        help="the learning rate")
    parser.add_argument("--train_data_path",
                        default='data/train.tsv',
                        type=str,
                        help="train dataset path")
    parser.add_argument("--dev_data_path",
                        default=None,
                        type=str,
                        help="dev dataset path")
    parser.add_argument("--test_data_path",
                        default='data/test.tsv',
                        type=str,
                        help="test dataset path")
    parser.add_argument("--train_batch_size",
                        default=128,
                        type=int,
                        help="the batch size")
    parser.add_argument("--dev_batch_size",
                        default=64,
                        type=int,
                        help="the batch size")
    parser.add_argument("--test_batch_size",
                        default=64,
                        type=int,
                        help="the batch size")
    parser.add_argument("--embedding_path",
                        default='data/sgns.renmin.bigram-char',
                        type=str,
                        help="pre-trained word embeddings path")
    parser.add_argument("--embedding_size",
                        default=300,
                        type=int,
                        help="the word embedding size")
    parser.add_argument("--hidden_size",
                        default=512,
                        type=int,
                        help="the hidden size")
    parser.add_argument("--fine_tuning",
                        default=True,
                        type=bool,
                        help="whether fine-tune word embeddings")
    parser.add_argument("--early_stopping",
                        default=15,
                        type=int,
                        help="Tolerance for early stopping (# of epochs).")
    parser.add_argument("--load_model",
                        default='results/20_Model_best.pt',
                        help="load pretrained model for testing")
    args = parser.parse_args()

    if not args.train_data_path:
        logger.info("please input train dataset path")
        exit()
    if not (args.dev_data_path or args.test_data_path):
        logger.info("please input dev or test dataset path")
        exit()

    TEXT, LABEL, vocab_size, word_embeddings, train_iter, dev_iter, test_iter, tag_dict = \
                dataset.load_dataset(args.train_data_path, args.dev_data_path, \
                 args.test_data_path, args.embedding_path, args.train_batch_size, \
                                        args.dev_batch_size, args.test_batch_size)

    idx_tag = {}
    for tag in tag_dict:
        idx_tag[tag_dict[tag]] = tag

    model = BiLSTM_CRF(args.embedding_size, args.hidden_size, vocab_size,
                       tag_dict, word_embeddings)
    if torch.cuda.is_available():
        model = model.cuda()

    # cost_test = []
    # start = time.perf_counter()
    # train_dev_size = len(train_iter)
    # train_size = int(train_dev_size*0.9)
    train_data, dev_data = dataset.train_dev_split(train_iter, 0.9)
    # for batch in train_data:
    #     print(batch)
    #     exit()

    # train_data = lambda: islice(train_iter,0,train_size)
    # dev_data = lambda: islice(train_iter,train_size,train_dev_size)
    # train_data = islice(train_iter,0,train_size)
    # dev_data = islice(train_iter,train_size,train_dev_size)
    if args.load_model:
        model.load_state_dict(torch.load(args.load_model, map_location='cpu'))
        # p, r, f1, eval_loss, all_assess = eval_model(model, dev_data, idx_tag)
        # logger.info('Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \
        #                             eval_loss, p, r, f1)
        p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag)
        logger.info('LOC Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['LOC']['P'], all_assess['LOC']['R'], all_assess['LOC']['F'])
        logger.info('PER Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['PER']['P'], all_assess['PER']['R'], all_assess['PER']['F'])
        logger.info('ORG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['ORG']['P'], all_assess['ORG']['R'], all_assess['ORG']['F'])
        logger.info('Micro_AVG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                                    p, r, f1)
        return

    best_score = 0.0
    for epoch in range(args.epoch):
        # train_data_ = copy.deepcopy(train_data)
        # dev_data_ = copy.deepcopy(dev_data)
        # train_model(model, train_data_, dev_data_, epoch, args.lr, idx_tag)
        train_loss, p, r, f1, eval_loss = train_model(model, train_data,
                                                      dev_data, epoch, args.lr,
                                                      idx_tag)

        logger.info('Epoch:%d, Training Loss:%.4f', epoch, train_loss)
        logger.info('Epoch:%d, Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \
                                    epoch, eval_loss, p, r, f1)
        # p, r, f1, eval_loss, all_assess = eval_model(model,  test_iter, idx_tag)
        # logger.info('Test Loss:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
        #                             eval_loss, p, r, f1)
        if f1 > best_score:
            best_score = f1
            torch.save(
                model.state_dict(),
                'results/%d_%s_%s.pt' % (epoch, 'Model', str(best_score)))
Ejemplo n.º 13
0
    words = vectorizer.token_vocab._token_to_idx.keys()
    embeddings = make_embedding_matrix(glove_filepath=args.glove_filepath,
                                       words=words)
    print("Using pre-trained embeddings")
else:
    print("Not using pre-trained embeddings")
    embeddings = None

model = BiLSTM_CRF(vectorizer.token_vocab,
                   vectorizer.tag_vocab,
                   args.batch_size,
                   dropout=args.dropout,
                   embedding_dim=args.embedding_dim,
                   hidden_dim=args.hidden_dim)
if args.reload_from_files and os.path.exists(args.model_state_file):
    model.load_state_dict(torch.load(args.model_state_file))
    print("Reloaded model")
else:
    print("New model")

model = model.to(args.device)
for name, param in model.named_parameters():
    if 'weight' in name:
        nn.init.xavier_normal_(param.data)
    else:
        nn.init.constant_(param.data, 0)
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                 mode='min',
                                                 factor=0.5,
                                                 patience=1)
Ejemplo n.º 14
0
    train_loader = torch.utils.data.DataLoader(dataset=train_data,
        batch_size=args.batch_size,collate_fn=collate_fn, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(dataset=valid_data, batch_size=args.batch_size,
        collate_fn=collate_fn, shuffle=False)
    test_loader = torch.utils.data.DataLoader(dataset=test_data, batch_size=args.batch_size,
                                               collate_fn=collate_fn, shuffle=False)

    weight_matrix = get_weight(wvmodel,len(word2id),args.embedding_size)
    print('weight_matrix',weight_matrix.size())
    model = BiLSTM_CRF(len(word2id),label2id, args.embedding_size, weight_matrix, args.hidden_size).cuda()
    
    if os.path.exists(args.param_path):
        print('loading params')
        # pdb.set_trace()
        model.load_state_dict(torch.load(args.param_path))

    optim = torch.optim.Adam(model.parameters(), args.learning_rate)
    criterion = torch.nn.CrossEntropyLoss()

    train(args, train_loader,valid_loader, model, optim, criterion)
    end_loss, end_f1 = evaluate_accuracy(model, test_loader)
    print("====================>test loss: %.4f, test f1 : %.4f"%(end_loss, end_f1))
else:
    print('test begin')
    with open(args.test_path, 'r', encoding='utf-8') as ftest_text:
        test_textlines = [line.strip().lower().split(' ') for line in ftest_text.readlines()]

        test_textlines = [[word2id[word] if word in word2id else unk for word in line] for line in test_textlines]

        test_textlines = [torch.Tensor(line).long() for line in test_textlines]
Ejemplo n.º 15
0
    sentences_sort = sentences[idx_sort]
    _, idx_unsort = idx_sort.sort(0, descending=False)

    return sentences_sort, lengths_sort, idx_unsort


char2idx = pickle.load(open('char2idx.pkl', 'rb'))
data = pickle.load(open('predict_data.pkl', 'rb'))

predict_data = PredData(data, char2idx)
dataloader = DataLoader(predict_data, batch_size=32, drop_last=False)

model = BiLSTM_CRF(len(char2idx), len(Config.tagert2idx), Config.embedding_dim,
                   Config.hidden_dim)

model.load_state_dict(torch.load('model_best.pth'))
if Config.use_gpu:
    model.to('cuda')
model.eval()

predict_result = []
with torch.no_grad():
    for batch_sentences, batch_lengths in dataloader:
        sentences, lengths, idx_unsort = sort_batch_data(
            batch_sentences, batch_lengths)
        if Config.use_gpu:
            sentences = sentences.cuda()
        pred = model(sentences, lengths)
        pred = pred[idx_unsort]
        pred = pred.cpu().numpy()
Ejemplo n.º 16
0
losses = []
best_dev_F = -1.0
best_test_F = -1.0
best_train_F = -1.0
all_F = [[0, 0, 0]]
plot_every = 10
eval_every = 20
sample_count = 0

best_idx = 0

if parameters['reload']:
    print('loading model:', parameters['reload'])
    checkpoint = torch.load(models_path + parameters['reload'])
    #model.load_state_dict(checkpoint)
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    adjust_learning_rate(optimizer, lr=learning_rate)

sys.stdout.flush()

from conlleval import evaluate

model.train(True)
ratio = 0.0
if parameters['adv']:
    ratio = 0.5

from conlleval import evaluate

Ejemplo n.º 17
0
                                    seqid2text(preds[i, :l], ix_to_lab))
                        f1 = f1_score(true_labels, pred_labels)
                        if (f1 > best_f1):
                            torch.save(model.state_dict(),
                                       "models/model-27-02-20")
                            best_f1 = f1

                        print("Accuracy: {:.4f}".format(
                            accuracy_score(true_labels, pred_labels)))
                        print("F1 score: {:.4f}".format(f1))
                        print(classification_report(true_labels, pred_labels))
                        model.train(True)
    if args.do_test:
        with torch.no_grad():
            print("Evaluation on test set")
            model.load_state_dict(
                torch.load("models/model-27-02-20", map_location=device))
            model.eval()
            true_labels = []
            pred_labels = []
            word_sents = []
            for batch in test_data_loader:
                sents, labs, lens = batch
                sents = pad_sequence(sents, batch_first=True).to(device)
                labs = pad_sequence(labs, batch_first=True).to(device)
                lens = torch.tensor(lens).to(device)
                lens, idx = torch.sort(lens, descending=True)
                sents = sents[idx]
                labs = labs[idx]
                score, preds = model(sents, lens)
                for i, l in enumerate(lens):
                    true_labels.append(seqid2text(labs[i, :l], ix_to_lab))