print ("Seg: ", seg) print ("Train file:", train_file) print ("Dev file:", dev_file) print ("Test file:", test_file) print ("Raw file:", raw_file) print ("Char emb:", char_emb) print ("Bichar emb:", bichar_emb) print ("Gaz file:",gaz_file) if status == 'train': print ("Model saved to:", save_model_dir) sys.stdout.flush() if status == 'train': data = Data() data.HP_gpu = gpu data.HP_use_char = False data.HP_batch_size = 10 data.use_bigram = False data.gaz_dropout = 0.5 data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data_initialization(data, gaz_file, train_file, dev_file, test_file) data.generate_instance_with_gaz(train_file,'train') data.generate_instance_with_gaz(dev_file,'dev') data.generate_instance_with_gaz(test_file,'test') data.build_word_pretrain_emb(char_emb) data.build_biword_pretrain_emb(bichar_emb) data.build_gaz_pretrain_emb(gaz_file) train(data, save_model_dir,dset_dir, seg)
print "Seg: ", seg print "Train file:", train_file print "Dev file:", dev_file print "Test file:", test_file print "Raw file:", raw_file print "Char emb:", char_emb print "Bichar emb:", bichar_emb print "Gaz file:", gaz_file if status == 'train': print "Model saved to:", save_model_dir sys.stdout.flush() if status == 'train': data = Data() data.HP_gpu = gpu data.HP_use_char = False data.HP_batch_size = 1 data.use_bigram = False data.gaz_dropout = 0.5 data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data_initialization(data, gaz_file, train_file, dev_file, test_file) data.generate_instance_with_gaz(train_file, 'train') data.generate_instance_with_gaz(dev_file, 'dev') data.generate_instance_with_gaz(test_file, 'test') data.build_word_pretrain_emb(char_emb) data.build_biword_pretrain_emb(bichar_emb) data.build_gaz_pretrain_emb(gaz_file) train(data, save_model_dir, seg) elif status == 'test': data = load_data_setting(dset_dir)
data.HP_batch_size = args.batch_size data.HP_iteration = args.num_iter data.label_comment = args.labelcomment data.result_file = args.resultfile data.HP_lr = args.lr data.use_bigram = args.use_biword data.HP_use_char = args.use_char data.HP_hidden_dim = args.hidden_dim data.HP_dropout = args.drop data.HP_use_count = args.use_count data.model_type = args.model_type data.use_bert = args.use_bert else: data = Data() data.HP_gpu = gpu data.HP_use_char = args.use_char data.HP_batch_size = args.batch_size data.HP_num_layer = args.num_layer data.HP_iteration = args.num_iter data.use_bigram = args.use_biword data.HP_dropout = args.drop data.norm_gaz_emb = False data.HP_fix_gaz_emb = False data.label_comment = args.labelcomment data.result_file = args.resultfile data.HP_lr = args.lr data.HP_hidden_dim = args.hidden_dim data.HP_use_count = args.use_count data.model_type = args.model_type data.use_bert = args.use_bert data_initialization(data, gaz_file, train_file, dev_file,
# emb_file = "../data/gigaword_chn.all.a2b.uni.ite50.vec" emb_file = "../data/joint4.all.b10c1.2h.iter17.mchar" ### catner else: emb_file = None char_emb_file = args.charemb.lower() print "Char Embedding:", char_emb_file if char_emb_file == "rich": char_emb_file = "../data/joint4.all.b10c1.2h.iter17.mchar" ### catner elif char_emb_file == "normal": char_emb_file = "../data/gigaword_chn.all.a2b.uni.ite50.vec" ### catner data = Data() data.number_normalized = True data_initialization(data, train_file, dev_file, test_file) data.HP_gpu = gpu data.HP_use_char = True data.HP_batch_size = 10 ## catner data.HP_lr = 0.015 # data.char_features = "CNN" data.generate_instance(train_file,'train') data.generate_instance(dev_file,'dev') data.generate_instance(test_file,'test') if emb_file: print "load word emb file... norm:", data.norm_word_emb data.build_word_pretrain_emb(emb_file) if char_emb_file != "none": print "load char emb file... norm:", data.norm_char_emb data.build_char_pretrain_emb(char_emb_file) train(data, save_model_dir, seg) elif status == 'test': data = load_data_setting(dset_dir)
# -*- coding: utf-8 -*- # @Author: Jie # @Date: 2017-06-15 14:11:08 # @Last Modified by: Jie Yang, Contact: [email protected] # @Last Modified time: 2018-07-06 11:08:27 import time import sys import argparse import random import copy import torch import gc import pickle as pickle import torch.autograd as autograd import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import numpy as np from utils.metric import get_ner_fmeasure from model.bilstmcrf import BiLSTM_CRF as SeqModel from utils.data import Data seed_num = 100 random.seed(seed_num) torch.manual_seed(seed_num) np.random.seed(seed_num) def data_initialization(data, gaz_file, train_file, dev_file, test_file): data.build_alphabet(train_file) data.build_alphabet(dev_file) data.build_alphabet(test_file)