args = parser.parse_args() train_path = args.train_path save_path = args.save_path val_path = args.val_path num_epochs = args.epoch gpu_config = "/gpu:" + str(args.gpu) # gpu_config = "/cpu:0" num_steps = 200 # it must consist with the test #get trainData and devData start_time = time.time() print "preparing train and validation data" train_data, val_data = helper.getTrain(train_path=train_path, val_path=val_path, seq_max_len=num_steps) #feature for trainData X_train = train_data['char'] X_left_train = train_data['left'] X_right_train = train_data['right'] X_pos_train = train_data['pos'] X_lpos_train = train_data['lpos'] X_rpos_train = train_data['rpos'] X_rel_train = train_data['rel'] X_dis_train = train_data['dis'] y_train = train_data['label'] #feature for devData X_val = val_data['char']
type=int) args = parser.parse_args() train_path = args.train_path save_path = args.save_path val_path = args.val_path num_epochs = args.epoch emb_path = args.char_emb gpu_config = "/gpu:" + str(args.gpu) num_steps = 200 # it must consist with the test start_time = time.time() print "preparing train and validation data" X_train, y_train, X_val, y_val = helper.getTrain(train_path=train_path, val_path=val_path, seq_max_len=num_steps) char2id, id2char = helper.loadMap("char2id") label2id, id2label = helper.loadMap("label2id") num_chars = len(id2char.keys()) num_classes = len(id2label.keys()) if emb_path != None: embedding_matrix = helper.getEmbedding(emb_path) else: embedding_matrix = None print "building model" config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device(gpu_config): initializer = tf.random_uniform_initializer(-0.1, 0.1)
y_true = [] with open(test_path, 'r') as file_in: tmp = [] for line in file_in: if line == "\n": # if len(tmp) <= 200: y_true.append(tmp) tmp = [] else: split_tmp = line.strip().split() tmp.append(label2id[split_tmp[1]]) y_true = helper.padding(y_true, 200) X, y_true, X_test_source_val, y_test_source_val = helper.getTrain( dict_path=dict_path, train_path=test_path, val_path="./test.txt", seq_max_len=num_steps, is_shuffle=False) file_out = open(output_path, 'w') print "building model" config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device(gpu_config): # initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope("model"): model = BILSTM_CRF(num_chars=num_chars, num_classes=num_classes, num_steps=num_steps, embedding_matrix=embedding_matrix, is_training=False)
parser.add_argument("-g","--gpu", help="the id of gpu, the default is 0", default=0, type=int) args = parser.parse_args() train_path = args.train_path save_path = args.save_path val_path = args.val_path num_epochs = args.epoch emb_path = args.char_emb # gpu_config = "/gpu:"+str(args.gpu) gpu_config = "/cpu:0" num_steps = 200 # it must consist with the test start_time = time.time() print "preparing train and validation data" X_train, y_train, X_val, y_val = helper.getTrain(train_path=train_path, val_path=val_path, seq_max_len=num_steps) char2id, id2char = helper.loadMap("char2id") label2id, id2label = helper.loadMap("label2id") num_chars = len(id2char.keys()) num_classes = len(id2label.keys()) if emb_path != None: embedding_matrix = helper.getEmbedding(emb_path) else: embedding_matrix = None print "building model" config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device(gpu_config): initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope("model", reuse=None, initializer=initializer):
if presicion_loc + recall_loc != 0: f_loc = 2 * presicion_loc * recall_loc / (presicion_loc + recall_loc) if all_pre_org != 0 and all_true_org != 0: presicion_org = true_org * 1.0 / all_pre_org recall_org = true_org * 1.0 / all_true_org if presicion_org + recall_org != 0: f_org = 2 * presicion_org * recall_org / (presicion_org + recall_org) if all_pre_per != 0 and all_true_per != 0: presicion_per = true_per * 1.0 / all_pre_per recall_per = true_per * 1.0 / all_true_per if presicion_per + recall_per != 0: f_per = 2 * presicion_per * recall_per / (presicion_per + recall_per) return presicion_loc, recall_loc, f_loc, presicion_org, recall_org, f_org, presicion_per, recall_per, f_per if __name__ == "__main__": train_file, save_path = sys.argv[1:3] char2id, id2char, label2id, id2label = helper.buildMap(train_file) # print char2id # sys.exit(0) with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-0.1, 0.1) with tf.variable_scope("model", reuse=None, initializer=initializer): ner_model = NerModel(len(char2id), True) tf.global_variables_initializer().run() train = Train(ner_model, train_file, 1, 500, 100) X_train, y_train, X_val, y_val = helper.getTrain(train_file, train_file) train.train(session, save_path, X_train, y_train, X_val, y_val)
model_name = args.model if model_name != "cnn" and model_name != "lstm": print("model name invalid! Please use -m cnn or -m lstm in command") sys.exit() start_time = time.time() seq_len = args.seq_len print("Preparing train and validation data...") train = helper.loadFile(args.train_path) dev = helper.loadFile(args.dev_path) map_dir = "token_label_id_mapping" X_train, y_train, X_val, y_val, feat2id, id2label = helper.getTrain(train, dev, map_dir, seq_len) num_chars = len(feat2id) num_classes = len(id2label) save_path = args.save_path #emb_path = "word_emb_matrix_100d" print("Building model...") config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True gpu_config = "/gpu:0" with tf.Session(config=config) as sess: with tf.device(gpu_config): initializer = tf.random_normal_initializer(stddev=0.1)
train_target_path = args.train_target_path dict_path = args.dict_path save_path = args.save_path test_source_path = args.test_source_path val_path = args.val_path num_epochs = args.epoch emb_path = args.char_emb gpu_config = "/gpu:" + str(args.gpu) num_steps = 200 # it must consist with the test start_time = time.time() print "preparing train and validation data" helper.get_dict(dict_path) X_train_source, y_train_source, X_val_source, y_val_source = helper.getTrain( dict_path=dict_path, train_path=train_source_path, val_path=test_source_path, seq_max_len=num_steps) X_train_target, y_train_target, X_val_target, y_val_target = helper.getTrain( dict_path=dict_path, train_path=train_target_path, val_path=test_source_path, seq_max_len=num_steps) X_test_source, y_test_source, X_test_source_val, y_test_source_val = helper.getTrain( dict_path=dict_path, train_path=test_source_path, val_path=test_source_path, seq_max_len=num_steps, is_shuffle=False) char2id, id2char = helper.loadMap("char2id") label2id, id2label = helper.loadMap("label2id")
x1 = tf.placeholder(tf.int32, [None, n_steps]) y = tf.placeholder(tf.int32, [None, 2]) # Define weights weights = {'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))} biases = {'out': tf.Variable(tf.random_normal([n_classes]))} train_path = 'train.2' char2id_file = 'char2id' label2id_file = 'label2id' save_path = './' emb_dim = '100' X_train, y_train, X_val, y_val = helper.getTrain(train_path=train_path, val_path=None, seq_max_len=n_steps, char2id_file=char2id_file, label2id_file=label2id_file) sh_index = np.arange(len(X_train)) np.random.shuffle(sh_index) X_train = X_train[sh_index] y_train = y_train[sh_index] char2id, id2char = helper.loadMap(char2id_file) label2id, id2label = helper.loadMap(label2id_file) num_chars = len(id2char.keys()) # vocabulary大小 num_classes = len(id2label.keys()) # 标注类别数 emb_path = None if emb_path != None: embedding_matrix = helper.getEmbedding(emb_path, char2id_file) # print len([_ for _ in np.sum(embedding_matrix,axis=1) if _ != 0])