Esempio n. 1
0
args = parser.parse_args()

train_path = args.train_path
save_path = args.save_path
val_path = args.val_path
num_epochs = args.epoch
gpu_config = "/gpu:" + str(args.gpu)
# gpu_config = "/cpu:0"
num_steps = 200  # it must consist with the test

#get trainData and devData
start_time = time.time()
print "preparing train and validation data"
train_data, val_data = helper.getTrain(train_path=train_path,
                                       val_path=val_path,
                                       seq_max_len=num_steps)

#feature for trainData
X_train = train_data['char']
X_left_train = train_data['left']
X_right_train = train_data['right']
X_pos_train = train_data['pos']
X_lpos_train = train_data['lpos']
X_rpos_train = train_data['rpos']
X_rel_train = train_data['rel']
X_dis_train = train_data['dis']
y_train = train_data['label']

#feature for devData
X_val = val_data['char']
Esempio n. 2
0
                    type=int)

args = parser.parse_args()

train_path = args.train_path
save_path = args.save_path
val_path = args.val_path
num_epochs = args.epoch
emb_path = args.char_emb
gpu_config = "/gpu:" + str(args.gpu)
num_steps = 200  # it must consist with the test

start_time = time.time()
print "preparing train and validation data"
X_train, y_train, X_val, y_val = helper.getTrain(train_path=train_path,
                                                 val_path=val_path,
                                                 seq_max_len=num_steps)
char2id, id2char = helper.loadMap("char2id")
label2id, id2label = helper.loadMap("label2id")
num_chars = len(id2char.keys())
num_classes = len(id2label.keys())
if emb_path != None:
    embedding_matrix = helper.getEmbedding(emb_path)
else:
    embedding_matrix = None

print "building model"
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
    with tf.device(gpu_config):
        initializer = tf.random_uniform_initializer(-0.1, 0.1)
Esempio n. 3
0
y_true = []
with open(test_path, 'r') as file_in:
    tmp = []
    for line in file_in:
        if line == "\n":
            #            if len(tmp) <= 200:
            y_true.append(tmp)
            tmp = []
        else:
            split_tmp = line.strip().split()
            tmp.append(label2id[split_tmp[1]])
y_true = helper.padding(y_true, 200)

X, y_true, X_test_source_val, y_test_source_val = helper.getTrain(
    dict_path=dict_path,
    train_path=test_path,
    val_path="./test.txt",
    seq_max_len=num_steps,
    is_shuffle=False)

file_out = open(output_path, 'w')
print "building model"
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
    with tf.device(gpu_config):
        #		initializer = tf.random_uniform_initializer(-0.1, 0.1)
        with tf.variable_scope("model"):
            model = BILSTM_CRF(num_chars=num_chars,
                               num_classes=num_classes,
                               num_steps=num_steps,
                               embedding_matrix=embedding_matrix,
                               is_training=False)
parser.add_argument("-g","--gpu", help="the id of gpu, the default is 0", default=0, type=int)

args = parser.parse_args()

train_path = args.train_path
save_path = args.save_path
val_path = args.val_path
num_epochs = args.epoch
emb_path = args.char_emb
# gpu_config = "/gpu:"+str(args.gpu)
gpu_config = "/cpu:0"
num_steps = 200 # it must consist with the test

start_time = time.time()
print "preparing train and validation data"
X_train, y_train, X_val, y_val = helper.getTrain(train_path=train_path, val_path=val_path, seq_max_len=num_steps)
char2id, id2char = helper.loadMap("char2id")
label2id, id2label = helper.loadMap("label2id")
num_chars = len(id2char.keys())
num_classes = len(id2label.keys())
if emb_path != None:
	embedding_matrix = helper.getEmbedding(emb_path)
else:
	embedding_matrix = None

print "building model"
config = tf.ConfigProto(allow_soft_placement=True)
with tf.Session(config=config) as sess:
	with tf.device(gpu_config):
		initializer = tf.random_uniform_initializer(-0.1, 0.1)
		with tf.variable_scope("model", reuse=None, initializer=initializer):
Esempio n. 5
0
            if presicion_loc + recall_loc != 0: 
                f_loc = 2 * presicion_loc * recall_loc / (presicion_loc + recall_loc) 
 
        if all_pre_org != 0 and all_true_org != 0: 
            presicion_org = true_org * 1.0 / all_pre_org 
            recall_org = true_org * 1.0 / all_true_org 
            if presicion_org + recall_org != 0: 
                f_org = 2 * presicion_org * recall_org / (presicion_org + recall_org) 
 
        if all_pre_per != 0 and all_true_per != 0: 
            presicion_per = true_per * 1.0 / all_pre_per 
            recall_per = true_per * 1.0 / all_true_per 
            if presicion_per + recall_per != 0: 
                f_per = 2 * presicion_per * recall_per / (presicion_per + recall_per) 
 
        return presicion_loc, recall_loc, f_loc, presicion_org, recall_org, f_org, presicion_per, recall_per, f_per          

if __name__ == "__main__":
    train_file, save_path = sys.argv[1:3]
    char2id, id2char, label2id, id2label = helper.buildMap(train_file)
    # print char2id
    # sys.exit(0)
    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_uniform_initializer(-0.1, 0.1)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            ner_model = NerModel(len(char2id), True)         
        tf.global_variables_initializer().run()
        train = Train(ner_model, train_file, 1, 500, 100)
        X_train, y_train, X_val, y_val = helper.getTrain(train_file, train_file)
        train.train(session, save_path, X_train, y_train, X_val, y_val)
Esempio n. 6
0
model_name = args.model

if model_name != "cnn" and model_name != "lstm":
    print("model name invalid! Please use -m cnn or -m lstm in command")
    sys.exit()

start_time = time.time()
seq_len = args.seq_len

print("Preparing train and validation data...")

train = helper.loadFile(args.train_path)
dev = helper.loadFile(args.dev_path)
map_dir = "token_label_id_mapping"

X_train, y_train, X_val, y_val, feat2id, id2label = helper.getTrain(train, dev, map_dir, seq_len)
num_chars = len(feat2id)
num_classes = len(id2label)

save_path = args.save_path
#emb_path = "word_emb_matrix_100d"

print("Building model...")
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
gpu_config = "/gpu:0"

with tf.Session(config=config) as sess:
    with tf.device(gpu_config): 
        initializer = tf.random_normal_initializer(stddev=0.1)
        
Esempio n. 7
0
train_target_path = args.train_target_path
dict_path = args.dict_path
save_path = args.save_path
test_source_path = args.test_source_path
val_path = args.val_path
num_epochs = args.epoch
emb_path = args.char_emb
gpu_config = "/gpu:" + str(args.gpu)
num_steps = 200  # it must consist with the test

start_time = time.time()
print "preparing train and validation data"
helper.get_dict(dict_path)
X_train_source, y_train_source, X_val_source, y_val_source = helper.getTrain(
    dict_path=dict_path,
    train_path=train_source_path,
    val_path=test_source_path,
    seq_max_len=num_steps)
X_train_target, y_train_target, X_val_target, y_val_target = helper.getTrain(
    dict_path=dict_path,
    train_path=train_target_path,
    val_path=test_source_path,
    seq_max_len=num_steps)
X_test_source, y_test_source, X_test_source_val, y_test_source_val = helper.getTrain(
    dict_path=dict_path,
    train_path=test_source_path,
    val_path=test_source_path,
    seq_max_len=num_steps,
    is_shuffle=False)
char2id, id2char = helper.loadMap("char2id")
label2id, id2label = helper.loadMap("label2id")
Esempio n. 8
0
x1 = tf.placeholder(tf.int32, [None, n_steps])
y = tf.placeholder(tf.int32, [None, 2])

# Define weights
weights = {'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))}

biases = {'out': tf.Variable(tf.random_normal([n_classes]))}

train_path = 'train.2'
char2id_file = 'char2id'
label2id_file = 'label2id'
save_path = './'
emb_dim = '100'
X_train, y_train, X_val, y_val = helper.getTrain(train_path=train_path,
                                                 val_path=None,
                                                 seq_max_len=n_steps,
                                                 char2id_file=char2id_file,
                                                 label2id_file=label2id_file)
sh_index = np.arange(len(X_train))
np.random.shuffle(sh_index)
X_train = X_train[sh_index]
y_train = y_train[sh_index]

char2id, id2char = helper.loadMap(char2id_file)
label2id, id2label = helper.loadMap(label2id_file)
num_chars = len(id2char.keys())  # vocabulary大小
num_classes = len(id2label.keys())  # 标注类别数
emb_path = None
if emb_path != None:
    embedding_matrix = helper.getEmbedding(emb_path, char2id_file)
    # print len([_ for _ in np.sum(embedding_matrix,axis=1) if _ != 0])