def load_data(): print('load data') data = read_cut_es() #cut word print(data) data = data_2id(data,['q1_es_cut','q2_es_cut']) # 2id print(data) data = add_hum_feats(data,config.train_feats) #生成特征并加入 data = add_hum_feats(data, config.train_feats) # 生成特征并加入 x_train, y_train = get_X_Y_from_df(data, config.data_augment) print(len(x_train[2])) return x_train, y_train
def load_data(): path = config.origin_csv print('load data') data = read_cut(path) # cut word data = data_2id(data) # 2id data = add_hum_feats(data, config.train_featdires) # 生成特征并加入 return data
def load_data(): print('load data') data = read_cut_test() #cut word data = data_2id(data, ['q1_es_cut', 'q2_es_cut']) # 2id data = add_hum_feats(data, config.test_feats) #生成特征并加入 return data
def load_data(): path = config.origin_csv print('load data') data = read_cut(path) # cut word data = data_2id(data) # 2id data = add_hum_feats(data, config.train_feats) # 生成特征并加入 x_train, y_train = get_X_Y_from_df(data, config.data_augment) print(len(x_train[2])) return x_train, y_train
def load_data(): print('load data') data = read_cut_es() #cut word print(data) data = data_2id(data, ['q1_es_cut', 'q2_es_cut']) # 2id print(data) data = add_hum_feats(data, config.train_feats) #生成特征并加入 train, dev = train_test(data) x_train, y_train = get_X_Y_from_df(train, config.data_augment) print(x_train) x_dev, y_dev = get_X_Y_from_df(dev, False) print('train shape', x_train[0].shape) print('dev shape', x_dev[0].shape) return x_train, y_train, x_dev, y_dev
def main(model_path): out_path = 'submit/{0}.txt'.format(model_path.split('/')[-1]) print('load data') data = read_cut_test() #cut word data = data_2id(data, ['q1_es_cut', 'q2_es_cut']) # 2id data = add_hum_feats(data, config.test_feats) #生成特征并加入 X, _ = get_X_Y_from_df(data, False) if config.feats == []: X = X[:2] print('load model and predict') model = load_model(model_path, custom_objects={"softmax": softmax}) test_pred = model.predict(X, batch_size=config.batch_size) print(test_pred) data['label'] = test_pred[:, 1] data['label'].to_csv( out_path, index=False, header=None, )
def load_data(in_path): print('load data') data = cut_word(in_path) data = data_2id(data) # 2id data = add_hum_feats(data, config.test_featdires) #生成特征并加入 return data