Beispiel #1
0
def load_data():
    print('load data')
    data = read_cut_es()  #cut word
    print(data)
    data = data_2id(data, ['q1_es_cut', 'q2_es_cut'])  # 2id
    print(data)
    data = add_hum_feats(data, config.train_feats)  #生成特征并加入
    train, dev = train_test(data)
    x_train, y_train = get_X_Y_from_df(train, config.data_augment)
    print(x_train)
    x_dev, y_dev = get_X_Y_from_df(dev, False)
    print('train shape', x_train[0].shape)
    print('dev shape', x_dev[0].shape)
    return x_train, y_train, x_dev, y_dev
Beispiel #2
0
def train(cv, model_name):

    data_df = read_cut(config.origin_csv, config.train_data_cut_hdf)
    data_df = data2id(data_df)
    data_df = data_df.sample(frac=1, random_state=18)

    model = get_model(model_name)

    if cv:
        kfolds = 5
        x_train, y_train = get_X_Y_from_df(data_df)
        model.make_train_cv_data([x_train, y_train], kfolds)
    else:
        train, dev = train_test(data_df)
        x_train, y_train = get_X_Y_from_df(train)
        x_dev, y_dev = get_X_Y_from_df(dev)
        model.single_train([x_train, y_train, x_dev, y_dev])
Beispiel #3
0
def load_data():
    path = config.origin_csv
    print('load data')
    data = read_cut(path)  # cut word
    data = data_2id(data)  # 2id
    data = add_hum_feats(data, config.train_feats)  # 生成特征并加入

    x_train, y_train = get_X_Y_from_df(data, config.data_augment)
    print(len(x_train[2]))
    
    return x_train, y_train
Beispiel #4
0
def submit(in_path, out_path, model_name, cv=False):
    data = load_data(in_path)
    X, _ = get_X_Y_from_df(data, False, False)
    print('load model and predict')
    if not cv:
        test_pred = single_submit(X, model_name)
    else:
        test_pred = make_test_cv_data(X, model_name, kfolds=5)
    print('save submit file')
    data['label'] = [int(x > 0.5) for x in test_pred]
    data[['id', 'label']].to_csv(out_path, index=False, header=None, sep='\t')
Beispiel #5
0
def load_data():
    print('load data')
    data = read_cut_es()  #cut word
    print(data)
    data = data_2id(data,['q1_es_cut','q2_es_cut'])  # 2id
    print(data)
    data = add_hum_feats(data,config.train_feats) #生成特征并加入
    data = add_hum_feats(data, config.train_feats)  # 生成特征并加入

    x_train, y_train = get_X_Y_from_df(data, config.data_augment)
    print(len(x_train[2]))
    
    return x_train, y_train
Beispiel #6
0
def submit_inteface(in_path, out_path, model_name, cv=False):

    data_df = read_cut(in_path, config.test_data_cut_hdf)
    data_df = data2id(data_df)
    data_df.label = data_df.label.fillna(0)
    X, _ = get_X_Y_from_df(data_df)
    data = [X, _]
    model = get_model(model_name)

    print('load model and predict')
    if not cv:
        test_pred = model.single_predict(data)
    else:
        test_pred = model.make_test_cv_data(data)
    test_model_pred = np.squeeze(test_pred)
    data_df['label'] = np.argmax(test_model_pred, axis=1) + 1
    data_df[['label']].to_csv(out_path, index=False, header=None, sep='\t')
Beispiel #7
0
def do_cv_test():

    model_name = 'cnn'
    epoch_nums = 5
    kfolds = 5
    out_path = 'submit/{0}_{1}.txt'.format(model_name, time.time())
    data = load_data()
    X, _ = get_X_Y_from_df(data, False)
    if config.feats == []:
        X = X[:2]
    pred = make_test_cv_data(X, model_name, epoch_nums, kfolds)
    data['label'] = pred
    data['label'].to_csv(
        out_path,
        index=False,
        header=None,
    )
Beispiel #8
0
def main(model_path):
    out_path = 'submit/{0}.txt'.format(model_path.split('/')[-1])
    print('load data')
    data = read_cut_test()  #cut word
    data = data_2id(data, ['q1_es_cut', 'q2_es_cut'])  # 2id

    data = add_hum_feats(data, config.test_feats)  #生成特征并加入
    X, _ = get_X_Y_from_df(data, False)
    if config.feats == []:
        X = X[:2]
    print('load model and predict')
    model = load_model(model_path, custom_objects={"softmax": softmax})
    test_pred = model.predict(X, batch_size=config.batch_size)
    print(test_pred)
    data['label'] = test_pred[:, 1]
    data['label'].to_csv(
        out_path,
        index=False,
        header=None,
    )