Ejemplo n.º 1
0
     test_ids,
     "OnWN") ]

feats = takelab_feats + takelab_lsa_feats + subsem_best_feats

scores = []

X_sts12_train, y_sts12_train = read_train_data(train_ids, feats)
X_sts12_test, y_sts12_test = read_test_data(test_ids, feats)
X_train = vstack([X_sts12_train, X_sts12_test])
y_train = hstack([y_sts12_train, y_sts12_test])

test_input = [read_system_input(test_input_fnames[sts13_test_id]) for sts13_test_id in sts13.test_ids]
test_input = concatenate(test_input)

X_sts13, y_sts13 = sts13.read_test_data(sts13.test_ids, feats)

X_sts13_val = X_sts13[0:X_sts13.shape[0]/2, :]
X_sts13_held = X_sts13[X_sts13.shape[0]/2:, :]

y_sts_val = y_sts13[0:len(y_sts13)/2]
y_sts_held = y_sts13[len(y_sts13)/2:]

test_input_val = test_input[0:len(test_input)/2]
test_input_held = test_input[len(test_input)/2:]

n_train = len(y_train)
n_test = len(y_sts_val)

param_grid = LARGE_PARAM_GRID
Ejemplo n.º 2
0
                             n_estimators=100,
                             max_features=0.8,
                             max_samples=0.8)

out_dir = "STS-en-{}-{}".format(GROUP, APPROACH)
if not os.path.exists(out_dir): os.mkdir(out_dir)

filenames = []

for sts12_train_id, sts12_test_id, sts13_test_id, sts14_test_id in id_pairs:
    # combine 2012, 2013 training and test data
    X_sts12_train, y_sts12_train = ntnu_sts12.read_train_data(
        sts12_train_id, feats)
    X_sts12_test, y_sts12_test = ntnu_sts12.read_test_data(
        sts12_test_id, feats)
    X_sts13_test, y_sts13_test = sts13.read_test_data(sts13_test_id, feats)
    X_train = np.vstack([X_sts12_train, X_sts12_test, X_sts13_test])
    y_train = np.hstack([y_sts12_train, y_sts12_test, y_sts13_test])

    regressor.fit(X_train, y_train)

    X_test = read_blind_test_data(sts14_test_id, feats)
    y_test = regressor.predict(X_test)

    test_input = read_system_input(test_input_fnames[sts14_test_id])
    postprocess(test_input, y_test)

    fname = "{}/STS-en.output.{}.txt".format(out_dir, sts14_test_id)
    write_scores(fname, y_test)
    filenames.append(fname)
Ejemplo n.º 3
0
feats = takelab_feats + takelab_lsa_feats + subsem_best_feats

# learning algorithm in default setting
regressor = BaggingRegressor(SVR(), verbose=1, n_jobs=3, n_estimators=100, max_features=0.8, max_samples=0.8)


out_dir = "STS-en-{}-{}".format(GROUP, APPROACH)
if not os.path.exists(out_dir): os.mkdir(out_dir)

filenames = []

for sts12_train_id, sts12_test_id, sts13_test_id, sts14_test_id in id_pairs:
    # combine 2012, 2013 training and test data
    X_sts12_train, y_sts12_train = ntnu_sts12.read_train_data(sts12_train_id, feats)
    X_sts12_test, y_sts12_test = ntnu_sts12.read_test_data(sts12_test_id, feats)
    X_sts13_test, y_sts13_test = sts13.read_test_data(sts13_test_id, feats)
    X_train = np.vstack([X_sts12_train, X_sts12_test, X_sts13_test])
    y_train = np.hstack([y_sts12_train, y_sts12_test, y_sts13_test])

    regressor.fit(X_train, y_train)

    X_test = read_blind_test_data(sts14_test_id, feats)
    y_test = regressor.predict(X_test)

    test_input = read_system_input(test_input_fnames[sts14_test_id])
    postprocess(test_input,  y_test)

    fname =  "{}/STS-en.output.{}.txt".format(out_dir, sts14_test_id)
    write_scores(fname, y_test)
    filenames.append(fname)
Ejemplo n.º 4
0
feats = takelab_feats + takelab_lsa_feats + subsem_best_feats

scores = []

X_sts12_train, y_sts12_train = read_train_data(train_ids, feats)
X_sts12_test, y_sts12_test = read_test_data(test_ids, feats)
X_train = vstack([X_sts12_train, X_sts12_test])
y_train = hstack([y_sts12_train, y_sts12_test])

test_input = [
    read_system_input(test_input_fnames[sts13_test_id])
    for sts13_test_id in sts13.test_ids
]
test_input = concatenate(test_input)

X_sts13, y_sts13 = sts13.read_test_data(sts13.test_ids, feats)

X_sts13_val = X_sts13[0:X_sts13.shape[0] / 2, :]
X_sts13_held = X_sts13[X_sts13.shape[0] / 2:, :]

y_sts_val = y_sts13[0:len(y_sts13) / 2]
y_sts_held = y_sts13[len(y_sts13) / 2:]

test_input_val = test_input[0:len(test_input) / 2]
test_input_held = test_input[len(test_input) / 2:]

n_train = len(y_train)
n_test = len(y_sts_val)

param_grid = LARGE_PARAM_GRID