Esempio n. 1
0
regressor = LinearRegression()
# Used with default setting here. However, in the real DKPro system, its setting
# were probably optmized by a CV gridsearch on the training data


# TODO: this approach is brain dead, because it keeps reading features from files

print "{:64s}\t".format("Features:"),

print "\t".join(["{:>16s}".format(p[1]) for p in id_pairs])


for feat in feats:
    print "{:64s}\t".format(feat),
    
    for train_id, test_id in id_pairs:
        train_feat, train_scores = read_train_data(train_id, [feat])
        regressor.fit(train_feat, train_scores)
        
        test_feat, test_scores = read_test_data(test_id, [feat])
        sys_scores = regressor.predict(test_feat)
        
        sys_input = read_system_input(test_input_fnames[test_id])
        postprocess(sys_input,  sys_scores)
        
        if isinstance(train_id, tuple):
            train_id = "+".join(train_id)
        
        print "{:16.2f}\t".format(correlation(sys_scores, test_scores)),
    print
    
Esempio n. 2
0
def score_stub(true, pred):
    return correlation(true, postprocess(test_input_val, pred))
Esempio n. 3
0
filenames = []

for sts12_train_id, sts12_test_id, sts13_test_id in id_pairs:
    # combine 2012 training and test data
    X_sts12_train, y_sts12_train = read_train_data(sts12_train_id, feats)
    X_sts12_test, y_sts12_test = read_test_data(sts12_test_id, feats)
    X_train = np.vstack([X_sts12_train, X_sts12_test])
    y_train = np.hstack([y_sts12_train, y_sts12_test])

    regressor.fit(X_train, y_train)

    X_test = read_blind_test_data(sts13_test_id, feats)
    y_test = regressor.predict(X_test)

    test_input = read_system_input(test_input_fnames[sts13_test_id])
    postprocess(test_input, y_test)

    fname = "{}/STScore.output.{}.txt".format(out_dir, sts13_test_id)
    write_scores(fname, y_test)
    filenames.append(fname)

descr_fname = "{}/STScore-{}-{}.description.txt".format(
    out_dir, GROUP, APPROACH)
open(descr_fname, "w").write(DESCRIPTION)
filenames.append(descr_fname)

filenames = " ".join(filenames)

zipfile = "STScore-{}-{}.zip".format(GROUP, APPROACH)

call("zip -rv {} {}".format(zipfile, filenames), shell=True)
Esempio n. 4
0
for sts12_train_id, sts12_test_id, sts13_test_id, sts14_test_id in id_pairs:
    # combine 2012, 2013 training and test data
    X_sts12_train, y_sts12_train = ntnu_sts12.read_train_data(sts12_train_id, feats)
    X_sts12_test, y_sts12_test = ntnu_sts12.read_test_data(sts12_test_id, feats)
    X_sts13_test, y_sts13_test = sts13.read_test_data(sts13_test_id, feats)
    X_train = np.vstack([X_sts12_train, X_sts12_test, X_sts13_test])
    y_train = np.hstack([y_sts12_train, y_sts12_test, y_sts13_test])

    regressor.fit(X_train, y_train)

    X_test = read_blind_test_data(sts14_test_id, feats)
    y_test = regressor.predict(X_test)

    test_input = read_system_input(test_input_fnames[sts14_test_id])
    postprocess(test_input,  y_test)

    fname =  "{}/STS-en.output.{}.txt".format(out_dir, sts14_test_id)
    write_scores(fname, y_test)
    filenames.append(fname)

descr_fname = "{}/STS-en-{}-{}.description.txt".format(out_dir, GROUP, APPROACH)
open(descr_fname, "w").write(DESCRIPTION)
filenames.append(descr_fname)

filenames = " ".join(filenames)

zipfile = "STS-en-{}-{}.zip".format(GROUP, APPROACH)

call("zip -rv {} {}".format(zipfile, filenames),
     shell=True)
Esempio n. 5
0
def score_stub(true, pred):
    return correlation(true, postprocess(test_input_val, pred))