test_ids, "OnWN") ] feats = takelab_feats + takelab_lsa_feats + subsem_best_feats scores = [] X_sts12_train, y_sts12_train = read_train_data(train_ids, feats) X_sts12_test, y_sts12_test = read_test_data(test_ids, feats) X_train = vstack([X_sts12_train, X_sts12_test]) y_train = hstack([y_sts12_train, y_sts12_test]) test_input = [read_system_input(test_input_fnames[sts13_test_id]) for sts13_test_id in sts13.test_ids] test_input = concatenate(test_input) X_sts13, y_sts13 = sts13.read_test_data(sts13.test_ids, feats) X_sts13_val = X_sts13[0:X_sts13.shape[0]/2, :] X_sts13_held = X_sts13[X_sts13.shape[0]/2:, :] y_sts_val = y_sts13[0:len(y_sts13)/2] y_sts_held = y_sts13[len(y_sts13)/2:] test_input_val = test_input[0:len(test_input)/2] test_input_held = test_input[len(test_input)/2:] n_train = len(y_train) n_test = len(y_sts_val) param_grid = LARGE_PARAM_GRID
n_estimators=100, max_features=0.8, max_samples=0.8) out_dir = "STS-en-{}-{}".format(GROUP, APPROACH) if not os.path.exists(out_dir): os.mkdir(out_dir) filenames = [] for sts12_train_id, sts12_test_id, sts13_test_id, sts14_test_id in id_pairs: # combine 2012, 2013 training and test data X_sts12_train, y_sts12_train = ntnu_sts12.read_train_data( sts12_train_id, feats) X_sts12_test, y_sts12_test = ntnu_sts12.read_test_data( sts12_test_id, feats) X_sts13_test, y_sts13_test = sts13.read_test_data(sts13_test_id, feats) X_train = np.vstack([X_sts12_train, X_sts12_test, X_sts13_test]) y_train = np.hstack([y_sts12_train, y_sts12_test, y_sts13_test]) regressor.fit(X_train, y_train) X_test = read_blind_test_data(sts14_test_id, feats) y_test = regressor.predict(X_test) test_input = read_system_input(test_input_fnames[sts14_test_id]) postprocess(test_input, y_test) fname = "{}/STS-en.output.{}.txt".format(out_dir, sts14_test_id) write_scores(fname, y_test) filenames.append(fname)
feats = takelab_feats + takelab_lsa_feats + subsem_best_feats # learning algorithm in default setting regressor = BaggingRegressor(SVR(), verbose=1, n_jobs=3, n_estimators=100, max_features=0.8, max_samples=0.8) out_dir = "STS-en-{}-{}".format(GROUP, APPROACH) if not os.path.exists(out_dir): os.mkdir(out_dir) filenames = [] for sts12_train_id, sts12_test_id, sts13_test_id, sts14_test_id in id_pairs: # combine 2012, 2013 training and test data X_sts12_train, y_sts12_train = ntnu_sts12.read_train_data(sts12_train_id, feats) X_sts12_test, y_sts12_test = ntnu_sts12.read_test_data(sts12_test_id, feats) X_sts13_test, y_sts13_test = sts13.read_test_data(sts13_test_id, feats) X_train = np.vstack([X_sts12_train, X_sts12_test, X_sts13_test]) y_train = np.hstack([y_sts12_train, y_sts12_test, y_sts13_test]) regressor.fit(X_train, y_train) X_test = read_blind_test_data(sts14_test_id, feats) y_test = regressor.predict(X_test) test_input = read_system_input(test_input_fnames[sts14_test_id]) postprocess(test_input, y_test) fname = "{}/STS-en.output.{}.txt".format(out_dir, sts14_test_id) write_scores(fname, y_test) filenames.append(fname)
feats = takelab_feats + takelab_lsa_feats + subsem_best_feats scores = [] X_sts12_train, y_sts12_train = read_train_data(train_ids, feats) X_sts12_test, y_sts12_test = read_test_data(test_ids, feats) X_train = vstack([X_sts12_train, X_sts12_test]) y_train = hstack([y_sts12_train, y_sts12_test]) test_input = [ read_system_input(test_input_fnames[sts13_test_id]) for sts13_test_id in sts13.test_ids ] test_input = concatenate(test_input) X_sts13, y_sts13 = sts13.read_test_data(sts13.test_ids, feats) X_sts13_val = X_sts13[0:X_sts13.shape[0] / 2, :] X_sts13_held = X_sts13[X_sts13.shape[0] / 2:, :] y_sts_val = y_sts13[0:len(y_sts13) / 2] y_sts_held = y_sts13[len(y_sts13) / 2:] test_input_val = test_input[0:len(test_input) / 2] test_input_held = test_input[len(test_input) / 2:] n_train = len(y_train) n_test = len(y_sts_val) param_grid = LARGE_PARAM_GRID