y_hline = np.recarray(X_hline.shape[0], dtype=y_dtype)

results = np.recarray(0,
                      dtype=[("train_id", "S16"), ("test_id", "S16"),
                             ("min_diff", "f"), ("max_diff", "f"),
                             ("samples", "i"), ("score", "f")])

# fit regressor on training data
for train_id, rgr in regressors.items():
    rgr.fit(X_train[train_id], y_train[train_id])

# compute and report initial score on test data
for test_id, train_id in test_id2train_id.items():
    rgr = regressors[train_id]
    sys_scores = rgr.predict(X_test[test_id])
    postprocess(sys_input[test_id], sys_scores)
    r = correlation(sys_scores, y_test[test_id])
    n = X_train[train_id].shape[0]
    results.resize(results.size + 1)
    if isinstance(train_id, tuple):
        train_id = "+".join(train_id)
    results[-1] = (train_id, test_id, 0, 0, n, r)
    print "{:32s} {:32s} {:>8d} {:8.4f}".format(train_id, test_id, n, r)

# score headlines
for train_id in hline_regressors:
    # TODO: full postprocessing
    scores = regressors[train_id].predict(X_hline)
    scores[scores < 0] = 0.0
    scores[scores > 5] = 5.0
    y_hline[train_id] = scores
Exemple #2
0
                                ("min_diff", "f"), 
                                ("max_diff", "f"),
                                ("samples", "i"),
                                ("iteration", "i"),
                                ("score", "f")])    

         
# fit regressor on training data
for train_id, rgr in regressors.items():
    rgr.fit(X_train[train_id], y_train[train_id])
    
# compute and report initial score on test data
for test_id, train_id in test_id2train_id.items():
    rgr = regressors[train_id]
    sys_scores = rgr.predict(X_test[test_id])
    postprocess(sys_input[test_id],  sys_scores)
    r = correlation(sys_scores, y_test[test_id])
    n = X_train[train_id].shape[0]
    results.resize(results.size + 1)
    if isinstance(train_id, tuple):
        train_id = "+".join(train_id)    
    results[-1] = (train_id, test_id, 0, 0, n, 0, r)
    print "{:32s} {:32s} {:>8d} {:8.4f}".format(train_id, test_id, n, r)
   
# score headlines
for train_id in hline_regressors:  
    # TODO: full postprocessing
    scores = regressors[train_id].predict(X_hline)
    scores[scores < 0] = 0.0 
    scores[scores > 5] = 5.0 
    y_hline[train_id] = scores