def to_libfm(examples, libfm_filename): libfm_file = open(libfm_filename, 'w') for example in examples: x_i = model.represent(example)[:-1] y_i = model.label(example) user_id = example['user'] user_id_feature = "%d:1" % (len(x_i)+user_id) libfm_file.write("%d %s %s\n" % (y_i, sp_vector(x_i), user_id_feature)) libfm_file.close()
def to_libfm(examples, libfm_filename): libfm_file = open(libfm_filename, 'w') for example in examples: x_i = model.represent(example)[:-1] y_i = model.label(example) user_id = example['user'] user_id_feature = "%d:1" % (len(x_i) + user_id) libfm_file.write("%d %s %s\n" % (y_i, sp_vector(x_i), user_id_feature)) libfm_file.close()
def get_test_examples(): global test_X global test_y if not test_X: print "Loading test examples" _log_time() test_examples = music.load_examples("data/test_40k_10k.pkl") _print_time_diff() print "Obtaining X and y values" _log_time() test_X = [model.represent(example) for example in test_examples] _print_time_diff() _log_time() test_y = [model.label(example) for example in test_examples] _print_time_diff() return test_X, test_y
print "Readying testing data" _log_time() music.ready_testing_data() _print_time_diff() _draw_separator() print "Start loading examples" _log_time() examples = music.load_examples("data/train.pkl") _print_time_diff() _draw_separator() print "Obtaining all x and y values" _log_time() all_X = [model.represent(example) for example in examples] _print_time_diff() _log_time() all_y = [model.label(example) for example in examples] _print_time_diff() _draw_separator() def print_consolidated_scores(scores): _draw_separator(".", 5) print "%0.6f (+/- %0.6f)" % (scores.mean(), scores.std() / 2) _draw_separator("`", 5) def _get_mean_score(scores): return "%0.6f" % (scores.mean())
scores = cross_val_score(knr, X, y, scoring='neg_mean_squared_error', cv=3) return scores if __name__ == "__main__": import music train_examples = music.load_examples('data/train.pkl') # poly = PolynomialNetworkRegressor(degreex=3, n_components=2, tol=1e-3, warm_start=True, random_state=0) fm = pylibfm.FM(num_iter=10, verbose=True, task="regression", initial_learning_rate=0.001, learning_rate_schedule="optimal") v = DictVectorizer() X = np.asarray([model.represent(example) for example in train_examples]) y = np.asarray([model.label(example) for example in train_examples]) # fm.fit(sparse.csr_matrix(X), y) # svr_rbf.fit(X, y) # pca = PCA(n_components=100) # pca.fit(X) # X_fit = pca.transform(X) # print "pca done" # xs = [x[0] for x in X_fit] # ys = [x[1] for x in X_fit] # plt.scatter(xs, ys) # plt.show() # print v.fit_transform(X) # print X_fitM y_np = np.asarray(y) plt.hist(y_np, bins=np.arange(y_np.min(), y_np.max() + 1))