def main(ratings_components=300, features_components=300, print_scores=False): np.random.seed(42) tf.set_random_seed(1984) data_path = '../data/goodbooks-10k/' book_features = get_book_features(get_book_dataframe(data_path)) reduced_item_features, _, _ = reduce_matrix(book_features, n_components=features_components) goodreads_path = '../data/goodbooks-10k/ratings.csv' amazon_path = '../data/amazon/ratings_amazon.csv' spr = get_ratings(goodreads_path, amazon_path, min_amazon_items=6) n_folds = 5 scores = np.zeros((n_folds, 2)) kf = ColumnwiseKFold(n_folds, random_seed=30) for i, (X, (user_indices, item_indices)) in enumerate(kf.split(spr)): _, _, rating_VT = reduce_matrix(X, n_components=ratings_components) reduced_item_ratings = rating_VT.T items = get_reduced_joint(reduced_item_ratings, reduced_item_features) tf.reset_default_graph() encoder = BookEncoder(user_input_dim=10000, book_input_dim=items.shape[1], user_hidden=150, book_hidden=150) with tf.Session() as sess: encoder.initialize(sess) encoder.train(sess, X, items) scores[i, :] = encoder.test(sess, spr, X, items, user_indices, item_indices) if print_scores: print_evaluation(scores[i, 0], scores[i, 1]) scores = np.mean(scores, axis=0) if print_scores: print('{0:d}-Fold Scores:'.format(n_folds)) print_evaluation(scores[0], scores[1]) return scores
def main(ratings_components=100, features_components=100, print_scores=False): #data_path = '../data/goodbooks-10k/' data_path = '../../goodbooks-10k/' book_features = get_book_features(get_book_dataframe(data_path)) reduced_item_features, _, _ = reduce_matrix( book_features, n_components=features_components) goodreads_path = data_path + 'ratings.csv' amazon_path = data_path + 'ratings_amazon.csv' spr = get_ratings(goodreads_path, amazon_path, min_amazon_items=6) n_folds = 5 scores = np.zeros((n_folds, 2)) kf = ColumnwiseKFold(n_folds, random_seed=30) for i, (X, (user_incides, item_indices)) in enumerate(kf.split(spr)): _, _, rating_VT = reduce_matrix(X, n_components=ratings_components) reduced_item_ratings = rating_VT.T items = get_reduced_joint(reduced_item_ratings, reduced_item_features) sim = (cosine_similarity(items) + 1) / 2 scores[i, :] = evaluate(spr, X, sim, user_incides, item_indices) if print_scores: print_evaluation(scores[i, 0], scores[i, 1]) scores = np.mean(scores, axis=0) if print_scores: print('{0:d}-Fold Scores:') print_evaluation(scores[0], scores[1]) return scores