Esempio n. 1
0
def main(ratings_components=300, features_components=300, print_scores=False):
    np.random.seed(42)
    tf.set_random_seed(1984)
    data_path = '../data/goodbooks-10k/'
    book_features = get_book_features(get_book_dataframe(data_path))
    reduced_item_features, _, _ = reduce_matrix(book_features, n_components=features_components)

    goodreads_path = '../data/goodbooks-10k/ratings.csv'
    amazon_path = '../data/amazon/ratings_amazon.csv'
    spr = get_ratings(goodreads_path, amazon_path, min_amazon_items=6)

    n_folds = 5
    scores = np.zeros((n_folds, 2))
    kf = ColumnwiseKFold(n_folds, random_seed=30)
    for i, (X, (user_indices, item_indices)) in enumerate(kf.split(spr)):
        _, _, rating_VT = reduce_matrix(X, n_components=ratings_components)
        reduced_item_ratings = rating_VT.T
        items = get_reduced_joint(reduced_item_ratings, reduced_item_features)
        tf.reset_default_graph()
        encoder = BookEncoder(user_input_dim=10000, book_input_dim=items.shape[1], user_hidden=150, book_hidden=150)
        with tf.Session() as sess:
            encoder.initialize(sess)
            encoder.train(sess, X, items)
            scores[i, :] = encoder.test(sess, spr, X, items, user_indices, item_indices)
            if print_scores:
                print_evaluation(scores[i, 0], scores[i, 1])

    scores = np.mean(scores, axis=0)
    if print_scores:
        print('{0:d}-Fold Scores:'.format(n_folds))
        print_evaluation(scores[0], scores[1])

    return scores
Esempio n. 2
0
def main(ratings_components=100, features_components=100, print_scores=False):
    #data_path = '../data/goodbooks-10k/'
    data_path = '../../goodbooks-10k/'
    book_features = get_book_features(get_book_dataframe(data_path))
    reduced_item_features, _, _ = reduce_matrix(
        book_features, n_components=features_components)

    goodreads_path = data_path + 'ratings.csv'
    amazon_path = data_path + 'ratings_amazon.csv'
    spr = get_ratings(goodreads_path, amazon_path, min_amazon_items=6)

    n_folds = 5
    scores = np.zeros((n_folds, 2))
    kf = ColumnwiseKFold(n_folds, random_seed=30)
    for i, (X, (user_incides, item_indices)) in enumerate(kf.split(spr)):
        _, _, rating_VT = reduce_matrix(X, n_components=ratings_components)
        reduced_item_ratings = rating_VT.T
        items = get_reduced_joint(reduced_item_ratings, reduced_item_features)
        sim = (cosine_similarity(items) + 1) / 2
        scores[i, :] = evaluate(spr, X, sim, user_incides, item_indices)
        if print_scores:
            print_evaluation(scores[i, 0], scores[i, 1])

    scores = np.mean(scores, axis=0)
    if print_scores:
        print('{0:d}-Fold Scores:')
        print_evaluation(scores[0], scores[1])

    return scores