Ejemplo n.º 1
0
    def test_simple_similarity(self):
        builder = ItemSimilarityMatrixBuilder(0)

        no_items = len(set(self.ratings['movie_id']))
        cor = builder.build(ratings=self.ratings, save=False)
        self.assertIsNotNone(cor)
        self.assertEqual(
            cor.shape[0], no_items,
            "Expected correlations matrix to have a row for each item")
        self.assertEqual(
            cor.shape[1], no_items,
            "Expected correlations matrix to have a column for each item")

        self.assertEqual(
            cor[WONDER_WOMAN][AVENGERS], -1,
            "Expected Wolverine and Star Wars to have similarity 0.5")
        self.assertEqual(
            cor[AVENGERS][AVENGERS], 1,
            "Expected items to be similar to themselves similarity 1")
        self.assertEqual(
            cor[STAR_WARS][STAR_WARS], 1,
            "Expected items to be similar to themselves similarity 1")
        self.assertEqual(
            cor[WONDER_WOMAN][WONDER_WOMAN], 1,
            "Expected items to be similar to themselves similarity 1")
        self.assertEqual(
            cor[WOLVERINE][WOLVERINE], 1,
            "Expected items to be similar to themselves similarity 1")
    def test_overlap(self):
        builder = ItemSimilarityMatrixBuilder(1, -1)

        cor, movies = builder.build(ratings=self.ratings, save=False)

        self.assertIsNotNone(cor)

        self.assertEqual(cor.count_nonzero(), 9)
    def test_min_ratings(self):
        builder = ItemSimilarityMatrixBuilder(2)

        cor, movies = builder.build(ratings=self.ratings, save=False)
        df = pd.DataFrame(cor.toarray(), columns=movies.values(), index=movies.values())
        self.assertEqual(cor.shape[0], 4, "Expected correlations matrix to have a row for each item")
        self.assertEqual(cor.shape[1], 4, "Expected correlations matrix to have a column for each item")

        self.assertAlmostEqual(df[WONDER_WOMAN][AVENGERS], 0.71066905451870177)
        self.assertAlmostEqual(df[AVENGERS][AVENGERS], 1)
Ejemplo n.º 4
0
    def test_save_similarities(self):
        builder = ItemSimilarityMatrixBuilder(0)

        cor = builder.build(ratings=self.ratings)

        self.assertIsNotNone(cor)

        similarities = Similarity.objects.all()
        av_log = similarities[0]

        self.assertEqual(Similarity.objects.count(), 4)
        self.assertEqual(av_log.source, AVENGERS)
        self.assertEqual(av_log.target, WOLVERINE)
        self.assertEqual(av_log.similarity, 0.5)
    def test_save_similarities(self):
        builder = ItemSimilarityMatrixBuilder(0, 0.1)

        cor = builder.build(ratings=self.ratings)

        self.assertIsNotNone(cor)

        similarities = Similarity.objects.all()
        av_log = similarities[0]

        self.assertEqual(Similarity.objects.count(), 2)
        self.assertEqual(av_log.source, WONDER_WOMAN)
        self.assertEqual(av_log.target, AVENGERS)
        self.assertAlmostEqual(float(av_log.similarity), 0.71066905451870177)
    def test_simple_similarity(self):
        builder = ItemSimilarityMatrixBuilder(0)

        no_items = len(set(self.ratings['movie_id']))
        cor, movies = builder.build(ratings=self.ratings, save=False)
        df = pd.DataFrame(cor.toarray(), columns=movies.values(), index=movies.values())
        self.assertIsNotNone(df)
        self.assertEqual(df.shape[0], no_items, "Expected correlations matrix to have a row for each item")
        self.assertEqual(df.shape[1], no_items, "Expected correlations matrix to have a column for each item")

        self.assertAlmostEqual(df[WONDER_WOMAN][AVENGERS], 0.71066905451870177)
        self.assertAlmostEqual(df[AVENGERS][AVENGERS], 1)
        self.assertAlmostEqual(df[STAR_WARS][STAR_WARS], 1)
        self.assertAlmostEqual(df[WONDER_WOMAN][WONDER_WOMAN], 1.0)
        self.assertAlmostEqual(df[WOLVERINE][WOLVERINE], 1)
Ejemplo n.º 7
0
def evaluate_cf_recommender():
    min_number_of_ratings = 5
    min_overlap = 5
    min_sim = 0.1
    k = 10
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-cf.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "ar, map, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank\n"
        )

        for k in np.arange(0, 20, 2):
            min_rank = min_number_of_ratings / 2
            recommender = NeighborhoodBasedRecs()
            er = EvaluationRunner(
                0, ItemSimilarityMatrixBuilder(min_overlap, min_sim=min_sim),
                recommender, k)

            result = er.calculate(min_number_of_ratings,
                                  min_rank,
                                  number_test_users=-1)

            map = result['map']
            mae = result['mae']
            ar = result['ar']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}\n".format(
                ar, map, mae, min_overlap, min_sim, k, min_number_of_ratings,
                min_rank))
            logfile.flush()
Ejemplo n.º 8
0
    def test_split_data(self):
        ratings = pd.DataFrame(
            [
                [1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
                [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
                [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
                [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
                [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
                [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
                [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
                [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
                [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],
                [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
                [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
                [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
                [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
                [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
                [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],
                [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
                [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
                [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
                [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
                [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],
            ],
            columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(1, min_sim=0.0),
                              NeighborhoodBasedRecs())

        test, train = er.split_data(2, ratings, [1, 2], [3])
        self.assertTrue(test is not None)
        self.assertTrue(test.shape[0], 4)
        self.assertEqual(train.shape[0], 16)
Ejemplo n.º 9
0
    def train(self, ratings=None, train_feature_recs=False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression(fit_intercept=True,
                                             n_jobs=-1,
                                             normalize=True)

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']],
                 self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {
            'cb1': regr.coef_[0],
            'cb2': regr.coef_[1],
            'cf1': regr.coef_[2],
            'cf2': regr.coef_[3],
            'intercept': regr.intercept_
        }
        self.logger.debug(result)
        self.logger.debug(self.train_data.iloc[100])
        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
Ejemplo n.º 10
0
    def test_min_ratings(self):
        builder = ItemSimilarityMatrixBuilder(2)

        cor = builder.build(ratings=self.ratings, save=False)
        self.assertIsNotNone(cor)
        self.assertEqual(
            cor.shape[0], 2,
            "Expected correlations matrix to have a row for each item")
        self.assertEqual(
            cor.shape[1], 2,
            "Expected correlations matrix to have a column for each item")

        self.assertEqual(
            cor[WONDER_WOMAN][AVENGERS], -1,
            "Expected Wolverine and Star Wars to have similarity 0.5")
        self.assertEqual(
            cor[AVENGERS][AVENGERS], 1,
            "Expected items to be similar to themselves similarity 1")
Ejemplo n.º 11
0
def evaluate_cf_recommender():
    min_number_of_ratings = 20
    min_overlap = 5
    min_sim = 0.1
    K = 20
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-min_overlap_item_similarity.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "rak, pak, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank, user_coverage, "
            "movie_coverage\n")

        builder = ItemSimilarityMatrixBuilder(min_overlap, min_sim=min_sim)

        for min_overlap in np.arange(0, 20, 2):
            min_rank = min_number_of_ratings / 2
            recommender = NeighborhoodBasedRecs()
            er = EvaluationRunner(0, builder, recommender, K)
            # Run the baseline recommender:
            # er = EvaluationRunner(3, None, PopularityBasedRecs(), K)

            result = er.calculate(min_number_of_ratings,
                                  min_rank,
                                  number_test_users=-1)

            user_coverage, movie_coverage = RecommenderCoverage(
                recommender).calculate_coverage()
            pak = result['pak']
            mae = result['mae']
            rak = result['rak']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format(
                rak, pak, mae, min_overlap, min_sim, K, min_number_of_ratings,
                min_rank, user_coverage, movie_coverage))
            logfile.flush()
Ejemplo n.º 12
0
if __name__ == '__main__':
    min_number_of_ratings = 30
    min_overlap = 25
    min_sim = 0
    K = 25  #redo
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-min_number_of_ratings_training.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "pak, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank\n"
        )

        for min_number_of_ratings in np.arange(5, 30, 10):
            min_rank = min_number_of_ratings / 2
            min_overlap = min_number_of_ratings - min_rank
            er = EvaluationRunner(
                3, ItemSimilarityMatrixBuilder(min_overlap, min_sim=min_sim),
                NeighborhoodBasedRecs(), K)
            result = er.calculate(min_number_of_ratings,
                                  min_rank,
                                  number_test_users=1000)
            pak = result['pak']
            mae = result['mae']
            logfile.write("{}, {}, {}, {}, {}, {}, {} \n".format(
                pak, mae, min_overlap, min_sim, K, min_number_of_ratings,
                min_rank, datetime.now()))
Ejemplo n.º 13
0
            len(self.all_users)))
        for user in self.all_users:
            user_id = str(user['user_id'])
            recset = self.cf.recommend_items(user_id)
            if recset:
                self.users_with_recs.append(user)
                for rec in recset:
                    self.items_in_rec[rec[0]] += 1
                print('found recs for {}'.format(user_id))

        print('writing cf coverage to file.')
        json.dump(self.items_in_rec, open('cf_coverage.json', 'w'))

        no_movies = Movie.objects.all().count()
        no_movies_in_rec = len(self.items_in_rec.items())

        print("{} {} {}".format(no_movies, no_movies_in_rec,
                                float(no_movies / no_movies_in_rec)))
        return no_movies_in_rec / no_movies


if __name__ == '__main__':
    # print("Calculating coverage...")
    # CFCoverage().calculate_coverage()

    print("Calculating Precision at K")
    pak = PrecissionAtK(5, NeighborhoodBasedRecs(),
                        ItemSimilarityMatrixBuilder())

    pak.calculate_old()
Ejemplo n.º 14
0
    def test(self):
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(1, min_sim=0.0), NeighborhoodBasedRecs())

        STAR_WARS = 'star wars'
        WONDER_WOMAN = 'wonder woman'
        AVENGERS = 'avengers'
        WOLVERINE = 'logan'
        PIRATES_OF = 'pirates of the caribbien'
        HARRY = 'harry potter I'
        CAPTAIN_AMERICA = 'captain america'
        ALIEN = 'alien'
        DR_STRANGELOVE = 'doctor strangelove'
        JACQUES = 'jacques'

        ratings = pd.DataFrame(
            [[1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
             [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
             [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
             [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
             [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
             [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
             [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
             [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
             [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],

             [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
             [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
             [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
             [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
             [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
             [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],

             [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
             [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
             [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
             [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
             [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],

             [4, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
             [4, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
             [4, AVENGERS, 9, '2015-10-12 23:20:27+00:00'],
             [4, PIRATES_OF, 5, '2013-10-12 23:20:27+00:00'],
             [4, HARRY, 6, '2014-10-12 23:20:27+00:00'],
             [4, ALIEN, 8, '2015-10-12 23:20:27+00:00'],
             [4, DR_STRANGELOVE, 9, '2015-10-12 23:20:27+00:00'],

             [5, STAR_WARS, 6, '2013-10-12 23:20:27+00:00'],
             [5, AVENGERS, 6, '2014-10-12 23:20:27+00:00'],
             [5, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
             [5, PIRATES_OF, 2, '2016-10-12 23:20:27+00:00'],
             [5, HARRY, 10, '2016-10-12 23:20:27+00:00'],
             [5, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
             [5, ALIEN, 4, '2016-10-12 23:20:27+00:00'],
             [5, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
             [5, JACQUES, 10, '2016-10-12 23:20:27+00:00'],

             [6, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
             [6, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
             [6, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
             [6, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
             [6, PIRATES_OF, 6, '2016-10-12 23:20:27+00:00'],
             [6, HARRY, 10, '2016-10-12 23:20:27+00:00'],
             [6, JACQUES, 8, '2016-10-12 23:20:27+00:00'],

             [7, AVENGERS, 10, '2014-10-12 23:20:27+00:00'],
             [7, PIRATES_OF, 3, '2016-10-12 23:20:27+00:00'],
             [7, HARRY, 1, '2016-10-12 23:20:27+00:00'],
             [7, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
             [7, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],

             [8, STAR_WARS, 9, '2013-10-12 23:20:27+00:00'],
             [8, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
             [8, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
             [8, WOLVERINE, 7, '2015-10-12 23:20:27+00:00'],
             [8, PIRATES_OF, 8, '2016-10-12 23:20:27+00:00'],
             [8, HARRY, 8, '2016-10-12 23:20:27+00:00'],
             [8, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
             [8, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
             [8, JACQUES, 10, '2016-10-12 23:20:27+00:00'],

             [9, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
             [9, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
             [9, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
             [9, PIRATES_OF, 7, '2016-10-12 23:20:27+00:00'],
             [9, HARRY, 8, '2016-10-12 23:20:27+00:00'],
             [9, CAPTAIN_AMERICA, 10, '2016-10-12 23:20:27+00:00'],
             [9, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
             [9, JACQUES, 7, '2016-10-12 23:20:27+00:00'],

             [10, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
             [10, HARRY, 10, '2016-10-12 23:20:27+00:00'],
             [10, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
             [10, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],

             ], columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])

        result = er.calculate_using_ratings(ratings, min_number_of_ratings=4, min_rank=5)

        #figure out what to do with result ;)
        print(result)
Ejemplo n.º 15
0
    def test(self):
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(1, min_sim=0.0),
                              NeighborhoodBasedRecs())

        ratings = pd.DataFrame(
            [
                [1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
                [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
                [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
                [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
                [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
                [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
                [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
                [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
                [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],
                [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
                [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
                [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
                [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
                [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
                [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],
                [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
                [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
                [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
                [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
                [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],
                [4, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
                [4, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
                [4, AVENGERS, 9, '2015-10-12 23:20:27+00:00'],
                [4, PIRATES_OF, 5, '2013-10-12 23:20:27+00:00'],
                [4, HARRY, 6, '2014-10-12 23:20:27+00:00'],
                [4, ALIEN, 8, '2015-10-12 23:20:27+00:00'],
                [4, DR_STRANGELOVE, 9, '2015-10-12 23:20:27+00:00'],
                [5, STAR_WARS, 6, '2013-10-12 23:20:27+00:00'],
                [5, AVENGERS, 6, '2014-10-12 23:20:27+00:00'],
                [5, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [5, PIRATES_OF, 2, '2016-10-12 23:20:27+00:00'],
                [5, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [5, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
                [5, ALIEN, 4, '2016-10-12 23:20:27+00:00'],
                [5, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
                [5, JACQUES, 10, '2016-10-12 23:20:27+00:00'],
                [6, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
                [6, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
                [6, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
                [6, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [6, PIRATES_OF, 6, '2016-10-12 23:20:27+00:00'],
                [6, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [6, JACQUES, 8, '2016-10-12 23:20:27+00:00'],
                [7, AVENGERS, 10, '2014-10-12 23:20:27+00:00'],
                [7, PIRATES_OF, 3, '2016-10-12 23:20:27+00:00'],
                [7, HARRY, 1, '2016-10-12 23:20:27+00:00'],
                [7, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
                [7, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
                [8, STAR_WARS, 9, '2013-10-12 23:20:27+00:00'],
                [8, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
                [8, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
                [8, WOLVERINE, 7, '2015-10-12 23:20:27+00:00'],
                [8, PIRATES_OF, 8, '2016-10-12 23:20:27+00:00'],
                [8, HARRY, 8, '2016-10-12 23:20:27+00:00'],
                [8, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
                [8, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
                [8, JACQUES, 10, '2016-10-12 23:20:27+00:00'],
                [9, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
                [9, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
                [9, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [9, PIRATES_OF, 7, '2016-10-12 23:20:27+00:00'],
                [9, HARRY, 8, '2016-10-12 23:20:27+00:00'],
                [9, CAPTAIN_AMERICA, 10, '2016-10-12 23:20:27+00:00'],
                [9, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
                [9, JACQUES, 7, '2016-10-12 23:20:27+00:00'],
                [10, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
                [10, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [10, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
                [10, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
            ],
            columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])

        result = er.calculate_using_ratings(ratings,
                                            min_number_of_ratings=4,
                                            min_rank=5)

        #figure out what to do with result ;)
        self.assertLess(result['mae'], decimal.Decimal(1.7))
        self.assertLess(result['pak'], decimal.Decimal(0.7))
        self.assertLess(result['rak'], decimal.Decimal(0.7))
        print(result)
Ejemplo n.º 16
0
    def setUp(self):

        self.ratings = pd.DataFrame(
            [
                [1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
                [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
                [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
                [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
                [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
                [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
                [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
                [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
                [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],
                [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
                [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
                [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
                [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
                [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
                [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],
                [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
                [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
                [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
                [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
                [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],
                [4, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
                [4, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
                [4, AVENGERS, 9, '2015-10-12 23:20:27+00:00'],
                [4, PIRATES_OF, 5, '2013-10-12 23:20:27+00:00'],
                [4, HARRY, 6, '2014-10-12 23:20:27+00:00'],
                [4, ALIEN, 8, '2015-10-12 23:20:27+00:00'],
                [4, DR_STRANGELOVE, 9, '2015-10-12 23:20:27+00:00'],
                [5, STAR_WARS, 6, '2013-10-12 23:20:27+00:00'],
                [5, AVENGERS, 6, '2014-10-12 23:20:27+00:00'],
                [5, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [5, PIRATES_OF, 2, '2016-10-12 23:20:27+00:00'],
                [5, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [5, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
                [5, ALIEN, 4, '2016-10-12 23:20:27+00:00'],
                [5, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
                [5, JACQUES, 10, '2016-10-12 23:20:27+00:00'],
                [6, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
                [6, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
                [6, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
                [6, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [6, PIRATES_OF, 6, '2016-10-12 23:20:27+00:00'],
                [6, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [6, JACQUES, 8, '2016-10-12 23:20:27+00:00'],
                [7, AVENGERS, 10, '2014-10-12 23:20:27+00:00'],
                [7, PIRATES_OF, 3, '2016-10-12 23:20:27+00:00'],
                [7, HARRY, 1, '2016-10-12 23:20:27+00:00'],
                [7, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
                [7, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
                [8, STAR_WARS, 9, '2013-10-12 23:20:27+00:00'],
                [8, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
                [8, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
                [8, WOLVERINE, 7, '2015-10-12 23:20:27+00:00'],
                [8, PIRATES_OF, 8, '2016-10-12 23:20:27+00:00'],
                [8, HARRY, 8, '2016-10-12 23:20:27+00:00'],
                [8, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
                [8, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
                [8, JACQUES, 10, '2016-10-12 23:20:27+00:00'],
                [9, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
                [9, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
                [9, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [9, PIRATES_OF, 7, '2016-10-12 23:20:27+00:00'],
                [9, HARRY, 8, '2016-10-12 23:20:27+00:00'],
                [9, CAPTAIN_AMERICA, 10, '2016-10-12 23:20:27+00:00'],
                [9, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
                [9, JACQUES, 7, '2016-10-12 23:20:27+00:00'],
                [10, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
                [10, ALIEN, 10, '2016-10-12 23:20:27+00:00'],
                [10, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
                [10, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
            ],
            columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])

        ItemSimilarityMatrixBuilder(0, min_sim=0).build(self.ratings,
                                                        save=True)
Ejemplo n.º 17
0
            ) as rank
        from    analytics_rating as rating1
        where    rank < 3"""

        columns = ['user_id', 'movie_id', 'rating', 'type']
        rating_data = data_helper.get_data_frame(sql, columns)

        print('found {} ratings'.format(rating_data.count()))
        return rating_data


if __name__ == '__main__':
    TEST = False

    if TEST:
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(2),
                              NeighborhoodBasedRecs())
        ratings = pd.DataFrame(
            [
                [1, '11', 5, '2013-10-12 23:20:27+00:00'],
                [1, '12', 3, '2014-10-12 23:20:27+00:00'],
                [1, '14', 2, '2015-10-12 23:20:27+00:00'],
                [2, '11', 4, '2013-10-12 23:20:27+00:00'],
                [2, '12', 3, '2014-10-12 23:20:27+00:00'],
                [2, '13', 4, '2015-10-12 23:20:27+00:00'],
                [3, '11', 5, '2013-10-12 23:20:27+00:00'],
                [3, '12', 2, '2014-10-12 23:20:27+00:00'],
                [3, '13', 5, '2015-10-12 23:20:27+00:00'],
                [3, '14', 2, '2016-10-12 23:20:27+00:00'],
                [4, '11', 3, '2013-10-12 23:20:27+00:00'],
                [4, '12', 5, '2014-10-12 23:20:27+00:00'],