def test_top_n(self):
        rec_sys = NeighborhoodBasedRecs()

        recs = rec_sys.recommend_items_by_ratings(10, [{'movie_id': AVENGERS, 'rating': 7},
                                                       {'movie_id': ALIEN, 'rating': 10},
                                                       {'movie_id': CAPTAIN_AMERICA, 'rating': 6}])
        self.assertIsNotNone(recs)
예제 #2
0
class FeatureWeightedLinearStacking(base_recommender):
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)

    def fun1(self):
        return Decimal(1.0)

    def fun2(self, user_id):
        count = Rating.objects.filter(user_id=user_id).count()
        if count > 3.0:
            return Decimal(1.0)
        return Decimal(0.0)

    def recommend_items(self, user_id, num=6):
        cb_recs = self.cb.recommend_items(user_id, num * 5)
        cf_recs = self.cf.recommend_items(user_id, num * 5)

        combined_recs = dict()
        for rec in cb_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            combined_recs[movie_id] = {'cb': pred}

        for rec in cf_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            if movie_id in combined_recs.keys():
                combined_recs[movie_id]['cf'] = pred
            else:
                combined_recs[movie_id] = {'cf': pred}
        fwls_preds = dict()
        for key, recs in combined_recs.items():
            if 'cb' not in recs.keys():
                recs['cb'] = self.cb.predict_score(user_id, key)
            if 'cf' not in recs.keys():
                recs['cf'] = self.cf.predict_score(user_id, key)
            pred = self.prediction(recs['cb'], recs['cf'], user_id)
            fwls_preds[key] = {'prediction': pred}
        sorted_items = sorted(fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num]
        return sorted_items


    def predict_score(self, user_id, item_id):
        p_cb = self.cb.predict_score(user_id, item_id)
        p_cf = self.cf.predict_score(user_id, item_id)

        self.prediction(p_cb, p_cf, user_id)

    def prediction(self, p_cb, p_cf, user_id):
        p = (self.wcb1 * self.fun1() * p_cb +
             self.wcb2 * self.fun2(user_id) * p_cb +
             self.wcf1 * self.fun1() * p_cf +
             self.wcf2 * self.fun2(user_id) * p_cf)
        return p
예제 #3
0
    def test_top_n(self):
        rec_sys = NeighborhoodBasedRecs()

        recs = rec_sys.recommend_items_by_ratings(10, [{'movie_id': AVENGERS, 'rating': 7},
                                                       {'movie_id': ALIEN, 'rating': 10},
                                                       {'movie_id': CAPTAIN_AMERICA, 'rating': 6}])
        self.assertIsNotNone(recs)
예제 #4
0
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)
예제 #5
0
 def __init__(self, data_size=1000):
     self.logger = logging.getLogger('FWLS')
     self.train_data = None
     self.test_data = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
     self.fwls = FeatureWeightedLinearStacking()
     self.data_size = data_size
예제 #6
0
    def test_predicting_score(self):
        # predict users 10 rating for DR_STRANGELOVE

        rec_sys = NeighborhoodBasedRecs()
        score = rec_sys.predict_score_by_ratings(DR_STRANGELOVE, {
            AVENGERS: 10,
            ALIEN: 10,
            CAPTAIN_AMERICA: 7
        })
        self.assertTrue(abs(8 - score) < 1)
예제 #7
0
class CFCoverage(object):
    def __init__(self):
        self.all_users = Rating.objects.all().values('user_id').distinct()
        self.cf = NeighborhoodBasedRecs()
        self.items_in_rec = defaultdict(int)
        self.users_with_recs = []

    def calculate_coverage(self):

        print('calculating coverage for all users ({} in total)'.format(
            len(self.all_users)))
        for user in self.all_users:
            user_id = str(user['user_id'])
            recset = self.cf.recommend_items(user_id)
            if recset:
                self.users_with_recs.append(user)
                for rec in recset:
                    self.items_in_rec[rec[0]] += 1
                print('found recs for {}'.format(user_id))

        print('writing cf coverage to file.')
        json.dump(self.items_in_rec, open('cf_coverage.json', 'w'))

        no_movies = Movie.objects.all().count()
        no_movies_in_rec = len(self.items_in_rec.items())

        print("{} {} {}".format(no_movies, no_movies_in_rec,
                                float(no_movies / no_movies_in_rec)))
        return no_movies_in_rec / no_movies
예제 #8
0
    def test_split_data(self):
        ratings = pd.DataFrame(
            [
                [1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
                [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
                [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
                [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
                [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
                [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
                [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
                [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
                [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],
                [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
                [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
                [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
                [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
                [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
                [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],
                [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
                [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
                [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
                [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
                [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],
            ],
            columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(1, min_sim=0.0),
                              NeighborhoodBasedRecs())

        test, train = er.split_data(2, ratings, [1, 2], [3])
        self.assertTrue(test is not None)
        self.assertTrue(test.shape[0], 4)
        self.assertEqual(train.shape[0], 16)
예제 #9
0
def evaluate_cf_recommender():
    min_number_of_ratings = 5
    min_overlap = 5
    min_sim = 0.1
    k = 10
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-cf.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "ar, map, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank\n"
        )

        for k in np.arange(0, 20, 2):
            min_rank = min_number_of_ratings / 2
            recommender = NeighborhoodBasedRecs()
            er = EvaluationRunner(
                0, ItemSimilarityMatrixBuilder(min_overlap, min_sim=min_sim),
                recommender, k)

            result = er.calculate(min_number_of_ratings,
                                  min_rank,
                                  number_test_users=-1)

            map = result['map']
            mae = result['mae']
            ar = result['ar']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}\n".format(
                ar, map, mae, min_overlap, min_sim, k, min_number_of_ratings,
                min_rank))
            logfile.flush()
예제 #10
0
class FWLSCalculator(object):
    def __init__(self, data_size=1000):
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(
            lambda data: self.cb.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)
        self.train_data['cf'] = self.train_data.apply(
            lambda data: self.cf.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def train(self):
        #model = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2", data=self.train_data[['rating', 'cb1','cb2','cf1','cf2']])
        #results = model.fit()
        #self.logger.info(results.summary())
        #self.logger.info(results.params)
        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']],
                 self.train_data['rating'])
        self.logger.info(regr.coef_)
        return regr.coef_
def recs_cf(request, user_id):
    min_sim = request.GET.get('min_sim', 0.1)
    sorted_items = NeighborhoodBasedRecs(
        min_sim=min_sim).recommend_items(user_id)

    data = {'user_id': user_id, 'data': sorted_items}

    return JsonResponse(data, safe=False)
 def __init__(self, save_path, data_size=1000):
     self.save_path = save_path
     self.logger = logging.getLogger('FWLS')
     self.train_data = None
     self.test_data = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
     self.fwls = FeatureWeightedLinearStacking()
     self.data_size = data_size
예제 #13
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument("rec", type=int, help="the recommender to use")
    args = parser.parse_args()

    rec = None
    if args.rec == "neighborhood_based_recommender":
        rec = NeighborhoodBasedRecs()

    RecommenderCoverage(rec)
예제 #14
0
def recs_cf(request, user_id, num=6):
    min_sim = request.GET.get('min_sim', 0.1)
    sorted_items = NeighborhoodBasedRecs(min_sim=min_sim).recommend_items(user_id, num)

    print("cf sorted_items is: {}".format({sorted_items}))
    data = {
        'user_id': user_id,
        'data': sorted_items
    }

    return JsonResponse(data, safe=False)
def item_news_feed(request, user_id, page_number):
    min_sim = request.GET.get('min_sim', 0.1)
    sorted_items = NeighborhoodBasedRecs(
        min_sim=min_sim).recommend_items(user_id)
    if sorted_items.count() < 10:
        response = {'NumberPages': 0, 'Data': None}
        return JsonResponse(response, safe=False)
    number_item_per_page = 5
    paginator = Paginator(sorted_items, number_item_per_page)
    number_pages = paginator.num_pages

    page_items = paginator.page(page_number)
    page_items = page_items.object_list
    data = []

    for item in page_items:
        id = item[0]
        rating = calculate_rating(id)
        value = {'Id': id, 'Rating': rating}
        data.append(value)

    response = {'NumberPages': number_pages, 'Data': data}
    return JsonResponse(response, safe=False)
예제 #16
0
class FWLSCalculator(object):
    def __init__(self):
        self.train = None
        self.test = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train, self.test = train_test_split(df, test_size=0.2)

    def get_training_data(self):
        print('load data')

        data = np.array([['1', '2', 3.6], ['1', '3', 5.0], ['1', '4', 5.0],
                         ['2', '2', 3.0]])
        self.train = pd.DataFrame(data,
                                  columns=['user_id', 'movie_id', 'rating'])
        self.rating_count = self.train.groupby('user_id').count().reset_index()
        return self.train

    def calculate_predictions_for_training_data(self):
        self.train['cb'] = self.train.apply(lambda data: self.cb.predict_score(
            data['user_id'], data['movie_id']),
                                            axis=1)
        self.train['cf'] = self.train.apply(lambda data: self.cf.predict_score(
            data['user_id'], data['movie_id']),
                                            axis=1)
        return None

    def calculate_feature_functions_for_training_data(self):
        self.train['cb1'] = self.train.apply(
            lambda data: data.cb * self.func1())
        self.train['cb2'] = self.train.apply(
            lambda data: data.cb * self.func2(data['user_id']), axis=1)

        self.train['cf1'] = self.train.apply(
            lambda data: data.cf * self.func1())
        self.train['cf2'] = self.train.apply(
            lambda data: data.cf * self.func2(data['user_id']), axis=1)

        return None

    def train(self):
        result = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2",
                        data=fwls.train).fit()
        print(result)
def news_feed(request, user_id, longitude, latitude, page_number):
    min_sim = request.GET.get('min_sim', 0.1)
    sorted_items = NeighborhoodBasedRecs(
        min_sim=min_sim).recommend_items(user_id)

    print('DEBUG sorted items len {}'.format(len(sorted_items)))

    number_item_per_page = 5
    paginator = Paginator(sorted_items, number_item_per_page)
    number_pages = paginator.num_pages

    page_items = paginator.page(page_number)
    page_items = page_items.object_list
    data = []
    tz = pytz.timezone('Asia/Saigon')
    current_time = datetime.datetime.now(tz).time()
    week_day = datetime.datetime.today().weekday() + 1

    for item in page_items:
        food_id = item[0]
        food = Food.objects.using('sql_db').filter(id=food_id).first()
        if food == None:
            continue

        menu = food.menuid
        restaurant = menu.restaurantid
        user = Userdetail.objects.using('sql_db').filter(
            userid=restaurant.userid).first()
        if user == None:
            continue
        rating = calculate_rating(food_id)
        distance = calculate_distance(restaurant.long, restaurant.lat,
                                      float(longitude), float(latitude))
        status = (current_time >= restaurant.opentime and current_time <= restaurant.closetime
                  and week_day >= restaurant.openfromday and week_day <= restaurant.opentoday) \
                 and True or False

        value = {
            'Id': food.id,
            'Name': food.name,
            'Address': "status",
            'FoodImage': food.defaultimage,
            'Rating': rating,
            'Status': "status",
            'Distance': "distance"
        }
        data.append(value)

    response = {'NumberPages': number_pages, 'Data': data}
    return JsonResponse(response, safe=False)
예제 #18
0
def evaluate_cf_recommender():
    min_number_of_ratings = 20
    min_overlap = 5
    min_sim = 0.1
    K = 20
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-min_overlap_item_similarity.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "rak, pak, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank, user_coverage, "
            "movie_coverage\n")

        builder = ItemSimilarityMatrixBuilder(min_overlap, min_sim=min_sim)

        for min_overlap in np.arange(0, 20, 2):
            min_rank = min_number_of_ratings / 2
            recommender = NeighborhoodBasedRecs()
            er = EvaluationRunner(0, builder, recommender, K)
            # Run the baseline recommender:
            # er = EvaluationRunner(3, None, PopularityBasedRecs(), K)

            result = er.calculate(min_number_of_ratings,
                                  min_rank,
                                  number_test_users=-1)

            user_coverage, movie_coverage = RecommenderCoverage(
                recommender).calculate_coverage()
            pak = result['pak']
            mae = result['mae']
            rak = result['rak']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format(
                rak, pak, mae, min_overlap, min_sim, K, min_number_of_ratings,
                min_rank, user_coverage, movie_coverage))
            logfile.flush()
예제 #19
0
if __name__ == '__main__':
    min_number_of_ratings = 30
    min_overlap = 25
    min_sim = 0
    K = 25  #redo
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-min_number_of_ratings_training.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "pak, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank\n"
        )

        for min_number_of_ratings in np.arange(5, 30, 10):
            min_rank = min_number_of_ratings / 2
            min_overlap = min_number_of_ratings - min_rank
            er = EvaluationRunner(
                3, ItemSimilarityMatrixBuilder(min_overlap, min_sim=min_sim),
                NeighborhoodBasedRecs(), K)
            result = er.calculate(min_number_of_ratings,
                                  min_rank,
                                  number_test_users=1000)
            pak = result['pak']
            mae = result['mae']
            logfile.write("{}, {}, {}, {}, {}, {}, {} \n".format(
                pak, mae, min_overlap, min_sim, K, min_number_of_ratings,
                min_rank, datetime.now()))
예제 #20
0
        from    analytics_rating as rating1
        where    rank < 3"""

        columns = ['user_id', 'movie_id', 'rating', 'type']
        rating_data = data_helper.get_data_frame(sql, columns)

        print('found {} ratings'.format(rating_data.count()))
        return rating_data


if __name__ == '__main__':
    TEST = False

    if TEST:
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(2),
                              NeighborhoodBasedRecs())
        ratings = pd.DataFrame(
            [
                [1, '11', 5, '2013-10-12 23:20:27+00:00'],
                [1, '12', 3, '2014-10-12 23:20:27+00:00'],
                [1, '14', 2, '2015-10-12 23:20:27+00:00'],
                [2, '11', 4, '2013-10-12 23:20:27+00:00'],
                [2, '12', 3, '2014-10-12 23:20:27+00:00'],
                [2, '13', 4, '2015-10-12 23:20:27+00:00'],
                [3, '11', 5, '2013-10-12 23:20:27+00:00'],
                [3, '12', 2, '2014-10-12 23:20:27+00:00'],
                [3, '13', 5, '2015-10-12 23:20:27+00:00'],
                [3, '14', 2, '2016-10-12 23:20:27+00:00'],
                [4, '11', 3, '2013-10-12 23:20:27+00:00'],
                [4, '12', 5, '2014-10-12 23:20:27+00:00'],
                [4, '13', 3, '2015-10-12 23:20:27+00:00'],
예제 #21
0
                        help="run evaluation on rank rec",
                        action="store_true")

    args = parser.parse_args()

    print(args.fwls)
    k = 10
    cov = None
    if args.fwls:
        logger.debug("evaluating coverage of fwls")
        cov = RecommenderCoverage(FeatureWeightedLinearStacking)
        cov.calculate_coverage(K=k, recName='fwls{}'.format(k))

    if args.cf:
        logger.debug("evaluating coverage of cf")
        cov = RecommenderCoverage(NeighborhoodBasedRecs())
        cov.calculate_coverage(K=k, recName='cf{}'.format(k))

    if args.cb:
        logger.debug("evaluating coverage of cb")
        cov = RecommenderCoverage(ContentBasedRecs())
        cov.calculate_coverage(K=k, recName='cb{}'.format(k))

    if args.ltr:
        logger.debug("evaluating coverage of ltr")
        cov = RecommenderCoverage(BPRRecs())
        cov.calculate_coverage(K=k, recName='bpr{}'.format(k))

    if args.funk:
        logger.debug("evaluating coverage of funk")
        cov = RecommenderCoverage(FunkSVDRecs())
예제 #22
0
 def __init__(self):
     self.train = None
     self.test = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
예제 #23
0
 def __init__(self):
     self.all_users = Rating.objects.all().values('user_id').distinct()
     self.cf = NeighborhoodBasedRecs()
     self.items_in_rec = defaultdict(int)
     self.users_with_recs = []
    def test_predicting_score(self):
        # predict users 10 rating for DR_STRANGELOVE

        rec_sys = NeighborhoodBasedRecs()
        score = rec_sys.predict_score_by_ratings(DR_STRANGELOVE, {AVENGERS: 10, ALIEN: 10, CAPTAIN_AMERICA: 7})
        self.assertTrue(abs(8 - score) < 1)
예제 #25
0
            len(self.all_users)))
        for user in self.all_users:
            user_id = str(user['user_id'])
            recset = self.cf.recommend_items(user_id)
            if recset:
                self.users_with_recs.append(user)
                for rec in recset:
                    self.items_in_rec[rec[0]] += 1
                print('found recs for {}'.format(user_id))

        print('writing cf coverage to file.')
        json.dump(self.items_in_rec, open('cf_coverage.json', 'w'))

        no_movies = Movie.objects.all().count()
        no_movies_in_rec = len(self.items_in_rec.items())

        print("{} {} {}".format(no_movies, no_movies_in_rec,
                                float(no_movies / no_movies_in_rec)))
        return no_movies_in_rec / no_movies


if __name__ == '__main__':
    # print("Calculating coverage...")
    # CFCoverage().calculate_coverage()

    print("Calculating Precision at K")
    pak = PrecissionAtK(5, NeighborhoodBasedRecs(),
                        ItemSimilarityMatrixBuilder())

    pak.calculate_old()
예제 #26
0
class FeatureWeightedLinearStacking(base_recommender):
    def __init__(self):
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()

        self.wcb1 = Decimal(0.65221204)
        self.wcb2 = Decimal(-0.14638855)
        self.wcf1 = Decimal(-0.0062952)
        self.wcf2 = Decimal(0.09139193)
        self.intercept = Decimal(0)

    def fun1(self):
        return Decimal(1.0)

    def fun2(self, user_id):
        count = Rating.objects.filter(user_id=user_id).count()
        if count > 3.0:
            return Decimal(1.0)
        return Decimal(0.0)

    def set_save_path(self, save_path):
        with open(save_path + 'fwls_parameters.data', 'rb') as ub_file:
            parameters = pickle.load(ub_file)
            self.wcb1 = Decimal(parameters['cb1'])
            self.wcb2 = Decimal(parameters['cb2'])
            self.wcf1 = Decimal(parameters['cb1'])
            self.wcf2 = Decimal(parameters['cf2'])
            self.intercept = Decimal(parameters['intercept'])

    def recommend_items_by_ratings(self, user_id, active_user_items, num=6):

        cb_recs = self.cb.recommend_items_by_ratings(user_id,
                                                     active_user_items,
                                                     num * 5)
        cf_recs = self.cf.recommend_items_by_ratings(user_id,
                                                     active_user_items,
                                                     num * 5)

        return self.merge_predictions(user_id, cb_recs, cf_recs, num)

    def recommend_items(self, user_id, num=6):
        cb_recs = self.cb.recommend_items(user_id, num * 5)
        cf_recs = self.cf.recommend_items(user_id, num * 5)

        return self.merge_predictions(user_id, cb_recs, cf_recs, num)

    def merge_predictions(self, user_id, cb_recs, cf_recs, num):

        combined_recs = dict()
        for rec in cb_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            combined_recs[movie_id] = {'cb': pred}

        for rec in cf_recs:
            movie_id = rec[0]
            pred = rec[1]['prediction']
            if movie_id in combined_recs.keys():
                combined_recs[movie_id]['cf'] = pred
            else:
                combined_recs[movie_id] = {'cf': pred}
        fwls_preds = dict()
        for key, recs in combined_recs.items():
            if 'cb' not in recs.keys():
                recs['cb'] = self.cb.predict_score(user_id, key)
            if 'cf' not in recs.keys():
                recs['cf'] = self.cf.predict_score(user_id, key)
            pred = self.prediction(recs['cb'], recs['cf'], user_id)
            fwls_preds[key] = {'prediction': pred}
        sorted_items = sorted(
            fwls_preds.items(),
            key=lambda item: -float(item[1]['prediction']))[:num]
        return sorted_items

    def predict_score(self, user_id, item_id):
        p_cb = self.cb.predict_score(user_id, item_id)
        p_cf = self.cf.predict_score(user_id, item_id)

        self.prediction(p_cb, p_cf, user_id)

    def prediction(self, p_cb, p_cf, user_id):
        p = (self.wcb1 * self.fun1() * p_cb +
             self.wcb2 * self.fun2(user_id) * p_cb +
             self.wcf1 * self.fun1() * p_cf +
             self.wcf2 * self.fun2(user_id) * p_cf)
        return p + self.intercept
예제 #27
0
    def test(self):
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(1, min_sim=0.0),
                              NeighborhoodBasedRecs())

        ratings = pd.DataFrame(
            [
                [1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
                [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
                [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
                [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
                [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
                [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
                [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
                [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
                [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],
                [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
                [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
                [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
                [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
                [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
                [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],
                [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
                [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
                [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
                [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
                [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],
                [4, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
                [4, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
                [4, AVENGERS, 9, '2015-10-12 23:20:27+00:00'],
                [4, PIRATES_OF, 5, '2013-10-12 23:20:27+00:00'],
                [4, HARRY, 6, '2014-10-12 23:20:27+00:00'],
                [4, ALIEN, 8, '2015-10-12 23:20:27+00:00'],
                [4, DR_STRANGELOVE, 9, '2015-10-12 23:20:27+00:00'],
                [5, STAR_WARS, 6, '2013-10-12 23:20:27+00:00'],
                [5, AVENGERS, 6, '2014-10-12 23:20:27+00:00'],
                [5, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [5, PIRATES_OF, 2, '2016-10-12 23:20:27+00:00'],
                [5, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [5, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
                [5, ALIEN, 4, '2016-10-12 23:20:27+00:00'],
                [5, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
                [5, JACQUES, 10, '2016-10-12 23:20:27+00:00'],
                [6, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
                [6, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
                [6, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
                [6, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [6, PIRATES_OF, 6, '2016-10-12 23:20:27+00:00'],
                [6, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [6, JACQUES, 8, '2016-10-12 23:20:27+00:00'],
                [7, AVENGERS, 10, '2014-10-12 23:20:27+00:00'],
                [7, PIRATES_OF, 3, '2016-10-12 23:20:27+00:00'],
                [7, HARRY, 1, '2016-10-12 23:20:27+00:00'],
                [7, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
                [7, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
                [8, STAR_WARS, 9, '2013-10-12 23:20:27+00:00'],
                [8, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
                [8, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
                [8, WOLVERINE, 7, '2015-10-12 23:20:27+00:00'],
                [8, PIRATES_OF, 8, '2016-10-12 23:20:27+00:00'],
                [8, HARRY, 8, '2016-10-12 23:20:27+00:00'],
                [8, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
                [8, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
                [8, JACQUES, 10, '2016-10-12 23:20:27+00:00'],
                [9, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
                [9, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
                [9, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
                [9, PIRATES_OF, 7, '2016-10-12 23:20:27+00:00'],
                [9, HARRY, 8, '2016-10-12 23:20:27+00:00'],
                [9, CAPTAIN_AMERICA, 10, '2016-10-12 23:20:27+00:00'],
                [9, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
                [9, JACQUES, 7, '2016-10-12 23:20:27+00:00'],
                [10, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
                [10, HARRY, 10, '2016-10-12 23:20:27+00:00'],
                [10, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
                [10, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
            ],
            columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])

        result = er.calculate_using_ratings(ratings,
                                            min_number_of_ratings=4,
                                            min_rank=5)

        #figure out what to do with result ;)
        self.assertLess(result['mae'], decimal.Decimal(1.7))
        self.assertLess(result['pak'], decimal.Decimal(0.7))
        self.assertLess(result['rak'], decimal.Decimal(0.7))
        print(result)
class FWLSCalculator(object):
    def __init__(self, save_path, data_size=1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                                      self.cb.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.train_data['cf'] = self.train_data.apply(lambda data:
                                                      self.cf.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data=None, params=None):

        if params:
            self.save_path = params['save_path']
            self.data_size = params['data_sample']

        if train_data is not None:
            self.train_data = train_data
            if self.data_size > 0:
                self.train_data = self.train_data.sample(self.data_size)
                self.logger.debug("training sample of size {}".format(self.train_data.shape[0]))
        else:
            self.get_real_training_data()

        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings=None, train_feature_recs=False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression(fit_intercept=True,
                                             n_jobs=-1,
                                             normalize=True)

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                  'cb2': regr.coef_[1],
                  'cf1': regr.coef_[2],
                  'cf2': regr.coef_[3],
                  'intercept': regr.intercept_}
        self.logger.debug(result)
        self.logger.debug(self.train_data.iloc[100])
        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
예제 #29
0
class FWLSCalculator(object):

    def __init__(self, save_path, data_size = 1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                            self.cb.predict_score(data['user_id'], data['movie_id']), axis=1)
        self.train_data['cf'] = self.train_data.apply(lambda data:
                                            self.cf.predict_score(data['user_id'], data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data = None, params = None):

        if params:
            self.save_path = params['save_path']

        if train_data is None:
            self.get_real_training_data()

        self.train_data = train_data
        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings = None, train_feature_recs= False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1','cb2','cf1','cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                'cb2': regr.coef_[1],
                'cf1': regr.coef_[2],
                'cf2': regr.coef_[3]
                }

        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
예제 #30
0
    def test(self):
        er = EvaluationRunner(5, ItemSimilarityMatrixBuilder(1, min_sim=0.0), NeighborhoodBasedRecs())

        STAR_WARS = 'star wars'
        WONDER_WOMAN = 'wonder woman'
        AVENGERS = 'avengers'
        WOLVERINE = 'logan'
        PIRATES_OF = 'pirates of the caribbien'
        HARRY = 'harry potter I'
        CAPTAIN_AMERICA = 'captain america'
        ALIEN = 'alien'
        DR_STRANGELOVE = 'doctor strangelove'
        JACQUES = 'jacques'

        ratings = pd.DataFrame(
            [[1, STAR_WARS, 9, '2013-10-12 23:21:27+00:00'],
             [1, WONDER_WOMAN, 10, '2014-10-12 23:22:27+00:00'],
             [1, AVENGERS, 10, '2015-11-12 23:20:27+00:00'],
             [1, WOLVERINE, 8, '2015-08-12 23:20:27+00:00'],
             [1, PIRATES_OF, 10, '2015-10-12 22:20:27+00:00'],
             [1, HARRY, 10, '2015-10-12 23:21:27+00:00'],
             [1, CAPTAIN_AMERICA, 10, '2014-10-12 23:20:27+00:00'],
             [1, ALIEN, 6, '2015-10-12 23:22:27+00:00'],
             [1, JACQUES, 6, '2015-10-12 11:20:27+00:00'],

             [2, STAR_WARS, 10, '2013-10-12 23:20:27+00:00'],
             [2, WONDER_WOMAN, 10, '2014-10-12 23:20:27+00:00'],
             [2, AVENGERS, 9, '2016-10-12 23:20:27+00:00'],
             [2, PIRATES_OF, 6, '2010-10-12 23:20:27+00:00'],
             [2, CAPTAIN_AMERICA, 10, '2005-10-12 23:20:27+00:00'],
             [2, DR_STRANGELOVE, 10, '2015-01-12 23:20:27+00:00'],

             [3, STAR_WARS, 9, '2013-10-12 20:20:27+00:00'],
             [3, AVENGERS, 10, '2015-10-12 10:20:27+00:00'],
             [3, PIRATES_OF, 9, '2013-03-12 23:20:27+00:00'],
             [3, HARRY, 8, '2016-10-13 23:20:27+00:00'],
             [3, DR_STRANGELOVE, 10, '2016-09-12 23:20:27+00:00'],

             [4, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
             [4, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
             [4, AVENGERS, 9, '2015-10-12 23:20:27+00:00'],
             [4, PIRATES_OF, 5, '2013-10-12 23:20:27+00:00'],
             [4, HARRY, 6, '2014-10-12 23:20:27+00:00'],
             [4, ALIEN, 8, '2015-10-12 23:20:27+00:00'],
             [4, DR_STRANGELOVE, 9, '2015-10-12 23:20:27+00:00'],

             [5, STAR_WARS, 6, '2013-10-12 23:20:27+00:00'],
             [5, AVENGERS, 6, '2014-10-12 23:20:27+00:00'],
             [5, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
             [5, PIRATES_OF, 2, '2016-10-12 23:20:27+00:00'],
             [5, HARRY, 10, '2016-10-12 23:20:27+00:00'],
             [5, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
             [5, ALIEN, 4, '2016-10-12 23:20:27+00:00'],
             [5, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
             [5, JACQUES, 10, '2016-10-12 23:20:27+00:00'],

             [6, STAR_WARS, 8, '2013-10-12 23:20:27+00:00'],
             [6, WONDER_WOMAN, 8, '2014-10-12 23:20:27+00:00'],
             [6, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
             [6, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
             [6, PIRATES_OF, 6, '2016-10-12 23:20:27+00:00'],
             [6, HARRY, 10, '2016-10-12 23:20:27+00:00'],
             [6, JACQUES, 8, '2016-10-12 23:20:27+00:00'],

             [7, AVENGERS, 10, '2014-10-12 23:20:27+00:00'],
             [7, PIRATES_OF, 3, '2016-10-12 23:20:27+00:00'],
             [7, HARRY, 1, '2016-10-12 23:20:27+00:00'],
             [7, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
             [7, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],

             [8, STAR_WARS, 9, '2013-10-12 23:20:27+00:00'],
             [8, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
             [8, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
             [8, WOLVERINE, 7, '2015-10-12 23:20:27+00:00'],
             [8, PIRATES_OF, 8, '2016-10-12 23:20:27+00:00'],
             [8, HARRY, 8, '2016-10-12 23:20:27+00:00'],
             [8, ALIEN, 8, '2016-10-12 23:20:27+00:00'],
             [8, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],
             [8, JACQUES, 10, '2016-10-12 23:20:27+00:00'],

             [9, WONDER_WOMAN, 7, '2014-10-12 23:20:27+00:00'],
             [9, AVENGERS, 8, '2014-10-12 23:20:27+00:00'],
             [9, WOLVERINE, 8, '2015-10-12 23:20:27+00:00'],
             [9, PIRATES_OF, 7, '2016-10-12 23:20:27+00:00'],
             [9, HARRY, 8, '2016-10-12 23:20:27+00:00'],
             [9, CAPTAIN_AMERICA, 10, '2016-10-12 23:20:27+00:00'],
             [9, DR_STRANGELOVE, 10, '2016-10-12 23:20:27+00:00'],
             [9, JACQUES, 7, '2016-10-12 23:20:27+00:00'],

             [10, AVENGERS, 7, '2014-10-12 23:20:27+00:00'],
             [10, HARRY, 10, '2016-10-12 23:20:27+00:00'],
             [10, CAPTAIN_AMERICA, 6, '2016-10-12 23:20:27+00:00'],
             [10, DR_STRANGELOVE, 8, '2016-10-12 23:20:27+00:00'],

             ], columns=['user_id', 'movie_id', 'rating', 'rating_timestamp'])

        result = er.calculate_using_ratings(ratings, min_number_of_ratings=4, min_rank=5)

        #figure out what to do with result ;)
        print(result)