Ejemplo n.º 1
0
class FWLSCalculator(object):
    def __init__(self, data_size=1000):
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(
            lambda data: self.cb.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)
        self.train_data['cf'] = self.train_data.apply(
            lambda data: self.cf.predict_score(data['user_id'], data['movie_id'
                                                                     ]),
            axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(
            lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(
            lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def train(self):
        #model = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2", data=self.train_data[['rating', 'cb1','cb2','cf1','cf2']])
        #results = model.fit()
        #self.logger.info(results.summary())
        #self.logger.info(results.params)
        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']],
                 self.train_data['rating'])
        self.logger.info(regr.coef_)
        return regr.coef_
Ejemplo n.º 2
0
 def __init__(self, data_size=1000):
     self.logger = logging.getLogger('FWLS')
     self.train_data = None
     self.test_data = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
     self.fwls = FeatureWeightedLinearStacking()
     self.data_size = data_size
    def test_recFWLS(self):
        save_path = './models/fwls/'
        builder = FWLSCalculator(save_path)
        train_data = self.get_training_data()
        builder.train_data = train_data
        parameters = builder.train()
        assert(parameters is not None)

        rec = FeatureWeightedLinearStacking()
        rec.set_save_path(save_path)
        assert(rec.wcb1 == parameters['cb1'])
        assert(rec.wcb2 == parameters['cb2'])
        assert(rec.wcf1 == parameters['cf1'])
        assert(rec.wcf2 == parameters['cf2'])
Ejemplo n.º 4
0
def recs_fwls(request, user_id, num=6):
    sorted_items = FeatureWeightedLinearStacking().recommend_items(user_id, num)

    data = {
        'user_id': user_id,
        'data': sorted_items
    }
    return JsonResponse(data, safe=False)
 def __init__(self, save_path, data_size=1000):
     self.save_path = save_path
     self.logger = logging.getLogger('FWLS')
     self.train_data = None
     self.test_data = None
     self.rating_count = None
     self.cb = ContentBasedRecs()
     self.cf = NeighborhoodBasedRecs()
     self.fwls = FeatureWeightedLinearStacking()
     self.data_size = data_size
Ejemplo n.º 6
0
def evaluate_fwls_recommender():
    save_path = './models/fwls/'
    min_number_of_ratings = 10
    min_overlap = 3
    min_sim = 0.1
    K = 5
    min_rank = 5
    number_test_users = 1000
    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-fwls.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "ar, map, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank, data_sample\n"
        )

        builder = FWLSCalculator(min_overlap, save_path)

        for data_sample in np.arange(500, 5000, 200):

            min_rank = min_number_of_ratings / 2
            recommender = FeatureWeightedLinearStacking()
            er = EvaluationRunner(0,
                                  builder,
                                  recommender,
                                  K,
                                  params={
                                      'save_path': save_path,
                                      'data_sample': data_sample
                                  })

            result = er.calculate(min_number_of_ratings, min_rank,
                                  number_test_users)

            map = result['map']
            mae = result['mae']
            ar = result['ar']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}, {}\n".format(
                ar, map, mae, min_overlap, min_sim, K, min_number_of_ratings,
                min_rank, data_sample))
            logfile.flush()
Ejemplo n.º 7
0
def evaluate_fwls_recommender(coverage=False):
    save_path = './models/fwls/'
    min_number_of_ratings = 10
    min_overlap = 3
    min_sim = 0.1
    K = 5
    min_rank = 5

    timestr = time.strftime("%Y%m%d-%H%M%S")
    file_name = '{}-fwls.csv'.format(timestr)

    with open(file_name, 'a', 1) as logfile:
        logfile.write(
            "ar, map, mae, min_overlap, min_sim, K, min_num_of_ratings, min_rank, user_coverage, "
            "movie_coverage\n")

        builder = FWLSCalculator(min_overlap, save_path)

        for k in np.arange(2, 20, 2):
            min_rank = min_number_of_ratings / 2
            recommender = FeatureWeightedLinearStacking()
            er = EvaluationRunner(0,
                                  builder,
                                  recommender,
                                  k,
                                  params={'save_path': save_path})

            result = er.calculate(min_number_of_ratings, min_rank)
            user_coverage, movie_coverage = 0, 0
            if coverage:
                user_coverage, movie_coverage = RecommenderCoverage(
                    recommender).calculate_coverage(k)

            builder = None
            map = result['map']
            mae = result['mae']
            ar = result['ar']
            logfile.write("{}, {}, {}, {}, {}, {}, {}, {}, {}, {}\n".format(
                ar, map, mae, min_overlap, min_sim, k, min_number_of_ratings,
                min_rank, user_coverage, movie_coverage))
            logfile.flush()
class FWLSCalculator(object):
    def __init__(self, save_path, data_size=1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                                      self.cb.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.train_data['cf'] = self.train_data.apply(lambda data:
                                                      self.cf.predict_score(data['user_id'],
                                                                            data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                                       data['cb'] * self.fwls.fun2(data['user_id']), axis=1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                                       data['cf'] * self.fwls.fun2(data['user_id']), axis=1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data=None, params=None):

        if params:
            self.save_path = params['save_path']
            self.data_size = params['data_sample']

        if train_data is not None:
            self.train_data = train_data
            if self.data_size > 0:
                self.train_data = self.train_data.sample(self.data_size)
                self.logger.debug("training sample of size {}".format(self.train_data.shape[0]))
        else:
            self.get_real_training_data()

        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings=None, train_feature_recs=False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression(fit_intercept=True,
                                             n_jobs=-1,
                                             normalize=True)

        regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                  'cb2': regr.coef_[1],
                  'cf1': regr.coef_[2],
                  'cf2': regr.coef_[3],
                  'intercept': regr.intercept_}
        self.logger.debug(result)
        self.logger.debug(self.train_data.iloc[100])
        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result
Ejemplo n.º 9
0
class FWLSCalculator(object):

    def __init__(self, save_path, data_size = 1000):
        self.save_path = save_path
        self.logger = logging.getLogger('FWLS')
        self.train_data = None
        self.test_data = None
        self.rating_count = None
        self.cb = ContentBasedRecs()
        self.cf = NeighborhoodBasedRecs()
        self.fwls = FeatureWeightedLinearStacking()
        self.data_size = data_size

    def get_real_training_data(self):
        columns = ['user_id', 'movie_id', 'rating', 'type']
        ratings_data = Rating.objects.all().values(*columns)[:self.data_size]
        df = pd.DataFrame.from_records(ratings_data, columns=columns)
        self.train_data, self.test_data = train_test_split(df, test_size=0.2)
        self.logger.debug("training data loaded {}".format(len(ratings_data)))

    def calculate_predictions_for_training_data(self):
        self.logger.debug("[BEGIN] getting predictions")

        self.train_data['cb'] = self.train_data.apply(lambda data:
                                            self.cb.predict_score(data['user_id'], data['movie_id']), axis=1)
        self.train_data['cf'] = self.train_data.apply(lambda data:
                                            self.cf.predict_score(data['user_id'], data['movie_id']), axis=1)

        self.logger.debug("[END] getting predictions")
        return None

    def calculate_feature_functions_for_training_data(self):
        self.logger.debug("[BEGIN] calculating functions")
        self.train_data['cb1'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun1(), axis=1)
        self.train_data['cb2'] = self.train_data.apply(lambda data:
                                             data['cb'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.train_data['cf1'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun1(), axis=1)
        self.train_data['cf2'] = self.train_data.apply(lambda data:
                                             data['cf'] * self.fwls.fun2(data['user_id']), axis = 1)

        self.logger.debug("[END] calculating functions")
        return None

    def build(self, train_data = None, params = None):

        if params:
            self.save_path = params['save_path']

        if train_data is None:
            self.get_real_training_data()

        self.train_data = train_data
        self.calculate_predictions_for_training_data()
        self.calculate_feature_functions_for_training_data()

        return self.train()

    def train(self, ratings = None, train_feature_recs= False):

        if train_feature_recs:
            ItemSimilarityMatrixBuilder().build(ratings)
            LdaModel.build()

        regr = linear_model.LinearRegression()

        regr.fit(self.train_data[['cb1','cb2','cf1','cf2']], self.train_data['rating'])
        self.logger.info(regr.coef_)

        result = {'cb1': regr.coef_[0],
                'cb2': regr.coef_[1],
                'cf1': regr.coef_[2],
                'cf2': regr.coef_[3]
                }

        ensure_dir(self.save_path)
        with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file:
            pickle.dump(result, ub_file)
        return result