class FeatureWeightedLinearStacking(base_recommender): def __init__(self): self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.wcb1 = Decimal(0.65221204) self.wcb2 = Decimal(-0.14638855) self.wcf1 = Decimal(-0.0062952) self.wcf2 = Decimal(0.09139193) def fun1(self): return Decimal(1.0) def fun2(self, user_id): count = Rating.objects.filter(user_id=user_id).count() if count > 3.0: return Decimal(1.0) return Decimal(0.0) def recommend_items(self, user_id, num=6): cb_recs = self.cb.recommend_items(user_id, num * 5) cf_recs = self.cf.recommend_items(user_id, num * 5) combined_recs = dict() for rec in cb_recs: movie_id = rec[0] pred = rec[1]['prediction'] combined_recs[movie_id] = {'cb': pred} for rec in cf_recs: movie_id = rec[0] pred = rec[1]['prediction'] if movie_id in combined_recs.keys(): combined_recs[movie_id]['cf'] = pred else: combined_recs[movie_id] = {'cf': pred} fwls_preds = dict() for key, recs in combined_recs.items(): if 'cb' not in recs.keys(): recs['cb'] = self.cb.predict_score(user_id, key) if 'cf' not in recs.keys(): recs['cf'] = self.cf.predict_score(user_id, key) pred = self.prediction(recs['cb'], recs['cf'], user_id) fwls_preds[key] = {'prediction': pred} sorted_items = sorted(fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num] return sorted_items def predict_score(self, user_id, item_id): p_cb = self.cb.predict_score(user_id, item_id) p_cf = self.cf.predict_score(user_id, item_id) self.prediction(p_cb, p_cf, user_id) def prediction(self, p_cb, p_cf, user_id): p = (self.wcb1 * self.fun1() * p_cb + self.wcb2 * self.fun2(user_id) * p_cb + self.wcf1 * self.fun1() * p_cf + self.wcf2 * self.fun2(user_id) * p_cf) return p
class FWLSCalculator(object): def __init__(self, data_size=1000): self.logger = logging.getLogger('FWLS') self.train_data = None self.test_data = None self.rating_count = None self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.fwls = FeatureWeightedLinearStacking() self.data_size = data_size def get_real_training_data(self): columns = ['user_id', 'movie_id', 'rating', 'type'] ratings_data = Rating.objects.all().values(*columns)[:self.data_size] df = pd.DataFrame.from_records(ratings_data, columns=columns) self.train_data, self.test_data = train_test_split(df, test_size=0.2) self.logger.debug("training data loaded {}".format(len(ratings_data))) def calculate_predictions_for_training_data(self): self.logger.debug("[BEGIN] getting predictions") self.train_data['cb'] = self.train_data.apply( lambda data: self.cb.predict_score(data['user_id'], data['movie_id' ]), axis=1) self.train_data['cf'] = self.train_data.apply( lambda data: self.cf.predict_score(data['user_id'], data['movie_id' ]), axis=1) self.logger.debug("[END] getting predictions") return None def calculate_feature_functions_for_training_data(self): self.logger.debug("[BEGIN] calculating functions") self.train_data['cb1'] = self.train_data.apply( lambda data: data['cb'] * self.fwls.fun1(), axis=1) self.train_data['cb2'] = self.train_data.apply( lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis=1) self.train_data['cf1'] = self.train_data.apply( lambda data: data['cf'] * self.fwls.fun1(), axis=1) self.train_data['cf2'] = self.train_data.apply( lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis=1) self.logger.debug("[END] calculating functions") return None def train(self): #model = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2", data=self.train_data[['rating', 'cb1','cb2','cf1','cf2']]) #results = model.fit() #self.logger.info(results.summary()) #self.logger.info(results.params) regr = linear_model.LinearRegression() regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']], self.train_data['rating']) self.logger.info(regr.coef_) return regr.coef_
class FWLSCalculator(object): def __init__(self): self.train = None self.test = None self.rating_count = None self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() def get_real_training_data(self): columns = ['user_id', 'movie_id', 'rating', 'type'] ratings_data = Rating.objects.all().values(*columns) df = pd.DataFrame.from_records(ratings_data, columns=columns) self.train, self.test = train_test_split(df, test_size=0.2) def get_training_data(self): print('load data') data = np.array([['1', '2', 3.6], ['1', '3', 5.0], ['1', '4', 5.0], ['2', '2', 3.0]]) self.train = pd.DataFrame(data, columns=['user_id', 'movie_id', 'rating']) self.rating_count = self.train.groupby('user_id').count().reset_index() return self.train def calculate_predictions_for_training_data(self): self.train['cb'] = self.train.apply(lambda data: self.cb.predict_score( data['user_id'], data['movie_id']), axis=1) self.train['cf'] = self.train.apply(lambda data: self.cf.predict_score( data['user_id'], data['movie_id']), axis=1) return None def calculate_feature_functions_for_training_data(self): self.train['cb1'] = self.train.apply( lambda data: data.cb * self.func1()) self.train['cb2'] = self.train.apply( lambda data: data.cb * self.func2(data['user_id']), axis=1) self.train['cf1'] = self.train.apply( lambda data: data.cf * self.func1()) self.train['cf2'] = self.train.apply( lambda data: data.cf * self.func2(data['user_id']), axis=1) return None def train(self): result = sm.ols(formula="rating ~ cb1+cb2+cf1+cf2", data=fwls.train).fit() print(result)
class FWLSCalculator(object): def __init__(self, save_path, data_size=1000): self.save_path = save_path self.logger = logging.getLogger('FWLS') self.train_data = None self.test_data = None self.rating_count = None self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.fwls = FeatureWeightedLinearStacking() self.data_size = data_size def get_real_training_data(self): columns = ['user_id', 'movie_id', 'rating', 'type'] ratings_data = Rating.objects.all().values(*columns)[:self.data_size] df = pd.DataFrame.from_records(ratings_data, columns=columns) self.train_data, self.test_data = train_test_split(df, test_size=0.2) self.logger.debug("training data loaded {}".format(len(ratings_data))) def calculate_predictions_for_training_data(self): self.logger.debug("[BEGIN] getting predictions") self.train_data['cb'] = self.train_data.apply(lambda data: self.cb.predict_score(data['user_id'], data['movie_id']), axis=1) self.train_data['cf'] = self.train_data.apply(lambda data: self.cf.predict_score(data['user_id'], data['movie_id']), axis=1) self.logger.debug("[END] getting predictions") return None def calculate_feature_functions_for_training_data(self): self.logger.debug("[BEGIN] calculating functions") self.train_data['cb1'] = self.train_data.apply(lambda data: data['cb'] * self.fwls.fun1(), axis=1) self.train_data['cb2'] = self.train_data.apply(lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis=1) self.train_data['cf1'] = self.train_data.apply(lambda data: data['cf'] * self.fwls.fun1(), axis=1) self.train_data['cf2'] = self.train_data.apply(lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis=1) self.logger.debug("[END] calculating functions") return None def build(self, train_data=None, params=None): if params: self.save_path = params['save_path'] self.data_size = params['data_sample'] if train_data is not None: self.train_data = train_data if self.data_size > 0: self.train_data = self.train_data.sample(self.data_size) self.logger.debug("training sample of size {}".format(self.train_data.shape[0])) else: self.get_real_training_data() self.calculate_predictions_for_training_data() self.calculate_feature_functions_for_training_data() return self.train() def train(self, ratings=None, train_feature_recs=False): if train_feature_recs: ItemSimilarityMatrixBuilder().build(ratings) LdaModel.build() regr = linear_model.LinearRegression(fit_intercept=True, n_jobs=-1, normalize=True) regr.fit(self.train_data[['cb1', 'cb2', 'cf1', 'cf2']], self.train_data['rating']) self.logger.info(regr.coef_) result = {'cb1': regr.coef_[0], 'cb2': regr.coef_[1], 'cf1': regr.coef_[2], 'cf2': regr.coef_[3], 'intercept': regr.intercept_} self.logger.debug(result) self.logger.debug(self.train_data.iloc[100]) ensure_dir(self.save_path) with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file: pickle.dump(result, ub_file) return result
class FWLSCalculator(object): def __init__(self, save_path, data_size = 1000): self.save_path = save_path self.logger = logging.getLogger('FWLS') self.train_data = None self.test_data = None self.rating_count = None self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.fwls = FeatureWeightedLinearStacking() self.data_size = data_size def get_real_training_data(self): columns = ['user_id', 'movie_id', 'rating', 'type'] ratings_data = Rating.objects.all().values(*columns)[:self.data_size] df = pd.DataFrame.from_records(ratings_data, columns=columns) self.train_data, self.test_data = train_test_split(df, test_size=0.2) self.logger.debug("training data loaded {}".format(len(ratings_data))) def calculate_predictions_for_training_data(self): self.logger.debug("[BEGIN] getting predictions") self.train_data['cb'] = self.train_data.apply(lambda data: self.cb.predict_score(data['user_id'], data['movie_id']), axis=1) self.train_data['cf'] = self.train_data.apply(lambda data: self.cf.predict_score(data['user_id'], data['movie_id']), axis=1) self.logger.debug("[END] getting predictions") return None def calculate_feature_functions_for_training_data(self): self.logger.debug("[BEGIN] calculating functions") self.train_data['cb1'] = self.train_data.apply(lambda data: data['cb'] * self.fwls.fun1(), axis=1) self.train_data['cb2'] = self.train_data.apply(lambda data: data['cb'] * self.fwls.fun2(data['user_id']), axis = 1) self.train_data['cf1'] = self.train_data.apply(lambda data: data['cf'] * self.fwls.fun1(), axis=1) self.train_data['cf2'] = self.train_data.apply(lambda data: data['cf'] * self.fwls.fun2(data['user_id']), axis = 1) self.logger.debug("[END] calculating functions") return None def build(self, train_data = None, params = None): if params: self.save_path = params['save_path'] if train_data is None: self.get_real_training_data() self.train_data = train_data self.calculate_predictions_for_training_data() self.calculate_feature_functions_for_training_data() return self.train() def train(self, ratings = None, train_feature_recs= False): if train_feature_recs: ItemSimilarityMatrixBuilder().build(ratings) LdaModel.build() regr = linear_model.LinearRegression() regr.fit(self.train_data[['cb1','cb2','cf1','cf2']], self.train_data['rating']) self.logger.info(regr.coef_) result = {'cb1': regr.coef_[0], 'cb2': regr.coef_[1], 'cf1': regr.coef_[2], 'cf2': regr.coef_[3] } ensure_dir(self.save_path) with open(self.save_path + 'fwls_parameters.data', 'wb') as ub_file: pickle.dump(result, ub_file) return result
class FeatureWeightedLinearStacking(base_recommender): def __init__(self): self.cb = ContentBasedRecs() self.cf = NeighborhoodBasedRecs() self.wcb1 = Decimal(0.65221204) self.wcb2 = Decimal(-0.14638855) self.wcf1 = Decimal(-0.0062952) self.wcf2 = Decimal(0.09139193) self.intercept = Decimal(0) def fun1(self): return Decimal(1.0) def fun2(self, user_id): count = Rating.objects.filter(user_id=user_id).count() if count > 3.0: return Decimal(1.0) return Decimal(0.0) def set_save_path(self, save_path): with open(save_path + 'fwls_parameters.data', 'rb') as ub_file: parameters = pickle.load(ub_file) self.wcb1 = Decimal(parameters['cb1']) self.wcb2 = Decimal(parameters['cb2']) self.wcf1 = Decimal(parameters['cb1']) self.wcf2 = Decimal(parameters['cf2']) self.intercept = Decimal(parameters['intercept']) def recommend_items_by_ratings(self, user_id, active_user_items, num=6): cb_recs = self.cb.recommend_items_by_ratings(user_id, active_user_items, num * 5) cf_recs = self.cf.recommend_items_by_ratings(user_id, active_user_items, num * 5) return self.merge_predictions(user_id, cb_recs, cf_recs, num) def recommend_items(self, user_id, num=6): cb_recs = self.cb.recommend_items(user_id, num * 5) cf_recs = self.cf.recommend_items(user_id, num * 5) return self.merge_predictions(user_id, cb_recs, cf_recs, num) def merge_predictions(self, user_id, cb_recs, cf_recs, num): combined_recs = dict() for rec in cb_recs: movie_id = rec[0] pred = rec[1]['prediction'] combined_recs[movie_id] = {'cb': pred} for rec in cf_recs: movie_id = rec[0] pred = rec[1]['prediction'] if movie_id in combined_recs.keys(): combined_recs[movie_id]['cf'] = pred else: combined_recs[movie_id] = {'cf': pred} fwls_preds = dict() for key, recs in combined_recs.items(): if 'cb' not in recs.keys(): recs['cb'] = self.cb.predict_score(user_id, key) if 'cf' not in recs.keys(): recs['cf'] = self.cf.predict_score(user_id, key) pred = self.prediction(recs['cb'], recs['cf'], user_id) fwls_preds[key] = {'prediction': pred} sorted_items = sorted( fwls_preds.items(), key=lambda item: -float(item[1]['prediction']))[:num] return sorted_items def predict_score(self, user_id, item_id): p_cb = self.cb.predict_score(user_id, item_id) p_cf = self.cf.predict_score(user_id, item_id) self.prediction(p_cb, p_cf, user_id) def prediction(self, p_cb, p_cf, user_id): p = (self.wcb1 * self.fun1() * p_cb + self.wcb2 * self.fun2(user_id) * p_cb + self.wcf1 * self.fun1() * p_cf + self.wcf2 * self.fun2(user_id) * p_cf) return p + self.intercept