def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, seed=0, out_file=None, filters=[], loggers=[], ) model = rs.PersonalPopularityModel() updater = rs.PersonalPopularityModelUpdater() updater.set_model(model) simple_learner = rs.SimpleLearner() simple_learner.add_simple_updater(updater) simple_learner.set_model(model) learner = rs.LearnerPeriodicDelayedWrapper( **self.parameter_defaults(period=86400, delay=86400)) learner.set_wrapped_learner(simple_learner) model = model learner = learner return {'config': config, 'model': model, 'learner': learner}
def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, seed=0, out_file=None, filters=[], loggers=[], ) model = rs.PopularityModel() updater = rs.PopularityTimeFrameModelUpdater(**self.parameter_defaults( tau=86400 )) updater.set_model(model) learner = rs.SimpleLearner() learner.add_simple_updater(updater) learner.set_model(model) model = model learner = learner return { 'config': config, 'model': model, 'learner': learner }
def _config(self, top_k, seed): model_parameters = self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, user_attributes=None, item_attributes=None, ) if (model_parameters['user_attributes'] is not None): container = rs.FileSparseAttributeContainer() container.load_from_file(model_parameters['user_attributes']) model_parameters['user_attributes'] = container if (model_parameters['item_attributes'] is not None): container = rs.FileSparseAttributeContainer() container.load_from_file(model_parameters['item_attributes']) model_parameters['item_attributes'] = container model = rs.FmModel(**model_parameters) updater = rs.FmModelUpdater( **self.parameter_defaults(learning_rate=0.05, )) updater.set_model(model) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=0.0, initialize_all=False, seed=67439852, filter_repeats=False, )) negative_sample_generator.add_updater(updater) return (model, [negative_sample_generator], [])
def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, seed=0, out_file=None, filters=[], loggers=[], ) model = rs.TransitionProbabilityModel() updater = rs.TransitionProbabilityModelUpdater( **self.parameter_defaults(filter_freq_updates=False, mode_="normal", label_transition_mode_=False, label_file_name_="")) updater.set_model(model) learner = rs.SimpleLearner() learner.add_simple_updater(updater) learner.set_model(model) model = model learner = learner filters = [model] return {'config': config, 'model': model, 'learner': learner}
def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, seed=0, out_file=None, filters=[], loggers=[], ) model = rs.NearestNeighborModel( **self.parameter_defaults(gamma=0.8, norm="num", direction="forward", gamma_threshold=0, num_of_neighbors=10)) updater = rs.NearestNeighborModelUpdater(**self.parameter_defaults( compute_similarity_period=86400, period_mode="time-based")) updater.set_model(model) learner = rs.SimpleLearner() learner.add_simple_updater(updater) learner.set_model(model) model = model learner = learner filters = [model] return {'config': config, 'model': model, 'learner': learner}
def _config(self, top_k, seed): model = rs.PopularityModel() updater = rs.PopularityTimeFrameModelUpdater(**self.parameter_defaults( tau=86400 )) updater.set_model(model) return (model, updater, [], [])
def _config(self, top_k, seed): model = rs.TransitionProbabilityModel() updater = rs.TransitionProbabilityModelUpdater( **self.parameter_defaults(filter_freq_updates=False, mode_="normal", label_transition_mode_=False, label_file_name_="")) updater.set_model(model) return (model, updater, [model], [])
def _config(self, top_k, seed): model = ag.PopularityModel() updater = ag.PopularityModelUpdater() updater.set_model(model) label_filter = ag.LabelFilter(**self.parameter_defaults( label_file_name="")) adapter = ag.WhitelistFilter2ModelAdapter() adapter.set_model(model) adapter.set_whitelist_filter(label_filter) return (adapter, [updater, label_filter], [] ) #note: do not forget to add filter as an updater
def _fit(self, recommender_data, users, items, matrix): model = rs.PopularityModel() updater = rs.PopularityModelUpdater() updater.set_model(model) learner = rs.OfflineIteratingLearner( **self.parameter_defaults(seed=67439852, )) learner.set_model(model) learner.add_simple_updater(updater) learner.set_recommender_data(recommender_data) return (model, learner)
def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, seed=0, out_file=None, filters=[], loggers=[], ) model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) learner = rs.ImplicitGradientLearner() learner.add_gradient_updater(updater) learner.set_model(model) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=0.0, initialize_all=False, seed=0, )) learner.set_negative_sample_generator(negative_sample_generator) pointWise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(pointWise) gradient_computer.set_model(model) learner.set_gradient_computer(gradient_computer) fmfilter = rs.FactorModelFilter() fmfilter.set_model(model) prediction_creator = rs.PredictionCreatorGlobal( **self.parameter_defaults( top_k=10000, # initial_threshold=1000, lookback=0)) prediction_creator.set_model(model) prediction_creator.set_filter(fmfilter) online_predictor = rs.OnlinePredictor(**self.parameter_defaults( min_time=0, time_frame=86400, file_name="")) online_predictor.set_prediction_creator(prediction_creator) config['loggers'].append(online_predictor) return {'config': config, 'model': model, 'learner': learner}
def _fit(self, recommender_data, users, items, matrix): model = rs.PopularityModel() updater = rs.PopularityModelUpdater() updater.set_model(model) learner = rs.OfflineIteratingOnlineLearnerWrapper( seed=0, number_of_iterations=0, shuffle=False, ) learner.add_updater(updater) return (model, learner)
def _config(self, top_k, seed): model = rs.NearestNeighborModel(**self.parameter_defaults( gamma=0.8, norm="num", direction="forward", gamma_threshold=0, num_of_neighbors=10 )) updater = rs.NearestNeighborModelUpdater(**self.parameter_defaults( compute_similarity_period=86400, period_mode="time-based" )) updater.set_model(model) return (model, updater, [])
def recommend(self, users=None, k=100, exclude_known=True): """Give toplist recommendations for users. Parameters ---------- users : list List of users to give recommendation for. k : int Size of toplists exclude_known : bool Whether to exclude (user,item) pairs in the train dataset from the toplists. Returns ------- pandas.DataFrame DataFrame of recommendations, with columns **user**, **item** and **rank**. """ rs.collect() dummy_model_filter = rs.DummyModelFilter() dummy_model_filter.set_items(self.items) dummy_model_filter.set_users(self.users) pred_creator = rs.PredictionCreatorPersonalized( top_k=k, lookback=1 if exclude_known else 0) pred_creator.set_filter(dummy_model_filter) pred_creator.set_train_matrix(self.matrix) pred_creator.set_model(self.model) ranking_computer = rs.OfflineRankingComputer(top_k=k) ranking_computer.set_items(self.items) if users is None: ranking_computer.set_users(self.users) else: ranking_computer.set_users( rs.VectorInt(pd.Series(users).unique().tolist())) ranking_computer.set_toplist_creator(pred_creator) created_objects = rs.get_and_clean() # rs.initialize_all(created_objects) for i in created_objects: rs.run_self_test(i) preds = ranking_computer.compute() preds_df = pd.DataFrame({ 'user': preds.users, 'item': preds.items, 'rank': preds.ranks }).sort_values(['user', 'rank'])[['user', 'item', 'rank']] return preds_df
def readFactorModel(file, dimensions): """Utility for reading binary models --saved by online experiments-- into pandas DataFrames. """ r = ag.FactorModelReader() uif = r.read(file, dimensions) users = [] user_factors = [] for f in uif.user_factors: users.append(f.entity) user_factors.append(f.factors) items = [] item_factors = [] for f in uif.item_factors: items.append(f.entity) item_factors.append(f.factors) user_df = pd.DataFrame.from_records(user_factors, columns=range(1, dimensions + 1)) user_df['user'] = users user_df.set_index('user', inplace=True) item_df = pd.DataFrame.from_records(item_factors, columns=range(1, dimensions + 1)) item_df['item'] = items item_df.set_index('item', inplace=True) return (user_df, item_df)
def _fit(self, recommender_data, users, items, matrix): model = rs.EigenFactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, seed=67439852, )) learner = rs.OfflineEigenFactorModelALSLearner( **self.parameter_defaults( number_of_iterations=3, regularization_lambda=0.0001, alpha=40, implicit=1, clear_before_fit=1, )) learner.set_model(model) return (model, learner)
def test_init(self): df = pd.DataFrame({ 'time': [30, 20, 10], 'item': [3, 2, 1], 'duration': [10, 10, 10] }) a = AvailabilityFilter(df) #todo: needs experiment environment r = rs.RecDat() r.time = 21
def fit(self, X, y=None, columns={}): """Fit the model to a dataset. Parameters ---------- X : pandas.DataFrame The input data, must contain the columns **user** and **item**. May contain the **score** column as well. y : pandas.Series or list The target values. If not set (and X doesn't contain the score column), it is assumed to be constant 1 (implicit recommendation). columns : dict Optionally the mapping of the input DataFrame's columns' names to the expected ones. """ rs.collect() data = X if y is None: if 'score' not in X: data['score'] = np.ones(len(X)) else: if 'score' in X: raise ValueError("y and score column both provided") else: data['score'] = y recommender_data = DataframeData(data, columns=columns) matrix = recommender_data.get_full_matrix() users = rs.VectorInt([]) items = rs.VectorInt([]) recommender_data.get_users_into(users) recommender_data.get_items_into(items) (model, learner) = self._fit(recommender_data, users, items, matrix) created_objects = rs.get_and_clean() rs.initialize_all(created_objects) for i in created_objects: rs.run_self_test(i) self.check_unused_parameters() learner.fit(recommender_data) self.objects = created_objects self.model = model self.items = items self.users = users self.matrix = matrix self.recommender_data = recommender_data
def test_init(self): df = pd.DataFrame({ 'time': [30, 20, 10], 'item': [3, 2, 1], 'duration': [10, 10, 10] }) a = prs.AvailabilityFilter(df) r = rs.RecDat() r.time = 21 a.run(r)
def _config(self, top_k, seed): model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) learner = rs.ImplicitGradientLearner() learner.add_gradient_updater(updater) learner.set_model(model) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=0.0, initialize_all=False, seed=67439852, filter_repeats=False, )) learner.set_negative_sample_generator(negative_sample_generator) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) learner.set_gradient_computer(gradient_computer) return (model, learner, [])
def _fit(self, recommender_data, users, items, matrix): model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, seed=254938879, )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults(negative_rate=0)) negative_sample_generator.set_train_matrix(matrix) negative_sample_generator.set_items(items) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) learner = rs.OfflineIteratingImplicitLearner(**self.parameter_defaults( seed=254938879, number_of_iterations=9, )) learner.set_gradient_computer(gradient_computer) learner.set_negative_sample_generator(negative_sample_generator) learner.set_model(model) learner.set_recommender_data(recommender_data) learner.add_gradient_updater(updater) return (model, learner)
def _config(self, top_k, seed): model = rs.AsymmetricFactorModel( **self.parameter_defaults(begin_min=-0.01, begin_max=0.01, dimension=10, use_sigmoid=False, norm_type="exponential", gamma=0.8, initialize_all=False)) gradient_updater = rs.AsymmetricFactorModelGradientUpdater( **self.parameter_defaults( learning_rate=0.05, cumulative_item_updates=False, )) gradient_updater.set_model(model) simple_updater = rs.AsymmetricFactorModelUpdater() simple_updater.set_model(model) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) gradient_computer.add_gradient_updater(gradient_updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=20, initialize_all=False, seed=928357823, )) negative_sample_generator.add_updater(gradient_computer) return (model, [negative_sample_generator, simple_updater], [])
def _fit(self, recommender_data, users, items, matrix): model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, seed=self.parameter_default('factor_seed', 67439852), )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) gradient_computer.add_gradient_updater(updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=0, initialize_all=False, max_item=-1)) negative_sample_generator.set_train_matrix(matrix) negative_sample_generator.set_items(items) negative_sample_generator.add_updater(gradient_computer) learner = rs.OfflineIteratingOnlineLearnerWrapper( **self.parameter_defaults( seed=self.parameter_default('learner_seed', 254938879), number_of_iterations=9, shuffle=True, )) learner.add_iterate_updater(negative_sample_generator) return (model, learner)
def _fit(self, recommender_data, users, items, matrix): model = rs.NearestNeighborModel( gamma=1, norm="off", direction="both", gamma_threshold=0, num_of_neighbors=self.parameter_default("num_of_neighbors", 10), ) updater = rs.NearestNeighborModelUpdater( period_mode="off", ) updater.set_model(model) learner = rs.OfflineIteratingOnlineLearnerWrapper( seed=254938879, number_of_iterations=0, shuffle=False, ) learner.add_updater(updater) return (model, learner)
def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, seed=0, out_file=None, filters=[], loggers=[], ) model = rs.SvdppModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, use_sigmoid=False, norm_type="exponential", gamma=0.8, user_vector_weight=0.5, history_weight=0.5 )) gradient_updater = rs.SvdppModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, cumulative_item_updates=False, )) gradient_updater.set_model(model) simple_updater = rs.SvdppModelUpdater() simple_updater.set_model(model) learner = rs.ImplicitGradientLearner() learner.add_gradient_updater(gradient_updater) learner.add_simple_updater(simple_updater) learner.set_model(model) negative_sample_generator = rs.UniformNegativeSampleGenerator(**self.parameter_defaults( negative_rate=20, initialize_all=False, seed=928357823, )) learner.set_negative_sample_generator(negative_sample_generator) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) learner.set_gradient_computer(gradient_computer) return { 'config': config, 'model': model, 'learner': learner }
def _config(self, top_k, seed): #config = self.parameter_defaults( # top_k=100, # evaluation_start_time=0, # seed=0, # out_file=None, # filters=[], # loggers=[], #) model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) gradient_computer.add_gradient_updater(updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=100, initialize_all=False, seed=67439852, filter_repeats=False, )) negative_sample_generator.add_updater(gradient_computer) return (model, [negative_sample_generator], [])
def _config(self, top_k, seed): model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) learner_parameters = self.parameter_defaults( number_of_iterations=3, start_time=-1, period_length=86400, write_model=False, read_model=False, clear_model=False, learn=True, base_out_file_name="", base_in_file_name="", timeframe_length=0, ) if (learner_parameters['timeframe_length'] == 0): learner_parameters.pop('timeframe_length', None) learner = rs.OfflineImplicitGradientLearner(**learner_parameters) else: learner = rs.PeriodicTimeframeImplicitGradientLearner( **learner_parameters) learner.set_model(model) learner.add_gradient_updater(updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=0, initialize_all=False, seed=67439852, filter_repeats=False, )) learner.set_negative_sample_generator(negative_sample_generator) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) learner.set_gradient_computer(gradient_computer) return (model, learner, [], [])
def predict(self, X): """Predict the target values on X. Parameters ---------- X : pandas.DataFrame The input data, must contain the columns **user** and **item**. Returns ------- list List of predictions """ predictor = rs.MassPredictor() predictor.set_model(self.model) return predictor.predict(X['user'].tolist(), X['item'].tolist())
def _config(self, top_k, seed): model = rs.CombinedModel(**self.parameter_defaults( los_file_name="my_log_file", log_frequency=100000, use_user_weights=False, )) pop_model = rs.PopularityModel() model.add_model(pop_model) pop_updater = rs.PopularityModelUpdater() pop_updater.set_model(pop_model) factor_model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, )) model.add_model(factor_model) factor_updater = rs.FactorModelGradientUpdater( **self.parameter_defaults(learning_rate=0.05, regularization_rate=0.0)) factor_updater.set_model(factor_model) objective = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(objective) gradient_computer.set_model(factor_model) gradient_computer.add_gradient_updater(factor_updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=10, initialize_all=False, seed=67439852, filter_repeats=False, )) negative_sample_generator.add_updater(gradient_computer) return (model, [pop_updater, negative_sample_generator], [], [])
def config(self, elems): config = self.parameter_defaults( top_k=100, min_time=0, loggers=[], ) model = rs.FactorModel(**self.parameter_defaults( begin_min=-0.01, begin_max=0.01, dimension=10, initialize_all=False, )) updater = rs.FactorModelGradientUpdater(**self.parameter_defaults( learning_rate=0.05, regularization_rate=0.0)) updater.set_model(model) learner = rs.OfflineImplicitGradientLearner( **self.parameter_defaults(number_of_iterations=3, start_time=-1, period_length=86400, write_model=False, read_model=False, clear_model=False, learn=True, base_out_file_name="", base_in_file_name="")) learner.set_model(model) # learner.set_recommender_data_iterator(elems['recommender_data_iterator']) learner.add_gradient_updater(updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=0, initialize_all=False, )) # negative_sample_generator.set_train_matrix(elems['train_matrix']) # negative_sample_generator.set_items(elems['items']) learner.set_negative_sample_generator(negative_sample_generator) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) learner.set_gradient_computer(gradient_computer) return {'config': config, 'model': model, 'learner': learner}
def _fit(self, recommender_data, users, items, matrix): model = rs.SvdppModel(**self.parameter_defaults( begin_min=self.parameter_default("begin_min", -0.01), begin_max=self.parameter_default("begin_max", 0.01), dimension=self.parameter_default("dimension", 10), use_sigmoid=False, norm_type="constant", gamma=1, user_vector_weight=0.5, history_weight=0.5)) gradient_updater = rs.SvdppModelGradientUpdater( **self.parameter_defaults( learning_rate=0.05, cumulative_item_updates=False, )) gradient_updater.set_model(model) simple_updater = rs.SvdppModelUpdater() simple_updater.set_model(model) point_wise = rs.ObjectiveMSE() gradient_computer = rs.GradientComputerPointWise() gradient_computer.set_objective(point_wise) gradient_computer.set_model(model) gradient_computer.add_gradient_updater(gradient_updater) negative_sample_generator = rs.UniformNegativeSampleGenerator( **self.parameter_defaults( negative_rate=9, initialize_all=False, max_item=-1)) negative_sample_generator.set_train_matrix(matrix) negative_sample_generator.set_items(items) negative_sample_generator.add_updater(gradient_computer) learner = rs.OfflineIteratingOnlineLearnerWrapper( **self.parameter_defaults( seed=254938879, number_of_iterations=20, shuffle=True, )) learner.add_early_updater(simple_updater) learner.add_iterate_updater(negative_sample_generator) return (model, learner)