def produce_agent_stats( env, std_env_args, agent: Agent, num_products: int, num_organic_users_to_train: int, num_users_to_train: int, num_users_to_score: int, random_seed: int, agent_class, agent_configs, agent_name: str, with_cache: bool, ): stat_epochs = 1 stat_epochs_new_random_seed = True training_data_samples = tuple([num_users_to_train]) testing_data_samples = num_users_to_score time_start = datetime.datetime.now() agent_stats = gather_agent_stats(env, std_env_args, { 'agent': agent, }, { **build_agent_init(agent_name, agent_class, { **agent_configs, 'num_products': num_products, }), }, training_data_samples, testing_data_samples, stat_epochs, stat_epochs_new_random_seed, num_organic_users_to_train, with_cache) q0_025 = [] q0_500 = [] q0_975 = [] for agent_name in agent_stats[AgentStats.AGENTS]: agent_values = agent_stats[AgentStats.AGENTS][agent_name] q0_025.append(agent_values[AgentStats.Q0_025][0]) q0_500.append(agent_values[AgentStats.Q0_500][0]) q0_975.append(agent_values[AgentStats.Q0_975][0]) time_end = datetime.datetime.now() seconds = (time_end - time_start).total_seconds() return pd.DataFrame({ 'q0.025': q0_025, 'q0.500': q0_500, 'q0.975': q0_975, 'time': [seconds], })
features = self.feature_provider.features(observation) a_ps_psa_dict = self.model.act(observation, features) # Update previous feature set for next online learning session self.previous_features = features self.previous_action = a_ps_psa_dict['a'] return { 't': observation.context().time(), 'u': observation.context().user(), **a_ps_psa_dict, } global_stats = [] agent = build_agent_init("PirateAgent", PirateAgent, {**pirate_agent_args}) ### # Test & train locally, this is not used when submitting but helps for local debugging # if __name__ == "__main__": import gym from recogym import env_1_args from recogym.bench_agents import test_agent num_products = 10 num_users = 300 pirate_agent = PirateAgent( Configuration({
X = X[mask] A = A[mask] y = y[mask] pss = pss[mask] n_clicks = np.sum(deltas) # Explicitly build one-hot matrix for actions A_one_hot = np.zeros((n_clicks,P)) A_one_hot[np.arange(n_clicks), A] = 1 # Train a model model = LogisticRegression(solver = 'lbfgs', multi_class = 'multinomial').fit(X, A, sample_weight = 1 / pss) return ( LogisticRegression_SKLearnFeaturesProvider(self.config), LogisticRegression_SKLearnModel(self.config, model) ) class LogisticRegression_SKLearnAgent(ModelBasedAgent): """ Scikit-Learn-based logistic regression Agent. """ def __init__(self, config = Configuration(logistic_regression_sklearn_args)): super(LogisticRegression_SKLearnAgent, self).__init__( config, LogisticRegression_SKLearnModelBuilder(config) ) agent = build_agent_init('LogisticRegression_SKLearnAgent', LogisticRegression_SKLearnAgent, {**logistic_regression_sklearn_args})
training_matrix[(i, j)] = X[i][j // P] training_matrix = training_matrix.tocsr() # for row in training_matrix: # print(row) # print("Dense Training matrix\n", training_matrix) # print("A value from dense matrix ", training_matrix[0][55]) # print("Sparse Training matrix\n", training_matrix) model = DecisionTreeClassifier().fit(training_matrix, y) # print("# of classes: ", model.n_classes_) # print("Classes: ", model.classes_) # exit(1) return (DecisionTreeFeaturesProvider(self.config), DecisionTreeModel(self.config, model)) class DecisionTreeAgent(ModelBasedAgent): def __init__(self, config=Configuration(decision_tree_args)): super(DecisionTreeAgent, self).__init__(config, DecisionTreeModelBuilder(config)) agent = build_agent_init("DecisionTreeAgent", DecisionTreeAgent, {**decision_tree_args})
from recogym import build_agent_init from recogym.agents import PyTorchMLRAgent, pytorch_mlr_args pytorch_mlr_args['n_epochs'] = 30 pytorch_mlr_args['learning_rate'] = 0.01 pytorch_mlr_args['logIPS'] = True agent = build_agent_init('PyTorchMLRAgent', PyTorchMLRAgent, {**pytorch_mlr_args})
def competition_score( num_products: int, num_organic_users_to_train: int, num_users_to_train: int, num_users_to_score: int, random_seed: int, latent_factor: int, num_flips: int, log_epsilon: float, sigma_omega: float, agent_class, agent_configs, agent_name: str, with_cache: bool, ): training_data_samples = tuple([num_users_to_train]) testing_data_samples = num_users_to_score stat_epochs = 1 stat_epochs_new_random_seed = True std_env_args = { **env_1_args, 'random_seed': random_seed, 'num_products': num_products, 'K': latent_factor, 'sigma_omega': sigma_omega, 'number_of_flips': num_flips } env = gym.make('reco-gym-v1') agent_stats = gather_agent_stats( env, std_env_args, { 'agent': OrganicUserEventCounterAgent( Configuration({ **organic_user_count_args, **std_env_args, 'select_randomly': True, 'epsilon': log_epsilon, 'num_products': num_products, })), }, { **build_agent_init(agent_name, agent_class, { **agent_configs, 'num_products': num_products, }), }, training_data_samples, testing_data_samples, stat_epochs, stat_epochs_new_random_seed, num_organic_users_to_train, with_cache) time_start = datetime.datetime.now() q0_025 = [] q0_500 = [] q0_975 = [] for agent_name in agent_stats[AgentStats.AGENTS]: agent_values = agent_stats[AgentStats.AGENTS][agent_name] q0_025.append(agent_values[AgentStats.Q0_025][0]) q0_500.append(agent_values[AgentStats.Q0_500][0]) q0_975.append(agent_values[AgentStats.Q0_975][0]) time_end = datetime.datetime.now() seconds = (time_end - time_start).total_seconds() return pd.DataFrame({ 'q0.025': q0_025, 'q0.500': q0_500, 'q0.975': q0_975, 'time': [seconds], })
from recogym import build_agent_init from recogym.agents import BanditCount, bandit_count_args agent = build_agent_init('BanditCount', BanditCount, {**bandit_count_args})
import numpy as np from recogym import build_agent_init from recogym.agents import LogregPolyAgent, logreg_poly_args agent = build_agent_init( 'LikelihoodWithTime', LogregPolyAgent, { **logreg_poly_args, 'weight_history_function': lambda t: np.exp(-t) })
from recogym import build_agent_init from recogym.agents import RandomAgent, random_args agent = build_agent_init('RandomAgent', RandomAgent, {**random_args})
def __init__(self, config=Configuration(keras_nn_args)): self.previous_features = None self.previous_action = None self.model = None super(KerasNNAgent, self).__init__(config, KerasNNModelBuilder(config)) def act(self, observation, reward, done): if self.model is None: self.feature_provider, self.model = self.model_builder.build() if self.config.online_training and reward == 1: self.model.train_online(self.previous_features, self.previous_action, reward) self.feature_provider.observe(observation) features = self.feature_provider.features(observation) action_dict = self.model.act(observation, features) self.previous_features = features self.previous_action = action_dict return { "t": observation.context().time(), "u": observation.context().user(), **action_dict } agent = build_agent_init('KerasNNAgent', KerasNNAgent, {**keras_nn_args})
def __init__(self, config, product_encodings, metric='cosine'): super(ClosestProductModel, self).__init__(config) self.product_encodings = product_encodings self.metric = metric def act(self, observation, features): dists = cdist( features.reshape(1, -1), self.product_encodings, self.metric, ) return { **super().act(observation, features), 'a': np.argmin(dists), 'ps': 1, } class TestAgent(ModelBasedAgent): def __init__(self, config): super(TestAgent, self).__init__(config, MFModelBuilder(config)) test_agent_args = { 'num_products': 10, 'num_latent_factors': 20, 'random_seed': np.random.randint(2**31 - 1), } agent = build_agent_init("MFAgent", TestAgent, test_agent_args)
''' # Train a model model = SVC(C=1.0, kernel='sigmoid', coef0=0.0, probability=True, tol=1e-3, class_weight=None, max_iter=-1, random_state=42) \ .fit(training_matrix, y) print('Model: {}'.format(model)) return (SimpleAgentFeaturesProvider(self.config), SimpleAgentModel(self.config, model)) class SimpleAgent(ModelBasedAgent): """ Scikit-Learn-based SVM Agent. """ def __init__(self, config=Configuration(model_sklearn_args)): super(SimpleAgent, self).__init__(config, SimpleAgentModelBuilder(config)) agent = build_agent_init('SimpleAgent', SimpleAgent, {**model_sklearn_args})
Specifies how multi-class classification problems are handled. Supported are “one_vs_rest” and “one_vs_one”. In “one_vs_rest”, one binary Gaussian process classifier is fitted for each class, which is trained to separate this class from the rest. In “one_vs_one”, one binary Gaussian process classifier is fitted for each pair of classes, which is trained to separate these two classes. The predictions of these binary predictors are combined into multi-class predictions. Note that “one_vs_one” does not support predicting probability estimates. Default is "one_vs_rest" ''' # Train a model model = GaussianProcessClassifier().fit(training_matrix, y) return (GaussianProcessAgentFeaturesProvider(self.config), GaussianProcessAgentModel(self.config, model)) class GaussianProcessAgent(ModelBasedAgent): """ Scikit-Learn-based SVM Agent. """ def __init__(self, config=Configuration(model_sklearn_args)): super(GaussianProcessAgent, self).__init__(config, GaussianProcessAgentModelBuilder(config)) agent = build_agent_init('GaussianProcessAgent', GaussianProcessAgent, {**model_sklearn_args})
# Build model first if not yet done if self.model is None: assert self.feature_provider is None self.feature_provider, self.model = self.model_builder.build() # Now that we have the reward, train based on previous features and reward we got for our action if self.config.online_training and reward is not None: self.model.train_online(self.previous_features, self.previous_action, reward) # Update the feature provider with this new observation self.feature_provider.observe(observation) # Get the new features features = self.feature_provider.features(observation) a_ps_psa_dict = self.model.act(observation, features) # Update previous feature set for next online learning session self.previous_features = features self.previous_action = a_ps_psa_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **a_ps_psa_dict, } global_stats = [] agent = build_agent_init("MultinomialNaiveBayes", TestAgent, {**test_agent_args})
if self.model is None: self.feature_provider, self.model = self.model_builder.build() self.feature_provider.observe(observation) features = self.feature_provider.features(observation) if self.config.online_training and reward is not None: print("Going into online training") self.model.train_online(self.previous_features, self.previous_action, reward) action_dict = self.model.act(observation, features) self.previous_features = features self.feature_count += 1 self.previous_action = action_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **action_dict } agent = build_agent_init("MLPAgent", MLPAgent, {**mlp_args})
def act(self, observation, features): # Choose the item the user has already seen most often organically features = features.ravel() action = np.argmax(features) ps_all = np.zeros_like(features) ps_all[action] = 1.0 return { **super().act(observation, features), **{ 'a': action, 'ps': 1.0, 'ps-a': ps_all, }, } return (PersonalOrganicFeaturesProvider(self.config), PersonalOrganicModel(self.config)) class TestAgent(ModelBasedAgent): """ Agent that performs the action with the user has organically seen most often already """ def __init__(self, config=Configuration(test_agent_args)): super(TestAgent, self).__init__(config, PersonalOrganicModelBuilder(config)) agent = build_agent_init('PersonalOrganicAgent', TestAgent, {**test_agent_args})
max_iter=10000) self.reset_count = -1 self.reset_online() def train(self, features, actions, clicks, pss): print("[Train LogRegModel]") self.model.fit(features, actions, sample_weight=1 / pss) def act(self, observation, features): # X is a vector of organic counts # features = features.reshape((1, len(features))) predictions = self.model.predict_proba(features) # Take the one you think is most likely to give a click action = np.argmax(predictions) ps_all = np.zeros(self.config.num_products) ps_all[action] = 1.0 return { **super().act(observation, features), **{ 'a': action, 'ps': 1.0, 'ps-a': ps_all, }, } # only offline training agent = build_agent_init("AE_LogReg_Agent_Offline", TestAgent, test_agent_args)
from recogym import build_agent_init from recogym.agents import LogregMulticlassIpsAgent, logreg_multiclass_ips_args agent = build_agent_init('Contextual Bandit', LogregMulticlassIpsAgent, { **logreg_multiclass_ips_args, })
from recogym import build_agent_init from recogym.agents import LogregPolyAgent, logreg_poly_args agent = build_agent_init('likelihood', LogregPolyAgent, { **logreg_poly_args, })
import pandas as pd import numpy as np from recogym import Configuration, build_agent_init from recogym.agents import OrganicUserEventCounterAgent, organic_user_count_args dump_agent_args = { 'agent': build_agent_init( 'OrganicUserCount', OrganicUserEventCounterAgent, {**organic_user_count_args} ) } class DumpAgent(OrganicUserEventCounterAgent): """ Dump Agent This is the Agent that dumps all its `train' and `act' functions. It used mostly for debugging purposes. """ def __init__(self, config=Configuration(dump_agent_args)): super(DumpAgent, self).__init__(config) self.previous_action = None self.data = { 'case': [], 't': [], 'u': [],
from recogym import build_agent_init from recogym.agents import BanditMFSquare, bandit_mf_square_args agent = build_agent_init('BanditMFsquare', BanditMFSquare, {**bandit_mf_square_args})
self.previous_action = None self.model = None super(MNBAgent, self).__init__( config, MNBBuilder(config) ) def act(self, observation, reward, done): if self.model is None: self.feature_provider, self.model = self.model_builder.build() if self.config.online_training and reward == 1: self.model.train_online(self.previous_features, self.previous_action, reward) self.feature_provider.observe(observation) features = self.feature_provider.features(observation) action_dict = self.model.act(observation, features) self.previous_features = features self.previous_action = action_dict return { "t": observation.context().time(), "u": observation.context().user(), **action_dict } agent = build_agent_init('MNBAgent', MNBAgent, {**mnb_args})
from recogym import build_agent_init from recogym.agents.organic_user_count import organic_user_count_args, OrganicUserEventCounterAgent agent = build_agent_init('OrganicUserCount', OrganicUserEventCounterAgent, {**organic_user_count_args})
# Explicitly build one-hot matrix for actions A_one_hot = np.zeros((N, P)) A_one_hot[np.arange(N), A] = 1 # TODO - this really doesn't scale, maybe sparsify? training_matrix = [] for x, a in zip(X, A_one_hot): training_matrix.append(np.kron(x, a)) training_matrix = np.asarray(training_matrix) # Train a model model = MultinomialNB().fit(training_matrix, y) return (MultinomialNaiveBayesFeaturesProvider(self.config), MultinomialNaiveBayesModel(self.config, model)) class MultinomialNaiveBayesAgent(ModelBasedAgent): """ Scikit-Learn-based logistic regression Agent. """ def __init__(self, config=Configuration(multinomial_naive_bayes_args)): super(MultinomialNaiveBayesAgent, self).__init__(config, MultinomialNaiveBayesModelBuilder(config)) agent = build_agent_init('MultinomialNaiveBayesAgent', MultinomialNaiveBayesAgent, {**multinomial_naive_bayes_args})
# Build model first if not yet done if self.model is None: assert self.feature_provider is None self.feature_provider, self.model = self.model_builder.build() # Now that we have the reward, train based on previous features and reward we got for our action if self.config.online_training and reward is not None: self.model.update_data(self.previous_features, self.previous_action, reward) # Update the feature provider with this new observation self.feature_provider.observe(observation) # Get the new features features = self.feature_provider.features(observation) a_ps_psa_dict = self.model.act(observation, features) # Update previous feature set for next online learning session self.previous_features = features self.previous_action = a_ps_psa_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **a_ps_psa_dict, } global_stats = [] agent = build_agent_init("MultinomialNBAgent", TestAgent, {**test_agent_args})
# Build model first if not yet done if self.model is None: assert self.feature_provider is None self.feature_provider, self.model = self.model_builder.build() # Now that we have the reward, train based on previous features and reward we got for our action if self.config.online_training and reward is not None: self.model.update_data( self.previous_features, self.previous_action, reward ) # Update the feature provider with this new observation self.feature_provider.observe(observation) # Get the new features features = self.feature_provider.features(observation) a_ps_psa_dict = self.model.act(observation, features) # Update previous feature set for next online learning session self.previous_features = features self.previous_action = a_ps_psa_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **a_ps_psa_dict, } agent = build_agent_init('LogisticRegression_SKLearnAgent_Egreedy', TestAgent, test_agent_args)
if self.model is None: assert self.feature_provider is None self.feature_provider, self.model = self.model_builder.build() # Now that we have the reward, train based on previous features and reward we got for our action if self.config.online_training and reward is not None: self.model.train_online( self.previous_features, self.previous_action, reward ) # Update the feature provider with this new observation self.feature_provider.observe(observation) # Get the new features features = self.feature_provider.features(observation) a_ps_psa_dict = self.model.act(observation, features) # Update previous feature set for next online learning session self.previous_features = features self.previous_action = a_ps_psa_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **a_ps_psa_dict, } global_stats = [] agent = build_agent_init("CrossConfirmationMNB", TestAgent, {**test_agent_args})
from recogym import build_agent_init from recogym.agents import OrganicCount, organic_count_args agent = build_agent_init('OrganicCount', OrganicCount, {**organic_count_args})
self.previous_features = None self.previous_action = None self.feature_count = 0 def act(self, observation, reward, done): if self.model is None: self.feature_provider, self.model = self.model_builder.build() self.feature_provider.observe(observation) features = self.feature_provider.features(observation) if self.config.online_training and reward is not None: self.model.train_online(self.previous_features, self.previous_action, reward) action_dict = self.model.act(observation, features) self.previous_features = features self.feature_count += 1 self.previous_action = action_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **action_dict } global_stats = [] agent = build_agent_init("MLPAgent", TestAgent, {**test_agent_args})
features = self.feature_provider.features(observation) a_ps_psa_dict = self.model.act(observation, features) # Update previous feature set for next online learning session self.previous_features = features self.previous_action = a_ps_psa_dict["a"] return { "t": observation.context().time(), "u": observation.context().user(), **a_ps_psa_dict, } global_stats = [] agent = build_agent_init("LightGBMAgent", TestAgent, {**test_agent_args}) if __name__ == "__main__": import gym from recogym import env_1_args from recogym.bench_agents import test_agent num_products = 10 num_offline_users = 20 num_online_users = 200 agent = TestAgent( Configuration({ "random_seed": np.random.randint(2**31 - 1), "num_products": num_products, "fallback_threshold": 0.00,