Example #1
0
def produce_agent_stats(
    env,
    std_env_args,
    agent: Agent,
    num_products: int,
    num_organic_users_to_train: int,
    num_users_to_train: int,
    num_users_to_score: int,
    random_seed: int,
    agent_class,
    agent_configs,
    agent_name: str,
    with_cache: bool,
):
    stat_epochs = 1
    stat_epochs_new_random_seed = True
    training_data_samples = tuple([num_users_to_train])
    testing_data_samples = num_users_to_score

    time_start = datetime.datetime.now()
    agent_stats = gather_agent_stats(env, std_env_args, {
        'agent': agent,
    }, {
        **build_agent_init(agent_name, agent_class, {
            **agent_configs,
            'num_products': num_products,
        }),
    }, training_data_samples, testing_data_samples, stat_epochs,
                                     stat_epochs_new_random_seed,
                                     num_organic_users_to_train, with_cache)

    q0_025 = []
    q0_500 = []
    q0_975 = []
    for agent_name in agent_stats[AgentStats.AGENTS]:
        agent_values = agent_stats[AgentStats.AGENTS][agent_name]
        q0_025.append(agent_values[AgentStats.Q0_025][0])
        q0_500.append(agent_values[AgentStats.Q0_500][0])
        q0_975.append(agent_values[AgentStats.Q0_975][0])

    time_end = datetime.datetime.now()
    seconds = (time_end - time_start).total_seconds()

    return pd.DataFrame({
        'q0.025': q0_025,
        'q0.500': q0_500,
        'q0.975': q0_975,
        'time': [seconds],
    })
        features = self.feature_provider.features(observation)
        a_ps_psa_dict = self.model.act(observation, features)

        # Update previous feature set for next online learning session
        self.previous_features = features
        self.previous_action = a_ps_psa_dict['a']

        return {
            't': observation.context().time(),
            'u': observation.context().user(),
            **a_ps_psa_dict,
        }


global_stats = []
agent = build_agent_init("PirateAgent", PirateAgent, {**pirate_agent_args})

###
# Test & train locally, this is not used when submitting but helps for local debugging
#

if __name__ == "__main__":
    import gym
    from recogym import env_1_args
    from recogym.bench_agents import test_agent

    num_products = 10
    num_users = 300

    pirate_agent = PirateAgent(
        Configuration({
        X = X[mask]
        A = A[mask]
        y = y[mask]
        pss = pss[mask]
        
        n_clicks = np.sum(deltas)

        # Explicitly build one-hot matrix for actions
        A_one_hot = np.zeros((n_clicks,P))
        A_one_hot[np.arange(n_clicks), A] = 1

        # Train a model
        model = LogisticRegression(solver = 'lbfgs', multi_class = 'multinomial').fit(X, A, sample_weight = 1 / pss)

        return (
            LogisticRegression_SKLearnFeaturesProvider(self.config),
            LogisticRegression_SKLearnModel(self.config, model)
        )

class LogisticRegression_SKLearnAgent(ModelBasedAgent):
    """
    Scikit-Learn-based logistic regression Agent.
    """
    def __init__(self, config = Configuration(logistic_regression_sklearn_args)):
        super(LogisticRegression_SKLearnAgent, self).__init__(
            config,
            LogisticRegression_SKLearnModelBuilder(config)
        )

agent = build_agent_init('LogisticRegression_SKLearnAgent', LogisticRegression_SKLearnAgent, {**logistic_regression_sklearn_args})
Example #4
0
                    training_matrix[(i, j)] = X[i][j // P]

        training_matrix = training_matrix.tocsr()

        # for row in training_matrix:
        #     print(row)

        # print("Dense Training matrix\n", training_matrix)
        # print("A value from dense matrix ", training_matrix[0][55])

        # print("Sparse Training matrix\n", training_matrix)

        model = DecisionTreeClassifier().fit(training_matrix, y)

        # print("# of classes: ", model.n_classes_)
        # print("Classes: ", model.classes_)
        # exit(1)

        return (DecisionTreeFeaturesProvider(self.config),
                DecisionTreeModel(self.config, model))


class DecisionTreeAgent(ModelBasedAgent):
    def __init__(self, config=Configuration(decision_tree_args)):
        super(DecisionTreeAgent,
              self).__init__(config, DecisionTreeModelBuilder(config))


agent = build_agent_init("DecisionTreeAgent", DecisionTreeAgent,
                         {**decision_tree_args})
Example #5
0
from recogym import build_agent_init
from recogym.agents import PyTorchMLRAgent, pytorch_mlr_args

pytorch_mlr_args['n_epochs'] = 30
pytorch_mlr_args['learning_rate'] = 0.01
pytorch_mlr_args['logIPS'] = True

agent = build_agent_init('PyTorchMLRAgent', PyTorchMLRAgent,
                         {**pytorch_mlr_args})
Example #6
0
def competition_score(
    num_products: int,
    num_organic_users_to_train: int,
    num_users_to_train: int,
    num_users_to_score: int,
    random_seed: int,
    latent_factor: int,
    num_flips: int,
    log_epsilon: float,
    sigma_omega: float,
    agent_class,
    agent_configs,
    agent_name: str,
    with_cache: bool,
):
    training_data_samples = tuple([num_users_to_train])
    testing_data_samples = num_users_to_score
    stat_epochs = 1
    stat_epochs_new_random_seed = True

    std_env_args = {
        **env_1_args, 'random_seed': random_seed,
        'num_products': num_products,
        'K': latent_factor,
        'sigma_omega': sigma_omega,
        'number_of_flips': num_flips
    }

    env = gym.make('reco-gym-v1')

    agent_stats = gather_agent_stats(
        env, std_env_args, {
            'agent':
            OrganicUserEventCounterAgent(
                Configuration({
                    **organic_user_count_args,
                    **std_env_args,
                    'select_randomly': True,
                    'epsilon': log_epsilon,
                    'num_products': num_products,
                })),
        }, {
            **build_agent_init(agent_name, agent_class, {
                **agent_configs,
                'num_products': num_products,
            }),
        }, training_data_samples, testing_data_samples, stat_epochs,
        stat_epochs_new_random_seed, num_organic_users_to_train, with_cache)
    time_start = datetime.datetime.now()

    q0_025 = []
    q0_500 = []
    q0_975 = []
    for agent_name in agent_stats[AgentStats.AGENTS]:
        agent_values = agent_stats[AgentStats.AGENTS][agent_name]
        q0_025.append(agent_values[AgentStats.Q0_025][0])
        q0_500.append(agent_values[AgentStats.Q0_500][0])
        q0_975.append(agent_values[AgentStats.Q0_975][0])

    time_end = datetime.datetime.now()
    seconds = (time_end - time_start).total_seconds()

    return pd.DataFrame({
        'q0.025': q0_025,
        'q0.500': q0_500,
        'q0.975': q0_975,
        'time': [seconds],
    })
Example #7
0
from recogym import build_agent_init
from recogym.agents import BanditCount, bandit_count_args

agent = build_agent_init('BanditCount', BanditCount, {**bandit_count_args})
Example #8
0
import numpy as np

from recogym import build_agent_init
from recogym.agents import LogregPolyAgent, logreg_poly_args

agent = build_agent_init(
    'LikelihoodWithTime', LogregPolyAgent, {
        **logreg_poly_args, 'weight_history_function': lambda t: np.exp(-t)
    })
Example #9
0
from recogym import build_agent_init
from recogym.agents import RandomAgent, random_args

agent = build_agent_init('RandomAgent', RandomAgent, {**random_args})
    def __init__(self, config=Configuration(keras_nn_args)):
        self.previous_features = None
        self.previous_action = None
        self.model = None
        super(KerasNNAgent, self).__init__(config, KerasNNModelBuilder(config))

    def act(self, observation, reward, done):
        if self.model is None:
            self.feature_provider, self.model = self.model_builder.build()

        if self.config.online_training and reward == 1:
            self.model.train_online(self.previous_features,
                                    self.previous_action, reward)

        self.feature_provider.observe(observation)

        features = self.feature_provider.features(observation)
        action_dict = self.model.act(observation, features)

        self.previous_features = features
        self.previous_action = action_dict

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **action_dict
        }


agent = build_agent_init('KerasNNAgent', KerasNNAgent, {**keras_nn_args})
Example #11
0
    def __init__(self, config, product_encodings, metric='cosine'):
        super(ClosestProductModel, self).__init__(config)
        self.product_encodings = product_encodings
        self.metric = metric

    def act(self, observation, features):
        dists = cdist(
            features.reshape(1, -1),
            self.product_encodings,
            self.metric,
        )
        return {
            **super().act(observation, features),
            'a': np.argmin(dists),
            'ps': 1,
        }


class TestAgent(ModelBasedAgent):
    def __init__(self, config):
        super(TestAgent, self).__init__(config, MFModelBuilder(config))


test_agent_args = {
    'num_products': 10,
    'num_latent_factors': 20,
    'random_seed': np.random.randint(2**31 - 1),
}

agent = build_agent_init("MFAgent", TestAgent, test_agent_args)
        '''

        # Train a model
        model = SVC(C=1.0,
                    kernel='sigmoid',
                    coef0=0.0,
                    probability=True,
                    tol=1e-3,
                    class_weight=None,
                    max_iter=-1,
                    random_state=42) \
            .fit(training_matrix, y)

        print('Model: {}'.format(model))

        return (SimpleAgentFeaturesProvider(self.config),
                SimpleAgentModel(self.config, model))


class SimpleAgent(ModelBasedAgent):
    """
    Scikit-Learn-based SVM Agent.
    """
    def __init__(self, config=Configuration(model_sklearn_args)):
        super(SimpleAgent, self).__init__(config,
                                          SimpleAgentModelBuilder(config))


agent = build_agent_init('SimpleAgent', SimpleAgent, {**model_sklearn_args})
                Specifies how multi-class classification problems are handled. 
                Supported are “one_vs_rest” and “one_vs_one”. In “one_vs_rest”, 
                one binary Gaussian process classifier is fitted for each class, 
                which is trained to separate this class from the rest. 
                In “one_vs_one”, one binary Gaussian process classifier is fitted for each pair of classes, 
                which is trained to separate these two classes. 
                The predictions of these binary predictors are combined into multi-class predictions. 
                Note that “one_vs_one” does not support predicting probability estimates.
                Default is "one_vs_rest"

        '''

        # Train a model
        model = GaussianProcessClassifier().fit(training_matrix, y)

        return (GaussianProcessAgentFeaturesProvider(self.config),
                GaussianProcessAgentModel(self.config, model))


class GaussianProcessAgent(ModelBasedAgent):
    """
    Scikit-Learn-based SVM Agent.
    """
    def __init__(self, config=Configuration(model_sklearn_args)):
        super(GaussianProcessAgent,
              self).__init__(config, GaussianProcessAgentModelBuilder(config))


agent = build_agent_init('GaussianProcessAgent', GaussianProcessAgent,
                         {**model_sklearn_args})
Example #14
0
        # Build model first if not yet done
        if self.model is None:
            assert self.feature_provider is None
            self.feature_provider, self.model = self.model_builder.build()

        # Now that we have the reward, train based on previous features and reward we got for our action
        if self.config.online_training and reward is not None:
            self.model.train_online(self.previous_features,
                                    self.previous_action, reward)

        # Update the feature provider with this new observation
        self.feature_provider.observe(observation)

        # Get the new features
        features = self.feature_provider.features(observation)
        a_ps_psa_dict = self.model.act(observation, features)

        # Update previous feature set for next online learning session
        self.previous_features = features
        self.previous_action = a_ps_psa_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **a_ps_psa_dict,
        }


global_stats = []
agent = build_agent_init("MultinomialNaiveBayes", TestAgent,
                         {**test_agent_args})
        if self.model is None:
            self.feature_provider, self.model = self.model_builder.build()

        self.feature_provider.observe(observation)
        features = self.feature_provider.features(observation)

        if self.config.online_training and reward is not None:
            print("Going into online training")
            self.model.train_online(self.previous_features, self.previous_action, reward)

        action_dict = self.model.act(observation, features)

        self.previous_features = features
        self.feature_count += 1
        self.previous_action = action_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **action_dict
        }


agent = build_agent_init("MLPAgent", MLPAgent, {**mlp_args})






            def act(self, observation, features):
                # Choose the item the user has already seen most often organically
                features = features.ravel()
                action = np.argmax(features)
                ps_all = np.zeros_like(features)
                ps_all[action] = 1.0
                return {
                    **super().act(observation, features),
                    **{
                        'a': action,
                        'ps': 1.0,
                        'ps-a': ps_all,
                    },
                }

        return (PersonalOrganicFeaturesProvider(self.config),
                PersonalOrganicModel(self.config))


class TestAgent(ModelBasedAgent):
    """
    Agent that performs the action with the user has organically seen most often already
    """
    def __init__(self, config=Configuration(test_agent_args)):
        super(TestAgent, self).__init__(config,
                                        PersonalOrganicModelBuilder(config))


agent = build_agent_init('PersonalOrganicAgent', TestAgent,
                         {**test_agent_args})
                                        max_iter=10000)
        self.reset_count = -1
        self.reset_online()

    def train(self, features, actions, clicks, pss):
        print("[Train LogRegModel]")
        self.model.fit(features, actions, sample_weight=1 / pss)

    def act(self, observation, features):
        # X is a vector of organic counts
        # features = features.reshape((1, len(features)))
        predictions = self.model.predict_proba(features)

        # Take the one you think is most likely to give a click
        action = np.argmax(predictions)
        ps_all = np.zeros(self.config.num_products)
        ps_all[action] = 1.0

        return {
            **super().act(observation, features),
            **{
                'a': action,
                'ps': 1.0,
                'ps-a': ps_all,
            },
        }


# only offline training
agent = build_agent_init("AE_LogReg_Agent_Offline", TestAgent, test_agent_args)
Example #18
0
from recogym import build_agent_init
from recogym.agents import LogregMulticlassIpsAgent, logreg_multiclass_ips_args

agent = build_agent_init('Contextual Bandit', LogregMulticlassIpsAgent, {
    **logreg_multiclass_ips_args,
})
Example #19
0
from recogym import build_agent_init
from recogym.agents import LogregPolyAgent, logreg_poly_args

agent = build_agent_init('likelihood', LogregPolyAgent, {
    **logreg_poly_args,
})
Example #20
0
import pandas as pd
import numpy as np

from recogym import Configuration, build_agent_init
from recogym.agents import OrganicUserEventCounterAgent, organic_user_count_args

dump_agent_args = {
    'agent': build_agent_init(
        'OrganicUserCount',
        OrganicUserEventCounterAgent,
        {**organic_user_count_args}
    )
}


class DumpAgent(OrganicUserEventCounterAgent):
    """
    Dump Agent

    This is the Agent that dumps all its `train' and `act' functions.
    It used mostly for debugging purposes.
    """

    def __init__(self, config=Configuration(dump_agent_args)):
        super(DumpAgent, self).__init__(config)
        self.previous_action = None

        self.data = {
            'case': [],
            't': [],
            'u': [],
Example #21
0
from recogym import build_agent_init
from recogym.agents import BanditMFSquare, bandit_mf_square_args

agent = build_agent_init('BanditMFsquare', BanditMFSquare, {**bandit_mf_square_args})
Example #22
0
        self.previous_action = None
        self.model = None
        super(MNBAgent, self).__init__(
            config,
            MNBBuilder(config)
        )

    def act(self, observation, reward, done):
        if self.model is None:
            self.feature_provider, self.model = self.model_builder.build()

        if self.config.online_training and reward == 1:
            self.model.train_online(self.previous_features, self.previous_action, reward)

        self.feature_provider.observe(observation)

        features = self.feature_provider.features(observation)
        action_dict = self.model.act(observation, features)

        self.previous_features = features
        self.previous_action = action_dict

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **action_dict
        }


agent = build_agent_init('MNBAgent', MNBAgent,
                         {**mnb_args})
Example #23
0
from recogym import build_agent_init
from recogym.agents.organic_user_count import organic_user_count_args, OrganicUserEventCounterAgent

agent = build_agent_init('OrganicUserCount', OrganicUserEventCounterAgent,
                         {**organic_user_count_args})
        # Explicitly build one-hot matrix for actions
        A_one_hot = np.zeros((N, P))
        A_one_hot[np.arange(N), A] = 1

        # TODO - this really doesn't scale, maybe sparsify?
        training_matrix = []
        for x, a in zip(X, A_one_hot):
            training_matrix.append(np.kron(x, a))
        training_matrix = np.asarray(training_matrix)

        # Train a model
        model = MultinomialNB().fit(training_matrix, y)

        return (MultinomialNaiveBayesFeaturesProvider(self.config),
                MultinomialNaiveBayesModel(self.config, model))


class MultinomialNaiveBayesAgent(ModelBasedAgent):
    """
    Scikit-Learn-based logistic regression Agent.
    """
    def __init__(self, config=Configuration(multinomial_naive_bayes_args)):
        super(MultinomialNaiveBayesAgent,
              self).__init__(config, MultinomialNaiveBayesModelBuilder(config))


agent = build_agent_init('MultinomialNaiveBayesAgent',
                         MultinomialNaiveBayesAgent,
                         {**multinomial_naive_bayes_args})
Example #25
0
        # Build model first if not yet done
        if self.model is None:
            assert self.feature_provider is None
            self.feature_provider, self.model = self.model_builder.build()

        # Now that we have the reward, train based on previous features and reward we got for our action
        if self.config.online_training and reward is not None:
            self.model.update_data(self.previous_features,
                                   self.previous_action, reward)

        # Update the feature provider with this new observation
        self.feature_provider.observe(observation)

        # Get the new features
        features = self.feature_provider.features(observation)
        a_ps_psa_dict = self.model.act(observation, features)

        # Update previous feature set for next online learning session
        self.previous_features = features
        self.previous_action = a_ps_psa_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **a_ps_psa_dict,
        }


global_stats = []
agent = build_agent_init("MultinomialNBAgent", TestAgent, {**test_agent_args})
        # Build model first if not yet done
        if self.model is None:
            assert self.feature_provider is None
            self.feature_provider, self.model = self.model_builder.build()

        # Now that we have the reward, train based on previous features and reward we got for our action
        if self.config.online_training and reward is not None:
            self.model.update_data(
                self.previous_features, self.previous_action, reward
            )

        # Update the feature provider with this new observation
        self.feature_provider.observe(observation)

        # Get the new features
        features = self.feature_provider.features(observation)
        a_ps_psa_dict = self.model.act(observation, features)

        # Update previous feature set for next online learning session
        self.previous_features = features
        self.previous_action = a_ps_psa_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **a_ps_psa_dict,
        }


agent = build_agent_init('LogisticRegression_SKLearnAgent_Egreedy', TestAgent, test_agent_args)
Example #27
0
        if self.model is None:
            assert self.feature_provider is None
            self.feature_provider, self.model = self.model_builder.build()

        # Now that we have the reward, train based on previous features and reward we got for our action
        if self.config.online_training and reward is not None:
            self.model.train_online(
                self.previous_features, self.previous_action, reward
            )

        # Update the feature provider with this new observation
        self.feature_provider.observe(observation)

        # Get the new features
        features = self.feature_provider.features(observation)
        a_ps_psa_dict = self.model.act(observation, features)

        # Update previous feature set for next online learning session
        self.previous_features = features
        self.previous_action = a_ps_psa_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **a_ps_psa_dict,
        }


global_stats = []
agent = build_agent_init("CrossConfirmationMNB", TestAgent, {**test_agent_args})
Example #28
0
from recogym import build_agent_init
from recogym.agents import OrganicCount, organic_count_args

agent = build_agent_init('OrganicCount', OrganicCount, {**organic_count_args})
Example #29
0
        self.previous_features = None
        self.previous_action = None
        self.feature_count = 0

    def act(self, observation, reward, done):
        if self.model is None:
            self.feature_provider, self.model = self.model_builder.build()

        self.feature_provider.observe(observation)
        features = self.feature_provider.features(observation)

        if self.config.online_training and reward is not None:
            self.model.train_online(self.previous_features,
                                    self.previous_action, reward)

        action_dict = self.model.act(observation, features)

        self.previous_features = features
        self.feature_count += 1
        self.previous_action = action_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **action_dict
        }


global_stats = []
agent = build_agent_init("MLPAgent", TestAgent, {**test_agent_args})
Example #30
0
        features = self.feature_provider.features(observation)
        a_ps_psa_dict = self.model.act(observation, features)

        # Update previous feature set for next online learning session
        self.previous_features = features
        self.previous_action = a_ps_psa_dict["a"]

        return {
            "t": observation.context().time(),
            "u": observation.context().user(),
            **a_ps_psa_dict,
        }


global_stats = []
agent = build_agent_init("LightGBMAgent", TestAgent, {**test_agent_args})

if __name__ == "__main__":
    import gym
    from recogym import env_1_args
    from recogym.bench_agents import test_agent

    num_products = 10
    num_offline_users = 20
    num_online_users = 200

    agent = TestAgent(
        Configuration({
            "random_seed": np.random.randint(2**31 - 1),
            "num_products": num_products,
            "fallback_threshold": 0.00,