Ejemplo n.º 1
0
def get_cartpole_agent(agent_name, cartpole_device):
    """
    Build a new agent for the specified cartpole device.

    It would probably make more sense to pass agent_parameters
    as a parameter to this function.

    Parameters
    ----------
    agent_name: str
        an identifier this function recognizes: "a2c" or "ppo"
    cartpole_device:

    Return
    ------
        a tensorforce Agent
    """
    if agent_name == "a2c":
        agent_parameters = dict(
            agent=agent_name,
            batch_size=11,
            variable_noise=0.1,
            l2_regularization=
            0.05,  # does this help with catastrophic forgetting?
            horizon=10,  # 10 is good, 1 is bad, 5 is bad, 20 is ok, 15 is bad
            summarizer=dict(
                directory="data/summaries/" + agent_name,
                # list of labels, or 'all'
                labels=[
                    "graph", "entropy", "kl-divergence", "losses", "rewards"
                ],
                frequency=10,  # store values every 10 timesteps
            ),
        )
        agent = Agent.create(
            # agent="a2c",
            environment=cartpole_device.cartpole_env,
            # the cartpole environment will supply argument max_episode_timesteps
            # max_episode_timesteps=max_turns,
            **agent_parameters,
        )
    elif agent_name == "ppo":
        agent_parameters = dict(
            batch_size=10,
            variable_noise=0.1,
        )
        agent = Agent.create(
            agent="ppo",
            environment=cartpole_device.cartpole_env,
            **agent_parameters,
        )
    else:
        raise ValueError(f"agent_name '{agent_name}' is not recognized")

    return agent, agent_parameters
Ejemplo n.º 2
0
 def __init__(self, agent_spec, agent_additional_parameters, agents_count):
     self.agents = []
     first_agent = Agent.from_spec(spec=agent_spec,
                                   kwargs=agent_additional_parameters)
     self.agents.append(first_agent)
     self.model = first_agent.model
     self.stop = False
     for _ in range(agents_count - 1):
         agent = Agent.from_spec(spec=agent_spec,
                                 kwargs=agent_additional_parameters)
         agent.model.close()
         agent.model = self.model
         self.agents.append(agent)
Ejemplo n.º 3
0
 def __init__(self, in_dim, n_action, rl, train):
     super().__init__()
     self.make_in_port('observation', in_dim)
     self.make_in_port('reward', 1)
     self.make_in_port('done', 1)
     self.make_out_port('action', 1)
     self.make_in_port('token_in', 1)
     self.make_out_port('token_out', 1)
     self.n_action = n_action  # number of action choices
     self.results['action'] = np.array([np.random.randint(n_action)])
     self.model = None
     self.env_type = "MotorEnv"
     self.token = 0
     self.prev_actions = 0
     self.init = True
     self.in_dim = in_dim
     self.rl = rl
     if rl:
         self.env = Environment.create(
             environment=MotorComponent.MotorEnv,
             max_episode_timesteps=train["episode_count"] *
             train["max_steps"],
             n_action=n_action,
             obs_dim=in_dim,
             parent=self)
         self.env.reset()
         self.agent = Agent.create(agent=train['rl_agent'],
                                   environment=self.env)
Ejemplo n.º 4
0
def main():

    bad_seeds_environment = Environment.create(environment=BadSeeds03,
                                               seed_count=10,
                                               bad_seed_count=3,
                                               max_episode_length=100)

    agent = Agent.create(
        agent="a2c",
        batch_size=100,  # this seems to help a2c
        horizon=20,  # does this help a2c?
        exploration=0.01,  # tried without this at first
        l2_regularization=0.1,
        entropy_regularization=0.2,
        variable_noise=0.05,
        environment=bad_seeds_environment,
        summarizer=dict(
            directory="training_data/agent_01_env_03/summaries",
            # list of labels, or 'all'
            labels=["graph", "entropy", "kl-divergence", "losses", "rewards"],
            frequency=100,  # store values every 100 timesteps
        ),
    )

    runner = Runner(agent=agent, environment=bad_seeds_environment)
    runner.run(num_episodes=100000)
    agent.save(directory="saved_models")
Ejemplo n.º 5
0
    def agent(self, agent: any):
        self._agent = Agent.create(agent=agent,
                                   environment=self._tensorforce_environment)

        self._runner = Runner(agent=self._agent,
                              environment=self._tensorforce_environment,
                              save_best_agent=self._save_best_agent)
Ejemplo n.º 6
0
    def __init__(self, agent, environments):
        if not util.is_iterable(x=environments):
            raise TensorforceError.type(name='parallel-runner',
                                        argument='environments',
                                        value=environments)
        elif len(environments) == 0:
            raise TensorforceError.value(name='parallel-runner',
                                         argument='environments',
                                         value=environments)

        if not isinstance(agent, Agent):
            agent = Agent.from_spec(spec=agent,
                                    states=environments[0].states(),
                                    actions=environments[0].actions(),
                                    parallel_interactions=len(environments))

        if len(environments) > agent.parallel_interactions:
            raise TensorforceError(message="Too many environments.")

        self.agent = agent
        self.environments = tuple(environments)

        self.agent.initialize()
        self.global_episode = self.agent.episode
        self.global_timestep = self.agent.timestep
        self.episode_rewards = list()
        self.episode_timesteps = list()
        self.episode_times = list()
Ejemplo n.º 7
0
    def train_implementation(self, train_context: easyagents.core.PpoTrainContext):
        """Tensorforce Ppo Implementation of the train loop.

            The implementation follows https://github.com/tensorforce/tensorforce/blob/master/examples/quickstart.py
        """
        tc = train_context
        train_env = self._create_env()
        network = self._create_network_specification()

        self.log_api('Agent.create', f'(agent="ppo", environment=..., ' +
                     f'network={network}' +
                     f'learning_rate={tc.learning_rate}, ' +
                     f'batch_size={tc.num_episodes_per_iteration}, ' +
                     f'optimization_steps={tc.num_epochs_per_iteration}, ' +
                     f'discount={tc.reward_discount_gamma})')
        self._agent = Agent.create(
            agent='ppo',
            environment=train_env,
            network=network,
            learning_rate=tc.learning_rate,
            batch_size=tc.num_episodes_per_iteration,
            optimization_steps=tc.num_epochs_per_iteration,
            discount=tc.reward_discount_gamma,
        )
        self._train_with_runner(train_env, tc)
Ejemplo n.º 8
0
    def __init__(self,
                 agent,
                 environment,
                 evaluation_environment=None,
                 save_best_agent=False):
        # save_best overwrites saver...
        self.is_environment_external = isinstance(environment, Environment)
        self.environment = Environment.create(environment=environment)

        self.is_eval_environment_external = isinstance(evaluation_environment,
                                                       Environment)
        if evaluation_environment is None:
            self.evaluation_environment = None
        else:
            self.evaluation_environment = Environment.create(
                environment=evaluation_environment)

        self.save_best_agent = save_best_agent
        self.is_agent_external = isinstance(agent, Agent)
        kwargs = dict()
        if self.save_best_agent is True:
            # Disable periodic saving
            assert not self.is_agent_external
            kwargs = dict(saver=dict(seconds=None, steps=None))
        self.agent = Agent.create(agent=agent,
                                  environment=self.environment,
                                  **kwargs)

        # self.global_episodes = self.agent.episodes
        # self.global_timesteps = self.agent.timesteps
        # self.global_updates = self.agent.updates
        self.episode_rewards = list()
        self.episode_timesteps = list()
        self.episode_seconds = list()
        self.episode_agent_seconds = list()
    def agent(self, agent_spec: any):
        self._agent = Agent.create(agent=agent_spec,
                                   environment=self._environment)

        self._runner = Runner(agent=self._agent,
                              environment=self._environment,
                              save_best_agent=self._save_best_agent)
    def __init__(self,
                 environment: TradingEnvironment,
                 agent_spec: Dict = None,
                 network_spec: Dict = None,
                 **kwargs):
        """
        Arguments:
            environment: A `TradingEnvironment` instance for the agent to trade within.
            agent_spec: A specification dictionary for the `Tensorforce` agent.
            network_sepc: A specification dictionary for the `Tensorforce` agent's model network.
            kwargs (optional): Optional keyword arguments to adjust the strategy.
        """
        self._environment = environment

        self._max_episode_timesteps = kwargs.get('max_episode_timesteps', None)

        if agent_spec and network_spec:
            self._agent_spec = agent_spec
            self._network_spec = network_spec

            self._agent = Agent.from_spec(spec=agent_spec,
                                          kwargs=dict(
                                              network=network_spec,
                                              states=environment.states,
                                              actions=environment.actions))

            self._runner = Runner(agent=self._agent, environment=environment)
Ejemplo n.º 11
0
def build(agent_spec, actor, env):
    agent = Agent.from_spec(spec=agent_spec,
                            kwargs=dict(states=env.states,
                                        actions=env.actions,
                                        network=actor))
    runner = Runner(agent=agent, environment=env, repeat_actions=1)
    return runner, agent
Ejemplo n.º 12
0
    def prepare(self,
                environment=None,
                timestep_range=None,
                states=None,
                actions=None,
                exclude_bool_action=False,
                exclude_int_action=False,
                exclude_float_action=False,
                exclude_bounded_action=False,
                require_observe=False,
                require_all=False,
                **agent):
        """
        Generic unit-test preparation.
        """
        Layer.layers = None

        if environment is None:
            if states is None:
                states = deepcopy(self.__class__.states)

            if actions is None:
                actions = deepcopy(self.__class__.actions)
                if exclude_bool_action or self.__class__.exclude_bool_action:
                    actions.pop('bool_action')
                if exclude_int_action or self.__class__.exclude_int_action:
                    actions.pop('int_action')
                if exclude_float_action or self.__class__.exclude_float_action:
                    actions.pop('float_action')
                if exclude_bounded_action or self.__class__.exclude_bounded_action:
                    actions.pop('bounded_action')

            if timestep_range is None:
                timestep_range = self.__class__.timestep_range

            environment = UnittestEnvironment(states=states,
                                              actions=actions,
                                              timestep_range=timestep_range)

        elif timestep_range is not None:
            raise TensorforceError.unexpected()

        environment = Environment.create(environment=environment)

        for key, value in self.__class__.agent.items():
            if key not in agent:
                agent[key] = value

        if self.__class__.require_all or require_all:
            config = None
        elif self.__class__.require_observe or require_observe:
            config = dict(api_functions=['reset', 'act', 'observe'])
        else:
            config = dict(api_functions=['reset', 'act'])

        agent = Agent.create(agent=agent,
                             environment=environment,
                             config=config)

        return agent, environment
Ejemplo n.º 13
0
def set_up():
    tensorflow_settings()
    env = Environment.create(environment=CartSeed01,
                             seed_count=10,
                             bad_seed_count=3,
                             max_count=20)

    agent = Agent.create(
        agent="a2c",
        batch_size=10000,
        horizon=50,
        discount=0.97,
        l2_regularization=0.1,
        variable_noise=0.5,
        environment=env,
        summarizer=dict(
            directory="training_data/a2c_cartseed/summaries",
            labels="all",
            frequency=10,
        ),
        # saver=dict(
        #     directory='saved_models/agent_04_env_04_1000/checkpoints',
        #     frequency=600  # save checkpoint every 600 seconds (10 minutes)
        # ),
    )
    return env, agent
Ejemplo n.º 14
0
    def __init__(
        self, agent, environment, max_episode_timesteps=None, evaluation_environment=None,
        save_best_agent=None
    ):
        self.is_environment_external = isinstance(environment, Environment)
        self.environment = Environment.create(
            environment=environment, max_episode_timesteps=max_episode_timesteps
        )

        if evaluation_environment is None:
            self.evaluation_environment = None
        else:
            self.is_eval_environment_external = isinstance(evaluation_environment, Environment)
            self.evaluation_environment = Environment.create(
                environment=evaluation_environment, max_episode_timesteps=max_episode_timesteps
            )
            assert self.evaluation_environment.states() == self.environment.states()
            assert self.evaluation_environment.actions() == self.environment.actions()

        self.is_agent_external = isinstance(agent, Agent)
        self.agent = Agent.create(agent=agent, environment=self.environment)
        self.save_best_agent = save_best_agent

        self.episode_rewards = list()
        self.episode_timesteps = list()
        self.episode_seconds = list()
        self.episode_agent_seconds = list()
def main():
    tensorflow_settings()
    bad_seeds_environment = Environment.create(
        environment=BadSeeds02,
        seed_count=10,
        bad_seed_count=3,
        history_block=2,
        max_episode_timesteps=100,
    )

    agent = Agent.create(
        agent="random",
        environment=bad_seeds_environment,
        summarizer=dict(
            directory="training_data/agent_random_env_02/summaries",
            labels="all",
            frequency=100,  # store values every 100 timesteps
        ),
    )

    runner = Runner(agent=agent, environment=bad_seeds_environment)
    runner.run(num_episodes=10000)

    bad_seeds_environment.close()
    agent.close()
Ejemplo n.º 16
0
def base_test(env):
    batch_size = 24

    agent = Agent.create(
        agent='ppo',
        environment=env[0],
        batch_size=batch_size,
        learning_rate=1e-3,
        network=actor_network,
        discount=1.0,
        entropy_regularization=None,
        critic_network=critic_network,
        critic_optimizer=dict(optimizer='adam',
                              multi_step=10,
                              learning_rate=1e-3),
        max_episode_timesteps=n_step,
        parallel_interactions=n_env
        # saver=dict(directory=os.path.join(os.getcwd(), 'saver_data'), frequency=30)
    )

    agent.initialize()

    # Initialize the runner
    runner = ParallelRunner(agent=agent, environments=env)

    # Start the runner
    runner.run(num_episodes=48)
    runner.close()
Ejemplo n.º 17
0
def runEnv():
    environment = Environment.create(
        environment=CustomEnvironment, max_episode_timesteps=500
    )
    agent = Agent.create(agent='a2c', environment=environment, batch_size=10, learning_rate=1e-3)

    # Train for 200 episodes
    for _ in range(2000):
        states = environment.reset()
        terminal = False
        while CustomEnvironment.extraCounter != 100:
            actions = agent.act(states=states)
            # print(actions)
            # print(states)
            states, reward, terminal = environment.execute(actions=actions)
            agent.observe(terminal=terminal, reward=reward)

    # Evaluate for 100 episodes
    sum_rewards = 0.0
    for _ in range(1000):
        states = environment.reset()
        internals = agent.initial_internals()
        terminal = False
        while CustomEnvironment.extraCounter != 100:
            actions, internals = agent.act(states=states, internals=internals, independent=True)
            states, terminal, reward = environment.execute(actions=actions)
            sum_rewards += reward

    # print('Mean episode reward:', sum_rewards / 100)
    # print(CustomEnvironment.firstCount, ",", CustomEnvironment.secondCount, ",", CustomEnvironment.thirdCount)
    print(CustomEnvironment.sum)

    # Close agent and environment
    agent.close()
    environment.close()
Ejemplo n.º 18
0
def main():

    bad_seeds_environment = Environment.create(
        environment=BadSeeds03, seed_count=10, bad_seed_count=3, max_episode_length=100
    )

    agent = Agent.create(
        agent="a2c",
        batch_size=100,
        horizon=100,     # changed from 20 to 100 for agent_03
        exploration=0.05,  # changed from 0.01 to 0.05 for agent_03
        l2_regularization=0.2,  # changed from 0.1 to 0.2 for agent_03
        #entropy_regularization=0.2,  # turned off for agent_03
        variable_noise=0.1,  # changed from 0.05 to 0.1 for agent_03
        environment=bad_seeds_environment,
        summarizer=dict(
            directory="training_data/agent_03_env_03/summaries",
            # list of labels, or 'all'
            labels=["graph", "entropy", "kl-divergence", "losses", "rewards"],
            frequency=100,  # store values every 100 timesteps
        ),
        saver=dict(
            directory='saved_models/agent_03_env_03/checkpoints',
            frequency=600  # save checkpoint every 600 seconds (10 minutes)
        ),
    )

    runner = Runner(agent=agent, environment=bad_seeds_environment)
    for _ in range(10):
        runner.run(num_episodes=10000)
        runner.run(num_episodes=1000, evaluation=True)

    bad_seeds_environment.close()
    agent.close()
Ejemplo n.º 19
0
def main():
    # Create an OpenAI-Gym environment
    environment = Environment.create(environment='gym', level='CartPole-v1')

    # Create a PPO agent
    agent = Agent.create(
        agent='dqn',
        environment=environment,
        # memory=100,
        # # Optimization
        # batch_size=10, update_frequency=2, learning_rate=1e-3,
        summarizer=dict(
            directory='data/summaries',
            # list of labels, or 'all'
            labels=['graph', 'entropy', 'kl-divergence', 'losses', 'rewards'],
            frequency=100  # store values every 100 timesteps
            # (infrequent update summaries every update; other configurations possible)
        ),
        recorder=None)

    # Initialize the runner
    runner = Runner(agent=agent, environment=environment)

    # Start the runner
    runner.run(num_episodes=10000)
    runner.close()
Ejemplo n.º 20
0
def set_up():
    tensorflow_settings()
    bad_seeds_environment = Environment.create(
        environment=BadSeeds02,
        seed_count=10,
        bad_seed_count=3,
        history_block=2,
        max_episode_timesteps=500,
    )

    agent = Agent.create(
        agent="dqn",
        network=[
            dict(type='flatten'),
            dict(type='dense', size=32, activation='tanh'),
            dict(type='dense', size=32, activation='tanh')
        ],
        environment=bad_seeds_environment,
        batch_size=256,
        memory=int(10**7),
        exploration=0.15,
        summarizer=dict(
            directory="training_data/agent_02_env_02/summaries",
            labels="all",
            frequency=100  # store values every 100 timesteps
        ))

    return bad_seeds_environment, agent
Ejemplo n.º 21
0
    def __init__(self, env=None, device=None):
        self.env = env
        if self.env.saver.model_file_name == "":
            try:
                self.env.saver.model_file_name = self.env.model_name + "_" + self.env.dataDirectory.replace("/", "")
            except:
                self.env.saver.model_file_name = self.env.model_name + "_" + self.env.dataDirectory.replace("/", "")
            if not os.path.exists(self.env.saver.model_directory+ "/model"):
                os.mkdir(self.env.saver.model_directory+ "/model")
            self.env.saver.model_file_path = self.env.saver.model_directory + "/model/" + self.env.saver.model_file_name



        self.agent = Agents.from_spec(
            self.env.settings['agent'],
            kwargs=dict(
                states=self.env.states,
                actions=dict(type='int', num_actions=self.env.actions),
                network=self.env.settings['network'],
                device=device
            )
        )

        try:
            self.agent.restore_model(self.env.saver.model_directory+"/model")
        except:
            pass
Ejemplo n.º 22
0
    def train_implementation(self,
                             train_context: easyagents.core.StepsTrainContext):
        """Tensorforce Dqn Implementation of the train loop.

            The implementation follows https://github.com/tensorforce/tensorforce/blob/master/examples/quickstart.py
        """
        tc = train_context
        train_env = self._create_env()
        network = self._create_network_specification()

        agent_type = 'dqn'
        self.log_api(
            'Agent.create', f'(agent="{agent_type}", ' +
            f'network={network}, ' + f'memory={tc.max_steps_in_buffer}, ' +
            f'start_updating={tc.num_steps_buffer_preload},'
            f'learning_rate={tc.learning_rate}, ' +
            f'batch_size={tc.num_steps_sampled_from_buffer}, ' +
            f'update_frequeny={tc.num_steps_per_iteration}, ' +
            f'discount={tc.reward_discount_gamma})')
        self._agent = Agent.create(
            agent=agent_type,
            environment=train_env,
            network=network,
            memory=tc.max_steps_in_buffer,
            start_updating=tc.num_steps_buffer_preload,
            learning_rate=tc.learning_rate,
            batch_size=tc.num_steps_sampled_from_buffer,
            update_frequency=tc.num_steps_per_iteration,
            discount=tc.reward_discount_gamma,
        )
        self._train_with_runner(train_env, tc)
    def __init__(self,
                 environment: 'TradingEnvironment',
                 agent_spec: any,
                 save_best_agent: bool = False,
                 **kwargs):
        """
        Arguments:
            environment: A `TradingEnvironment` instance for the agent to trade within.
            agent: A `Tensorforce` agent or agent specification.
            save_best_agent (optional): The runner will automatically save the best agent
            kwargs (optional): Optional keyword arguments to adjust the strategy.
        """
        self._max_episode_timesteps = kwargs.get('max_episode_timesteps',
                                                 False)

        self._environment = Environment.create(
            environment='gym',
            level=environment,
            max_episode_timesteps=self._max_episode_timesteps)

        self._agent = Agent.create(agent=agent_spec,
                                   environment=self._environment)

        self._runner = Runner(agent=self._agent,
                              environment=self._environment,
                              save_best_agent=save_best_agent)
Ejemplo n.º 24
0
def set_up():
    tensorflow_settings()
    bad_seeds_environment = Environment.create(
        environment=BadSeedsSkinny,
        seed_count=10,
        bad_seed_count=3,
        history_block=2,
        max_episode_timesteps=100,
    )

    agent = Agent.create(
        agent="a2c",
        network=[
            dict(type='flatten'),
            dict(type='dense', size=32, activation='relu'),
            dict(type='dense', size=32, activation='relu')
        ],
        batch_size=10000,  # changed for 04 but was this a mistake? no
        horizon=50,  # changed from 100 to 50 for agent_04
        discount=0.97,  # new for agent_04
        #exploration=0.05,  # turned off for agent_04 - turn on for 05?
        l2_regularization=0.1,
        #entropy_regularization=0.2,  # turned off for agent_03
        variable_noise=0.5,  # changed from 0.1 to 0.5 for agent_04
        environment=bad_seeds_environment,
        summarizer=dict(
            directory="training_data/a2c_dense_skinny/summaries",
            # list of labels, or 'all'
            labels="all",
            frequency=100,  # store values every 100 timesteps
        ),
    )

    return bad_seeds_environment, agent
    def __init__(self, environment: 'TradingEnvironment', agent_spec: any,
                 **kwargs):
        """
        Arguments:
            environment: A `TradingEnvironment` instance for the agent to trade within.
            agent: A `Tensorforce` agent or agent specification.
            save_best_agent (optional): The runner will automatically save the best agent
            kwargs (optional): Optional keyword arguments to adjust the strategy.
        """
        self._max_episode_timesteps = kwargs.get('max_episode_timesteps',
                                                 False)
        self._save_best_agent = kwargs.get('save_best_agent', False)

        self._environment = Environment.create(
            environment='gym',
            level=environment,
            max_episode_timesteps=self._max_episode_timesteps)

        self._agent = Agent.create(
            agent=agent_spec,
            environment=self._environment,
            summarizer=dict(
                directory='data/summaries',
                labels=['graph', 'losses',
                        'rewards'],  # list of labels, or 'all'
                frequency=100  # store values every 100 timesteps
                # (infrequent update summaries every update; other configurations possible)
            ),
        )

        self._runner = Runner(agent=self._agent,
                              environment=self._environment,
                              save_best_agent=self._save_best_agent)
    def restore_agent(self, path: str, model_path: str = None):
        """Deserialize the strategy's learning agent from a file.

        Arguments:
            path: The `str` path of the file the agent specification is stored in.
                The `.json` file extension will be automatically appended if not provided.
            model_path (optional): The `str` path of the file or directory the agent checkpoint is stored in.
                If not provided, the `model_path` will default to `{path_without_dot_json}/agents`.
        """
        path_with_ext = path if path.endswith('.json') else f'{path}.json'

        with open(path_with_ext) as json_file:
            spec = json.load(json_file)

            self._agent_spec = spec.agent
            self._network_spec = spec.network

        self._agent = Agent.from_spec(spec=self._agent_spec,
                                      kwargs=dict(
                                          network=self._network_spec,
                                          states=self._environment.states,
                                          actions=self._environment.actions))

        path_without_ext = path_with_ext.replace('.json', '')
        model_path = model_path or f'{path_without_ext}/agent'

        self._agent.restore_model(file=model_path)

        self._runner = Runner(agent=self._agent, environment=self._environment)
Ejemplo n.º 27
0
    def test_quickstart(self):
        self.start_tests(name='quickstart')

        # ====================

        # Create an OpenAI-Gym environment
        environment = Environment.create(environment='gym',
                                         level='CartPole-v1')

        # Create a PPO agent
        agent = Agent.create(
            agent='ppo',
            environment=environment,
            # Automatically configured network
            network='auto',
            # Optimization
            batch_size=10,
            update_frequency=2,
            learning_rate=1e-3,
            subsampling_fraction=0.2,
            optimization_steps=5,
            # Reward estimation
            likelihood_ratio_clipping=0.2,
            discount=0.99,
            estimate_terminal=False,
            # Critic
            critic_network='auto',
            critic_optimizer=dict(optimizer='adam',
                                  multi_step=10,
                                  learning_rate=1e-3),
            # Preprocessing
            preprocessing=None,
            # Exploration
            exploration=0.0,
            variable_noise=0.0,
            # Regularization
            l2_regularization=0.0,
            entropy_regularization=0.0,
            # TensorFlow etc
            name='agent',
            device=None,
            parallel_interactions=1,
            seed=None,
            execution=None,
            saver=None,
            summarizer=None,
            recorder=None)

        # Initialize the runner
        runner = Runner(agent=agent, environment=environment)

        # Start the runner
        runner.run(num_episodes=50, use_tqdm=False)
        runner.close()

        # ====================

        self.finished_test()
Ejemplo n.º 28
0
def run_no_runner(environment, nplayers):
    #with open("rl-regenwormen/agent.json", 'r') as fp:
    #    agent = json.load(fp=fp)

    agents = [
        Agent.create(agent='ppo',
                     batch_size=100,
                     learning_rate=1e-3,
                     exploration=0.2,
                     environment=environment,
                     summarizer=dict(directory='summaries', summaries='all'))
        for i in range(nplayers)
    ]

    print("starting training...")
    i = 10000000
    bar = Bar('Training', max=i)
    rewards = {i: 0 for i in range(nplayers)}
    rewards_total = {i: [] for i in range(nplayers)}
    for episode in range(30000):
        for agent in agents:
            agent.reset()
        states = environment.reset()
        terminal = False
        while not terminal:
            try:
                agent = agents[environment.current_player]
                current_player = environment.current_player
                actions = agent.act(states=states)
                #print(actions)
                states, terminal, reward = environment.execute(actions=actions)
                rewards[environment.current_player] += reward
                rewards_total[environment.current_player] += [reward]
                rewards_total[environment.current_player] = rewards_total[
                    environment.current_player][-300:]
                end_of_roll = environment.current_player != current_player
                agent.observe(terminal=end_of_roll, reward=reward)
                if terminal:
                    for agent2 in agents:
                        if agent2 != agent:
                            actions = agent2.act(states=states)
                            states, terminal, reward = environment.execute(
                                actions=actions)
                            agent2.observe(terminal=True, reward=reward)
            except:
                print(f"ENV {environment.state}")
                print(f"ACT {actions}")
                print(states)
                raise
        names = ["lola", "henry de muis", "pykel", "flo"]
        print({
            names[k]:
            (int(v * 100) / 100, int(np.mean(rewards_total[k]) * 100) / 100)
            for k, v in rewards.items()
        })
        rewards = {i: 0 for i in range(nplayers)}
        bar.next()
    bar.finish()
Ejemplo n.º 29
0
 def __init__(self, agent_type, network, action_type, preprocessor_type,
              reward, tag):
     rf = reward_functions.__dict__[reward]
     super().__init__("-".join(
         [agent_type, network, action_type, reward, tag]))
     agent_spec = create_spec(action_type, agent_type, network)
     self._tf_agent = Agent.from_spec(agent_spec, {})
     self.action_translator = get_action_translator(action_type)
     self.preprocessor = get_observation_preprocessor(preprocessor_type)
    def restore_agent(self, directory: str, filename: str = None):
        """Deserialize the strategy's learning agent from a file.

        Arguments:
            directory: The `str` path of the directory the agent checkpoint is stored in.
            filename (optional): The `str` path of the file the agent specification is stored in.
                The `.json` file extension will be automatically appended if not provided.
        """
        self._agent = Agent.load(directory, filename=filename)

        self._runner = Runner(agent=self._agent, environment=self._environment)