コード例 #1
0
def main():
    bad_seeds_environment, agent = set_up()
    runner = Runner(agent=agent, environment=bad_seeds_environment)
    runner.run(num_episodes=10000)
    agent.save(directory="saved_models")
    bad_seeds_environment.close()
    agent.close()
コード例 #2
0
def main(
        time_limit=None,
        scoring="default",
        batch_size=16,
        gpu_idx=0,
        env_version=1,
        seed_count=9,
        max_count=10,
        out_path=None,
        num_episodes=int(3 * 10**3),
):
    env, agent = set_up(
        time_limit=time_limit,
        scoring=scoring,
        batch_size=batch_size,
        gpu_idx=gpu_idx,
        env_version=env_version,
        seed_count=seed_count,
        max_count=max_count,
        out_path=out_path,
    )

    runner = Runner(agent=agent, environment=env)
    runner.run(num_episodes=num_episodes)
    if out_path is None:
        out_path = Path()
    else:
        out_path = Path(out_path).expanduser()
    agent.save(directory=str(out_path / "saved_models"))
    agent.close()
    env.close()
コード例 #3
0
    def test_lstm(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=False)
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32),
                                       dict(type='lstm')
                                   ]))
            agent = VPGAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('VPG agent (LSTM): ' + str(runner.episode))

            if runner.episode < 1000:
                passed += 1

        print('VPG agent (LSTM) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #4
0
def main():
    env, agent = set_up()
    runner = Runner(agent=agent, environment=env)
    runner.run(num_episodes=10000)
    agent.save(directory="saved_models")
    agent.close()
    env.close()
コード例 #5
0
    def test_save_restore(self):
        environment_spec = {"float": ()}
        environment = create_environment(environment_spec)
        network_spec = [dict(type='dense', size=32)]
        agent = create_agent(environment, network_spec)
        runner = Runner(agent=agent, environment=environment)

        runner.run(episodes=100)
        model_values = agent.model.session.run(
            agent.model.get_variables(include_submodules=True,
                                      include_nontrainable=False))
        save_path = agent.model.save(directory=self._tmp_dir_path + "/model")
        print("Saved at: %s" % (save_path, ))
        runner.close()

        agent = create_agent(environment, network_spec)
        agent.model.restore(directory="", file=save_path)
        restored_model_values = agent.model.session.run(
            agent.model.get_variables(include_submodules=True,
                                      include_nontrainable=False))
        assert len(model_values) == len(restored_model_values)
        assert all([
            np.array_equal(v1, v2)
            for v1, v2 in zip(model_values, restored_model_values)
        ])

        agent.close()
コード例 #6
0
    def train_and_test(self, agent, early_stop=-1, n_tests=15):
        n_train = TIMESTEPS // n_tests
        i = 0
        runner = Runner(agent=agent, environment=self)

        try:
            while i <= n_tests:
                self.use_dataset(Mode.TRAIN)
                runner.run(timesteps=n_train, max_episode_timesteps=n_train)
                self.use_dataset(Mode.TEST)
                self.run_deterministic(runner, print_results=True)
                if early_stop > 0:
                    advantages = np.array(
                        self.acc.episode.advantages[-early_stop:])
                    if i >= early_stop and np.all(advantages > 0):
                        i = n_tests
                i += 1
        except KeyboardInterrupt:
            # Lets us kill training with Ctrl-C and skip straight to the final test. This is useful in case you're
            # keeping an eye on terminal and see "there! right there, stop you found it!" (where early_stop & n_tests
            # are the more methodical approaches)
            pass

        # On last "how would it have done IRL?" run, without getting in the way (no killing on repeats, 0-balance)
        print('Running no-kill test-set')
        self.use_dataset(Mode.TEST, no_kill=True)
        self.run_deterministic(runner, print_results=True)
コード例 #7
0
    def test_discrete(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=False)
            config = Configuration(batch_size=8,
                                   learning_rate=0.0005,
                                   memory_capacity=800,
                                   first_update=80,
                                   target_update_frequency=20,
                                   memory=dict(type='replay',
                                               random_sampling=True),
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32)
                                   ]))
            agent = CategoricalDQNAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x / l >= reward_threshold for x, l in zip(
                        r.episode_rewards[-100:], r.episode_lengths[-100:]))

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('Categorical DQN agent: ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('Categorical DQN agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #8
0
    def test_replay(self):
        environment = MinimalTest(definition=[(False, (1, 2))])
        config = Configuration(batch_size=8,
                               learning_rate=0.001,
                               memory_capacity=50,
                               memory=dict(type='replay',
                                           random_sampling=True),
                               first_update=20,
                               target_update_frequency=10,
                               states=environment.states,
                               actions=environment.actions,
                               network=layered_network_builder([
                                   dict(type='dense', size=32),
                                   dict(type='dense', size=32)
                               ]))
        agent = DQNAgent(config=config)
        runner = Runner(agent=agent, environment=environment)

        def episode_finished(r):
            return r.episode < 100 or not all(
                x / l >= reward_threshold for x, l in zip(
                    r.episode_rewards[-100:], r.episode_lengths[-100:]))

        runner.run(episodes=1000, episode_finished=episode_finished)
        print('Replay memory DQN: ' + str(runner.episode))
コード例 #9
0
    def test_continuous(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=True)
            config = Configuration(
                batch_size=8,
                states=environment.states,
                actions=environment.actions,
                network=layered_network_builder([
                    dict(type='dense', size=32, activation='tanh'),
                    dict(type='dense', size=32, activation='tanh')
                ])
            )
            agent = TRPOAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(x / l >= reward_threshold for x, l in zip(r.episode_rewards[-100:],
                                                                                            r.episode_lengths[-100:]))

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('TRPO agent (continuous): ' + str(runner.episode))

            if runner.episode < 1000:
                passed += 1

        print('TRPO agent (continuous) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #10
0
ファイル: test_vpg_agent.py プロジェクト: ddfan/tensorforce
    def test_discrete_baseline(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=False)
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   states=environment.states,
                                   actions=environment.actions,
                                   baseline=dict(type="mlp",
                                                 sizes=[32, 32],
                                                 epochs=5,
                                                 update_batch_size=8,
                                                 learning_rate=0.01),
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32)
                                   ]))
            agent = VPGAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(x / l >= 0.9 for x, l in zip(
                    r.episode_rewards[-100:], r.episode_lengths[-100:]))

            runner.run(episodes=1500, episode_finished=episode_finished)
            print('VPG agent (discrete): ' + str(runner.episode))

            if runner.episode < 1500:
                passed += 1

        print('VPG agent (discrete) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #11
0
    def test_discrete(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=False)
            config = Configuration(batch_size=8,
                                   keep_last=True,
                                   learning_rate=0.001,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32)
                                   ]))
            agent = DQNNstepAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(x / l >= 0.9 for x, l in zip(
                    r.episode_rewards[-100:], r.episode_lengths[-100:]))

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('DQN Nstep agent: ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('DQN Nstep agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #12
0
    def __init__(self,
                 environment: 'TradingEnvironment',
                 agent_spec: any,
                 save_best_agent: bool = False,
                 **kwargs):
        """
        Arguments:
            environment: A `TradingEnvironment` instance for the agent to trade within.
            agent: A `Tensorforce` agent or agent specification.
            save_best_agent (optional): The runner will automatically save the best agent
            kwargs (optional): Optional keyword arguments to adjust the strategy.
        """
        self._max_episode_timesteps = kwargs.get('max_episode_timesteps',
                                                 False)

        self._environment = Environment.create(
            environment='gym',
            level=environment,
            max_episode_timesteps=self._max_episode_timesteps)

        self._agent = Agent.create(agent=agent_spec,
                                   environment=self._environment)

        self._runner = Runner(agent=self._agent,
                              environment=self._environment,
                              save_best_agent=save_best_agent)
コード例 #13
0
    def test_multi(self):
        passed = 0

        def network_builder(inputs):
            layer = layers['dense']
            state0 = layer(x=layer(x=inputs['state0'], size=32), size=32)
            state1 = layer(x=layer(x=inputs['state1'], size=32), size=32)
            return state0 * state1

        for _ in xrange(5):
            environment = MinimalTest(definition=[True, (True, 2)])
            config = Configuration(batch_size=16,
                                   learning_rate=0.00025,
                                   exploration=dict(type='ornstein_uhlenbeck'),
                                   memory_capacity=800,
                                   first_update=80,
                                   target_update_frequency=20,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=network_builder)
            agent = NAFAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 20 or not all(
                    x >= 1.0 for x in r.episode_rewards[-20:])

            runner.run(episodes=10000, episode_finished=episode_finished)
            print('NAF agent (multi-state/action): ' + str(runner.episode))
            if runner.episode < 10000:
                passed += 1

        print('NAF agent (multi-state/action) passed = {}'.format(passed))
        self.assertTrue(passed >= 0)
コード例 #14
0
    def test_naf_agent(self):

        passed = 0
        for _ in xrange(5):
            environment = MinimalTest(definition=True)
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   exploration=dict(type='ornstein_uhlenbeck'),
                                   memory_capacity=800,
                                   first_update=80,
                                   target_update_frequency=20,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32)
                                   ]))
            agent = NAFAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('NAF agent: ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('NAF agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #15
0
def main():
    parser = argparse.ArgumentParser(description="Train an IBM agent")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    args = parser.parse_args()

    for n_simple in [3]:  #[1, 2, 3]:

        agent, environment = make_agent_env(1, n_simple, args.render)
        agent = restore_agent(agent)

        # Run
        runner = Runner(agent=agent, environment=environment)
        while True:
            runner.run(episodes=100, max_episode_timesteps=2000)
            ave_reward = np.mean(runner.episode_rewards)
            print("Average reward: %f with %d SimpleAgents" %
                  (ave_reward, n_simple))

            directory = os.path.join(os.getcwd(), "log", "agent")
            runner.agent.save_model(directory=directory)

            if ave_reward > 0 and n_simple < 3:
                break
            if ave_reward > 0.9:
                break

        try:
            runner.close()
        except AttributeError as e:
            pass
コード例 #16
0
    def test_runner_evaluation(self):
        states = dict(type='float', shape=(1,))

        actions = dict(type='int', shape=(), num_values=3)

        agent, environment = self.prepare(name='runner-evaluation', states=states, actions=actions)

        runner = Runner(agent=agent, environment=environment)

        self.num_evaluations = 0
        evaluation_frequency = 3
        max_evaluation_timesteps = 2
        num_evaluation_iterations = 2

        def evaluation_callback(r):
            self.num_evaluations += 1
            self.assertEqual(r.episode, self.num_evaluations * evaluation_frequency)
            self.assertEqual(len(r.evaluation_timesteps), num_evaluation_iterations)
            for num_timesteps in r.evaluation_timesteps:
                self.assertLessEqual(num_timesteps, max_evaluation_timesteps)

        runner.run(
            num_episodes=10, evaluation_callback=evaluation_callback,
            evaluation_frequency=evaluation_frequency,
            max_evaluation_timesteps=max_evaluation_timesteps,
            num_evaluation_iterations=num_evaluation_iterations
        )

        runner.close()
        sys.stdout.flush()
        self.assertTrue(expr=True)
コード例 #17
0
def main():
    tensorflow_settings()
    bad_seeds_environment = Environment.create(
        environment=BadSeeds02,
        seed_count=10,
        bad_seed_count=3,
        history_block=2,
        max_episode_timesteps=100,
    )

    agent = Agent.create(
        agent="random",
        environment=bad_seeds_environment,
        summarizer=dict(
            directory="training_data/agent_random_env_02/summaries",
            labels="all",
            frequency=100,  # store values every 100 timesteps
        ),
    )

    runner = Runner(agent=agent, environment=bad_seeds_environment)
    runner.run(num_episodes=10000)

    bad_seeds_environment.close()
    agent.close()
コード例 #18
0
    def test_discrete(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(continuous=False)
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   memory_capacity=800,
                                   first_update=80,
                                   repeat_update=4,
                                   target_update_frequency=20,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder(
                                       [dict(type='dense', size=32)]))
            agent = DQNAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=5000, episode_finished=episode_finished)
            print('DQN Agent: ' + str(runner.episode))
            if runner.episode < 5000:
                passed += 1
                print('passed')
            else:
                print('failed')

        print('DQN Agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #19
0
    def test_multi(self):
        passed = 0

        def network_builder(inputs, **kwargs):
            layer = layers['dense']
            state0 = layer(x=layer(x=inputs['state0'], size=32), size=32)
            state1 = layer(x=layer(x=inputs['state1'], size=32), size=32)
            return state0 * state1

        for _ in xrange(5):
            environment = MinimalTest(definition=[False, (False, 2)])
            config = Configuration(
                batch_size=8,
                learning_rate=0.001,
                memory_capacity=800,
                first_update=80,
                target_update_frequency=20,
                states=environment.states,
                actions=environment.actions,
                network=network_builder
            )
            agent = CategoricalDQNAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 15 or not all(x / l >= reward_threshold for x, l in zip(r.episode_rewards[-15:], r.episode_lengths[-15:]))

            runner.run(episodes=2000, episode_finished=episode_finished)
            print('Categorical DQN agent (multi-state/action): ' + str(runner.episode))
            if runner.episode < 2000:
                passed += 1

        print('Categorical DQN agent (multi-state/action) passed = {}'.format(passed))
        self.assertTrue(passed >= 2)
コード例 #20
0
def main():

    bad_seeds_environment = Environment.create(
        environment=BadSeeds03, seed_count=10, bad_seed_count=3, max_episode_length=100
    )

    agent = Agent.create(
        agent="a2c",
        batch_size=100,
        horizon=100,     # changed from 20 to 100 for agent_03
        exploration=0.05,  # changed from 0.01 to 0.05 for agent_03
        l2_regularization=0.2,  # changed from 0.1 to 0.2 for agent_03
        #entropy_regularization=0.2,  # turned off for agent_03
        variable_noise=0.1,  # changed from 0.05 to 0.1 for agent_03
        environment=bad_seeds_environment,
        summarizer=dict(
            directory="training_data/agent_03_env_03/summaries",
            # list of labels, or 'all'
            labels=["graph", "entropy", "kl-divergence", "losses", "rewards"],
            frequency=100,  # store values every 100 timesteps
        ),
        saver=dict(
            directory='saved_models/agent_03_env_03/checkpoints',
            frequency=600  # save checkpoint every 600 seconds (10 minutes)
        ),
    )

    runner = Runner(agent=agent, environment=bad_seeds_environment)
    for _ in range(10):
        runner.run(num_episodes=10000)
        runner.run(num_episodes=1000, evaluation=True)

    bad_seeds_environment.close()
    agent.close()
コード例 #21
0
    def test_discrete(self):
        passed = 0

        # TRPO can occasionally have numerical issues so we allow for 1 in 5 to fail on Travis
        for _ in xrange(5):
            environment = MinimalTest(continuous=False)
            config = Configuration(batch_size=8,
                                   learning_rate=0.0001,
                                   cg_iterations=20,
                                   cg_damping=0.001,
                                   line_search_steps=20,
                                   max_kl_divergence=0.05,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder(
                                       [dict(type='dense', size=32)]))
            agent = TRPOAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=2000, episode_finished=episode_finished)
            print('TRPO Agent (discrete): ' + str(runner.episode))

            if runner.episode < 2000:
                passed += 1

        print('TRPO discrete agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #22
0
    def __init__(self,
                 environment: TradingEnvironment,
                 agent_spec: Dict = None,
                 network_spec: Dict = None,
                 **kwargs):
        """
        Arguments:
            environment: A `TradingEnvironment` instance for the agent to trade within.
            agent_spec: A specification dictionary for the `Tensorforce` agent.
            network_sepc: A specification dictionary for the `Tensorforce` agent's model network.
            kwargs (optional): Optional keyword arguments to adjust the strategy.
        """
        self._environment = environment

        self._max_episode_timesteps = kwargs.get('max_episode_timesteps', None)

        if agent_spec and network_spec:
            self._agent_spec = agent_spec
            self._network_spec = network_spec

            self._agent = Agent.from_spec(spec=agent_spec,
                                          kwargs=dict(
                                              network=network_spec,
                                              states=environment.states,
                                              actions=environment.actions))

            self._runner = Runner(agent=self._agent, environment=environment)
コード例 #23
0
    def train_and_test(self, agent, n_steps, n_tests, early_stop):
        test_acc = self.acc.tests
        n_steps = n_steps * 10000
        test_acc.n_tests = n_tests
        test_acc.i = 0
        timesteps_each = n_steps // n_tests
        runner = Runner(agent=agent, environment=self)

        try:
            while test_acc.i <= n_tests:
                self.use_dataset(Mode.TRAIN)
                # max_episode_timesteps not required, since we kill on (cash|value)<0 or max_repeats
                runner.run(timesteps=timesteps_each)
                self.use_dataset(Mode.TEST)
                self.run_deterministic(runner, print_results=True)
                if early_stop > 0:
                    sharpes = np.array(self.acc.episode.sharpes[-early_stop:])
                    if test_acc.i >= early_stop and np.all(sharpes > 0):
                        test_acc.i = n_tests
                test_acc.i += 1
        except KeyboardInterrupt:
            # Lets us kill training with Ctrl-C and skip straight to the final test. This is useful in case you're
            # keeping an eye on terminal and see "there! right there, stop you found it!" (where early_stop & n_steps
            # are the more methodical approaches)
            pass

        # On last "how would it have done IRL?" run, without getting in the way (no killing on repeats, 0-balance)
        print('Running no-kill test-set')
        self.use_dataset(Mode.TEST, full_set=True)
        self.run_deterministic(runner, print_results=True)
コード例 #24
0
    def restore_agent(self, path: str, model_path: str = None):
        """Deserialize the strategy's learning agent from a file.

        Arguments:
            path: The `str` path of the file the agent specification is stored in.
                The `.json` file extension will be automatically appended if not provided.
            model_path (optional): The `str` path of the file or directory the agent checkpoint is stored in.
                If not provided, the `model_path` will default to `{path_without_dot_json}/agents`.
        """
        path_with_ext = path if path.endswith('.json') else f'{path}.json'

        with open(path_with_ext) as json_file:
            spec = json.load(json_file)

            self._agent_spec = spec.agent
            self._network_spec = spec.network

        self._agent = Agent.from_spec(spec=self._agent_spec,
                                      kwargs=dict(
                                          network=self._network_spec,
                                          states=self._environment.states,
                                          actions=self._environment.actions))

        path_without_ext = path_with_ext.replace('.json', '')
        model_path = model_path or f'{path_without_ext}/agent'

        self._agent.restore_model(file=model_path)

        self._runner = Runner(agent=self._agent, environment=self._environment)
コード例 #25
0
    def test_continuous(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(continuous=True)
            config = Configuration(batch_size=8,
                                   cg_iterations=20,
                                   cg_damping=0.001,
                                   line_search_steps=20,
                                   max_kl_divergence=0.05,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder(
                                       [dict(type='dense', size=32)]))
            agent = TRPOAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=10000, episode_finished=episode_finished)
            print('TRPO Agent (continuous): ' + str(runner.episode))

            if runner.episode < 10000:
                passed += 1
                print('passed')
            else:
                print('failed')

        print('TRPO continuous agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #26
0
    def test_continuous(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=True)
            config = Configuration(
                batch_size=20,
                entropy_penalty=0.01,
                loss_clipping=0.1,
                epochs=10,
                optimizer_batch_size=10,
                learning_rate=0.0005,
                states=environment.states,
                actions=environment.actions,
                network=layered_network_builder([
                    dict(type='dense', size=32),
                    dict(type='dense', size=32)
                ])
            )
            agent = PPOAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(x / l >= reward_threshold for x, l in zip(r.episode_rewards[-100:],
                                                                                            r.episode_lengths[-100:]))

            runner.run(episodes=2000, episode_finished=episode_finished)
            print('PPO agent (continuous): ' + str(runner.episode))

            if runner.episode < 2000:
                passed += 1

        print('PPO agent (continuous) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #27
0
def load_agent(
    time_limit=None,
    scoring="default",
    batch_size=16,
    gpu_idx=0,
    env_version=1,
    seed_count=9,
    max_count=10,
    out_path=None,
):
    env, agent = set_up(
        time_limit=time_limit,
        scoring=scoring,
        batch_size=batch_size,
        gpu_idx=gpu_idx,
        env_version=env_version,
        seed_count=seed_count,
        max_count=max_count,
        out_path=out_path,
    )
    if out_path is None:
        out_path = Path()
    else:
        out_path = Path(out_path).expanduser()
    agent.restore(directory=str(out_path / "saved_models"))
    runner = Runner(agent=agent, environment=env)
    runner.run(num_episodes=20)
    return agent
コード例 #28
0
    def test_multi(self):
        passed = 0

        def network_builder(inputs, **kwargs):
            layer = layers['dense']
            state0 = layer(x=layer(x=inputs['state0'], size=32), size=32)
            state1 = layer(x=layer(x=inputs['state1'], size=32), size=32)
            state2 = layer(x=layer(x=inputs['state2'], size=32), size=32)

            return state0 * state1 * state2

        for _ in xrange(5):
            environment = MinimalTest(
                definition=[False, (False, 2), (True, 2)])
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=network_builder)
            agent = VPGAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x / l >= reward_threshold for x, l in zip(
                        r.episode_rewards[-100:], r.episode_lengths[-100:]))

            runner.run(episodes=4000, episode_finished=episode_finished)
            print('VPG agent (multi-state/action): ' + str(runner.episode))
            if runner.episode < 4000:
                passed += 1

        print('VPG agent (multi-state/action) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #29
0
    def test_beta(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=True)
            actions = environment.actions
            actions['min_value'] = -0.5
            actions['max_value'] = 1.5

            config = Configuration(batch_size=8,
                                   learning_rate=0.01,
                                   states=environment.states,
                                   actions=actions,
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32)
                                   ]))
            agent = VPGAgent(config=config)
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(x / l >= 0.9 for x, l in zip(
                    r.episode_rewards[-100:], r.episode_lengths[-100:]))

            runner.run(episodes=1500, episode_finished=episode_finished)
            print('VPG agent (beta): ' + str(runner.episode))
            if runner.episode < 1500:
                passed += 1

        print('VPG agent (beta) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
コード例 #30
0
ファイル: run.py プロジェクト: schlesingerphilipp/trader
def train(config, network_spec=None):
    data_provider = DataProvider(config.db)
    env = StockEnvironment(data_provider, config, 0)
    agent = overwrite_agent(env, network_spec,
                            config) if config.overwrite_agent else load_agent(
                                config, env, network_spec)

    mlflow.log_param("agent", "tensorforce.agents.DQNAgent")
    for key in config.agent_specs:
        mlflow.log_param(key, config.agent_specs[key])

    runner = Runner(agent=agent, environment=env)
    offset = 20000
    num_episodes = 20
    step = 0
    while data_provider.has_data_key(offset + config.max_step_per_episode):
        runner.run(num_episodes=num_episodes)
        offset = offset + config.max_step_per_episode
        env.offset = offset
        agent.save(config.agent_dir, config.agent_name)
        if step % 10 == 0:
            evaluate(config, data_provider,
                     offset - config.max_step_per_episode, agent)
        step += 1
    return agent, env