Ejemplo n.º 1
0
    def test_dqfd_agent(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(continuous=False)
            config = Configuration(
                batch_size=16,
                learning_rate=0.001,
                memory_capacity=800,
                first_update=80,
                repeat_update=4,
                target_update_frequency=20,
                demo_memory_capacity=100,
                demo_sampling_ratio=0.1,
                states=environment.states,
                actions=environment.actions,
                network=layered_network_builder(layers_config=[dict(type='dense', size=32, l2_regularization=0.0001)])
            )
            agent = DQFDAgent(config=config)

            # First generate demonstration data and pretrain
            demonstrations = list()
            terminal = True

            for n in xrange(50):
                if terminal:
                    state = environment.reset()
                action = 1
                state, reward, terminal = environment.execute(action=action)
                demonstration = dict(state=state, action=action, reward=reward, terminal=terminal, internal=[])
                demonstrations.append(demonstration)

            agent.import_demonstrations(demonstrations)
            agent.pretrain(steps=1000)

            # Normal training
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('DQFD Agent: ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('DQFD Agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
    def test_multi(self):
        passed = 0

        def network_builder(inputs):
            layer = layers['dense']
            state0 = layer(x=layer(x=inputs['state0'], size=32), size=32)
            state1 = layer(x=layer(x=inputs['state1'], size=32), size=32)
            state2 = layer(x=layer(x=inputs['state2'], size=32), size=32)
            return state0 * state1 * state2

        for _ in xrange(5):
            environment = MinimalTest(
                definition=[False, (False, 2), (False, (1, 2))])
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   memory_capacity=800,
                                   first_update=80,
                                   target_update_frequency=20,
                                   demo_memory_capacity=100,
                                   demo_sampling_ratio=0.2,
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=network_builder)
            agent = DQFDAgent(config=config)

            # First generate demonstration data and pretrain
            demonstrations = list()
            terminal = True

            for n in xrange(50):
                if terminal:
                    state = environment.reset()
                action = dict(action0=1, action1=(1, 1), action2=((1, 1), ))
                state, reward, terminal = environment.execute(action=action)
                demonstration = dict(state=state,
                                     action=action,
                                     reward=reward,
                                     terminal=terminal,
                                     internal=[])
                demonstrations.append(demonstration)

            agent.import_demonstrations(demonstrations)
            agent.pretrain(steps=1000)

            # Normal training
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 50 or not all(
                    x >= 1.0 for x in r.episode_rewards[-50:])

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('DQFD agent (multi-state/action): ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('DQFD agent (multi-state/action) passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
Ejemplo n.º 3
0
    def test_discrete(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(definition=False)
            config = Configuration(batch_size=8,
                                   learning_rate=0.001,
                                   memory_capacity=800,
                                   first_update=80,
                                   target_update_frequency=20,
                                   demo_memory_capacity=100,
                                   demo_sampling_ratio=0.2,
                                   memory=dict(type='replay',
                                               random_sampling=True),
                                   states=environment.states,
                                   actions=environment.actions,
                                   network=layered_network_builder([
                                       dict(type='dense', size=32),
                                       dict(type='dense', size=32)
                                   ]))
            agent = DQFDAgent(config=config)

            # First generate demonstration data and pretrain
            demonstrations = list()
            terminal = True

            for n in xrange(50):
                if terminal:
                    state = environment.reset()
                action = 1
                state, reward, terminal = environment.execute(action=action)
                demonstration = dict(state=state,
                                     action=action,
                                     reward=reward,
                                     terminal=terminal,
                                     internal=[])
                demonstrations.append(demonstration)

            agent.import_demonstrations(demonstrations)
            agent.pretrain(steps=1000)

            # Normal training
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x / l >= reward_threshold for x, l in zip(
                        r.episode_rewards[-100:], r.episode_lengths[-100:]))

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('DQFD agent: ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('DQFD agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
Ejemplo n.º 4
0
    def test_dqfd_agent(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(continuous=False)
            config = Configuration(
                batch_size=16,
                learning_rate=0.001,
                memory_capacity=800,
                first_update=80,
                repeat_update=4,
                target_update_frequency=20,
                demo_memory_capacity=100,
                demo_sampling_ratio=0.1,
                states=environment.states,
                actions=environment.actions,
                network=layered_network_builder(layers_config=[
                    dict(type='dense', size=32, l2_regularization=0.0001)
                ]))
            agent = DQFDAgent(config=config)

            # First generate demonstration data and pretrain
            demonstrations = list()
            terminal = True

            for n in xrange(50):
                if terminal:
                    state = environment.reset()
                action = 1
                state, reward, terminal = environment.execute(action=action)
                demonstration = dict(state=state,
                                     action=action,
                                     reward=reward,
                                     terminal=terminal,
                                     internal=[])
                demonstrations.append(demonstration)

            agent.import_demonstrations(demonstrations)
            agent.pretrain(steps=1000)

            # Normal training
            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            runner.run(episodes=1000, episode_finished=episode_finished)
            print('DQFD Agent: ' + str(runner.episode))
            if runner.episode < 1000:
                passed += 1

        print('DQFD Agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)
Ejemplo n.º 5
0
def get_dqfd_agent(environment, bootstrap, *args, **kwargs):
    with open('config/cnn_network.json', 'r') as infile:
        network = json.load(infile)

    agent = DQFDAgent(
        states=environment.states,
        actions=environment.actions,
        network=network,
        memory={
            "type": "replay",
            "capacity": 32000,
            "include_next_states": True,
        },
        saver={
            "directory": "checkpoint/dqfd",
            "seconds": 1800,
        },
    )

    if bootstrap:
        internals = agent.current_internals

        for demo in load_demos():
            states = demo['states']
            moves = demo['moves']

            demonstrations = [
                {
                    "states": state,
                    "internals": internals,
                    "actions": move,
                    "terminal": False,
                    "reward": 1,
                }
                for state, move
                in zip(states, moves)
            ]

            demonstrations[-1]['terminal'] = True
            demonstrations[-1]['reward'] = -1

            agent.import_demonstrations(demonstrations)

        print("Pretraining agent network")
        agent.pretrain(steps=15000)

        print("Saving trained network")
        agent.model.save()
    else:
        agent.model.restore()

    return agent
Ejemplo n.º 6
0
 def createDQFDAgent(states, actions, rewards, terminals):
   agent = DQFDAgent(
     states = env.states,
     actions = env.actions,
     network=[
       dict(type='dense', size=networkFirstLayer),
       dict(type='dense', size=int((networkFirstLayer*networkLastLayer)**0.5)), # geometric average of first and last
       dict(type='dense', size=networkLastLayer),
     ],
     optimizer=dict(type='adam', learning_rate=1e-4),
     target_sync_frequency=10,
   )
   demonstrations = dict(
     states = states,
     #internals = internals,
     actions = actions,
     terminal = terminal,
     reward = reward
   )
   agent.import_demonstrations(demonstrations = demonstrations)
   agent.pretrain(steps = 24 * 10)
   return agent
Ejemplo n.º 7
0
    def test_dqfd_agent(self):

        config = {
            "expert_sampling_ratio": 0.01,
            "supervised_weight": 0.5,
            "expert_margin": 1,
            'batch_size': 8,
            'state_shape': (2, ),
            'actions': 2,
            'action_shape': (),
            'update_rate': 1,
            'update_repeat': 4,
            'min_replay_size': 20,
            'memory_capacity': 20,
            "exploration": "epsilon_decay",
            "exploration_param": {
                "epsilon": 0,
                "epsilon_final": 0,
                "epsilon_states": 0
            },
            'target_network_update_rate': 1.0,
            'use_target_network': True,
            "alpha": 0.00004,
            "gamma": 1,
            "tau": 1.0
        }

        tf.reset_default_graph()

        config = create_config(config)
        network_builder = NeuralNetwork. \
            layered_network(layers=[{'type': 'dense',
                                     'num_outputs': 16,
                                     'weights_regularizer': 'tensorflow.contrib.layers.python.layers.regularizers.l2_regularizer',
                                     'weights_regularizer_kwargs': {
                                         'scale': 0.01
                                     }
                                     }, {'type': 'linear', 'num_outputs': 2}])
        agent = DQFDAgent(config=config, network_builder=network_builder)

        state = (1, 0)
        rewards = [0.0] * 100

        # First: add to demo memory
        for n in xrange(50):
            action = agent.get_action(state=state)
            if action == 0:
                state = (1, 0)
                reward = 0.0
                terminal = False
            else:
                state = (0, 1)
                reward = 1.0
                terminal = False
            agent.add_demo_observation(state=state,
                                       action=action,
                                       reward=reward,
                                       terminal=terminal)

        # Pre-train from demo data
        agent.pre_train(10000)

        # If pretraining worked, we should not need much more training
        for n in xrange(1000):
            action = agent.get_action(state=state)
            if action == 0:
                state = (1, 0)
                reward = 0.0
                terminal = False
            else:
                state = (0, 1)
                reward = 1.0
                terminal = False

            agent.add_observation(state=state,
                                  action=action,
                                  reward=reward,
                                  terminal=terminal)
            rewards[n % 100] = reward

            if sum(rewards) == 100.0:
                print('Passed after steps = {:d}'.format(n))

                return
            print('sum = {:f}'.format(sum(rewards)))
Ejemplo n.º 8
0
    def test_dqfd_agent(self):
        passed = 0

        for _ in xrange(5):
            environment = MinimalTest(continuous=False)

            config = Configuration(
                batch_size=8,
                memory_capacity=100,
                first_update=20,
                repeat_update=4,
                target_update_frequency=1,
                discount=1,
                learning_rate=0.001,
                expert_sampling_ratio=0.1,
                supervised_weight=1,
                expert_margin=1,
                states=environment.states,
                actions=environment.actions,
                network=layered_network_builder(layers_config=[
                    dict(type='dense', size=32, l2_regularization=0.0001)
                ]))

            agent = DQFDAgent(config=config)

            # First: generate some data to add to demo memory
            state = environment.reset()
            agent.reset()

            for n in xrange(50):
                action = agent.act(state=state)

                # Override with correct action
                action = 1
                state, step_reward, terminal = environment.execute(
                    action=action)

                agent.add_demo_observation(state=state,
                                           action=action,
                                           reward=step_reward,
                                           terminal=terminal)

                if terminal:
                    state = environment.reset()
                    agent.reset()

            # Pre-train from demo data
            agent.pre_train(10000)

            runner = Runner(agent=agent, environment=environment)

            def episode_finished(r):
                return r.episode < 100 or not all(
                    x >= 1.0 for x in r.episode_rewards[-100:])

            # Fewer than in DQN test
            runner.run(episodes=1000, episode_finished=episode_finished)
            if runner.episode < 1000:
                passed += 1
                print('passed')
            else:
                print('failed')

        print('DQFD Agent passed = {}'.format(passed))
        self.assertTrue(passed >= 4)