Example #1
0
def exp_double_duel():
    import matplotlib.pyplot as plt
    eps = 1000

    env = gym.make('CartPole-v0')
    env.seed(19)

    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n


    log_dir = './logs/prova_pole'

    net = models.DenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                          lr=0.001)

    a = Agent(game=env, net=net, log_dir=log_dir, pol=policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500))

    r = a.learn(eps, False, 10, verbose=False)
    plt.plot(range(eps), r, label='DQN')

    net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                              lr=0.001, layer_size_val=(4, 4))
    env.seed(19)
    a = Agent(game=env, net=net, log_dir=log_dir, pol=policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500))

    r = a.learn(eps, False, 10, verbose=False)
    plt.plot(range(eps), r, label='Duel DQN 4 4')

    for i in [50, 100, 200, 300, 500, 750, 1000, 2000, 3000]:

        net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                                  lr=0.001, layer_size_val=(4, 4))
        n = models.DoubleDQNWrapper(network=net, update_time=i)

        a = Agent(game=env, net=n, log_dir=log_dir, pol=policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500))

        r = a.learn(eps, False, 10, verbose=False)

        plt.plot(range(eps), r, label='Double Duel DQN 4 4 '+str(i))

    plt.legend()
    plt.savefig('exp_double_duel.png')
Example #2
0
def exp_ddqn():
    import matplotlib.pyplot as plt
    eps = 1000

    env = gym.make('CartPole-v0')
    env.seed(19)

    pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

    log_dir = './logs/prova_pole'+pol.name

    n = models.DenseDQN(log_dir=log_dir, action_size=env.action_space.n, state_size=env.observation_space.shape[0],
                        layer_size=(24, 24), lr=0.001)

    a = Agent(game=env, net=n, log_dir=log_dir, pol=pol)

    r = a.learn(eps, False, 10, verbose=False)
    plt.plot(range(eps), r, label='DQN')

    for i in [50, 100, 200, 300, 500, 750, 1000, 2000, 3000]:
        pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                    value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

        log_dir = './logs/prova_pole'+pol.name

        n = models.DoubleDQNWrapper(network=models.DenseDQN(log_dir=log_dir, action_size=env.action_space.n,
                                    state_size=env.observation_space.shape[0], layer_size=(24, 24), lr=0.001),
                                    update_time=i)

        a = Agent(game=env, net=n, log_dir=log_dir, pol=pol)

        r = a.learn(eps, False, 10, verbose=False)

        plt.plot(range(eps), r, label='Update time: {}'.format(i))

    plt.legend()
    plt.savefig('exp_ddqn.png')
    def __init__(self, config=None):

        if config is None:
            config = {}
        self.env = wrap_dqn(gym.make(config.get('game', 'PongNoFrameskip-v4')))
        self.action_size = self.env.action_space.n

        self.to_vis = config.get('visualize', False)
        self.verbose = config.get('verbose', True)
        self.backup = config.get('backup', 25)
        self.episodes = config.get('episodes', 300)

        self.depth = config.get('depth', 4)
        self.state_size = config.get('space', (84, 84))
        self.model = None
        self._target_model = None

        self.prioritized = config.get(('prioritized', False))

        if self.prioritized:
            self.memory = PrioritizedMemory(
                max_len=config.get('mem_size', 100000))
        else:
            self.memory = SimpleMemory(max_len=config.get('mem_size', 100000))

        if config.get('duel', False):
            self.model = self._duel_conv()
        else:
            self.model = self._conv()

        self.model.compile(Adam(lr=config.get('lr', 1e-4)), loss=huber_loss)

        if config.get('target', True):
            self._target_model = clone_model(self.model)
            self._target_model.set_weights(self.model.get_weights())
            self._time = 0
            self.update_time = config.get('target_update', 1000)

        self.env._max_episode_steps = None
        self.batch_size = config.get('batch', 32 * 3)
        self.to_observe = config.get('to_observe', 10000)

        self.log_dir = config['log_dir']
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        plot_model(self.model,
                   to_file=os.path.join(self.log_dir, 'model.png'),
                   show_shapes=True)

        attr = {
            'batch size': self.batch_size,
            'to observe': self.to_observe,
            'depth': self.depth
        }

        self.results = {'info': attr}

        load_prev = config.get('load', False)

        self.gamma = None
        pol = None

        if 'pol' in config:
            if config['pol'] == 'random':
                pol = policy.RandomPolicy()
            elif config['pol'] == 'eps':
                pol = policy.EpsPolicy(config.get('pol_eps', 0.1))

        self.pol = pol

        if load_prev:
            path = sorted([
                int(x) for x in os.listdir(self.log_dir)
                if os.path.isdir(os.path.join(self.log_dir, x))
            ])
            if len(path) != 0:
                load_prev = self.load(os.path.join(self.log_dir,
                                                   str(path[-1])))

        if self.pol is None:
            self.pol = policy.AnnealedPolicy(
                inner_policy=policy.EpsPolicy(1.0,
                                              other_pol=policy.GreedyPolicy()),
                attr='eps',
                value_max=1.0,
                value_min=config.get('ex_min', 0.02),
                value_test=0.5,
                nb_steps=config.get('ex_steps', 100000))
        if self.gamma is None:
            self.gamma = policy.EpsPolicy(float(config.get('gamma',
                                                           0.99))).get_value
import sys
sys.path.append('..')
import policy
from dqn.agent_with_depth_less_memory import ImageAgent as ia_less
from dqn.models_with_depth import DenseDQN, DoubleDQNWrapper, ConvDQM, ConvDDQN

n = ConvDQM(action_size=6, state_size=(84, 84), depth=4, lr=1e-4)

n = DoubleDQNWrapper(n, 10000)

# n = DenseDQN(action_size=3, state_size=6, depth=4, lr=0.001, layer_size=(64, 64))
pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(
    1.0, other_pol=policy.GreedyPolicy()),
                            attr='eps',
                            value_max=1.0,
                            value_min=0.02,
                            value_test=0.5,
                            nb_steps=100000)

agent = ia_less(pol=pol,
                network=n,
                to_observe=10000,
                max_len_memory=100000,
                log_dir='../pong/good_wrappers_DDQN_32x3-8/',
                load_prev=True,
                gamma=0.99)

# agent = ram_less(pol=pol, network=n, to_observe=50000, max_len_memory=1000000,
#                  log_dir='../logs/pong/ram/depth2_huber_DQN/', load_prev=False)
Example #5
0
def exp_duel():
    import matplotlib.pyplot as plt
    eps = 1000

    env = gym.make('CartPole-v0')
    env.seed(19)

    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

    log_dir = './logs/prova_pole' + pol.name

    net = models.DenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                          lr=0.001)

    a = Agent(game=env, net=net, log_dir=log_dir, pol=pol)

    r = a.learn(eps, False, 10, verbose=False)
    print(r[-1])
    plt.plot(range(eps), r, label='DQN')

    pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

    net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                              lr=0.001, layer_size_val=(12, 12))
    env.seed(19)
    a = Agent(game=env, net=net, log_dir=log_dir, pol=pol)

    r = a.learn(eps, False, 10, verbose=False)
    print(r[-1])
    plt.plot(range(eps), r, label='Duel DQN 12 12')

    pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

    net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                              lr=0.001, layer_size_val=(8, 8))
    env.seed(19)
    a = Agent(game=env, net=net, log_dir=log_dir, pol=pol)

    r = a.learn(eps, False, 10, verbose=False)
    print(r[-1])
    plt.plot(range(eps), r, label='Duel DQN 8 8')

    pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

    net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                              lr=0.001, layer_size_val=(4, 4))

    a = Agent(game=env, net=net, log_dir=log_dir, pol=pol)

    r = a.learn(eps, False, 10, verbose=False)
    plt.plot(range(eps), r, label='Duel DQN 4 4')
    print(r[-1])

    pol = policy.AnnealedPolicy(inner_policy=policy.EpsPolicy(1.0, other_pol=policy.GreedyPolicy()), attr='eps',
                                value_max=1.0, value_min=0.1, value_test=0.5, nb_steps=500)

    net = models.DuelDenseDQN(log_dir=log_dir, action_size=action_size, state_size=state_size, layer_size=(24, 24),
                              lr=0.001, layer_size_val=(24, 24))

    a = Agent(game=env, net=net, log_dir=log_dir, pol=pol)

    r = a.learn(eps, False, 10, verbose=False)
    plt.plot(range(eps), r, label='Duel DQN 24 24')
    print(r[-1])
    plt.legend()
    plt.savefig('exp_duel.png')