Example #1
0
def train(args):
    # here is where we should start up a session,
    # create DQN agent, create the model, etc.
    # then we can run our fit method.
    env = gym.make(args.env)
    num_actions = env.action_space.n

    network_model, q_values_func = create_model(
        args.stack_frames, (args.cropped_size, args.cropped_size),
        num_actions,
        model_name='q_network')
    preprocessor = PreprocessorSequence(args.stack_frames,
                                        (args.cropped_size, args.cropped_size),
                                        num_actions)
    memory = ReplayMemory(args.memsize, args.stack_frames)
    policy = {
        'init': UniformRandomPolicy(num_actions),
        'train': GreedyEpsilonPolicy(num_actions),
        'test': GreedyPolicy(),
    }

    print("Generate Model...")
    dqn_agent = DQNAgent(network_model, q_values_func, preprocessor, memory,
                         policy, args.gamma, args.target_update_freq,
                         args.num_burn_in, args.train_freq, args.batch_size,
                         args.output)

    print("Compile Model...")
    dqn_agent.compile(optimizer=Adam(lr=args.learning_rate),
                      loss_func=mean_huber_loss)

    print("Fit Model...")
    sys.stdout.flush()

    dqn_agent.fit(env, args.num_iterations, args.max_episode_length)
Example #2
0
def test(args):
    if not os.path.isfile(args.model_path):
        print("The model path: {} doesn't exist in the system.".format(
            args.model_path))
        print(
            "Hints: python dqn_atari.py --mode test --model_path Path_to_your_model_weigths"
        )
        return

    # here is where we should start up a session,
    # create DQN agent, create our model, etc.
    # then we can run our fit method.
    env = gym.make(args.env)
    num_actions = env.action_space.n
    network_model, q_values_func = create_model(
        args.stack_frames, (args.cropped_size, args.cropped_size),
        num_actions,
        model_name='q_network')

    rewards = []
    lens = []
    tries = 0
    while True:
        env = gym.make(args.env)
        env = wrappers.Monitor(env, 'videos', force=True)

        #network_model.load_weights(args.output + '/model_weights_%d.h5' % m)

        preprocessor = PreprocessorSequence(
            args.stack_frames, (args.cropped_size, args.cropped_size),
            num_actions)
        memory = ReplayMemory(args.memsize, args.stack_frames)
        policy = {
            'init': UniformRandomPolicy(num_actions),
            'train': GreedyEpsilonPolicy(num_actions),
            'test': GreedyPolicy(),
        }

        dqn_agent = DQNAgent(network_model, q_values_func, preprocessor,
                             memory, policy, args.gamma,
                             args.target_update_freq, args.num_burn_in,
                             args.train_freq, args.batch_size, args.output)

        dqn_agent.load_weights(args.model_path)

        cumulative_reward, std, average_episode_length = dqn_agent.evaluate(
            env, 1, None)
        tries += 1

        # Sometime the model is not very stable.
        if tries > 100 or cumulative_reward > 350:
            break

        print('average reward = %f, std = %f, average_epis_length = %d' %
              (cumulative_reward, std, average_episode_length))
        rewards.append(cumulative_reward)
        lens.append(average_episode_length)
Example #3
0
def train(args):
    # need to start a new session
    env = gym.make(args.env)
    num_actions = env.action_space.n
    if args.modeltype == 'dueling_dqn':
        network_model, q_values_func = create_model(
            args.stack_frames, (args.cropped_size, args.cropped_size),
            num_actions,
            args.dueling_type,
            model_name='dueling_q_network')
    else:
        network_model, q_values_func = create_model(
            args.stack_frames, (args.cropped_size, args.cropped_size),
            num_actions,
            args.dueling_type,
            model_name='q_network')
    preprocessor = PreprocessorSequence(args.stack_frames,
                                        (args.cropped_size, args.cropped_size),
                                        num_actions)
    memory = ReplayMemory(args.memsize, args.stack_frames)
    policy = {
        'init': UniformRandomPolicy(num_actions),
        'train': GreedyEpsilonPolicy(num_actions),
        'test': GreedyPolicy(),
    }

    print("Generate Model...")
    if args.modeltype == 'double_dqn':
        ddqn_agent = DQNAgent(network_model, q_values_func, preprocessor,
                              memory, policy, args.gamma,
                              args.target_update_freq, args.num_burn_in,
                              args.train_freq, args.batch_size, args.output,
                              True, False, args.dueling_type)
        dqn_agent = DQNAgent(network_model, q_values_func, preprocessor,
                             memory, policy, args.gamma,
                             args.target_update_freq, args.num_burn_in,
                             args.train_freq, args.batch_size, args.output,
                             False, False, args.dueling_type)
    elif args.modeltype == 'dueling_dqn':
        dqn_agent = DQNAgent(network_model, q_values_func, preprocessor,
                             memory, policy, args.gamma,
                             args.target_update_freq, args.num_burn_in,
                             args.train_freq, args.batch_size, args.output,
                             False, True, args.dueling_type)
    else:  # natual dqn
        dqn_agent = DQNAgent(network_model, q_values_func, preprocessor,
                             memory, policy, args.gamma,
                             args.target_update_freq, args.num_burn_in,
                             args.train_freq, args.batch_size, args.output,
                             False, False, args.dueling_type)

    print("Compiling Model...")
    dqn_agent.compile(optimizer=Adam(lr=args.learning_rate),
                      loss_func=mean_huber_loss)
    if args.modeltype == 'double_dqn':
        ddqn_agent.compile(optimizer=Adam(lr=args.learning_rate),
                           loss_func=mean_huber_loss)

    print("Fitting model...")
    sys.stdout.flush()
    dqn_rewards = dqn_agent.fit(env, args.num_iterations,
                                args.max_episode_length)
    if args.modeltype == 'double_dqn':
        ddqn_rewards = ddqn_agent.fit(env, args.num_iterations,
                                      args.max_episode_length)
        return ddqn_rewards
    return dqn_rewards