예제 #1
0
    config.lwf = args.lwf

    config.lwf_loss = args.lwf_loss

    config.lwf_weight = args.lwf_weight

    config.num_old_actions = int(args.num_old_actions)

    return config

def print_config(config):
    print 'Current config:\n'
    variables = zip(vars(config).keys(), vars(config).values())
    for var, val in sorted(variables):
        print var + ' = ' + str(val)


if __name__ == '__main__':
    args = parse_args()
    my_config = modify_config(args)
    print_config(my_config)
    with tf.device('/gpu:' + str(args.gpu)):
        # make env
        env = gym.make(my_config.env_name)
        env = wrap_dqn(env)
        model = NatureQN(env, my_config)
        model.initialize_eval()
        model.evaluate()
        if my_config.record:
            model.record()
of the training is to use Tensorboard. The starter code writes summaries of different
variables.

To launch tensorboard, open a Terminal window and run 
tensorboard --logdir=results/
Then, connect remotely to 
address-ip-of-the-server:6006 
6006 is the default port used by tensorboard.
"""
if __name__ == '__main__':
    # make env
    starttime = time.time()
    env = gym.make(config.env_name)
    env = MaxAndSkipEnv(env, skip=config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=config.overwrite_render)

    # exploration strategy
    exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end,
                                     config.eps_nsteps)

    # learning rate schedule
    lr_schedule = LinearSchedule(config.lr_begin, config.lr_end,
                                 config.lr_nsteps)

    # train model
    model = NatureQN(env, config)
    model.run(exp_schedule, lr_schedule)
    print('Total render time : {:.2f}'.format(time.time() - starttime))
예제 #3
0
of the training is to use Tensorboard. The starter code writes summaries of different
variables.

To launch tensorboard, open a Terminal window and run 
tensorboard --logdir=results/
Then, connect remotely to 
address-ip-of-the-server:6006 
6006 is the default port used by tensorboard.
"""
if __name__ == '__main__':
    # make env
    env = gym.make(config.env_name)
    env = MaxAndSkipEnv(env, skip=config.skip_frame)
    env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), 
                        overwrite_render=config.overwrite_render)

    # exploration strategy
    exp_schedule = LinearExploration(env, config.eps_begin, 
            config.eps_end, config.eps_nsteps)

    # learning rate schedule
    lr_schedule  = LinearSchedule(config.lr_begin, config.lr_end,
            config.lr_nsteps)

    # train model
    model = NatureQN(env, config)
    model.original_schedule = config.original_schedule
    model.logger.info('original schedule: {}'.format(model.original_schedule))
    
    model.run(exp_schedule, lr_schedule)
예제 #4
0
env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1),
                    overwrite_render=config.overwrite_render)

rewards = []

experts_meta_lis = [
    './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/deepdqn_weights/.meta', './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/resnet_weights/.meta', './core/checkpoints/policy_gradients/policy_network.ckpt.meta']
experts_chkpt_lis = [
    './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/deepdqn_weights/', './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/resnet_weights/', './core/checkpoints/policy_gradients/policy_network.ckpt']
experts = []

#temp_sess = None
for meta_path, chkpt_path in zip(experts_meta_lis, experts_chkpt_lis):
    print([n.name for n in tf.get_default_graph().as_graph_def().node])
    if "deepdqn" in meta_path:
        model = NatureQN(env, config)
    if "resnet" in meta_path:
        model = ResnetQN(env, config)
    if "policy" in meta_path:
        continue
    # if temp_sess == None:
    #temp_sess = model.sess
    model.initialize(meta_path, chkpt_path)
    experts.append(model)
    # with model.graph.as_default():

print("LOADED ALL MODELS")

for i in range(len(experts)):
    guide = experts[i]
    guide_experience = [[]]
예제 #5
0
If so, please report your hyperparameters.

You'll find the results, log and video recordings of your agent every 250k under
the corresponding file in the results folder. A good way to monitor the progress
of the training is to use Tensorboard. The starter code writes summaries of different
variables.

To launch tensorboard, open a Terminal window and run 
tensorboard --logdir=results/
Then, connect remotely to 
address-ip-of-the-server:6006 
6006 is the default port used by tensorboard.
"""
if __name__ == '__main__':
    # make env
    env = gym.make(config.env_name)
    env = MaxAndSkipEnv(env, skip=config.skip_frame)
    env = PreproWrapper(env,
                        prepro=greyscale,
                        shape=(80, 80, 1),
                        overwrite_render=config.overwrite_render)

    # load model
    model = NatureQN(env, config)
    model.initialize()
    loaded = load_model(model)
    assert loaded != False, "Loading failed"

    # evaluate one episode of data
    model.evaluate(env, 1)