def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    # Load VAE file
    with open(parameters.vae_aux_file, 'r') as f:
        aux_data = pickle.load(f)
    f.close()
    params = aux_data['params']

    with tf.device(None):  #"/gpu:0"):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # sess = tf.Session(config=config)

        # config = tf.ConfigProto(
        #     device_count={'GPU': 0}
        # )

        sess = tf.Session(config=config)

        VAE = vae.vae(params)
        VAE._create_network_()

        try:
            sess.run(tf.global_variables_initializer())
        except AttributeError:
            sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()
        chkpt = tf.train.get_checkpoint_state(parameters.vae_file)

        if chkpt and chkpt.model_checkpoint_path:
            saver.restore(sess, chkpt.model_checkpoint_path)
        else:
            print 'No checkpoint found'

    import theano
    import ale_experiment
    import ale_agent
    import q_network
    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            VAE.z_size, 1, num_actions, parameters.phi_length,
            parameters.discount, parameters.learning_rate,
            parameters.rms_decay, parameters.rms_epsilon, parameters.momentum,
            parameters.clip_delta, parameters.freeze_interval,
            parameters.batch_size, parameters.network_type,
            parameters.update_rule, parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, VAE.X_size[1], VAE.X_size[0], parameters.resize_method,
        parameters.epochs, parameters.steps_per_epoch,
        parameters.steps_per_test, parameters.frame_skip,
        parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE,
        sess)

    experiment.run()
Beispiel #2
0
tr_iters = commandline_params['tr_iters']
lr_rate = commandline_params['learning_rate']
dump_path = commandline_params['dump_path']

params = {}
params['batch_size'] = commandline_params['batch_size']
params['X_size'] = [210, 160, 3]
params['z_size'] = 30
params['beta'] = commandline_params['beta']

print('---Params used---')
print(params)

params_generated = params

VAE = vae.vae(params)
VAE._create_network_()

train_step = tf.train.AdamOptimizer(lr_rate).minimize(VAE.total_loss)

try:
    sess.run(tf.global_variables_initializer())
except AttributeError:
    sess.run(tf.initialize_all_variables())

saver = tf.train.Saver()

aux_data = {'params': params, 'commandline_params': commandline_params, \
            'perm_train': perm_train, 'perm_valid': perm_valid, \
            'magic_seed_number': magic_seed_number}
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    if parameters.rom.endswith('.bin'):
        rom = parameters.rom
    else:
        rom = "%s.bin" % parameters.rom
    full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom)

    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    ale = ale_python_interface.ALEInterface()
    ale.setInt('random_seed', rng.randint(1000))

    if parameters.display_screen:
        import sys
        if sys.platform == 'darwin':
            import pygame
            pygame.init()
            ale.setBool('sound', False)  # Sound doesn't work on OSX

    ale.setBool('display_screen', parameters.display_screen)
    ale.setFloat('repeat_action_probability',
                 parameters.repeat_action_probability)

    ale.loadROM(full_rom_path)

    num_actions = len(ale.getMinimalActionSet())

    # Load VAE file
    with open(parameters.vae_aux_file, 'r') as f:
        aux_data = pickle.load(f)
    f.close()
    params = aux_data['params']

    with tf.device(None):  #"/gpu:0"):

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # sess = tf.Session(config=config)
        # )

        sess = tf.Session(config=config)

        VAE = vae.vae(params)
        VAE._create_network_()

        try:
            sess.run(tf.global_variables_initializer())
        except AttributeError:
            sess.run(tf.initialize_all_variables())

        saver = tf.train.Saver()

    import theano
    import ale_experiment
    import ale_agent
    import q_network
    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'
    if parameters.nn_file is None:
        network = q_network.DeepQLearner(
            defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions,
            parameters.phi_length, parameters.discount,
            parameters.learning_rate, parameters.rms_decay,
            parameters.rms_epsilon, parameters.momentum, parameters.clip_delta,
            parameters.freeze_interval, parameters.batch_size,
            parameters.network_type, parameters.update_rule,
            parameters.batch_accumulator, rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ale_agent.NeuralAgent(
        network, parameters.epsilon_start, parameters.epsilon_min,
        parameters.epsilon_decay, parameters.replay_memory_size,
        parameters.experiment_prefix, parameters.replay_start_size,
        parameters.update_frequency, rng)

    experiment = ale_experiment.ALEExperiment(
        ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_WIDTH,
        parameters.resize_method, parameters.epochs,
        parameters.steps_per_epoch, parameters.steps_per_test,
        parameters.frame_skip, parameters.death_ends_episode,
        parameters.max_start_nullops, rng, VAE, sess, defaults.VAE_REQ_STEPS,
        defaults.VAE_STORAGE_SIZE)
    time_str = time.strftime("%m-%d-%H-%M", time.gmtime())
    vae_save_path = '%s/%s_beta%f_z%d' % (defaults.VAE_OUT_PREFIX,
                                          rom.split('.')[0], params['beta'],
                                          params['z_size'])
    os.system('mkdir -p %s' % (vae_save_path))
    experiment.run()
    ckpt_path = '%s/%s_%s' % (vae_save_path, rom.split('.')[0], time_str)
    print ckpt_path
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    saver.save(sess, '%s/checkpoint.ckpt' % (ckpt_path))