def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) # Load VAE file with open(parameters.vae_aux_file, 'r') as f: aux_data = pickle.load(f) f.close() params = aux_data['params'] with tf.device(None): #"/gpu:0"): config = tf.ConfigProto() config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # config = tf.ConfigProto( # device_count={'GPU': 0} # ) sess = tf.Session(config=config) VAE = vae.vae(params) VAE._create_network_() try: sess.run(tf.global_variables_initializer()) except AttributeError: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() chkpt = tf.train.get_checkpoint_state(parameters.vae_file) if chkpt and chkpt.model_checkpoint_path: saver.restore(sess, chkpt.model_checkpoint_path) else: print 'No checkpoint found' import theano import ale_experiment import ale_agent import q_network if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' if parameters.nn_file is None: network = q_network.DeepQLearner( VAE.z_size, 1, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) experiment = ale_experiment.ALEExperiment( ale, agent, VAE.X_size[1], VAE.X_size[0], parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE, sess) experiment.run()
tr_iters = commandline_params['tr_iters'] lr_rate = commandline_params['learning_rate'] dump_path = commandline_params['dump_path'] params = {} params['batch_size'] = commandline_params['batch_size'] params['X_size'] = [210, 160, 3] params['z_size'] = 30 params['beta'] = commandline_params['beta'] print('---Params used---') print(params) params_generated = params VAE = vae.vae(params) VAE._create_network_() train_step = tf.train.AdamOptimizer(lr_rate).minimize(VAE.total_loss) try: sess.run(tf.global_variables_initializer()) except AttributeError: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() aux_data = {'params': params, 'commandline_params': commandline_params, \ 'perm_train': perm_train, 'perm_valid': perm_valid, \ 'magic_seed_number': magic_seed_number}
def launch(args, defaults, description): """ Execute a complete training run. """ logging.basicConfig(level=logging.INFO) parameters = process_args(args, defaults, description) if parameters.rom.endswith('.bin'): rom = parameters.rom else: rom = "%s.bin" % parameters.rom full_rom_path = os.path.join(defaults.BASE_ROM_PATH, rom) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() ale = ale_python_interface.ALEInterface() ale.setInt('random_seed', rng.randint(1000)) if parameters.display_screen: import sys if sys.platform == 'darwin': import pygame pygame.init() ale.setBool('sound', False) # Sound doesn't work on OSX ale.setBool('display_screen', parameters.display_screen) ale.setFloat('repeat_action_probability', parameters.repeat_action_probability) ale.loadROM(full_rom_path) num_actions = len(ale.getMinimalActionSet()) # Load VAE file with open(parameters.vae_aux_file, 'r') as f: aux_data = pickle.load(f) f.close() params = aux_data['params'] with tf.device(None): #"/gpu:0"): config = tf.ConfigProto() config.gpu_options.allow_growth = True # sess = tf.Session(config=config) # ) sess = tf.Session(config=config) VAE = vae.vae(params) VAE._create_network_() try: sess.run(tf.global_variables_initializer()) except AttributeError: sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() import theano import ale_experiment import ale_agent import q_network if parameters.cudnn_deterministic: theano.config.dnn.conv.algo_bwd = 'deterministic' if parameters.nn_file is None: network = q_network.DeepQLearner( defaults.RESIZED_WIDTH, defaults.RESIZED_HEIGHT, num_actions, parameters.phi_length, parameters.discount, parameters.learning_rate, parameters.rms_decay, parameters.rms_epsilon, parameters.momentum, parameters.clip_delta, parameters.freeze_interval, parameters.batch_size, parameters.network_type, parameters.update_rule, parameters.batch_accumulator, rng) else: handle = open(parameters.nn_file, 'r') network = cPickle.load(handle) agent = ale_agent.NeuralAgent( network, parameters.epsilon_start, parameters.epsilon_min, parameters.epsilon_decay, parameters.replay_memory_size, parameters.experiment_prefix, parameters.replay_start_size, parameters.update_frequency, rng) experiment = ale_experiment.ALEExperiment( ale, agent, defaults.RESIZED_WIDTH, defaults.RESIZED_WIDTH, parameters.resize_method, parameters.epochs, parameters.steps_per_epoch, parameters.steps_per_test, parameters.frame_skip, parameters.death_ends_episode, parameters.max_start_nullops, rng, VAE, sess, defaults.VAE_REQ_STEPS, defaults.VAE_STORAGE_SIZE) time_str = time.strftime("%m-%d-%H-%M", time.gmtime()) vae_save_path = '%s/%s_beta%f_z%d' % (defaults.VAE_OUT_PREFIX, rom.split('.')[0], params['beta'], params['z_size']) os.system('mkdir -p %s' % (vae_save_path)) experiment.run() ckpt_path = '%s/%s_%s' % (vae_save_path, rom.split('.')[0], time_str) print ckpt_path if not os.path.exists(ckpt_path): os.makedirs(ckpt_path) saver.save(sess, '%s/checkpoint.ckpt' % (ckpt_path))