def main(args): logging.debug('Configuration: {}'.format(args)) network, env_creator = get_network_and_environment_creator(args) learner = PAACLearner(network, env_creator, args) logging.info('Starting training') learner.train() logging.info('Finished training')
def main(args): logging.debug('Configuration: {}'.format(args)) logging.getLogger("urllib3.connectionpool").setLevel(logging.WARNING) network_creator = get_network_creator(args) learner = PAACLearner(network_creator, args) setup_kill_signal_handler(learner) logging.info('Starting training') learner.train() logging.info('Finished training')
def main(args): logging.debug('Configuration: {}'.format(args)) logging.getLogger("wergzrug").setLevel(logging.WARNING) logging.getLogger("tensorflow").setLevel(logging.WARNING) network_creator, env_creator = get_network_and_environment_creator(args) learner = PAACLearner(network_creator, env_creator, args) setup_kill_signal_handler(learner) logging.info('Starting training') learner.train() logging.info('Finished training')
def main(args): logging.debug('Configuration: {}'.format(args)) explo_policy = ExplorationPolicy(args) print('Repetition table : ' + str(explo_policy.tab_rep)) network_creator, env_creator = get_network_and_environment_creator( args, explo_policy) learner = PAACLearner(network_creator, env_creator, explo_policy, args) setup_kill_signal_handler(learner) logging.info('Starting training') learner.train() logging.info('Finished training')
def main(args): logging.debug('Configuration: {}'.format(args)) if args.random_seed is None: rng = random.RandomState(int(time())) random_seed = rng.randint(1000) args.random_seed = random_seed network_creator, env_creator = get_network_and_environment_creator(args) learner = PAACLearner(network_creator, env_creator, args) setup_kill_signal_handler(learner) logging.info('Starting training') learner.train() logging.info('Finished training')
def main(args): network_creator, env_creator = get_network_and_environment_creator(args) utils.save_args(args, args.debugging_folder, file_name=ARGS_FILE) logging.info('Saved args in the {0} folder'.format(args.debugging_folder)) logging.info(args_to_str(args)) batch_env = ConcurrentBatchEmulator(WorkerProcess, env_creator, args.num_workers, args.num_envs) set_exit_handler(concurrent_emulator_handler(batch_env)) try: batch_env.start_workers() learner = PAACLearner(network_creator, batch_env, args) learner.set_eval_function(eval_network, learner.network, env_creator, 50, learner.use_rnn) # args to eval_network learner.train() finally: batch_env.close()
episode_over = False reward = 0.0 while not episode_over: if args.embedding_plot and random.random() < 0.2: latent_var = sess.run(network.encoder_output, feed_dict={ network.autoencoder_input_ph: np.array([state[:, :, 0]]).reshape( 1, 84, 84, 1) }) img_database.append(state[:, :, 0]) latent_database.append(latent_var[0]) action = PAACLearner.choose_next_actions( network, env_creator.num_actions, [state], sess) state, r, episode_over = environment.next(action[0]) reward += r rewards.append(reward) print(reward) print("Mean:", np.mean(rewards), "Min:", np.min(rewards), "Max:", np.max(rewards), "Std:", np.std(rewards)) # Generate a plot for visualizing the latent space learnt by the autoencoder if args.embedding_plot: from sklearn.manifold import TSNE manifold = TSNE(n_components=2) x_fitted = manifold.fit(np.array(latent_database)) print("Plotting TSNE of the latent space...") fig, ax = plt.subplots() num_imgs = 30
if 'gpu' in args.device: config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: checkpoints_ = os.path.join(df, 'checkpoints') network.init(checkpoints_, saver, sess) states = np.asarray( [environment.get_initial_state() for environment in environments]) if args.noops != 0: for i, environment in enumerate(environments): for _ in range(random.randint(0, args.noops)): state, _, _ = environment.next(environment.get_noop()) states[i] = state episodes_over = np.zeros(args.test_count, dtype=np.bool) rewards = np.zeros(args.test_count, dtype=np.float32) while not all(episodes_over): actions, _, _ = PAACLearner.choose_next_actions( network, env_creator.num_actions, states, sess) for j, environment in enumerate(environments): state, r, episode_over = environment.next(actions[j]) states[j] = state rewards[j] += r episodes_over[j] = episode_over print('Performed {} tests for {}.'.format(args.test_count, args.game)) print('Mean: {0:.2f}'.format(np.mean(rewards))) print('Min: {0:.2f}'.format(np.min(rewards))) print('Max: {0:.2f}'.format(np.max(rewards))) print('Std: {0:.2f}'.format(np.std(rewards)))
edr_outs = get_save_frame(edr_name) with tf.Session(config=config) as sess: checkpoints_ = os.path.join(df, 'checkpoints') network.init(checkpoints_, saver, sess) states = np.asarray( [environment.get_initial_state() for environment in environments]) if args.noops != 0: for i, environment in enumerate(environments): for _ in range(random.randint(0, args.noops)): state, _, _ = environment.next(environment.get_noop()) states[i] = state episodes_over = np.zeros(args.test_count, dtype=np.bool) rewards = np.zeros(args.test_count, dtype=np.float32) while not all(episodes_over): actions, edr_output, _, _, r_s, r_m, r_l = PAACLearner.choose_next_actions_with_viz( network, env_creator.num_actions, states, sess, True) #red = visualize(states, edr_output) #print("states shape", states.shape) #print("r_s", r_s.shape) #print("edr output shape",edr_output.shape) if args.edr_viz: edr_outs(edr_output[3, :, :, 0].reshape([84, 84])) # edr_outs(visualize(states, edr_output)) #edr_outs(r_s.reshape([84,84])) for j, environment in enumerate(environments): state, r, episode_over = environment.next(actions[j]) states[j] = state rewards[j] += r episodes_over[j] = episode_over print('Performed {} tests for {}.'.format(args.test_count, args.game))