Exemple #1
0
 def __init__(self,
              synthetic_prefs,
              max_segs,
              log_dir,
              zoom=4,
              channels=3,
              min_segments_to_test=10,
              n_pause_frames=4,
              user_response_timeout=3):
     if not synthetic_prefs:
         self.vid_q = mp.get_context('spawn').Queue()
         self.renderer = VideoRenderer(vid_queue=self.vid_q,
                                       mode=VideoRenderer.play_through_mode,
                                       zoom=zoom,
                                       channels=channels)
     else:
         self.renderer = None
     self.min_segments_to_test = min_segments_to_test
     self.synthetic_prefs = synthetic_prefs
     self.zoom = zoom
     self.seg_idx = 0
     self.segments = []
     self.channels = channels
     self.max_segs = max_segs
     self.tested_pairs = set()
     self.n_pause_frames = n_pause_frames
     self.user_response_timeout = user_response_timeout
     easy_tf_log.set_dir(log_dir)
Exemple #2
0
def run(env_defs, kargs=None):
    args, lr_args, log_dir, preprocess_wrapper = parse_args(env_defs, kargs)
    easy_tf_log.set_dir(log_dir)

    utils_tensorflow.set_random_seeds(args.seed)
    sess = tf.Session()

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops,
                     args.n_workers, args.seed, args.debug, log_dir, env_defs)

    step_counter = utils.TensorFlowCounter(sess)
    update_counter = utils.TensorFlowCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    extra_inputs_shape = None

    if 'extra_inputs_shape' in env_defs:
        extra_inputs_shape = env_defs['extra_inputs_shape']

    networks = make_networks(n_workers=args.n_workers,
                             obs_shape=envs[0].observation_space.shape,
                             n_actions=envs[0].action_space.n,
                             value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus,
                             max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer,
                             detailed_logs=args.detailed_logs,
                             debug=args.debug,
                             env_defs=env_defs,
                             extra_inputs_shape=extra_inputs_shape)

    global_vars = tf.trainable_variables('global')
    # Why save_relative_paths=True?
    # So that the plain-text 'checkpoint' file written uses relative paths, so that we can restore
    # from checkpoints created on another machine.
    saver = tf.train.Saver(global_vars,
                           max_to_keep=1,
                           save_relative_paths=True)
    if args.load_ckpt:
        print("Restoring from checkpoint '{}'...".format(args.load_ckpt),
              end='',
              flush=True)
        saver.restore(sess, args.load_ckpt)
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    workers = make_workers(sess, envs, networks, args.n_workers, log_dir)

    worker_threads = start_worker_threads(workers, args.n_steps,
                                          args.steps_per_update, step_counter,
                                          update_counter)

    run_manager(worker_threads, sess, lr, step_counter, update_counter,
                log_dir, saver, args.manager_wake_interval_seconds,
                args.ckpt_interval_seconds)

    for env in envs:
        env.close()
Exemple #3
0
    def __init__(self,
                 cluster_job_name,
                 core_network,
                 lr=1e-4,
                 cluster_dict=None,
                 batchnorm=False,
                 dropout=0.0,
                 n_preds=1,
                 log_dir=None):
        self.n_preds = n_preds
        graph, self.sess = self.init_sess(cluster_dict, cluster_job_name)
        # Why not just use soft device placement? With soft placement,
        # if we have a bug which prevents an operation being placed on the GPU
        # (e.g. we're using uint8s for operations that the GPU can't do),
        # then TensorFlow will be silent and just place the operation on a CPU.
        # Instead, we want to say: if there's a GPU present, definitely try and
        # put things on the GPU. If it fails, tell us!
        if tf.test.gpu_device_name():
            worker_device = "/job:{}/task:0/gpu:0".format(cluster_job_name)
        else:
            worker_device = "/job:{}/task:0".format(cluster_job_name)
        device_setter = tf.train.replica_device_setter(
            cluster=cluster_dict,
            ps_device="/job:ps/task:0",
            worker_device=worker_device)
        self.rps = []
        with graph.as_default():
            for pred_n in range(n_preds):
                with tf.device(device_setter):
                    with tf.variable_scope("pred_{}".format(pred_n)):
                        rp = RewardPredictorNetwork(core_network=core_network,
                                                    dropout=dropout,
                                                    batchnorm=batchnorm,
                                                    lr=lr)
                self.rps.append(rp)
            self.init_op = tf.global_variables_initializer()
            # Why save_relative_paths=True?
            # So that the plain-text 'checkpoint' file written uses relative paths,
            # which seems to be needed in order to avoid confusing saver.restore()
            # when restoring from FloydHub runs.
            self.saver = tf.train.Saver(max_to_keep=1,
                                        save_relative_paths=True)
            self.summaries = self.add_summary_ops()

        self.checkpoint_file = osp.join(log_dir,
                                        'reward_predictor_checkpoints',
                                        'reward_predictor.ckpt')
        self.train_writer = tf.summary.FileWriter(osp.join(
            log_dir, 'reward_predictor', 'train'),
                                                  flush_secs=5)
        self.test_writer = tf.summary.FileWriter(osp.join(
            log_dir, 'reward_predictor', 'test'),
                                                 flush_secs=5)

        self.n_steps = 0
        self.r_norm = RunningStat(shape=n_preds)

        misc_logs_dir = osp.join(log_dir, 'reward_predictor', 'misc')
        easy_tf_log.set_dir(misc_logs_dir)
Exemple #4
0
    def __init__(self,
                 core_network,
                 obs_shape,
                 logger,
                 lr=1e-4,
                 batchnorm=False,
                 dropout=0.0,
                 n_preds=1,
                 log_dir=None):
        self.n_preds = n_preds
        self.obs_shape = obs_shape
        self.sess = tf.Session()
        self.logger = logger
        graph = tf.get_default_graph()
        self.rps = []
        with graph.as_default():
            for pred_n in range(n_preds):
                #with tf.device(device_setter):

                # Create pred_n different reward predictors, each of which are in their own variable scope
                with tf.variable_scope("pred_{}".format(pred_n)):
                    rp = RewardPredictorNetwork(core_network=core_network,
                                                dropout=dropout,
                                                batchnorm=batchnorm,
                                                lr=lr,
                                                obs_shape=self.obs_shape)
                self.rps.append(rp)

            self.init_op = tf.global_variables_initializer()
            # Why save_relative_paths=True?
            # So that the plain-text 'checkpoint' file written uses relative paths,
            # which seems to be needed in order to avoid confusing saver.restore()
            # when restoring from FloydHub runs.
            self.saver = tf.train.Saver(max_to_keep=1,
                                        save_relative_paths=True)
            self.summaries = self.add_summary_ops()

        # Try to fix bug, based on here https://stackoverflow.com/questions/34001922/failedpreconditionerror-attempting-to-use-uninitialized-in-tensorflow
        init_op = tf.global_variables_initializer()
        try:
            self.sess.run(init_op)
        except Exception as e:
            print(e)
        self.checkpoint_dir = osp.join(log_dir, 'reward_predictor_checkpoints')
        self.checkpoint_file = osp.join(self.checkpoint_dir,
                                        'reward_predictor.ckpt')
        self.train_writer = tf.summary.FileWriter(osp.join(
            log_dir, 'reward_predictor', 'train'),
                                                  flush_secs=5)
        self.test_writer = tf.summary.FileWriter(osp.join(
            log_dir, 'reward_predictor', 'test'),
                                                 flush_secs=5)

        self.n_steps = 0
        self.r_norm = RunningStat(shape=n_preds)

        misc_logs_dir = osp.join(log_dir, 'reward_predictor', 'misc')
        easy_tf_log.set_dir(misc_logs_dir)
Exemple #5
0
 def test_set_dir(self):
     """
     Confirm that set_dir works.
     """
     with tempfile.TemporaryDirectory() as temp_dir:
         os.chdir(temp_dir)
         easy_tf_log.set_dir('logs2')
         easy_tf_log.tflog('var', 0)
         self.assertEqual(os.listdir(), ['logs2'])
         self.assertIn('events.out.tfevents', os.listdir('logs2')[0])
Exemple #6
0
def main():
    args, lr_args, log_dir, preprocess_wrapper = parse_args() # parse_args() é importado de params
    easy_tf_log.set_dir(log_dir) # seta o caminho dos logs em easy_ty_log

    utils_tensorflow.set_random_seeds(args.seed) # iniciando a semente aleatóriamente
    sess = tf.Session() # Uma classe para executar operações do TensorFlow. Um Sessionobjeto encapsula o ambiente no qual os Operation objetos são executados e os Tensorobjetos são avaliados. 

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers,
                     args.seed, args.debug, log_dir)

    step_counter = utils.TensorFlowCounter(sess)
    update_counter = utils.TensorFlowCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    # Criando o conjunto de redes por threads
    networks = make_networks(n_workers=args.n_workers, obs_shape=envs[0].observation_space.shape,
                             n_actions=envs[0].action_space.n, value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer, detailed_logs=args.detailed_logs,
                             debug=args.debug)

    # Retorna todas as variáveis ​​criadas com trainable=True.
    # scope: (Opcional.) Uma string. Se fornecida, a lista resultante é filtrada para incluir apenas itens cujo nameatributo corresponde ao scopeuso re.match
    global_vars = tf.trainable_variables('global')


    # Por que save_relative_paths = True?
    # De modo que o arquivo de 'checkpoint' em texto simples use caminhos relativos,
    # para que possamos restaurar a partir de pontos de verificação criados em outra máquina.
    saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True)

    # se existir um checkpoint para carregar ele restaura os dados para proceguir de onde parou, caso contrário ele inicia do 0
    if args.load_ckpt:
        print("Restoring from checkpoint '{}'...".format(args.load_ckpt), end='', flush=True)
        saver.restore(sess, args.load_ckpt) # restaura(carrega) a sessão do checkpoint especificado
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    # Criando as workes
    workers = make_workers(sess, envs, networks, args.n_workers, log_dir)

    # inicia as threads referente a cada workers criada
    worker_threads = start_worker_threads(workers, args.n_steps, args.steps_per_update,
                                          step_counter, update_counter)

    # Gerenciador de execução das workers_threads
    run_manager(worker_threads, sess, lr, step_counter, update_counter, log_dir, saver,
                args.manager_wake_interval_seconds, args.ckpt_interval_seconds)

    for env in envs:
        env.close()
Exemple #7
0
    def test_fork(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            easy_tf_log.set_dir(temp_dir)

            def f(queue):
                easy_tf_log.tflog('foo', 0)
                queue.put(True)

            q = Queue()
            Process(target=f, args=[q], daemon=True).start()
            try:
                q.get(timeout=1.0)
            except queue.Empty:
                self.fail("Process did not return")
Exemple #8
0
 def __init__(self, synthetic_prefs, max_segs, log_dir):
     self.vid_q = Queue()
     if not synthetic_prefs:
         self.renderer = VideoRenderer(vid_queue=self.vid_q,
                                       mode=VideoRenderer.restart_on_get_mode,
                                       zoom=4)
     else:
         self.renderer = None
     self.synthetic_prefs = synthetic_prefs
     self.seg_idx = 0
     self.segments = []
     self.tested_pairs = set()  # For O(1) lookup
     self.max_segs = max_segs
     easy_tf_log.set_dir(log_dir)
    def __init__(self, env, prefix="", log_dir=None):
        Wrapper.__init__(self, env)

        if prefix:
            self.log_prefix = prefix + ": "
        else:
            self.log_prefix = ""

        if log_dir is not None:
            easy_tf_log.set_dir(log_dir)

        self.episode_rewards = None
        self.episode_length_steps = None
        self.episode_n = -1
        self.episode_done = None
        self.log_dir = log_dir
Exemple #10
0
 def f():
     if make_reward_predictor:
         reward_predictor = make_reward_predictor('a2c', cluster_dict)
     else:
         reward_predictor = None
     misc_logs_dir = osp.join(log_dir, 'a2c_misc')
     easy_tf_log.set_dir(misc_logs_dir)
     learn(policy=policy_fn,
           env=env,
           seg_pipe=seg_pipe,
           start_policy_training_pipe=start_policy_training_pipe,
           episode_vid_queue=episode_vid_queue,
           reward_predictor=reward_predictor,
           ckpt_save_dir=ckpt_dir,
           gen_segments=gen_segments,
           **a2c_params)
Exemple #11
0
def main():
    args, lr_args, log_dir, preprocess_wrapper, ckpt_timer = parse_args()
    easy_tf_log.set_dir(log_dir)

    utils.set_random_seeds(args.seed)
    sess = tf.Session()

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops,
                     args.n_workers, args.seed, args.debug, log_dir)

    step_counter = utils.GraphCounter(sess)
    update_counter = utils.GraphCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    networks = make_networks(n_workers=args.n_workers,
                             n_actions=envs[0].action_space.n,
                             weight_inits=args.weight_inits,
                             value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus,
                             max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer,
                             debug=args.debug)

    # Why save_relative_paths=True?
    # So that the plain-text 'checkpoint' file written uses relative paths,
    # which seems to be needed in order to avoid confusing saver.restore()
    # when restoring from FloydHub runs.
    global_vars = tf.trainable_variables('global')
    saver = tf.train.Saver(global_vars,
                           max_to_keep=1,
                           save_relative_paths=True)
    checkpoint_dir = osp.join(log_dir, 'checkpoints')
    os.makedirs(checkpoint_dir)
    checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt')

    if args.load_ckpt:
        print("Restoring from checkpoint '%s'..." % args.load_ckpt,
              end='',
              flush=True)
        saver.restore(sess, args.load_ckpt)
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    workers = make_workers(sess=sess,
                           envs=envs,
                           networks=networks,
                           n_workers=args.n_workers,
                           log_dir=log_dir)

    worker_threads = start_workers(n_steps=args.n_steps,
                                   steps_per_update=args.steps_per_update,
                                   step_counter=step_counter,
                                   update_counter=update_counter,
                                   workers=workers)
    ckpt_timer.reset()
    step_rate = utils.RateMeasure()
    step_rate.reset(int(step_counter))
    while True:
        time.sleep(args.wake_interval_seconds)

        steps_per_second = step_rate.measure(int(step_counter))
        easy_tf_log.tflog('misc/steps_per_second', steps_per_second)
        easy_tf_log.tflog('misc/steps', int(step_counter))
        easy_tf_log.tflog('misc/updates', int(update_counter))
        easy_tf_log.tflog('misc/lr', sess.run(lr))

        alive = [t.is_alive() for t in worker_threads]

        if ckpt_timer.done() or not any(alive):
            saver.save(sess, checkpoint_file, int(step_counter))
            print("Checkpoint saved to '{}'".format(checkpoint_file))
            ckpt_timer.reset()

        if not any(alive):
            break

    for env in envs:
        env.close()
Exemple #12
0
#!/usr/bin/env python
import time

import easy_tf_log

# Logging using the global logger

# Will log to automatically-created 'logs' directory
for i in range(10):
    easy_tf_log.tflog('foo', i)
for j in range(10, 20):
    easy_tf_log.tflog('bar', j)

easy_tf_log.set_dir('logs2')

for k in range(20, 30):
    easy_tf_log.tflog('baz', k)
for l in range(5):
    easy_tf_log.tflog('qux', l, step=(10 * l))

# Logging using a Logger object

logger = easy_tf_log.Logger(log_dir='logs3')

for i in range(10):
    logger.log_key_value('quux', i)

logger.log_list_stats('quuz', [1, 2, 3, 4, 5])

logger.measure_rate('corge', 10)
time.sleep(1)
Exemple #13
0
tf.flags.DEFINE_integer('seed', 888, 'random seed for training sampling')
tf.flags.DEFINE_bool(
    'use_lsh', False, 'use locality-sensitive hashing '
    '(NOTE: not fully tested)')
tf.flags.DEFINE_bool('use_resnet', True, 'use resnet instead of inception')
tf.flags.DEFINE_integer('num_parts', 2, 'number of parts to use')

# set the log directory to logs/preifx
date_time = datetime.today().strftime('%Y%m%d_%H%M%S')
prefix = date_time+'_rep_dim_'+str(FLAGS.rep_dim)+'_episode_length_'+str(FLAGS.episode_length) + \
         '_episode_width_'+str(FLAGS.episode_width)+'_batch_size_'+str(FLAGS.batch_size) + \
         '_num_episodes_'+str(FLAGS.num_episodes)+'_num_parts_'+str(FLAGS.num_parts)
tf.flags.DEFINE_string('save_dir',
                       os.path.join(CURRENT_DIR, '../checkpoints/' + prefix),
                       'directory to save model to')
easy_tf_log.set_dir(os.path.join(CURRENT_DIR, '../logs/' + prefix))


class Trainer(object):
    """Class that takes care of training, validating, and checkpointing model."""
    def __init__(self,
                 train_data,
                 valid_data,
                 input_dim=(84, 84, 3),
                 output_dim=None):
        self.train_data = train_data
        self.valid_data = valid_data
        self.input_dim = input_dim

        self.rep_dim = FLAGS.rep_dim
        self.episode_length = FLAGS.episode_length
Exemple #14
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon May 27 20:36:14 2019

@author: starstorms
"""

import easy_tf_log as etl

scale = 1.7

etl.set_dir('logs/1')

for i in range(10):
    etl.tflog('foo', i * scale)
for j in range(10, 20):
    etl.tflog('bar', j * scale)

etl.set_dir('logs/2')

for k in range(20, 30):
    etl.tflog('foo', k * scale)
for l in range(5):
    etl.tflog('bar', l * scale, step=(10 * l))

etl.set_dir('logs/3 extra stuff')

for k in range(20, 40):
    etl.tflog('foo', k * scale)
for l in range(15):
Exemple #15
0
import gc
import os

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from random import shuffle
from datetime import datetime
from collections import deque

import tensorflow as tf

import easy_tf_log
from easy_tf_log import tflog
easy_tf_log.set_dir('tboard/')

from pommerman import agents

from rnn_agent import RNN_Agent
from pomm_dataset import dataset

tf_tag = tf.saved_model.tag_constants


# Generate Data ---------------------------------------------------------------------------------------------
#------------------------------------------------------------------------------------------------------------
def generate_data(EPISODES, save_file_nm, shuffle_agents=False):
    rnn_agent = RNN_Agent()

    # Init dataset
Exemple #16
0
#%% Tensorflow / Keras

#For specifying device to use
with tf.device('/gpu:0'): pass

# Adding new axis to array
x_train = train[..., tf.newaxis]

# Tensorboard setup
logdir="logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
callbacks=[tensorboard_callback] # in model.fit()

# Easy tf log to tensorboard for scalars
etl.set_dir('logs2')
for k in range(20, 30): etl.tflog('baz', k)
# to start tensorboard put this into the terminal: tensorboard --logdir path/to/log/dir

# Plot Graphs
def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_'+string])
    plt.xlabel("Epochs")
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()
pass

# Class for displaying progress on the end of an epoch
class DisplayCallback(tf.keras.callbacks.Callback):
Exemple #17
0
def run_worker(env_id, preprocess_wrapper, seed, worker_n, n_steps_to_run,
               ckpt_timer, load_ckpt_file, render, log_dir, max_n_noops, debug,
               steps_per_update):
    utils.set_random_seeds(seed)

    mem_log = osp.join(log_dir, "worker_{}_memory.log".format(worker_n))
    memory_profiler = MemoryProfiler(pid=-1, log_path=mem_log)
    memory_profiler.start()

    worker_log_dir = osp.join(log_dir, "worker_{}".format(worker_n))
    easy_tf_log_dir = osp.join(worker_log_dir, 'easy_tf_log')
    os.makedirs(easy_tf_log_dir)
    easy_tf_log.set_dir(easy_tf_log_dir)

    server = tf.train.Server(cluster, job_name="worker", task_index=worker_n)
    sess = tf.Session(server.target)

    with tf.device("/job:worker/task:0"):
        create_network('global')
    with tf.device("/job:worker/task:%d" % worker_n):
        w = Worker(sess=sess,
                   env_id=env_id,
                   preprocess_wrapper=preprocess_wrapper,
                   worker_n=worker_n,
                   seed=seed,
                   log_dir=worker_log_dir,
                   max_n_noops=max_n_noops,
                   debug=debug)
        init_op = tf.global_variables_initializer()
        if render:
            w.render = True

    # Worker 0 initialises the global network as well as the per-worker networks
    # Other workers only initialise their own per-worker networks
    sess.run(init_op)

    if worker_n == 0:
        saver = tf.train.Saver()
        checkpoint_dir = osp.join(log_dir, 'checkpoints')
        os.makedirs(checkpoint_dir)
        checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt')

    if load_ckpt_file is not None:
        print("Restoring from checkpoint '%s'..." % load_ckpt_file,
              end='',
              flush=True)
        saver.restore(sess, load_ckpt_file)
        print("done!")

    updates = 0
    steps = 0
    ckpt_timer.reset()
    while steps < n_steps_to_run:
        start_time = time.time()

        steps_ran = w.run_update(steps_per_update)
        steps += steps_ran
        updates += 1

        end_time = time.time()
        steps_per_second = steps_ran / (end_time - start_time)

        easy_tf_log.tflog('misc/steps_per_second', steps_per_second)
        easy_tf_log.tflog('misc/steps', steps)
        easy_tf_log.tflog('misc/updates', updates)

        if worker_n == 0 and ckpt_timer.done():
            saver.save(sess, checkpoint_file)
            print("Checkpoint saved to '{}'".format(checkpoint_file))
            ckpt_timer.reset()

    memory_profiler.stop()