import csv import json import time import tempfile import distutils import subprocess # --- 3rd party --- import gym import numpy as np # --- my module --- from unstable_baselines import logger from unstable_baselines.lib import utils as ub_utils LOG = logger.getLogger() __all__ = [ 'Monitor', 'MonitorToolChain', 'StatsRecorder', 'VideoRecorder', ] # === Monitor === class Monitor(gym.Wrapper): def __init__(self, env: gym.Env, root_dir: str = './monitor',
# --- my module --- from unstable_baselines import logger from unstable_baselines.base import (SavableModel, TrainableModel) from unstable_baselines.bugs import ReLU from unstable_baselines.prob import MultiNormal from unstable_baselines.utils import (normalize, denormalize, is_image_observation, preprocess_observation, get_input_tensor_from_space) LOG = logger.getLogger('SD3') # === Buffer === class ReplayBuffer(): ''' Replay buffer ''' def __init__(self, buffer_size): self.buffer_size = buffer_size self.reset() def reset(self): self.pos = 0 self.full = False
env = WarpFrame(env) env = FrameStack(env, 4) return env if __name__ == '__main__': a = parse_args() # === Reset logger === logger.Config.use(filename=a.logging, level=a.log_level, colored=True, reset=True) LOG = logger.getLogger('QRDQN') # === Print welcome message === LOG.add_row('') LOG.add_rows('QRDQN', fmt='{:@f:ANSI_Shadow}', align='center') LOG.add_line() LOG.add_rows('{}'.format(__copyright__)) LOG.flush('INFO') time.sleep(1) # === Print arguments === LOG.set_header('Arguments') LOG.add_row('Log dir', a.logdir) LOG.add_row('Logging path', a.logging) LOG.add_row('Monitor path', a.monitor_dir) LOG.add_row('Tensorboard path', a.tb_logdir)
import numpy as np import tensorflow as tf # --- my module --- from unstable_baselines import logger from unstable_baselines.lib.base import (SavableModel, TrainableModel) from unstable_baselines.lib.patch import ReLU from unstable_baselines.lib.prob import (Categorical, MultiNormal) from unstable_baselines.lib.utils import (is_image_space, preprocess_observation) import unstable_baselines as ub # create logger LOG = logger.getLogger('PPO') # === Buffers === """ class GaeBuffer(): '''A Generalized Advantage Estimation Buffer''' def __init__(self, gae_lambda=1.0, gamma=0.99): '''GAE Buffer refer to: https://arxiv.org/abs/1506.02438 Args: gae_lambda (float, optional): Smoothing parameter. Defaults to 1.0. gamma (float, optional): Discount factor. Defaults to 0.99. ''' self.gae_lambda = gae_lambda self.gamma = gamma
import numpy as np import tensorflow as tf # --- my module --- from unstable_baselines import logger from unstable_baselines.base import (SavableModel, TrainableModel) from unstable_baselines.bugs import ReLU from unstable_baselines.sche import Scheduler from unstable_baselines.utils import (is_image_observation, preprocess_observation, get_input_tensor_from_space) # create logger LOG = logger.getLogger('IQN') # === Buffers === class ReplayBuffer(): ''' Replay buffer ''' def __init__(self, buffer_size): self.buffer_size = buffer_size self.reset() def reset(self): self.pos = 0 self.full = False
import numpy as np import tensorflow as tf # --- my module --- from unstable_baselines import logger from unstable_baselines.base import (SavableModel, TrainableModel) from unstable_baselines.bugs import ReLU from unstable_baselines.utils import (normalize, denormalize, is_image_observation, preprocess_observation, get_input_tensor_from_space) # create logger LOG = logger.getLogger('TD3') # === Buffer === class ReplayBuffer(): ''' Replay buffer ''' def __init__(self, buffer_size): self.buffer_size = buffer_size self.reset() def reset(self): self.pos = 0 self.full = False
import numpy as np import tensorflow as tf # --- my module --- from unstable_baselines import logger from unstable_baselines.base import (SavableModel, TrainableModel) from unstable_baselines.bugs import ReLU from unstable_baselines.sche import Scheduler from unstable_baselines.utils import (is_image_observation, preprocess_observation, get_input_tensor_from_space) # create logger LOG = logger.getLogger('C51') def calc_bins(v_min, v_max, n_atoms): '''Calculate canonical returns of each category Args: v_min (float): Minimum of value. v_max (float): Maximum of value. n_atoms (int): Number of categories. Returns: list: a list of canonical returns for each category float: bin width ''' delta_z = (v_max - v_min) / (n_atoms - 1)