コード例 #1
0
import csv
import json
import time
import tempfile
import distutils
import subprocess

# --- 3rd party ---
import gym
import numpy as np

# --- my module ---
from unstable_baselines import logger
from unstable_baselines.lib import utils as ub_utils

LOG = logger.getLogger()

__all__ = [
    'Monitor',
    'MonitorToolChain',
    'StatsRecorder',
    'VideoRecorder',
]

# === Monitor ===


class Monitor(gym.Wrapper):
    def __init__(self,
                 env: gym.Env,
                 root_dir: str = './monitor',
コード例 #2
0
# --- my module ---
from unstable_baselines import logger

from unstable_baselines.base import (SavableModel,
                                    TrainableModel)
from unstable_baselines.bugs import ReLU
from unstable_baselines.prob import MultiNormal
from unstable_baselines.utils import (normalize,
                                        denormalize,
                                        is_image_observation,
                                        preprocess_observation,
                                        get_input_tensor_from_space)


LOG = logger.getLogger('SD3')

# === Buffer ===

class ReplayBuffer():
    '''
    Replay buffer
    '''
    def __init__(self, buffer_size):
        self.buffer_size = buffer_size
        self.reset()

    def reset(self):
        self.pos = 0
        self.full = False
コード例 #3
0
ファイル: run.py プロジェクト: Ending2015a/unstable_baselines
        env = WarpFrame(env)
        env = FrameStack(env, 4)

    return env


if __name__ == '__main__':

    a = parse_args()

    # === Reset logger ===
    logger.Config.use(filename=a.logging,
                      level=a.log_level,
                      colored=True,
                      reset=True)
    LOG = logger.getLogger('QRDQN')

    # === Print welcome message ===
    LOG.add_row('')
    LOG.add_rows('QRDQN', fmt='{:@f:ANSI_Shadow}', align='center')
    LOG.add_line()
    LOG.add_rows('{}'.format(__copyright__))
    LOG.flush('INFO')
    time.sleep(1)

    # === Print arguments ===
    LOG.set_header('Arguments')
    LOG.add_row('Log dir', a.logdir)
    LOG.add_row('Logging path', a.logging)
    LOG.add_row('Monitor path', a.monitor_dir)
    LOG.add_row('Tensorboard path', a.tb_logdir)
コード例 #4
0
import numpy as np
import tensorflow as tf

# --- my module ---
from unstable_baselines import logger

from unstable_baselines.lib.base import (SavableModel, TrainableModel)
from unstable_baselines.lib.patch import ReLU
from unstable_baselines.lib.prob import (Categorical, MultiNormal)
from unstable_baselines.lib.utils import (is_image_space,
                                          preprocess_observation)
import unstable_baselines as ub

# create logger
LOG = logger.getLogger('PPO')

# === Buffers ===
"""
class GaeBuffer():
    '''A Generalized Advantage Estimation Buffer'''
    def __init__(self, gae_lambda=1.0, gamma=0.99):
        '''GAE Buffer
            refer to: https://arxiv.org/abs/1506.02438

        Args:
            gae_lambda (float, optional): Smoothing parameter. Defaults to 1.0.
            gamma (float, optional): Discount factor. Defaults to 0.99.
        '''        
        self.gae_lambda = gae_lambda
        self.gamma = gamma
コード例 #5
0
import numpy as np
import tensorflow as tf

# --- my module ---
from unstable_baselines import logger

from unstable_baselines.base import (SavableModel, TrainableModel)
from unstable_baselines.bugs import ReLU
from unstable_baselines.sche import Scheduler
from unstable_baselines.utils import (is_image_observation,
                                      preprocess_observation,
                                      get_input_tensor_from_space)

# create logger
LOG = logger.getLogger('IQN')

# === Buffers ===


class ReplayBuffer():
    '''
    Replay buffer
    '''
    def __init__(self, buffer_size):
        self.buffer_size = buffer_size
        self.reset()

    def reset(self):
        self.pos = 0
        self.full = False
コード例 #6
0
import numpy as np
import tensorflow as tf

# --- my module ---
from unstable_baselines import logger

from unstable_baselines.base import (SavableModel, TrainableModel)
from unstable_baselines.bugs import ReLU
from unstable_baselines.utils import (normalize, denormalize,
                                      is_image_observation,
                                      preprocess_observation,
                                      get_input_tensor_from_space)

# create logger
LOG = logger.getLogger('TD3')

# === Buffer ===


class ReplayBuffer():
    '''
    Replay buffer
    '''
    def __init__(self, buffer_size):
        self.buffer_size = buffer_size
        self.reset()

    def reset(self):
        self.pos = 0
        self.full = False
コード例 #7
0
import numpy as np
import tensorflow as tf

# --- my module ---
from unstable_baselines import logger

from unstable_baselines.base import (SavableModel, TrainableModel)
from unstable_baselines.bugs import ReLU
from unstable_baselines.sche import Scheduler
from unstable_baselines.utils import (is_image_observation,
                                      preprocess_observation,
                                      get_input_tensor_from_space)

# create logger
LOG = logger.getLogger('C51')


def calc_bins(v_min, v_max, n_atoms):
    '''Calculate canonical returns of each category

    Args:
        v_min (float): Minimum of value.
        v_max (float): Maximum of value.
        n_atoms (int): Number of categories.

    Returns:
        list: a list of canonical returns for each category
        float: bin width
    '''
    delta_z = (v_max - v_min) / (n_atoms - 1)