def __init__(self, args): super(A3C_Learner, self).__init__() self.actor_id = args.actor_id self.game = args.game self.gamma = args.gamma self.entropy_regularisation_strength = args.entropy_regularisation_strength self.batch_size = args.batch_size self.checkpoint_interval = args.checkpoint_interval self.file_init_weights = args.file_init_weights self.name_save_file = args.name_save_file self.local_step = 0 self.global_step = args.global_step self.barrier = args.barrier self.queue = args.queue self.max_global_steps = args.max_global_steps self.pipes = args.pipes self.thread_step_counter = 1 self.nb_actions = args.nb_actions self.epsilon = args.epsilon self.num_actor_learners = args.num_actor_learners self.env = atari_environment.AtariEnvironment(args.game, visualize=args.visualize) self.logger = logging_utils.getLogger( __name__ + ":Process {}".format(self.actor_id))
def __init__(self, conf): self.name = conf['name'] self.nb_actions = conf['nb_actions'] self.gamma = conf['gamma'] self.actor_id = None if 'actor_id' not in conf else conf['actor_id'] self.entropy_regularisation_strength = conf['entropy_regularisation_strength'] self.build_network() self.create_assign_op_weights() self.create_op_loss() self._tf_session = tf.Session() self._tf_session.run(tf.initialize_all_variables()) name_logger = __name__ if self.actor_id != None: name_logger += ":Process {}".format(self.actor_id) self.logger = logging_utils.getLogger(name_logger) if self.actor_id == 0: self.saver = tf.train.Saver(max_to_keep=10) self.writer = tf.summary.FileWriter('./tf_logs/Process_{}'.format(self.actor_id), graph=self._tf_session.graph_def) self._tf_summary_total_episode_reward = tf.placeholder(tf.float32, []) self._tf_summary_len_episode = tf.placeholder(tf.float32, []) tf.summary.scalar("total_episode_reward", self._tf_summary_total_episode_reward) tf.summary.scalar("len_episode", self._tf_summary_len_episode) self._tf_summary_op = tf.merge_all_summaries()
import sys import os import numpy as np from threading import Thread, Lock import tensorflow as tf import logging_utils import time from emulator import get_num_actions import importlib from q_network import * from policy_v_network import * from value_based_actor_learner import * from policy_based_actor_learner import * import math logger = logging_utils.getLogger('main') def generate_epsilon(): """ Generate lower limit for decaying epsilon. """ epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]} return np.random.choice(epsilon['limits'], p=epsilon['probs']) def check_or_create_checkpoint_dir(checkpoint_dir): """ Create checkpoint directory if it does not exist """ if not os.path.exists(checkpoint_dir): try: os.makedirs(checkpoint_dir) except OSError: pass
from skimage.transform import resize from skimage.color import rgb2gray import numpy as np import atari_py import logging_utils logger = logging_utils.getLogger('emulator') import matplotlib.pyplot as plt IMG_SCALE = 255.0 IMG_SIZE_X = 84 IMG_SIZE_Y = 84 NR_IMAGES = 4 def get_num_actions(rom_path, rom_name): #import os #print os.path.abspath(atari_py.__file__) game_path = atari_py.get_game_path(rom_name) ale = atari_py.ALEInterface() ale.loadROM(game_path) return ale.getMinimalActionSet() class Emulator: def __init__(self, rom_path, rom_name, visualize, actor_id, rseed, single_life_episode = False): self.ale = atari_py.ALEInterface() self.ale.setInt("random_seed", rseed * (actor_id +1)) # For fuller control on explicit action repeat (>= ALE 0.5.0)
import numpy as np from multiprocessing import Process import logging_utils import tensorflow as tf import ctypes import pyximport pyximport.install() from hogupdatemv import copy, apply_grads_mom_rmsprop, apply_grads_adam import time import utils CHECKPOINT_INTERVAL = 500000 logger = logging_utils.getLogger('actor_learner') def generate_final_epsilon(): """ Generate lower limit for decaying epsilon. """ epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]} return np.random.choice(epsilon['limits'], p=epsilon['probs']) class ActorLearner(Process): def __init__(self, args): super(ActorLearner, self).__init__() self.summ_base_dir = args.summ_base_dir self.local_step = 0 self.global_step = args.global_step
import argparse from multiprocessing import Process, Value, Barrier, Queue, Pipe import numpy as np from A3C_Learner import A3C_Learner import time import logging_utils import atari_environment logger = logging_utils.getLogger(__name__) def main(args): logger.debug("CONFIGURATION : {}".format(args)) #Global shared counter alloated in the shared memory. i = signed int args.global_step = Value('i', 0) #Barrier used to synchronize the threads args.barrier = Barrier(args.num_actor_learners) #Thread safe queue used to communicate between the threads args.queue = Queue() #Number of actions available at each steps of the game args.nb_actions = atari_environment.get_num_actions(args.game) if args.visualize == 0: args.visualize = False else: args.visualize = True actor_learners = []
import numpy as np from multiprocessing import Process import logging_utils import tensorflow as tf import ctypes import pyximport; pyximport.install() from hogupdatemv import copy, apply_grads_mom_rmsprop, apply_grads_adam import time import utils CHECKPOINT_INTERVAL = 500000 logger = logging_utils.getLogger('actor_learner') def generate_final_epsilon(): """ Generate lower limit for decaying epsilon. """ epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]} return np.random.choice(epsilon['limits'], p=epsilon['probs']) class ActorLearner(Process): def __init__(self, args): super(ActorLearner, self).__init__() self.summ_base_dir = args.summ_base_dir self.local_step = 0 self.global_step = args.global_step self.actor_id = args.actor_id
import sys import os import numpy as np from threading import Thread, Lock import tensorflow as tf import logging_utils import time from emulator import get_num_actions import importlib from q_network import * from policy_v_network import * from value_based_actor_learner import * from policy_based_actor_learner import * import math logger = logging_utils.getLogger('main') def generate_epsilon(): """ Generate lower limit for decaying epsilon. """ epsilon = {'limits': [0.1, 0.01, 0.5], 'probs': [0.4, 0.3, 0.3]} return np.random.choice(epsilon['limits'], p=epsilon['probs']) def check_or_create_checkpoint_dir(checkpoint_dir): """ Create checkpoint directory if it does not exist """ if not os.path.exists(checkpoint_dir): try: os.makedirs(checkpoint_dir) except OSError: pass def restore_vars(saver, sess, game_name, actor_learner_type,
# encoding: utf-8 import abc import os from logging_utils import getLogger, SentinelBuilder from ._compat import with_metaclass logger = getLogger(__name__) sentinel = SentinelBuilder(logger) class Client(with_metaclass(abc.ABCMeta)): def is_alive(self, pid): try: with logger.context(pid=pid), sentinel('Checking pid owner liveness'): return self._is_alive(pid) except: return False @abc.abstractmethod def _is_alive(self, pid): pass # pragma: no cover def terminate(self, pid): with logger.context(pid=pid), sentinel('Terminating pid owner'): return self._terminate(pid) @abc.abstractmethod def _terminate(self, pid): pass # pragma: no cover