def __init__(self, env, configuration=config, logger=None): # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper-params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env self.action_dim = self.env.action_space[0].n self.observation_dim = self.env.observation_space[0].shape[0] # create n DDPGActorCritic object for n agents agent_networks = [] for i in range(env.n): agent_networks.append( DDPGActorCritic(i, env, configuration=self.config, logger=logger)) self.agent_networks = agent_networks
def __init__(self, config, logger=None): """ Initialize Policy Gradient Class Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper-params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) # discrete action space or continuous action space self.discrete = False self.observation_dim = 19 + 3 self.action_dim = 1 + int(self.config.throttle) self.learning_rate = self.config.learning_rate # build model self.build() #Enable saving the model self.saver = tf.train.Saver()
def __init__(self, agent_idx, env, configuration, logger=None): self.agent_idx = agent_idx self.config = configuration # directory for training outputs if not os.path.exists(self.config.output_path): os.makedirs(self.config.output_path) # store hyper-params self.logger = logger if logger is None: self.logger = get_logger(self.config.log_path) self.env = env # action space for a given agent - is Discrete(5) for simple_spread # NOTE: assumes that all agents have the same action space for now # TODO: action_dim as a argument to this function, so it can vary by agent # TODO: for simple_spread we don't need to worry about communication space # however, for senarios with communication channels, we will need to re-look at this # as action_space seems to be a tuple of movement space and comm space self.action_dim = self.env.action_space[0].n # observation space for a given agent - is Box(18) for simple_spread # NOTE: assumes that all agents have the same observation space for now # TODO: observation_dim as a argument to this function, so it can vary by agent self.observation_dim = self.env.observation_space[0].shape[0] self.lr = self.config.learning_rate
def __init__(self, env, config, logger=None): ''' 初始化策略梯度类 Args: env: open-ai中的环境或者自己写的环境,满足gym的接口 config: class with hyperparameters logger: logger instance from logging module ''' # 被保存模型的路径 if not os.path.exists(config.output_path): os.makedirs(config.output_path) self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # 判断环境的动作空间是离散的还是连续的 self.discrete = isinstance(env.action_space, gym.spaces.Discrete) # 对于CartPole-v0, observation_dim = 4 # InvertedPendulum-v2, observation_dim.shape = 4 self.observation_dim = self.env.observation_space.shape[0] # 离散情况是多少种动作,连续情况一个向量 # 对于CartPole-v0, action_dim.shape = 2 # InvertedPendulum-v2, action_dim.shape = 1 self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[ 0] self.lr = self.config.learning_rate # build model self.build()
def __init__(self, env, config, logger=None, name=None): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs self.name = name self.action_space = 3 if name == None: raise Exception("Must supply network name") name = time.strftime("_%m%d_%H%M") + "/" + name config.output_path = config.output_path.format(name) config.model_output = config.model_output.format(name) config.log_path = config.log_path.format(name) config.plot_output = config.plot_output.format(name) config.record_path = config.record_path.format(name) if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params # Customise the config self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # build model self.build()
def __init__(self, env, config, logger=None, student=False): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env self.student = student # build model self.build(student=student) self.size = sum(v.get_shape().num_elements() for v in tf.trainable_variables()) if self.student: self.size -= self.teachermodel.size self.logger.info('Num params: %d' % self.size)
def __init__(self, env, config, logger=None): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # double q learning conf self.is_double_q = getattr(self.config, 'double_q', False) # my flag to control whether to use original code # or modified with respect to schedule update self.original_schedule = getattr(self.config, 'original_schedule', True) # build model self.build()
def __init__(self, config, logger=None): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) envs = [] for env_name in config.env_names: env = gym.make(env_name) env = wrap_dqn(env) env = PreproWrapper(env, prepro=greyscale, shape=(84, 84, 1), overwrite_render=config.overwrite_render) envs.append(env) self.envs = envs self.recon = self.config.recon # build model self.build()
def __init__(self): """ Creates output directories if they don't exist and load vocabulary Defines attributes that depends on the vocab. Look for the __init__ comments in the class attributes """ # check that the reload directory exists if self.dir_reload is not None and not os.path.exists(self.dir_reload): print("Weights directory not found ({})".format(self.dir_reload)) self.dir_reload = None # directory for training outputs if not os.path.exists(self.dir_output): os.makedirs(self.dir_output) if not os.path.exists(self.model_output): os.makedirs(self.model_output) if not os.path.exists(self.dir_plots): os.makedirs(self.dir_plots) # initializer file for answers with open(self.path_results, "a") as f: pass with open(self.path_results_final, "a") as f: pass self.vocab = load_vocab(self.path_vocab) self.vocab_size = len(self.vocab) self.attn_cell_config["num_proj"] = self.vocab_size self.id_PAD = self.vocab[PAD] self.id_END = self.vocab[END] self.logger = get_logger(self.path_log)
def __init__(self, env, config, logger=None): if not os.path.exists(config.output_path): os.makedirs(config.output_path) self.config = config self.logger = logger self.batch_counter = 0 self.seed = None if logger is None: self.logger = get_logger(config.log_path) self.env = env self.discrete = isinstance(env.action_space, gym.spaces.Discrete) if str(config.env_name).startswith("Fourrooms"): self.observation_dim = 1 else: self.observation_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.n if self.discrete else \ self.env.action_space.shape[0] self.lr = self.config.learning_rate self.build()
def __init__(self, env, config, logger=None): """ Initialize Policy Gradient Class Args: env: an OpenAI Gym environment config: class with hyperparameters logger: logger instance from the logging module You do not need to implement anything in this function. However, you will need to use self.discrete, self.observation_dim, self.action_dim, and self.lr in other methods. """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyperparameters self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # discrete vs continuous action space self.discrete = isinstance(env.action_space, gym.spaces.Discrete) self.observation_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[ 0] self.lr = self.config.learning_rate # build model self.build()
def __init__(self): self.lr = 5e-2 self.controller_cells = 64 self.batch_size = 500 self.num_batches = 500 self.observation_dim = 1000 self.action_dim = 1000 self.num_layers = 2 self.action_buffer = [] self.state_buffer = [] self.logprob_buffer = [] self._dict = {} self._used_dict = {} self.log_acc = [] self.logger = get_logger('./log.txt') self.baseline = -1000.0 self._num_used_models = [] #self._initial_baseline = ''' with open('./action_average_reward_dict.json', 'r') as f: self._raw_dict = json.load(f) temp_map = {30:0, 60:1, 100:2, 144:3} for key in self._raw_dict.keys(): actions = [temp_map[int(a)] for a in key[1:-1].split(',')] temp = str(actions).replace(",","") accuracy = float(self._raw_dict[key]) / 10000 self._dict[temp] = accuracy self._used_dict[temp] = 0 ''' self._dict = self.build_reward_function() self._used_dict = np.zeros_like(self._dict) self.build()
def __init__(self, config: dict, dirs: dict, device): self.parallel = isinstance(device, list) self.config = config self.logger = get_logger(self.config["general"]["project_name"]) self.is_debug = self.config["general"]["debug"] if self.is_debug: self.logger.info("Running in debug mode") if self.parallel: self.device = torch.device( f"cuda:{device[0]}" if torch.cuda.is_available() else "cpu") self.all_devices = device self.logger.info("Running experiment on multiple gpus!") else: self.device = device self.all_devices = [device] self.dirs = dirs if torch.cuda.is_available(): os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([ str(dev.index if self.parallel else dev) for dev in self.all_devices ]) if self.config["general"]["restart"]: self.logger.info( f'Resume training run with name "{self.config["general"]["project_name"]}" on device(s) {self.all_devices}' ) else: self.logger.info( f'Start new training run with name "{self.config["general"]["project_name"]}" on device(s) {self.all_devices}' ) ########## seed setting ########## torch.manual_seed(self.config["general"]["seed"]) torch.cuda.manual_seed(self.config["general"]["seed"]) np.random.seed(self.config["general"]["seed"]) # random.seed(opt.seed) torch.backends.cudnn.deterministic = True torch.manual_seed(self.config["general"]["seed"]) rng = np.random.RandomState(self.config["general"]["seed"]) if self.config["general"]["mode"] == "train": project = "visual_poking_unsupervised" wandb.init( dir=self.dirs["log"], project=project, name=self.config["general"]["project_name"], group=self.config["general"]["experiment"], ) # log paramaters self.logger.info("Training parameters:") for key in self.config: if key != "testing": self.logger.info( f"{key}: {self.config[key]}") # print to console wandb.config.update({key: self.config[key]}) # update wandb config
def __init__(self): self.lr = 10e-2 self.batch_size = 64 self.controller_cells = 64 self.num_iterations = 100 self.observation_dim = 4 self.action_dim_1 = 4 self.action_dim_2 = 2 self.action_dim_3 = 2 self.num_layers = 3 self.num_actions_per_layer = 3 self.hasConstraint = True self.hardConstraint = True self.reg_weight = 1e-5 self.reg_op = 1e-8 self.weight_limit = 8000 self.op_limit = 1e8 self.temp1 = [] self.temp2 = [] self.action_buffer = [] self.state_buffer = [] self.logprob_buffer = [] self._dict = {} self._used_dict = {} self.log_acc = [] self.logger = get_logger('./log.txt') self._num_used_models = [] self._initial_baseline = 0.05 #with open('./unormdata.json', 'r') as f: with open('./normalizedata.json', 'r') as f: self._raw_dict = json.load(f) f.close() filter_nums_map = {10:0, 50:1, 100:2, 200:3} kernel_sizes_map = {3:0, 5:1} strides_map = {1:0, 2:1} for key in self._raw_dict.keys(): params = key[1:-1].split(',') temp = [] for i in range(9): if i%3 == 0: temp.append(filter_nums_map[int(params[i])]) elif i%3 == 1: temp.append(kernel_sizes_map[int(params[i])]) else: temp.append(strides_map[int(params[i])]) self._dict[str(temp)] = np.mean(self._raw_dict[key]) self._used_dict[str(temp)] = 0 self.build()
def stats(cfg_dict): logger = get_logger("stats_calculation") cfg_dict['data']['normalize_flows'] = True transforms = tt.Compose( [tt.ToTensor(), tt.Lambda(lambda x: (x * 2.0) - 1.0)]) datakeys = ["flow", "images"] dataset, _ = get_dataset(config=cfg_dict["data"]) test_dataset = dataset(transforms, datakeys, cfg_dict["data"], train=True) all_frange_data = [] for l in tqdm(range(test_dataset.data['flow_paths'].shape[-1])): logger.info( f'Calculating stats for lag of {(l+1) * cfg_dict["flow_delta"]} frames...' ) in_data = [(f, i, l, test_dataset) for f, i in zip( test_dataset.data["flow_paths"][:, l], test_dataset.data["img_path"])] out_data = parallel_data_prefetch(process_flows, in_data[:100], n_proc=20, cpu_intensive=True, target_data_type="list") all_frange_data.append(out_data) n_error = np.count_nonzero(out_data[:, 2]) logger.info(f"While loading the data, {n_error} errors occurred.") all_frange_data = np.stack(all_frange_data, axis=-1) assert all_frange_data.shape[-1] == test_dataset.datadict[ 'flow_paths'].shape[-1] with open( path.join(test_dataset.datapath, f"{test_dataset.metafilename}.p"), "rb") as f: datadict = pickle.load(f) #assert out_data.shape[0] == len(datadict["img_path"]) key = "flow_range" name_key = "frange" datadict.update({key: all_frange_data}) with open( path.join(test_dataset.datapath, f"{test_dataset.metafilename}_{name_key}.p"), "wb") as f: pickle.dump(datadict, f, protocol=pickle.HIGHEST_PROTOCOL)
def __init__(self, config, dir_output): """Defines self._config Args: config: (Config instance) class with hyper parameters, vocab and embeddings """ self._config = config self._dir_output = dir_output init_dir(self._dir_output) self.logger = get_logger(self._dir_output + "model.log") tf.reset_default_graph() # saveguard if previous model was defined
def __init__(self, env, config): name = time.strftime("%m%d_%H%M") config.output_path = "./elo_scores/{}/".format(name) print("Outputting to ", config.output_path) config.log_path = config.output_path + "log.log" self.config = config self.env = env if not os.path.exists(config.output_path): os.makedirs(config.output_path) self.logger = get_logger( config.log_path.format(time.strftime("_%m%d_%H%M")))
def __init__(self, env, config): self.config = config if not os.path.exists(self.config.output_path): os.makedirs(self.config.output_path) # 定义critic网络,critic self.critic = Critic(S_DIM, A_DIM, self.config.num_units) # 定义策略网络 self.actor = Actor(S_DIM, A_DIM, self.config.num_units) self.actor_old = Actor(S_DIM, A_DIM, self.config.num_units) # 得到环境 self.env = env # logger self.logger = get_logger(self.config.log_path) # tensorboard 相关 定义一个writer self.writer = SummaryWriter(self.config.output_path)
def __init__(self, agent_idx, env, configuration, logger=None): self.agent_idx = agent_idx # the index of this agent self.config = configuration # directory for training outputs if not os.path.exists(self.config.output_path): os.makedirs(self.config.output_path) # store hyper-params self.logger = logger if logger is None: self.logger = get_logger(self.config.log_path) self.env = env # action space for a given agent - is Discrete(5) for simple_spread # NOTE: assumes that all agents have the same action space for now # TODO: action_dim as a argument to this function, so it can vary by agent # TODO: for simple_spread we don't need to worry about communication space # however, for senarios with communication channels, we will need to re-look at this # as action_space seems to be a tuple of movement space and comm space self.action_dim = self.env.action_space[0].n # observation space for a given agent - is Box(18) for simple_spread # NOTE: assumes that all agents have the same observation space for now # TODO: observation_dim as a argument to this function, so it can vary by agent self.observation_dim = self.env.observation_space[0].shape[0] self.lr = self.config.learning_rate # top level scopes self.policy_approx_networks_scope = "policy_approx_networks" self.actor_network_scope = "actor_network" self.critic_network_scope = "critic_network" # Noise to simulate the random process # TODO: consider making this an input parameter so we can tweak it? self.noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(env.action_space[0].n), sigma=0.3, theta=0.15, dt=1e-2, x0=None) self.param_noise_stddev = tf.placeholder(tf.float32, shape=(), name='param_noise_stddev') self.param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.6, desired_action_stddev=0.2) self.t = 0
def __init__(self, env, config, logger=None): """ Initialize Policy Gradient Class Args: env: an OpenAI Gym environment config: class with hyperparameters use_mask: train time, omit velocity features in state logger: logger instance from the logging module You do not need to implement anything in this function. However, you will need to use self.discrete, self.observation_dim, self.action_dim, and self.lr in other methods. """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyperparameters self.config = config if self.config.use_mask: print('Using mask...') self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # discrete vs continuous action space self.discrete = isinstance(env.action_space, gym.spaces.Discrete) self.observation_dim = get_obs_dims(self.config.env_name, self.config.use_mask) self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[ 0] self.lr = self.config.learning_rate # for milestone: capture raw tuple embedding self.memory_dim = 6 #self.observation_dim * 2 + self.action_dim + 1 + 1 # (s, a, r, s', done_mask) self.replay_buffer = ReplayBuffer(self.config.memory_len + 1, 1, action_dim=self.action_dim) self.percolated_buffer = ReplayBuffer(self.config.percolate_len + 1, 1, action_dim=self.action_dim) # build model self.build()
def __init__(self, env, record_env, network, FLAGS, logger=None): # Directory for training outputs if not os.path.exists(FLAGS.output_path): os.makedirs(FLAGS.output_path) # Store hyper params self.FLAGS = FLAGS self.env = env self.record_env = record_env self.network = network self.summary = Summary() # Setup Logger if logger is None: self.logger = get_logger(FLAGS.log_path) else: self.logger = logger # Create network self.network.build()
def __init__(self, model_0, model_1, env, config): self.model_0 = model_0 self.model_1 = model_1 self.env = env self.config = config name = time.strftime("_%m%d_%H%M") config.output_path = config.output_path.format(name) config.model_output = config.model_output.format(name) config.log_path = config.log_path.format(name) config.plot_output = config.plot_output.format(name) config.record_path = config.record_path.format(name) if not os.path.exists(config.output_path): os.makedirs(config.output_path) self.logger = get_logger( config.log_path.format(time.strftime("_%m%d_%H%M")))
def __init__(self, env, configuration=config, logger=None): # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper-params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # create n PG objects for n agents temp = [] for i in range(self.env.n): temp.append(PG(i, self.env, configuration=self.config, logger=logger)) self.agents = temp
def train(): #s, _, loss, y = autoencoder() s, _, loss, y, recon_loss, KL = vae() train_op, grad_norm = add_optimizer_op(loss) if not os.path.exists(config.output_path): os.makedirs(config.output_path) logger = get_logger(config.log_path) train_data, eval_data = load_data() sess = tf.Session() sess.run(tf.global_variables_initializer()) for i in xrange(0, config.epoch_num): # each epoch #train prog = Progbar(target=1 + int(len(train_data) / config.batch_size)) step = 1 for batch in minibatches(train_data, config.batch_size): loss_eval, grad_norm_eval, y_train, _, recon_loss_train, KL_train = sess.run([loss, grad_norm, y, train_op, recon_loss, KL], feed_dict={s: batch}) #prog.update(step, [("train loss", loss_eval), ("grad norm", grad_norm_eval)]) prog.update(step, [("train loss", loss_eval), ("grad norm", grad_norm_eval), ('recon loss', recon_loss_train), ('VLBO', KL_train)]) step += 1 plt.imshow(y_train[0,:,:,0], cmap='Greys') plt.savefig('y.png') #eval #prog = Progbar(target=1 + int(len(eval_data) / config.batch_size)) #step = 1 #losses = [] #for batch in minibatches(eval_data, config.batch_size): # loss_eval = sess.run(loss, feed_dict={s: batch}) # prog.update(step, [("eval loss", loss_eval)]) # losses.append(loss_eval) # step += 1 #avg_loss = np.mean(losses) #sigma_loss = np.sqrt(np.var(losses) / len(losses)) #print "" #msg = "Average loss: {:04.2f} +/- {:04.2f}".format(avg_loss, sigma_loss) #logger.info(msg) save(sess)
def __init__(self, env, config, parent_scope, q_network_sizes, logger=None, student=False): """ Initialize Q Network and env Args: env: environment config: class with hyperparameters parent_scope: parent scope under which this model is constructed logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(self.config.log_path) self.env = env self.student = student self.parent_scope = parent_scope self.q_network_sizes = q_network_sizes # log the config self.logger.info('CONFIG VARIABLES:') self.logger.info(self.config.get_config()) # build model self.build(student=student) self.size = sum(v.get_shape().num_elements() for v in tf.trainable_variables(self.parent_scope)) # subtracting the size of the teacher models is no longer necessary # because tf.trainable_variables is using the scope of this student model only # if self.student: # self.size -= sum([teachermodel.size for teachermodel in self.teachermodels]) self.logger.info('Num params: %d' % self.size)
def __init__(self, env, config, logger=None): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env self.build()
def __init__(self, env, config, run=0, logger=None): """ Initialize Policy Gradient Class Args: env: an OpenAI Gym environment config: class with hyperparameters logger: logger instance from the logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyperparameters self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env # discrete vs continuous action space self.discrete = isinstance(env.action_space, gym.spaces.Discrete) self.observation_shape = self.env.observation_space.shape self.observation_dim = self.env.observation_space.shape[0] self.action_dim = self.env.action_space.n if self.discrete else self.env.action_space.shape[ 0] self.actor_lr = self.config.learning_rate self.critic_lr = self.config.critic_learning_rate self.tau = self.config.target_update_weight self.gamma = self.config.gamma if not self.discrete: self.action_high = float(self.env.action_space.high[0]) self.action_low = float(self.env.action_space.low[0]) self.obs_high = self.env.observation_space.high self.obs_low = self.env.observation_space.low self.agent_name = "pg" self.run = run
def __init__(self): self.lr = 5e-2 self.batch_size = 500 self.controller_cells = 128 self.num_iterations = 5000 self.observation_dim = 100 self.action_dim_1 = 1 self.action_dim_2 = 2 self.action_dim_3 = 2 self.num_layers = 3 self.num_actions_per_layer = 3 self.hasConstraint = False self.hardConstraint = False self.reg_weight = 1e-5 self.reg_op = 1e-8 self.weight_limit = 8000 self.op_limit = 1e8 self.temp1 = [] self.temp2 = [] self.action_buffer = [] self.state_buffer = [] self.logprob_buffer = [] self._dict = {} self._used_dict = {} self.log_acc = [] self.logger = get_logger('./log.txt') self._num_used_models = [] self._initial_baseline = 0 self.max_filter = 100 self._used_models = [] with open('./norm_inter_acc.json', 'r') as f: self._raw_dict = json.load(f) self.build()
def __init__(self, env, config, current_graph, logger=None): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) self.env = env self.current_graph = current_graph self.state_shape = list([3, 3, len(self.env.state.xmap.item_class_id)+2]) # build model self.build()
def __init__(self, env, config, exp_schedule, lr_schedule, is_training_agent, train_from_scratch=False, reward_after_somebody_died=False, logger=None): """ Initialize Q Network and env :param env: Game environment :param config: config(hyper-parameters) instance :param logger: logger instance from logging module :param exp_schedule: exploration strategy for epsilon :param lr_schedule: schedule for learning rate """ super(DQNAgent, self).__init__() # Variables initialized in _build self._states = None self._actions = None self._rewards = None self._next_states = None self._done_mask = None self._learning_rate = None self._q_values = None self._target_q_values = None self._next_q_values = None self._update_target_op = None self._loss = None self._train_op = None self._grad_norm = None # Variables initialized in init_agent self._session = None self._avg_reward_placeholder = None self._max_reward_placeholder = None self._std_reward_placeholder = None self._avg_q_placeholder = None self._max_q_placeholder = None self._std_q_placeholder = None # TODO: Commented due to lack of evaluate() # self._eval_reward_placeholder = None self._merged = None self._file_writer = None self._saver = None self._train_replay_buffer = None self._train_rewards = None self._train_max_q_values = None self._train_q_values = None self._avg_reward = None self._max_reward = None self._std_reward = None self._avg_q = None self._max_q = None self._std_q = None # TODO: Commented due to lack of evaluate() # self._eval_reward = None self._time_step = None self._progress_bar = None self._has_episode_started = None # Variables initialized in act. self._last_action = None self._last_idx = None self._enemy_count = None # Directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) self._logger = logger if logger is None: self._logger = get_logger(config.log_path) self._config = config self._env = env self._exp_schedule = exp_schedule self._lr_schedule = lr_schedule self._is_training_agent = is_training_agent self._train_from_scratch = train_from_scratch self._reward_after_somebody_died = reward_after_somebody_died self._total_reward = 0 # Build model. self._build()