def initTensorboard(self): """ Will initialize the SummaryWriter for tensorboard logging. Link: https://pytorch.org/docs/stable/tensorboard.html """ # TODO: Is it required? # if self.dry_run: # logger.warn("Tensorboard initialization was ignored due to --dry-run argument.") # return from torch.utils.tensorboard import SummaryWriter self.writer = SummaryWriter(log_dir=self.state['path_tensorboard']) # Put it here for global access to tensorboard! writers.append(self.writer) if self.args["tensorboard"]: # Run a dedicated Tensorboard server: from tensorboard import program tb = program.TensorBoard() tb.configure(argv=[ None, '--bind_all', '--logdir', self.state['path_tensorboard'] ]) url = tb.launch() logger.warn("Access Tensorboard through: " + str(url)) else: # Nullify the attributes so time would not be wasted logging. for attr in dir(self.writer): if attr.startswith("add_") or (attr == "flush") or (attr == "close"): setattr(self.writer, attr, lambda *args, **kw: None)
def reset(self): if self.waiting_step: logger.warn('Called reset() while waiting for the step to complete') self.step_wait() for pipe in self.parent_pipes: pipe.send(('reset', None)) return self._decode_obses([pipe.recv() for pipe in self.parent_pipes])
def log(self): if (self.stats["episodes"] % self.interval == 0): success = self.stats["episodes"] - self.stats["episodes_failure"] overall = self.stats["episodes"] success_rate = (1 - float(self.stats["episodes_failure"]) / float(self.stats["episodes"])) * 100 logger.warn("Success rate is: {}/{} = {:4.2f}".format( success, overall, success_rate))
def __init__(self, device, **params): super(Policy, self).__init__(device) self.params = params # assert len(self.params["obs_space"]["dim"]) == 1, "We only support 1d observations for the SAC policy for now." assert self.params["act_space"][ "typ"] == "Box", "We only support continuous actions in SAC policy for now." state_size = self.params["obs_space"]["dim"][0] action_size = self.params["act_space"]["dim"] if np.isscalar( self.params["act_space"] ["dim"]) else self.params["act_space"]["dim"][0] image_repr_size = self.params["image_repr_size"] print("image_repr_type =", self.params["image_repr_type"]) if self.params["image_repr_type"] == "cnn": logger.warn("Using CNNBlock in the policy.") self.model["image"] = CNNBlock(num_inputs=state_size, output_size=image_repr_size) elif self.params["image_repr_type"] == "coordconv": logger.warn("Using CoordConv in the policy.") self.model["image"] = CoordConvBlock(num_inputs=state_size, output_size=image_repr_size) else: logger.error("The provided 'image_repr_type' is not recognized.") exit() if "hidden_size" in self.params: hidden_size = self.params["hidden_size"] self.model["value"] = ValueNetwork(image_repr_size, hidden_size, **self.params["value_args"]) self.model["value_target"] = deepcopy(self.model["value"]) self.model["softq"] = SoftQNetwork(image_repr_size, action_size, hidden_size, **self.params["softq_args"]) self.model["actor"] = ActorNetwork(image_repr_size, action_size, hidden_size, **self.params["actor_args"]) else: self.model["value"] = ValueNetwork(image_repr_size, **self.params["value_args"]) self.model["value_target"] = deepcopy(self.model["value"]) self.model["softq"] = SoftQNetwork(image_repr_size, action_size, **self.params["softq_args"]) self.model["actor"] = ActorNetwork(image_repr_size, action_size, **self.params["actor_args"]) self.averager = {} self.averager["value"] = Averager(self.model["value"], self.model["value_target"], **self.params["average_args"]) self.model_to_gpu() logger("Number of parameters: <", self.count_parameters(), '>')
def strict_update(dict_target, dict_source): result = copy.deepcopy(dict_target) for key in dict_source: if key not in dict_target: logger.warn( "The provided parameter '{}' was not available in the source dictionary." .format(key)) # continue result[key] = dict_source[key] return result
def check_stats(chunk, info): """This sampler function has debugging purposes and will report the mean and standard deviation of every key in the data chunk. """ logger.warn("%s:%s[%d]: Checking stats:" % (os.path.basename(inspect.stack()[2].filename), inspect.stack()[2].function, inspect.stack()[2].lineno)) if chunk: for key in chunk: logger.warn("{} = {:.2f} (\xB1{:.2f} 95%)".format(key, np.nanmean(chunk[key]), 2*np.nanstd(chunk[key]))) return chunk
def check_shape(chunk, info): """This sampler function has debugging purposes and reports the shapes of every key in the data chunk. """ logger.warn("%s:%s[%d]: Checking shapes:" % (os.path.basename(inspect.stack()[2].filename), inspect.stack()[2].function, inspect.stack()[2].lineno)) if chunk: for key in chunk: logger.warn("%s %s" % ( key, str(chunk[key].shape))) return chunk
def save_runner(self, runner, index): if self.dry_run: return dirname = os.path.join(self.state['path_checkpoints'], "checkpoint-" + str(index)) if not os.path.exists(dirname): os.makedirs(dirname) filename = os.path.join(dirname, "runner.pt") pickle.dump(runner, open(filename, "wb"), pickle.HIGHEST_PROTOCOL) print(">> 5") logger.warn('>>> Network runner saved at {}\n'.format(dirname))
def check_nan(chunk, info): """This sampler function has debugging purposes and will publish a warning message if there are NaN values in the chunk. """ if chunk: for key in chunk: if np.isnan(chunk[key]).any(): logger.warn("%s:%s[%d]: Found NaN '%s'." % (os.path.basename(inspect.stack()[2].filename), inspect.stack()[2].function, inspect.stack()[2].lineno, key)) return chunk
def load_datasets(self): # NOTE: Very important, memory will not throw any exceptions if a key is added to it after loading! # In other words, loading memory can fail silently! filelist = [] keyslist = [] for root, dirs, files in os.walk(self.memroot): for file in files: filelist += [os.path.join(root, file)] key = os.path.splitext("/"+os.path.relpath(os.path.join(root, file), self.memroot))[0] keyslist += [key] for key, filename in zip(keyslist, filelist): self.buffer[key] = np.memmap(filename, mode='r+', shape=self.state['keys'][key]['shape'], dtype=self.state['keys'][key]['dtype']) logger.warn("[Memory] Loading from disk: '{key}' (shape:{shape}), (dtype:{dtype}).".format( key=key, dtype=self.state['keys'][key]['dtype'], shape=self.state['keys'][key]['shape']))
def create_dataset(self, key, dtype, shape): filename = os.path.join(self.memroot, key.lstrip("/")) + ".npy" # Create all directories dirs = os.path.split(filename)[0] os.makedirs(dirs, exist_ok=True) # TODO: If loading a checkpoint, then 'r+' self.buffer[key] = np.memmap(filename, mode='w+', shape=shape, dtype=dtype,) # Make NaN records we have missed so far ... all_index_list = self.get_index_up_to_end() self.buffer[key][:,all_index_list] = self.get_appropriate_nan(dtype) # Give a report size = dtype.itemsize * np.prod(shape) / 1024. / 1024. # In MB logger.warn("[Memory] Storing on disk: '{key}' (shape:{shape}), (dtype:{dtype}), (size:{size:9.1f} MB).".format( key=key, shape=shape, dtype=dtype, size=size))
def load_snapshot(self): logger.warn("Loading memory from snapshot started ...") logger.warn("Doing nothing ...") # TODO: Enable if snapshot should be moved from a remote place to the local machine. # self.session.load_memory_snapshot(self.memroot, name=self.params["name"]) logger.warn("Loading memory from snapshot finished.") # Opening all the datasets into self.buffer self.load_datasets()
def load_snapshot(self): t = time.time() logger.warn("Loading memory ({}) from snapshot started ...".format(self.params["name"])) dirname = os.path.join(self.session.state['path_memsnapshot'], self.session.state['checkpoint_name']) filename = os.path.join(dirname, self.params["name"] + ".npz") logger.warn("Loading memory from:", filename) loading = np.load(filename) for key in loading.files: self.buffer[key] = loading[key] logger.info("[Memory] Loading from disk: '{key}' (shape:{shape}), (dtype:{dtype}).".format( key=key, dtype=self.buffer[key].dtype, shape=self.buffer[key].shape)) logger.warn("Loading memory from snapshot finished in {:.2f} seconds...".format(time.time() - t))
def save_snapshot(self, index): t = time.time() logger.warn("Taking memory ({}) snapshot started ...".format(self.params["name"])) # NOTE: Save self.buffer under memsnapshot/checkpoint-X/<memory-name>.npz # or under memsnapshot/checkpoint-X/<name>/ tree of ".npy" files dirname = os.path.join(self.session.state['path_memsnapshot'], "checkpoint-" + str(index)) if not os.path.exists(dirname): os.makedirs(dirname) filename = os.path.join(dirname, self.params["name"]) logger.warn("Saving memory to:", filename+".npz") np.savez(filename, **self.buffer) logger.warn("Taking memory snapshot finished in {:.2f} seconds...".format(time.time() - t)) # NOTE: Now that we have successfully saved current checkpoint, we can remove old checkpoints. if not self.params["keep_old_checkpoints"]: # TODO: Do not delete old checkpoints if we are still there. # Go and find the memory checkpoint that we started from. if self.session.state["checkpoint_name"] and self.session.is_loading: dirname = os.path.join(self.session.state["path_memsnapshot"], self.session.state["checkpoint_name"]) if os.path.exists(dirname): shutil.rmtree(dirname)
def save_snapshot(self): logger.warn("Taking memory snapshot started ...") logger.warn("Doing nothing ...") # TODO: Enable if snapshot in different places is eligible. # self.session.take_memory_snapshop(memroot=self.memroot, name=self.params["name"]) logger.warn("Taking memory snapshot finished ...")
def main(session): ########################################## ### LOOPING ### ############### # 1. Loading if session.is_loading: params = session.update_params({}) # Summary logger.warn("="*50) logger.warn("Session:", params["session_name"]) logger.warn("Message:", params["session_msg"]) logger.warn("Command:\n\n$", params["session_cmd"], "\n") logger.warn("-"*50) runner = session.load_runner() # runner.override(session.args["override"]) # params = runner.params else: ########################################## ### LOAD FRESH PARAMETERS ### ############################# # Import method-specific modules ParamEngine = get_module(session.args["params"]) cpanel = strict_update(ParamEngine.cpanel, session.args["cpanel"]) params = ParamEngine.gen_params(cpanel) ## Generate params from cpanel everytime # Storing parameters in the session. params = session.update_params(params) session.dump_cpanel(cpanel) session.dump_params(params) # Summary logger.warn("="*50) logger.warn("Session:", params["session_name"]) logger.warn("Message:", params["session_msg"]) logger.warn("Command:\n\n$", params["session_cmd"], "\n") logger.warn("-"*50) # logger.info("Hyper-Parameters\n\n{}".format(yaml.dump(params, indent=2)) ) logger.warn("Hyper-Parameters\n\n{}".format(json.dumps(cpanel, indent=4, sort_keys=False)) ) logger.warn("="*50) ########################################## Runner = get_class(params["runner"]["name"]) runner = Runner(params) # If we are creating the session only, we do not even need to start the runner. session.save_runner(runner, 0) if session.is_session_only: logger.fatal("Session was created; exiting ...") return # 2. Initializing: It will load_state_dicts if we are in loading mode runner.start(session) # 3. Train/Enjoy/Custom Loops if session.is_playing: runner.enjoy() elif session.is_customs: runner.custom() else: runner.train()
def store(self, chunk): """ Store a chunk of data in the memory. Args: chunk (dict): The chunk of information in dictionary format: ``{"keys": array(batch_size x num_steps x *(key_shape))}`` This function does not assume anything about the key names in the ``chunk``. If the key is new, it create a new entry for that in the memory. If it already exists, it will be appended under the existing key. This function appends the new ``chunk`` to the ``buffer`` in a key-wise manner. If the memory is already full, the new data will replace the oldest data, i.e. a queue. Tip: A chunk from the :class:`~digideep.environment.explorer.Explorer` consists of ``batch_size`` trajectories. Each trajectory includes ``n-steps + 1`` transitions. The last transition from the last step is always overriden, since that is a "half-step" and all information in that half-step would recur in the new step, i.e. observations or actions. So the size of the buffer (in terms of transitions) is always ``k x n-steps + 1``, where ``k`` is the number of chunks stored so far. """ ######################################### ### CODE FOR DEBUGGING THE INPUT DATA ### ######################################### ## Tensorboard functions: # add_scalar(tag, scalar_value, global_step=None, walltime=None) # add_scalars(main_tag, tag_scalar_dict, global_step=None, walltime=None) # add_histogram(tag, values, global_step=None, bins='tensorflow', walltime=None, max_bins=None) # add_image(tag, img_tensor, global_step=None, walltime=None, dataformats='CHW') # add_images(tag, img_tensor, global_step=None, walltime=None, dataformats='NCHW') # add_figure(tag, figure, global_step=None, close=True, walltime=None) # add_video(tag, vid_tensor, global_step=None, fps=4, walltime=None) # add_audio(tag, snd_tensor, global_step=None, sample_rate=44100, walltime=None) # add_text(tag, text_string, global_step=None, walltime=None) # add_graph(model, input_to_model=None, verbose=False) # add_embedding(mat, metadata=None, label_img=None, global_step=None, tag='default', metadata_header=None) # add_pr_curve(tag, labels, predictions, global_step=None, num_thresholds=127, weights=None, walltime=None) # add_custom_scalars(layout) # add_mesh(tag, vertices, colors=None, faces=None, config_dict=None, global_step=None, walltime=None) # add_hparams(hparam_dict=None, metric_dict=None) # print(chunk.keys()) # # print("-"*40) # print(chunk["/observations/camera"].shape) # Camera: 1, 2, 4, 180, 240) # self.session.writer.add_histogram('memory:observations/agent', chunk["/observations/agent"][0,0], self.state['frame']) # self.session.writer.add_scalar('memory:rewards', chunk["/rewards"][0,0], self.state['frame']) # add_hparams # dataformats=NCHW, NHWC, CHW, HWC, HW, WH # cam_batch = chunk["/observations/camera"][0,0] ## Sequence of images as stacking # shape = chunk["/observations/camera"][0,0].shape # self.session.writer.add_images(tag=self.params["name"]+"_images", # img_tensor=chunk["/observations/camera"][0,0].reshape(shape[0],1,shape[1],shape[2]), # global_step=self.state['frame'], # dataformats='NCHW') ## Sequence of images as channels # self.session.writer.add_image(tag=self.params["name"]+"_images", # img_tensor=chunk["/observations/camera"][0,0], # global_step=self.state['frame'], # dataformats='CHW') ## Histograms # self.session.writer.add_histogram('distribution centers', x + i, i) # cam_stacked = np.concatenate(cam_batch, axis=1) # new_img = Image.fromarray(cam_stacked, 'L') # new_img.save("/master/reports/{:04d}_gray_{}.jpg".format(self.state['frame'], self.params["name"])) ###################################################### self.state['frame'] += 1 ## Assertions sizes = [chunk[key].shape[0:2] for key in chunk.keys()] assert np.all(np.array(sizes) == sizes[0]), "All keys should have the same size (batch, samples, *)." # size (batch, samples) size = sizes[0] batch_size = size[0] # Here it indicates the number of workers chunk_size_plus_overrun = size[1] assert chunk_size_plus_overrun == self.params["chunk_sample_len"]+self.params['overrun'], "Chunk should have " + str(self.params['chunk_sample_len']+self.params['overrun']) + " samples." if self.state['batch_size']: assert batch_size == self.state['batch_size'] else: self.state['batch_size'] = batch_size # self.counter += 1 # # if self.params["mode"]=="demo": # pic = chunk["/observations/camera"][0,0,-1] # img = Image.fromarray(pic) # img = img.convert("L") # img.save("/home/sharif/frames/{}_{:04d}.jpg".format(self.params["mode"], self.counter)) # exit() ## Assignments (memory) all_index_list = self.get_index_up_to_end() new_index_list = self.get_index_new_chunk() for key in chunk: if not key in self.buffer: ## np.empty is much faster than np.full dtype = chunk[key].dtype self.buffer[key] = np.empty(shape=(batch_size, self.buffer_size, *chunk[key].shape[2:]), dtype=dtype) self.buffer[key][:,all_index_list] = self.get_appropriate_nan(dtype) size = getsizeof(self.buffer[key]) / 1024. / 1024. logger.warn("Dictionary entry [{}] added (type: {:s}, size: {:9.1f} MB)".format(key, str(dtype), size)) # TODO: Check the shape of new data and complain if not consistent. self.buffer[key][:,new_index_list] = chunk[key] # # TODO: Fix it for integer types! # if np.issubdtype(dtype, np.floating): # self.buffer[key][:,new_index_list] = chunk[key] # elif np.issubdtype(dtype, np.integer): # self.buffer[key][:,new_index_list] = chunk[key] # else: # logger.warn("The [{}] type in memory in neither integer nor floating, it is {}.".format(key, self.buffer[key].dtype)) for key in self.buffer: if not key in chunk: self.buffer[key][:,new_index_list] = self.get_appropriate_nan(self.buffer[key].dtype) self.move_list_one_chunk_forward()
def store(self, chunk): """ Store a chunk of data in the memory. Args: chunk (dict): The chunk of information in dictionary format: ``{"keys": array(batch_size x num_steps x *(key_shape))}`` This function does not assume anything about the key names in the ``chunk``. If the key is new, it create a new entry for that in the memory. If it already exists, it will be appended under the existing key. This function appends the new ``chunk`` to the ``buffer`` in a key-wise manner. If the memory is already full, the new data will replace the oldest data, i.e. a queue. Tip: A chunk from the :class:`~digideep.environment.explorer.Explorer` consists of ``batch_size`` trajectories. Each trajectory includes ``n-steps + 1`` transitions. The last transition from the last step is always overriden, since that is a "half-step" and all information in that half-step would recur in the new step, i.e. observations or actions. So the size of the buffer (in terms of transitions) is always ``k x n-steps + 1``, where ``k`` is the number of chunks stored so far. """ sizes = [chunk[key].shape[0:2] for key in chunk.keys()] assert np.all( np.array(sizes) == sizes[0] ), "All keys should have the same size (batch, samples, *)." assert sizes[0][1] == self.params[ "chunk_sample_len"] + 1, "Chunk should have " + str( self.params["chunk_sample_len"] + 1) + " samples." # size (batch, samples) size = sizes[0] batch_size = size[0] # Here it indicates the number of workers trans_size = size[ 1] - 1 # Note this -1 (related to overriding the last half-step) if self.full: # Roll memory if it's full. Do it for all existing keys. # Missing keys will have no problem. for key in self.buffer: self.buffer[key] = np.roll(self.buffer[key], -trans_size, axis=1) self.state["i_index"] -= trans_size self.state["i_chunk"] -= 1 with self.lock: for key in chunk: if not key in self.buffer: ## np.empty is much faster than np.full # Check if the batch_size is the same with old entries. if self.state["n_batch"]: assert self.state[ "n_batch"] == batch_size, "Number of batches in " + key + " is not consistent with the buffer (" + self.state[ "n_batch"] + ")" else: self.state["n_batch"] = batch_size # self.buffer[key] = np.empty(shape=(batch_size, self.buffer_size, *chunk[key].shape[2:]), dtype=np.float32) data_type = chunk[key].dtype self.buffer[key] = np.empty(shape=(batch_size, self.buffer_size, *chunk[key].shape[2:]), dtype=chunk[key].dtype) if np.issubdtype(data_type, np.floating): self.buffer[key][:, 0:self.state["i_index"]] = np.nan elif np.issubdtype(data_type, np.integer): self.buffer[key][:, 0:self.state["i_index"]] = np.iinfo( data_type).min size = getsizeof(self.buffer[key]) / 1024. / 1024. logger.warn( "Dictionary entry [{}] added (type: {:s}, size: {:9.1f} MB)" .format(key, str(chunk[key].dtype), size)) # Update memory self.buffer[key][:, self.state["i_index"] - 1:self.state["i_index"] + trans_size] = chunk[key] for key in self.buffer: if not key in chunk: self.buffer[key][:, self.state["i_index"] - 1:self.state["i_index"] + trans_size] = np.nan self.state["i_index"] += trans_size self.state["i_chunk"] += 1 logger.debug("Memory i_chunk: {} | i_index: {}".format( self.state["i_chunk"], self.state["i_index"]))
def __init__(self, root_path): self.parse_arguments() self.state = {} # If '--dry-run' is specified no reports should be generated. It is not relevant to whether # we are loading from a checkpoint or running from scratch. If dry-run is there no reports # should be generated. self.dry_run = True if self.args["dry_run"] else False self.is_loading = True if self.args["load_checkpoint"] else False self.is_playing = True if self.args["play"] else False self.is_resumed = True if self.args["resume"] else False self.is_customs = True if self.args["custom"] else False self.is_session_only = True if self.args[ "create_session_only"] else False assert (self.is_loading and self.is_playing) or (self.is_loading and self.is_resumed) or (self.is_loading and self.is_customs) or (not self.is_loading), \ "--load-checkpoint argument should be used either with --play, --resume, or --custom arguments." assert (self.is_session_only and (not self.is_loading) and (not self.is_playing) and (not self.is_resumed) and (not self.is_customs)) or (not self.is_session_only), \ "--create-session-only argument cannot be used with any of the --load-checkpoint, --play, --resume, or --custom arguments." # Automatically find the latest checkpoint if not specified self.state['checkpoint_name'] = None if self.is_loading: if check_checkpoint(self.args["load_checkpoint"], verbose=True): self.state['checkpoint_name'] = os.path.split( self.args["load_checkpoint"])[1] elif check_session(self.args["load_checkpoint"], verbose=True): last_checkpoint = sorted([ int(d.replace("checkpoint-", "")) for d in os.listdir( os.path.join(self.args["load_checkpoint"], "checkpoints")) ])[-1] self.args["load_checkpoint"] = os.path.join( self.args["load_checkpoint"], "checkpoints", "checkpoint-" + str(last_checkpoint)) self.state['checkpoint_name'] = "checkpoint-" + str( last_checkpoint) else: raise ValueError( "In '--load-checkpoint path', path is neither a valid checkpoint nor a valid session." ) # TODO: Change the path for loading the packages? # sys.path.insert(0, '/path/to/whatever') # if self.args["monitor_cpu"] or self.args["monitor_gpu"]: # # Force visdom ON if "--monitor-cpu" or "--monitor-gpu" are provided. # self.args["visdom"] = True # Root: Indicates where we are right now self.state['path_root'] = os.path.split(root_path)[0] # Session: Indicates where we want our codes to be stored if self.is_loading and self.is_playing: # If we are playing a recorded checkpoint, we must save the results into the `evaluations` path # of that session. checkpoint_path = os.path.split(self.args["load_checkpoint"])[0] self.state['path_base_sessions'] = os.path.join( os.path.split(checkpoint_path)[0], "evaluations") elif self.is_loading and self.is_resumed: if self.args['session_name']: print("Warning: --session-name is ignored.") directory = os.path.dirname( os.path.dirname(self.args["load_checkpoint"])) self.state['path_base_sessions'] = os.path.split(directory)[0] self.args['session_name'] = os.path.split(directory)[1] elif self.is_loading and self.is_customs: # If we are doing a custom task from a checkpoint, we must save the results into the `customs` path # of that session. checkpoint_path = os.path.split(self.args["load_checkpoint"])[0] self.state['path_base_sessions'] = os.path.join( os.path.split(checkpoint_path)[0], "customs") else: # OK, we are loading from a checkpoint, just create session from scratch. # self.state['path_root_session'] = self.args["session_path"] # self.state['path_base_sessions'] = os.path.join(self.state['path_root_session'], 'digideep_sessions') self.state['path_base_sessions'] = self.args["session_path"] # 1. Creating 'path_base_sessions', i.e. '/tmp/digideep_sessions': try: # TODO: and not self.dry_run: os.makedirs(self.state['path_base_sessions']) # Create an empty __init__.py in it! except FileExistsError: pass except Exception as ex: print(ex) try: with open( os.path.join(self.state['path_base_sessions'], '__init__.py'), 'w') as f: print("", file=f) except FileExistsError: pass except Exception as ex: print(ex) # 2. Create a unique 'path_session': if not self.dry_run: if self.args['session_name']: # If is_loading then this line will be executed ... self.state['path_session'] = os.path.join( self.state['path_base_sessions'], self.args["session_name"]) # TODO: Make the directory try: os.makedirs(self.state['path_session']) except Exception as ex: print(ex) else: self.state['path_session'] = make_unique_path_session( self.state['path_base_sessions'], prefix="session_") else: self.state['path_session'] = os.path.join( self.state['path_base_sessions'], "no_session") # This will be equal to args['session_name'] if that has existed previously. self.state['session_name'] = os.path.split( self.state['path_session'])[-1] self.state['path_checkpoints'] = os.path.join( self.state['path_session'], 'checkpoints') self.state['path_memsnapshot'] = os.path.join( self.state['path_session'], 'memsnapshot') self.state['path_monitor'] = os.path.join(self.state['path_session'], 'monitor') self.state['path_videos'] = os.path.join(self.state['path_session'], 'videos') self.state['path_tensorboard'] = os.path.join( self.state['path_session'], 'tensorboard') # Hyper-parameters basically is a snapshot of intial parameter engine's state. self.state['file_cpanel'] = os.path.join(self.state['path_session'], 'cpanel.json') self.state['file_repeal'] = os.path.join(self.state['path_session'], 'repeal.json') self.state['file_params'] = os.path.join(self.state['path_session'], 'params.yaml') self.state['file_report'] = os.path.join(self.state['path_session'], 'report.log') # self.state['file_visdom'] = os.path.join(self.state['path_session'], 'visdom.log') self.state['file_varlog'] = os.path.join(self.state['path_session'], 'varlog.json') self.state['file_prolog'] = os.path.join(self.state['path_session'], 'prolog.json') self.state['file_monlog'] = os.path.join(self.state['path_session'], 'monlog.json') self.state['lock_running'] = os.path.join(self.state['path_session'], 'running.lock') self.state['lock_done'] = os.path.join(self.state['path_session'], 'done.lock') # Here, the session path has been created or it existed. # Now make sure only one instance passes from this point. self.check_singleton_instance() self.check_if_done() # 3. Creating the rest of paths: if not self.is_playing and not self.is_resumed and not self.dry_run: os.makedirs(self.state['path_checkpoints']) os.makedirs(self.state['path_memsnapshot']) if not self.is_resumed and not self.dry_run: os.makedirs(self.state['path_monitor']) self.initLogger() self.initVarlog() self.initProlog() self.initTensorboard() # self.initVisdom() # TODO: We don't need the "SaaM" when are loading from a checkpoint. # if not self.is_playing: self.createSaaM() ################# self.runMonitor() # Monitor CPU/GPU/RAM self.set_device() # Check valid params file: if not self.is_loading: try: get_module(self.args["params"]) except Exception as ex: logger.fatal("While importing user-specified params:", ex) exit() if self.is_loading: logger.warn("Loading from:", self.args["load_checkpoint"]) if not self.dry_run: print(':: The session will be stored in ' + self.state['path_session']) else: print( ':: This session has no footprints. Use without `--dry-run` to store results.' )
def step(self): """Function that runs the ``prestep`` and the actual ``env.step`` functions. It will also manipulate the transition data to be in appropriate format. Returns: dict: The full transition information, including the pre-transition (actions, last observations, etc) and the results of executing actions on the environments, i.e. rewards and infos. The format is like: ``{"observations":..., "masks":..., "rewards":..., "infos":..., "agents":...}`` See Also: :ref:`ref-data-structure` """ # We are saving old versions of observations, hidden_state, and masks. with KeepTime("prestep"): pre_transition = self.prestep() # TODO: For true multi-agent systems, rewards must be a dictionary as well, # i.e. one reward for each agent. However, if the agents are pursuing # a single goal, the reward can still be a single scalar! # Updating observations and masks: These two are one step old in the trajectory. # hidden_state is the newest. with KeepTime("envstep"): # Prepare actions actions = extract_keywise(pre_transition["agents"], "actions") # Step self.state["observations"], rewards, dones, infos = self.envs.step( actions) # Post-step self.state["hidden_state"] = extract_keywise( pre_transition["agents"], "hidden_state") self.state["masks"] = np.array( [0.0 if done_ else 1.0 for done_ in dones], dtype=np.float32).reshape((-1, 1)) # NOTE: Uncomment if you find useful information in the continuous rewards ... # monitor("/reward/"+self.params["mode"]+"/continuous", np.mean(rewards)) with KeepTime("render"): if self.params["render"]: self.envs.render() if self.params["render_delay"] > 0: time.sleep(self.params["render_delay"]) # except MujocoException as e: # logger.error("We got a MuJoCo exception!") # raise # ## Retry?? # # return self.run() with KeepTime("poststep"): # TODO: Sometimes the type of observations is "dict" which shouldn't be. Investigate the reason. if isinstance(self.state["observations"], OrderedDict) or isinstance( self.state["observations"], dict): for key in self.state["observations"]: if np.isnan(self.state["observations"][key]).any(): logger.warn( 'NaN caught in observations during rollout generation.', 'step =', self.state["steps"]) raise ValueError else: if np.isnan(self.state["observations"]).any(): logger.warn( 'NaN caught in observations during rollout generation.', 'step =', self.state["steps"]) raise ValueError ## Retry?? # return self.run() self.state["steps"] += 1 self.state["timesteps"] += self.params["num_workers"] self.monitor_timesteps() # TODO: Adapt with the new dict_of_lists data structure. with KeepTime("report_reward"): self.report_rewards(infos) transition = dict(**pre_transition, rewards=rewards, infos=infos) return transition
def print_line(chunk, info): logger.warn("=========================================") return chunk