def train(self): try: while self.state["i_epoch"] < self.params["runner"][ "n_epochs"] and not self.termination_check(): self.state["i_cycle"] = 0 while self.state["i_cycle"] < self.params["runner"]["n_cycles"]: with KeepTime("/"): self.explorer["demo"].update() # # Update Agent # for agent_name in self.agents: # with KeepTime(agent_name): # self.agents[agent_name].update() self.state["i_cycle"] += 1 # End of Cycle self.state["i_epoch"] += 1 self.monitor_epoch() # 3. Log self.log() gc.collect() # Garbage Collection except (KeyboardInterrupt, SystemExit): logger.fatal('Operation stopped by the user ...') finally: logger.fatal('End of operation ...') self.finalize()
def run_wrapper_stack(self, env, stack): """ Apply a series of wrappers. """ for index in range(len(stack)): if stack[index]["enabled"]: wrapper_class = get_class(stack[index]["name"]) # We pass mode to the wrapper as well, so the wrapper can adjust itself. if "request_for_args" in stack[index]: for rfa in stack[index]["request_for_args"]: logger(" Adding argument {} to the wrapper {}".format( rfa, stack[index]["name"])) if rfa == "session_state": if self.session: stack[index]["args"][ "session_state"] = self.session.state # TODO: Move the "mode" to optional parameter that can be requested! # elif rfa == "mode": # stack[index]["args"]["mode"] = self.mode else: logger.fatal( " Argument {} not found!".format(rfa)) exit() env = wrapper_class(env, mode=self.mode, **stack[index]["args"]) return env
def enjoy(self): #i.e. eval """This function evaluates the current policy in the environment. It only runs the explorer in a loop. .. code-block:: python # Do a cycle while not done: # Explore explorer["eval"].update() log() """ # TODO: We need more elegant mechanisms to handle this import. import glfw glfw.init() try: self._sync_normalizations(source_explorer="train", target_explorer="eval") self.explorer["eval"].reset() while True: # Cycles self.state["i_cycle"] = 0 while self.state["i_cycle"] < self.params["runner"]["n_cycles"]: with KeepTime("/"): # 1. Do Experiment with KeepTime("eval"): self.explorer["eval"].update() self.log() self.state["i_cycle"] += 1 # Log except (KeyboardInterrupt, SystemExit): logger.fatal('Operation stopped by the user ...') finally: self.finalize(save=False)
def train(self): """ The function that runs the training loop. See Also: :ref:`ref-how-runner-works` """ try: # while self.state["i_epoch"] < self.state["n_epochs"]: while (self.state["i_epoch"] < self.params["runner"]["n_epochs"] ) and not self.termination_check(): self.state["i_cycle"] = 0 while self.state["i_cycle"] < self.params["runner"]["n_cycles"]: with KeepTime("/"): self.train_cycle() self.state["i_cycle"] += 1 # End of Cycle self.state["i_epoch"] += 1 self.monitor_epoch() self.iterations += 1 # NOTE: We may save/test after each cycle or at intervals. # 1. Perform the test self.test() # 2. Log self.log() # 3. Save self.save() # Free up memory from garbage. gc.collect() # Garbage Collection except (KeyboardInterrupt, SystemExit): logger.fatal('Operation stopped by the user ...') finally: self.finalize()
def add(self, value, success=False): if self.name == "Rewards": success = True if self.categoric: if not value in self.data: logger.fatal( "For {}, categoric value ({}) not in existing categories.". format(self.name, value)) return if success: self.data[value][0][0] += 1 self.data[value][1][0] += 1 else: lbound = self.limit[0] ubound = self.limit[1] bins = self.bins if (value < lbound) or (value > ubound): logger.fatal( "For {}, value not in range: {:4.2f} ({:4.2f}, {:4.2f})". format(self.name, value, lbound, ubound)) return i = int((value - lbound) / (ubound - lbound) * bins) if i == bins: i -= 1 name = self.keys[i] if success: self.data[name][0][0] += 1 self.data[name][1][0] += 1
def train(self): try: self.explorer["train"].reset() while (self.state["i_epoch"] < self.params["runner"]["n_epochs"]) and not self.termination_check(): self.state["i_cycle"] = 0 while self.state["i_cycle"] < self.params["runner"]["n_cycles"]: with KeepTime("/"): with KeepTime("train"): chunk = self.explorer["train"].update() with KeepTime("store"): self.memory["train"].store(chunk) self.state["i_cycle"] += 1 # End of Cycle self.state["i_epoch"] += 1 self.monitor_epoch() # 3. Log self.log() gc.collect() # Garbage Collection except (KeyboardInterrupt, SystemExit): logger.fatal('Operation stopped by the user ...') finally: logger.fatal('End of operation ...') self.finalize()
def train(self): try: while (self.state["i_epoch"] < self.params["runner"]["n_epochs"]) and not self.termination_check(): self.state["i_cycle"] = 0 while self.state["i_cycle"] < self.params["runner"]["n_cycles"]: with KeepTime("/"): with KeepTime("demo"): chunk = self.explorer["demo"].update() with KeepTime("store"): self.memory["demo"].store(chunk) # if self.memory["demo"].full: # # Memory full. Time to leave # self.ready_for_termination # # Make sure major checkpoints work. self.state["i_cycle"] += 1 # End of Cycle self.state["i_epoch"] += 1 self.monitor_epoch() # 3. Log self.log() gc.collect() # Garbage Collection except (KeyboardInterrupt, SystemExit): logger.fatal('Operation stopped by the user ...') finally: logger.fatal('End of operation ...') self.finalize()
def finalize(self): logger.fatal("\n", '=' * 50, "\n", "\n" * 5, " " * 15, "END OF SIMULATION\n", "\n" * 5, "=" * 50, "\n" * 5, sep="")
def convert_time_to_batch_major(episode): """Converts a rollout to have the batch dimension in the major (first) dimension, instead of second dimension. Args: episode (dict): A trajectory in the form of ``{'key1':(num_steps,batch_size,...), 'key2':(num_steps,batch_size,...)}`` Returns: dict: A trajectory in the form of ``{'key1':(batch_size,num_steps,...), 'key2':(batch_size,num_steps,...)}`` .. code-block:: python :caption: Example >>> episode = {'key1':[[[1],[2]], [[3],[4]], [[5],[6]], [[7],[8]], [[9],[10]]], 'key2':[[[1,2],[3,4]], [[5,6],[7,8]], [[9,10],[11,12]], [[13,14],[15,16]], [[17,18],[19,20]]]} >>> convert_time_to_batch_major(episode) {'key1': array([[[ 1.], [ 3.], [ 5.], [ 7.], [ 9.]], [[ 2.], [ 4.], [ 6.], [ 8.], [10.]]], dtype=float32), 'key2': array([[[ 1., 2.], [ 5., 6.], [ 9., 10.], [13., 14.], [17., 18.]], [[ 3., 4.], [ 7., 8.], [11., 12.], [15., 16.], [19., 20.]]], dtype=float32)} """ episode_batch = {} for key in episode.keys(): try: # val = None # print(key, "=", episode[key]) entry_data_type = episode[key][0].dtype #TODO: Should we copy? # val = np.array(episode[key], dtype=entry_data_type).copy() val = np.asarray(episode[key], dtype=entry_data_type) # make inputs batch-major instead of time-major episode_batch[key] = val.swapaxes(0, 1) except Exception as ex: logger.fatal('@', key, ':', ex) raise return episode_batch
def load_runner(self): # If loading from a checkpoint, we must check the existence # of that path and whether that's a valid digideep session. # Existence is checked but validity is not. How is that? try: filename = os.path.join(self.args["load_checkpoint"], "runner.pt") logger.info("Loading runner from file:" + filename) runner = pickle.load(open(filename, "rb")) except Exception as ex: logger.fatal("Error loading from checkpoint:", ex) exit() return runner
def termination_check(self): termination = self.ready_for_termination if self.params["runner"]["max_time"]: if time.time() - self.time_start >= self.params["runner"]["max_time"] * 3600: self.save_major_checkpoint = True termination = True logger.fatal('Simulation maximum allowed execution time exceeded ...') if self.params["runner"]["max_iter"]: # TODO: Should be current_epoch - initial_epoch >= max_iter: ... if self.iterations >= self.params["runner"]["max_iter"]: self.save_major_checkpoint = True termination = True logger.fatal('Simulation maximum allowed execution iterations exceeded ...') return termination
def finalize(self, save=True): logger.fatal('End of operation ...') # Mark session as done if we have went through all epochs. # if self.state["i_epoch"] == self.state["n_epochs"]: if self.state["i_epoch"] == self.params["runner"]["n_epochs"]: self.session.mark_as_done() self.save_major_checkpoint = True if save and self.save_major_checkpoint: self.save_final_checkpoint() # self.save_major_checkpoint = False # Close all explorers benignly: for key in self.explorer: self.explorer[key].close()
def lazy_init(self): """ Initialization of attributes which are not part of the object state. These need lazy initialization due to proper initialization when loading from a checkpoint. """ self.time_start = time.time() logger.fatal("Execution (max) timer started ...") self.save_major_checkpoint = False self.ready_for_termination = False self.iterations = 0 profiler.reset() monitor.reset() self.monitor_epoch() # Ignore interrupt signals for_subprocesses signal.signal(signal.SIGINT, signal.SIG_IGN) self.ctrl_c_count = 0
def __init__(self, session, mode, seed, **params): self.mode = mode # train/test/eval self.seed = seed self.session = session self.params = params # Won't we have several environment registrations by this? if params["from_module"]: try: get_module(params["from_module"]) except Exception as ex: logger.fatal("While importing user module:", ex) exit() # elif (params["from_params"]) and (not MakeEnvironment.registered): elif (params["from_params"]) and (not params["name"] in registry.env_specs): try: registry.register(**params["register_args"]) MakeEnvironment.registered = True except Exception as ex: logger.fatal("While registering from parameters:", ex) exit() # After all of these, check if environment is registered in the gym or not. if not params["name"] in registry.env_specs: logger.fatal("Environment '" + params["name"] + "' is not registered in the gym registry.") exit()
def space2config(S): """Function to convert space's characteristics into a config-space dict. """ # S.__class__.__name__: "Discrete" / "Box" if isinstance(S, spaces.Discrete): typ = S.__class__.__name__ dim = np.int32(S.n) lim = (np.nan, np.nan) # Discrete Spaces do not have high/low config = {"typ": typ, "dim": dim, "lim": lim} elif isinstance(S, spaces.Box): typ = S.__class__.__name__ dim = S.shape # S.shape[0]: This "[0]" only supports 1d arrays. lim = (S.low.tolist(), S.high.tolist()) config = {"typ": typ, "dim": dim, "lim": lim} elif isinstance(S, spaces.Dict): config = OrderedDict() for k in S.spaces: config[k] = space2config(S.spaces[k]) else: logger.fatal("Unknown type for space:", type(S)) raise NotImplementedError return config
def __init__(self, name, limit=None, bins=0, categories={}): if categories: if limit or (bins != 0): logger.fatal( "Only 'categories' or '(limit,bins)' can be specified at a time." ) exit() self.categoric = True else: if not (limit and (bins > 0)): logger.fatal( "At least one of 'categories' or '(limit,bins)' should be specified." ) exit() self.categoric = False self.name = name self.limit = limit self.bins = bins self.categories = categories if self.categoric: self.data = OrderedDict() for c in categories: self.data[c] = [[0], [0, Red]] else: self.data = OrderedDict() lbound = limit[0] ubound = limit[1] steps = (ubound - lbound) / bins self.keys = {} for i in range(bins): name = "({:4.2f},{:4.2f})".format(lbound + steps * i, lbound + steps * (i + 1)) self.keys[i] = name self.data[name] = [[0], [0, Red]]
def on_sigint_received(self, signalNumber, frame): print("") # To print on the next line where ^C is printed. self.ctrl_c_count += 1 if self.ctrl_c_count == 1: logger.fatal("Received CTRL+C. Will terminate process after cycle is over.") logger.fatal("Press CTRL+C one more time to exit without saving.") self.ready_for_termination = True self.save_major_checkpoint = True elif self.ctrl_c_count == 2: # NOTE: Kill all subprocesses logger.fatal("Received CTRL+C for the second time. Will terminate immediately.") self.ready_for_termination = True self.save_major_checkpoint = False sys.exit(1)
def main(session): ########################################## ### LOOPING ### ############### # 1. Loading if session.is_loading: params = session.update_params({}) # Summary logger.warn("="*50) logger.warn("Session:", params["session_name"]) logger.warn("Message:", params["session_msg"]) logger.warn("Command:\n\n$", params["session_cmd"], "\n") logger.warn("-"*50) runner = session.load_runner() # runner.override(session.args["override"]) # params = runner.params else: ########################################## ### LOAD FRESH PARAMETERS ### ############################# # Import method-specific modules ParamEngine = get_module(session.args["params"]) cpanel = strict_update(ParamEngine.cpanel, session.args["cpanel"]) params = ParamEngine.gen_params(cpanel) ## Generate params from cpanel everytime # Storing parameters in the session. params = session.update_params(params) session.dump_cpanel(cpanel) session.dump_params(params) # Summary logger.warn("="*50) logger.warn("Session:", params["session_name"]) logger.warn("Message:", params["session_msg"]) logger.warn("Command:\n\n$", params["session_cmd"], "\n") logger.warn("-"*50) # logger.info("Hyper-Parameters\n\n{}".format(yaml.dump(params, indent=2)) ) logger.warn("Hyper-Parameters\n\n{}".format(json.dumps(cpanel, indent=4, sort_keys=False)) ) logger.warn("="*50) ########################################## Runner = get_class(params["runner"]["name"]) runner = Runner(params) # If we are creating the session only, we do not even need to start the runner. session.save_runner(runner, 0) if session.is_session_only: logger.fatal("Session was created; exiting ...") return # 2. Initializing: It will load_state_dicts if we are in loading mode runner.start(session) # 3. Train/Enjoy/Custom Loops if session.is_playing: runner.enjoy() elif session.is_customs: runner.custom() else: runner.train()
def on_sigusr1_received(self, signalNumber, frame): logger.fatal( "Received SIGUSR1 signal. Will terminate process after cycle is over." ) self.ready_for_termination = True self.save_major_checkpoint = True
def __init__(self, root_path): self.parse_arguments() self.state = {} # If '--dry-run' is specified no reports should be generated. It is not relevant to whether # we are loading from a checkpoint or running from scratch. If dry-run is there no reports # should be generated. self.dry_run = True if self.args["dry_run"] else False self.is_loading = True if self.args["load_checkpoint"] else False self.is_playing = True if self.args["play"] else False self.is_resumed = True if self.args["resume"] else False self.is_customs = True if self.args["custom"] else False self.is_session_only = True if self.args[ "create_session_only"] else False assert (self.is_loading and self.is_playing) or (self.is_loading and self.is_resumed) or (self.is_loading and self.is_customs) or (not self.is_loading), \ "--load-checkpoint argument should be used either with --play, --resume, or --custom arguments." assert (self.is_session_only and (not self.is_loading) and (not self.is_playing) and (not self.is_resumed) and (not self.is_customs)) or (not self.is_session_only), \ "--create-session-only argument cannot be used with any of the --load-checkpoint, --play, --resume, or --custom arguments." # Automatically find the latest checkpoint if not specified self.state['checkpoint_name'] = None if self.is_loading: if check_checkpoint(self.args["load_checkpoint"], verbose=True): self.state['checkpoint_name'] = os.path.split( self.args["load_checkpoint"])[1] elif check_session(self.args["load_checkpoint"], verbose=True): last_checkpoint = sorted([ int(d.replace("checkpoint-", "")) for d in os.listdir( os.path.join(self.args["load_checkpoint"], "checkpoints")) ])[-1] self.args["load_checkpoint"] = os.path.join( self.args["load_checkpoint"], "checkpoints", "checkpoint-" + str(last_checkpoint)) self.state['checkpoint_name'] = "checkpoint-" + str( last_checkpoint) else: raise ValueError( "In '--load-checkpoint path', path is neither a valid checkpoint nor a valid session." ) # TODO: Change the path for loading the packages? # sys.path.insert(0, '/path/to/whatever') # if self.args["monitor_cpu"] or self.args["monitor_gpu"]: # # Force visdom ON if "--monitor-cpu" or "--monitor-gpu" are provided. # self.args["visdom"] = True # Root: Indicates where we are right now self.state['path_root'] = os.path.split(root_path)[0] # Session: Indicates where we want our codes to be stored if self.is_loading and self.is_playing: # If we are playing a recorded checkpoint, we must save the results into the `evaluations` path # of that session. checkpoint_path = os.path.split(self.args["load_checkpoint"])[0] self.state['path_base_sessions'] = os.path.join( os.path.split(checkpoint_path)[0], "evaluations") elif self.is_loading and self.is_resumed: if self.args['session_name']: print("Warning: --session-name is ignored.") directory = os.path.dirname( os.path.dirname(self.args["load_checkpoint"])) self.state['path_base_sessions'] = os.path.split(directory)[0] self.args['session_name'] = os.path.split(directory)[1] elif self.is_loading and self.is_customs: # If we are doing a custom task from a checkpoint, we must save the results into the `customs` path # of that session. checkpoint_path = os.path.split(self.args["load_checkpoint"])[0] self.state['path_base_sessions'] = os.path.join( os.path.split(checkpoint_path)[0], "customs") else: # OK, we are loading from a checkpoint, just create session from scratch. # self.state['path_root_session'] = self.args["session_path"] # self.state['path_base_sessions'] = os.path.join(self.state['path_root_session'], 'digideep_sessions') self.state['path_base_sessions'] = self.args["session_path"] # 1. Creating 'path_base_sessions', i.e. '/tmp/digideep_sessions': try: # TODO: and not self.dry_run: os.makedirs(self.state['path_base_sessions']) # Create an empty __init__.py in it! except FileExistsError: pass except Exception as ex: print(ex) try: with open( os.path.join(self.state['path_base_sessions'], '__init__.py'), 'w') as f: print("", file=f) except FileExistsError: pass except Exception as ex: print(ex) # 2. Create a unique 'path_session': if not self.dry_run: if self.args['session_name']: # If is_loading then this line will be executed ... self.state['path_session'] = os.path.join( self.state['path_base_sessions'], self.args["session_name"]) # TODO: Make the directory try: os.makedirs(self.state['path_session']) except Exception as ex: print(ex) else: self.state['path_session'] = make_unique_path_session( self.state['path_base_sessions'], prefix="session_") else: self.state['path_session'] = os.path.join( self.state['path_base_sessions'], "no_session") # This will be equal to args['session_name'] if that has existed previously. self.state['session_name'] = os.path.split( self.state['path_session'])[-1] self.state['path_checkpoints'] = os.path.join( self.state['path_session'], 'checkpoints') self.state['path_memsnapshot'] = os.path.join( self.state['path_session'], 'memsnapshot') self.state['path_monitor'] = os.path.join(self.state['path_session'], 'monitor') self.state['path_videos'] = os.path.join(self.state['path_session'], 'videos') self.state['path_tensorboard'] = os.path.join( self.state['path_session'], 'tensorboard') # Hyper-parameters basically is a snapshot of intial parameter engine's state. self.state['file_cpanel'] = os.path.join(self.state['path_session'], 'cpanel.json') self.state['file_repeal'] = os.path.join(self.state['path_session'], 'repeal.json') self.state['file_params'] = os.path.join(self.state['path_session'], 'params.yaml') self.state['file_report'] = os.path.join(self.state['path_session'], 'report.log') # self.state['file_visdom'] = os.path.join(self.state['path_session'], 'visdom.log') self.state['file_varlog'] = os.path.join(self.state['path_session'], 'varlog.json') self.state['file_prolog'] = os.path.join(self.state['path_session'], 'prolog.json') self.state['file_monlog'] = os.path.join(self.state['path_session'], 'monlog.json') self.state['lock_running'] = os.path.join(self.state['path_session'], 'running.lock') self.state['lock_done'] = os.path.join(self.state['path_session'], 'done.lock') # Here, the session path has been created or it existed. # Now make sure only one instance passes from this point. self.check_singleton_instance() self.check_if_done() # 3. Creating the rest of paths: if not self.is_playing and not self.is_resumed and not self.dry_run: os.makedirs(self.state['path_checkpoints']) os.makedirs(self.state['path_memsnapshot']) if not self.is_resumed and not self.dry_run: os.makedirs(self.state['path_monitor']) self.initLogger() self.initVarlog() self.initProlog() self.initTensorboard() # self.initVisdom() # TODO: We don't need the "SaaM" when are loading from a checkpoint. # if not self.is_playing: self.createSaaM() ################# self.runMonitor() # Monitor CPU/GPU/RAM self.set_device() # Check valid params file: if not self.is_loading: try: get_module(self.args["params"]) except Exception as ex: logger.fatal("While importing user-specified params:", ex) exit() if self.is_loading: logger.warn("Loading from:", self.args["load_checkpoint"]) if not self.dry_run: print(':: The session will be stored in ' + self.state['path_session']) else: print( ':: This session has no footprints. Use without `--dry-run` to store results.' )