Exemple #1
0
    def train(self):
        try:
            while self.state["i_epoch"] < self.params["runner"][
                    "n_epochs"] and not self.termination_check():
                self.state["i_cycle"] = 0
                while self.state["i_cycle"] < self.params["runner"]["n_cycles"]:
                    with KeepTime("/"):
                        self.explorer["demo"].update()

                        # # Update Agent
                        # for agent_name in self.agents:
                        #     with KeepTime(agent_name):
                        #         self.agents[agent_name].update()

                    self.state["i_cycle"] += 1
                # End of Cycle
                self.state["i_epoch"] += 1
                self.monitor_epoch()

                # 3. Log
                self.log()
                gc.collect()  # Garbage Collection

        except (KeyboardInterrupt, SystemExit):
            logger.fatal('Operation stopped by the user ...')
        finally:
            logger.fatal('End of operation ...')
            self.finalize()
Exemple #2
0
    def run_wrapper_stack(self, env, stack):
        """
        Apply a series of wrappers.
        """
        for index in range(len(stack)):
            if stack[index]["enabled"]:
                wrapper_class = get_class(stack[index]["name"])
                # We pass mode to the wrapper as well, so the wrapper can adjust itself.
                if "request_for_args" in stack[index]:
                    for rfa in stack[index]["request_for_args"]:
                        logger("  Adding argument {} to the wrapper {}".format(
                            rfa, stack[index]["name"]))
                        if rfa == "session_state":
                            if self.session:
                                stack[index]["args"][
                                    "session_state"] = self.session.state
                        # TODO: Move the "mode" to optional parameter that can be requested!
                        # elif rfa == "mode":
                        #     stack[index]["args"]["mode"] = self.mode
                        else:
                            logger.fatal(
                                "  Argument {} not found!".format(rfa))
                            exit()

                env = wrapper_class(env,
                                    mode=self.mode,
                                    **stack[index]["args"])
        return env
Exemple #3
0
    def enjoy(self):  #i.e. eval
        """This function evaluates the current policy in the environment. It only runs the explorer in a loop.

        .. code-block:: python

            # Do a cycle
            while not done:
                # Explore
                explorer["eval"].update()

            log()
        """
        # TODO: We need more elegant mechanisms to handle this import.
        import glfw
        glfw.init()

        try:
            self._sync_normalizations(source_explorer="train",
                                      target_explorer="eval")
            self.explorer["eval"].reset()
            while True:
                # Cycles
                self.state["i_cycle"] = 0
                while self.state["i_cycle"] < self.params["runner"]["n_cycles"]:
                    with KeepTime("/"):
                        # 1. Do Experiment
                        with KeepTime("eval"):
                            self.explorer["eval"].update()
                    self.log()
                    self.state["i_cycle"] += 1
                # Log
        except (KeyboardInterrupt, SystemExit):
            logger.fatal('Operation stopped by the user ...')
        finally:
            self.finalize(save=False)
Exemple #4
0
    def train(self):
        """
        The function that runs the training loop.

        See Also:
            :ref:`ref-how-runner-works`
        """
        try:
            # while self.state["i_epoch"] < self.state["n_epochs"]:
            while (self.state["i_epoch"] < self.params["runner"]["n_epochs"]
                   ) and not self.termination_check():
                self.state["i_cycle"] = 0
                while self.state["i_cycle"] < self.params["runner"]["n_cycles"]:
                    with KeepTime("/"):
                        self.train_cycle()
                    self.state["i_cycle"] += 1
                    # End of Cycle
                self.state["i_epoch"] += 1
                self.monitor_epoch()
                self.iterations += 1

                # NOTE: We may save/test after each cycle or at intervals.
                # 1. Perform the test
                self.test()
                # 2. Log
                self.log()
                # 3. Save
                self.save()
                # Free up memory from garbage.
                gc.collect()  # Garbage Collection

        except (KeyboardInterrupt, SystemExit):
            logger.fatal('Operation stopped by the user ...')
        finally:
            self.finalize()
Exemple #5
0
    def add(self, value, success=False):
        if self.name == "Rewards":
            success = True
        if self.categoric:
            if not value in self.data:
                logger.fatal(
                    "For {}, categoric value ({}) not in existing categories.".
                    format(self.name, value))
                return
            if success:
                self.data[value][0][0] += 1
            self.data[value][1][0] += 1

        else:
            lbound = self.limit[0]
            ubound = self.limit[1]
            bins = self.bins

            if (value < lbound) or (value > ubound):
                logger.fatal(
                    "For {}, value not in range: {:4.2f} ({:4.2f}, {:4.2f})".
                    format(self.name, value, lbound, ubound))
                return
            i = int((value - lbound) / (ubound - lbound) * bins)

            if i == bins:
                i -= 1
            name = self.keys[i]

            if success:
                self.data[name][0][0] += 1
            self.data[name][1][0] += 1
Exemple #6
0
    def train(self):
        try:
            self.explorer["train"].reset()

            while (self.state["i_epoch"] < self.params["runner"]["n_epochs"]) and not self.termination_check():
                self.state["i_cycle"] = 0
                while self.state["i_cycle"] < self.params["runner"]["n_cycles"]:
                    with KeepTime("/"):
                        with KeepTime("train"):
                            chunk = self.explorer["train"].update()
                            with KeepTime("store"):
                                self.memory["train"].store(chunk)

                    self.state["i_cycle"] += 1
                # End of Cycle
                self.state["i_epoch"] += 1
                self.monitor_epoch()
                
                # 3. Log
                self.log()
                gc.collect() # Garbage Collection

        except (KeyboardInterrupt, SystemExit):
            logger.fatal('Operation stopped by the user ...')
        finally:
            logger.fatal('End of operation ...')
            self.finalize()
Exemple #7
0
    def train(self):
        try:
            while (self.state["i_epoch"] < self.params["runner"]["n_epochs"]) and not self.termination_check():
                self.state["i_cycle"] = 0
                while self.state["i_cycle"] < self.params["runner"]["n_cycles"]:
                    with KeepTime("/"):
                        
                        with KeepTime("demo"):
                            chunk = self.explorer["demo"].update()
                            with KeepTime("store"):
                                self.memory["demo"].store(chunk)
                        
                        # if self.memory["demo"].full:
                        #     # Memory full. Time to leave
                        #     self.ready_for_termination
                        #     # Make sure major checkpoints work.

                    self.state["i_cycle"] += 1
                # End of Cycle
                self.state["i_epoch"] += 1
                self.monitor_epoch()
                
                # 3. Log
                self.log()
                gc.collect() # Garbage Collection

        except (KeyboardInterrupt, SystemExit):
            logger.fatal('Operation stopped by the user ...')
        finally:
            logger.fatal('End of operation ...')
            self.finalize()
Exemple #8
0
 def finalize(self):
     logger.fatal("\n",
                  '=' * 50,
                  "\n",
                  "\n" * 5,
                  " " * 15,
                  "END OF SIMULATION\n",
                  "\n" * 5,
                  "=" * 50,
                  "\n" * 5,
                  sep="")
Exemple #9
0
def convert_time_to_batch_major(episode):
    """Converts a rollout to have the batch dimension in the major (first) dimension, instead of second dimension.

    Args:
        episode (dict): A trajectory in the form of ``{'key1':(num_steps,batch_size,...), 'key2':(num_steps,batch_size,...)}``
    
    Returns:
        dict: A trajectory in the form of ``{'key1':(batch_size,num_steps,...), 'key2':(batch_size,num_steps,...)}``
    
    .. code-block:: python
        :caption: Example

        >>> episode = {'key1':[[[1],[2]], [[3],[4]], [[5],[6]], [[7],[8]], [[9],[10]]],
                        'key2':[[[1,2],[3,4]], [[5,6],[7,8]], [[9,10],[11,12]], [[13,14],[15,16]], [[17,18],[19,20]]]}
        >>> convert_time_to_batch_major(episode)
        {'key1': array([[[ 1.],
            [ 3.],
            [ 5.],
            [ 7.],
            [ 9.]],
    
            [[ 2.],
            [ 4.],
            [ 6.],
            [ 8.],
            [10.]]], dtype=float32), 'key2': array([[[ 1.,  2.],
            [ 5.,  6.],
            [ 9., 10.],
            [13., 14.],
            [17., 18.]],
    
            [[ 3.,  4.],
            [ 7.,  8.],
            [11., 12.],
            [15., 16.],
            [19., 20.]]], dtype=float32)}  
    """
    episode_batch = {}
    for key in episode.keys():
        try:
            # val = None
            # print(key, "=", episode[key])
            entry_data_type = episode[key][0].dtype
            #TODO: Should we copy?
            # val = np.array(episode[key], dtype=entry_data_type).copy()
            val = np.asarray(episode[key], dtype=entry_data_type)
            # make inputs batch-major instead of time-major
            episode_batch[key] = val.swapaxes(0, 1)
        except Exception as ex:
            logger.fatal('@', key, ':', ex)
            raise

    return episode_batch
Exemple #10
0
 def load_runner(self):
     # If loading from a checkpoint, we must check the existence
     # of that path and whether that's a valid digideep session.
     # Existence is checked but validity is not. How is that?
     try:
         filename = os.path.join(self.args["load_checkpoint"], "runner.pt")
         logger.info("Loading runner from file:" + filename)
         runner = pickle.load(open(filename, "rb"))
     except Exception as ex:
         logger.fatal("Error loading from checkpoint:", ex)
         exit()
     return runner
Exemple #11
0
 def termination_check(self):
     termination = self.ready_for_termination
     if self.params["runner"]["max_time"]:
         if time.time() - self.time_start >= self.params["runner"]["max_time"] * 3600:
             self.save_major_checkpoint = True
             termination = True
             logger.fatal('Simulation maximum allowed execution time exceeded ...')
     if self.params["runner"]["max_iter"]:
         # TODO: Should be current_epoch - initial_epoch >= max_iter: ...
         if self.iterations >= self.params["runner"]["max_iter"]:
             self.save_major_checkpoint = True
             termination = True
             logger.fatal('Simulation maximum allowed execution iterations exceeded ...')
     return termination
Exemple #12
0
    def finalize(self, save=True):
        logger.fatal('End of operation ...')

        # Mark session as done if we have went through all epochs.
        # if self.state["i_epoch"] == self.state["n_epochs"]:
        if self.state["i_epoch"] == self.params["runner"]["n_epochs"]:
            self.session.mark_as_done()
            self.save_major_checkpoint = True

        if save and self.save_major_checkpoint:
            self.save_final_checkpoint()
            # self.save_major_checkpoint = False

        # Close all explorers benignly:
        for key in self.explorer:
            self.explorer[key].close()
Exemple #13
0
    def lazy_init(self):
        """
        Initialization of attributes which are not part of the object state.
        These need lazy initialization due to proper initialization when loading
        from a checkpoint.
        """
        self.time_start = time.time()
        logger.fatal("Execution (max) timer started ...")

        self.save_major_checkpoint = False
        self.ready_for_termination = False
        self.iterations = 0

        profiler.reset()
        monitor.reset()
        self.monitor_epoch()

        # Ignore interrupt signals for_subprocesses
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        self.ctrl_c_count = 0
Exemple #14
0
    def __init__(self, session, mode, seed, **params):
        self.mode = mode  # train/test/eval
        self.seed = seed
        self.session = session
        self.params = params

        # Won't we have several environment registrations by this?
        if params["from_module"]:
            try:
                get_module(params["from_module"])
            except Exception as ex:
                logger.fatal("While importing user module:", ex)
                exit()
        # elif (params["from_params"]) and (not MakeEnvironment.registered):
        elif (params["from_params"]) and (not params["name"]
                                          in registry.env_specs):
            try:
                registry.register(**params["register_args"])
                MakeEnvironment.registered = True
            except Exception as ex:
                logger.fatal("While registering from parameters:", ex)
                exit()

        # After all of these, check if environment is registered in the gym or not.
        if not params["name"] in registry.env_specs:
            logger.fatal("Environment '" + params["name"] +
                         "' is not registered in the gym registry.")
            exit()
Exemple #15
0
def space2config(S):
    """Function to convert space's characteristics into a config-space dict.
    """
    # S.__class__.__name__: "Discrete" / "Box"
    if isinstance(S, spaces.Discrete):
        typ = S.__class__.__name__
        dim = np.int32(S.n)
        lim = (np.nan, np.nan)  # Discrete Spaces do not have high/low
        config = {"typ": typ, "dim": dim, "lim": lim}
    elif isinstance(S, spaces.Box):
        typ = S.__class__.__name__
        dim = S.shape  # S.shape[0]: This "[0]" only supports 1d arrays.
        lim = (S.low.tolist(), S.high.tolist())
        config = {"typ": typ, "dim": dim, "lim": lim}
    elif isinstance(S, spaces.Dict):
        config = OrderedDict()
        for k in S.spaces:
            config[k] = space2config(S.spaces[k])
    else:
        logger.fatal("Unknown type for space:", type(S))
        raise NotImplementedError

    return config
Exemple #16
0
    def __init__(self, name, limit=None, bins=0, categories={}):
        if categories:
            if limit or (bins != 0):
                logger.fatal(
                    "Only 'categories' or '(limit,bins)' can be specified at a time."
                )
                exit()
            self.categoric = True
        else:
            if not (limit and (bins > 0)):
                logger.fatal(
                    "At least one of 'categories' or '(limit,bins)' should be specified."
                )
                exit()
            self.categoric = False

        self.name = name
        self.limit = limit
        self.bins = bins
        self.categories = categories

        if self.categoric:
            self.data = OrderedDict()
            for c in categories:
                self.data[c] = [[0], [0, Red]]
        else:
            self.data = OrderedDict()
            lbound = limit[0]
            ubound = limit[1]
            steps = (ubound - lbound) / bins
            self.keys = {}

            for i in range(bins):
                name = "({:4.2f},{:4.2f})".format(lbound + steps * i,
                                                  lbound + steps * (i + 1))
                self.keys[i] = name
                self.data[name] = [[0], [0, Red]]
Exemple #17
0
 def on_sigint_received(self, signalNumber, frame):
     print("") # To print on the next line where ^C is printed.
     self.ctrl_c_count += 1
     if self.ctrl_c_count == 1:
         logger.fatal("Received CTRL+C. Will terminate process after cycle is over.")
         logger.fatal("Press CTRL+C one more time to exit without saving.")
         self.ready_for_termination = True
         self.save_major_checkpoint = True
     elif self.ctrl_c_count == 2:
         # NOTE: Kill all subprocesses
         logger.fatal("Received CTRL+C for the second time. Will terminate immediately.")
         self.ready_for_termination = True
         self.save_major_checkpoint = False
         sys.exit(1)
Exemple #18
0
def main(session):
    ##########################################
    ### LOOPING ###
    ###############
    # 1. Loading
    if session.is_loading:
        params = session.update_params({})
        # Summary
        logger.warn("="*50)
        logger.warn("Session:", params["session_name"])
        logger.warn("Message:", params["session_msg"])
        logger.warn("Command:\n\n$", params["session_cmd"], "\n")
        logger.warn("-"*50)

        runner = session.load_runner()
        # runner.override(session.args["override"])
        # params = runner.params
    else:
        ##########################################
        ### LOAD FRESH PARAMETERS ###
        #############################
        # Import method-specific modules
        ParamEngine = get_module(session.args["params"])
        cpanel = strict_update(ParamEngine.cpanel, session.args["cpanel"])
        params = ParamEngine.gen_params(cpanel) ## Generate params from cpanel everytime

        # Storing parameters in the session.
        params = session.update_params(params)
        session.dump_cpanel(cpanel)
        session.dump_params(params)

        # Summary
        logger.warn("="*50)
        logger.warn("Session:", params["session_name"])
        logger.warn("Message:", params["session_msg"])
        logger.warn("Command:\n\n$", params["session_cmd"], "\n")
        logger.warn("-"*50)
        # logger.info("Hyper-Parameters\n\n{}".format(yaml.dump(params, indent=2)) )
        logger.warn("Hyper-Parameters\n\n{}".format(json.dumps(cpanel, indent=4, sort_keys=False)) )
        logger.warn("="*50)
        ##########################################
        
        Runner = get_class(params["runner"]["name"])
        runner = Runner(params)

    # If we are creating the session only, we do not even need to start the runner.
    session.save_runner(runner, 0)
    if session.is_session_only:
        logger.fatal("Session was created; exiting ...")
        return
    
    # 2. Initializing: It will load_state_dicts if we are in loading mode
    runner.start(session)
    
    # 3. Train/Enjoy/Custom Loops
    if session.is_playing:
        runner.enjoy()
    elif session.is_customs:
        runner.custom()
    else:
        runner.train()
Exemple #19
0
 def on_sigusr1_received(self, signalNumber, frame):
     logger.fatal(
         "Received SIGUSR1 signal. Will terminate process after cycle is over."
     )
     self.ready_for_termination = True
     self.save_major_checkpoint = True
Exemple #20
0
    def __init__(self, root_path):
        self.parse_arguments()
        self.state = {}

        # If '--dry-run' is specified no reports should be generated. It is not relevant to whether
        # we are loading from a checkpoint or running from scratch. If dry-run is there no reports
        # should be generated.
        self.dry_run = True if self.args["dry_run"] else False

        self.is_loading = True if self.args["load_checkpoint"] else False
        self.is_playing = True if self.args["play"] else False
        self.is_resumed = True if self.args["resume"] else False
        self.is_customs = True if self.args["custom"] else False
        self.is_session_only = True if self.args[
            "create_session_only"] else False

        assert (self.is_loading and self.is_playing) or (self.is_loading and self.is_resumed) or (self.is_loading and self.is_customs) or (not self.is_loading), \
            "--load-checkpoint argument should be used either with --play, --resume, or --custom arguments."
        assert (self.is_session_only and (not self.is_loading) and (not self.is_playing) and (not self.is_resumed) and (not self.is_customs)) or (not self.is_session_only), \
            "--create-session-only argument cannot be used with any of the --load-checkpoint, --play, --resume, or --custom arguments."

        # Automatically find the latest checkpoint if not specified
        self.state['checkpoint_name'] = None
        if self.is_loading:
            if check_checkpoint(self.args["load_checkpoint"], verbose=True):
                self.state['checkpoint_name'] = os.path.split(
                    self.args["load_checkpoint"])[1]
            elif check_session(self.args["load_checkpoint"], verbose=True):
                last_checkpoint = sorted([
                    int(d.replace("checkpoint-", "")) for d in os.listdir(
                        os.path.join(self.args["load_checkpoint"],
                                     "checkpoints"))
                ])[-1]
                self.args["load_checkpoint"] = os.path.join(
                    self.args["load_checkpoint"], "checkpoints",
                    "checkpoint-" + str(last_checkpoint))
                self.state['checkpoint_name'] = "checkpoint-" + str(
                    last_checkpoint)
            else:
                raise ValueError(
                    "In '--load-checkpoint path', path is neither a valid checkpoint nor a valid session."
                )

        # TODO: Change the path for loading the packages?
        # sys.path.insert(0, '/path/to/whatever')

        # if self.args["monitor_cpu"] or self.args["monitor_gpu"]:
        #     # Force visdom ON if "--monitor-cpu" or "--monitor-gpu" are provided.
        #     self.args["visdom"] = True

        # Root: Indicates where we are right now
        self.state['path_root'] = os.path.split(root_path)[0]

        # Session: Indicates where we want our codes to be stored
        if self.is_loading and self.is_playing:
            # If we are playing a recorded checkpoint, we must save the results into the `evaluations` path
            # of that session.
            checkpoint_path = os.path.split(self.args["load_checkpoint"])[0]
            self.state['path_base_sessions'] = os.path.join(
                os.path.split(checkpoint_path)[0], "evaluations")
        elif self.is_loading and self.is_resumed:
            if self.args['session_name']:
                print("Warning: --session-name is ignored.")

            directory = os.path.dirname(
                os.path.dirname(self.args["load_checkpoint"]))
            self.state['path_base_sessions'] = os.path.split(directory)[0]
            self.args['session_name'] = os.path.split(directory)[1]
        elif self.is_loading and self.is_customs:
            # If we are doing a custom task from a checkpoint, we must save the results into the `customs` path
            # of that session.
            checkpoint_path = os.path.split(self.args["load_checkpoint"])[0]
            self.state['path_base_sessions'] = os.path.join(
                os.path.split(checkpoint_path)[0], "customs")
        else:
            # OK, we are loading from a checkpoint, just create session from scratch.
            # self.state['path_root_session']  = self.args["session_path"]
            # self.state['path_base_sessions'] = os.path.join(self.state['path_root_session'], 'digideep_sessions')
            self.state['path_base_sessions'] = self.args["session_path"]

        # 1. Creating 'path_base_sessions', i.e. '/tmp/digideep_sessions':
        try:  # TODO: and not self.dry_run:
            os.makedirs(self.state['path_base_sessions'])
            # Create an empty __init__.py in it!
        except FileExistsError:
            pass
        except Exception as ex:
            print(ex)

        try:
            with open(
                    os.path.join(self.state['path_base_sessions'],
                                 '__init__.py'), 'w') as f:
                print("", file=f)
        except FileExistsError:
            pass
        except Exception as ex:
            print(ex)

        # 2. Create a unique 'path_session':
        if not self.dry_run:
            if self.args['session_name']:
                # If is_loading then this line will be executed ...
                self.state['path_session'] = os.path.join(
                    self.state['path_base_sessions'],
                    self.args["session_name"])
                # TODO: Make the directory
                try:
                    os.makedirs(self.state['path_session'])
                except Exception as ex:
                    print(ex)
            else:
                self.state['path_session'] = make_unique_path_session(
                    self.state['path_base_sessions'], prefix="session_")
        else:
            self.state['path_session'] = os.path.join(
                self.state['path_base_sessions'], "no_session")

        # This will be equal to args['session_name'] if that has existed previously.
        self.state['session_name'] = os.path.split(
            self.state['path_session'])[-1]

        self.state['path_checkpoints'] = os.path.join(
            self.state['path_session'], 'checkpoints')
        self.state['path_memsnapshot'] = os.path.join(
            self.state['path_session'], 'memsnapshot')
        self.state['path_monitor'] = os.path.join(self.state['path_session'],
                                                  'monitor')
        self.state['path_videos'] = os.path.join(self.state['path_session'],
                                                 'videos')
        self.state['path_tensorboard'] = os.path.join(
            self.state['path_session'], 'tensorboard')
        # Hyper-parameters basically is a snapshot of intial parameter engine's state.
        self.state['file_cpanel'] = os.path.join(self.state['path_session'],
                                                 'cpanel.json')
        self.state['file_repeal'] = os.path.join(self.state['path_session'],
                                                 'repeal.json')
        self.state['file_params'] = os.path.join(self.state['path_session'],
                                                 'params.yaml')

        self.state['file_report'] = os.path.join(self.state['path_session'],
                                                 'report.log')
        # self.state['file_visdom'] = os.path.join(self.state['path_session'], 'visdom.log')
        self.state['file_varlog'] = os.path.join(self.state['path_session'],
                                                 'varlog.json')
        self.state['file_prolog'] = os.path.join(self.state['path_session'],
                                                 'prolog.json')
        self.state['file_monlog'] = os.path.join(self.state['path_session'],
                                                 'monlog.json')
        self.state['lock_running'] = os.path.join(self.state['path_session'],
                                                  'running.lock')
        self.state['lock_done'] = os.path.join(self.state['path_session'],
                                               'done.lock')

        # Here, the session path has been created or it existed.
        # Now make sure only one instance passes from this point.
        self.check_singleton_instance()
        self.check_if_done()

        # 3. Creating the rest of paths:
        if not self.is_playing and not self.is_resumed and not self.dry_run:
            os.makedirs(self.state['path_checkpoints'])
            os.makedirs(self.state['path_memsnapshot'])
        if not self.is_resumed and not self.dry_run:
            os.makedirs(self.state['path_monitor'])

        self.initLogger()
        self.initVarlog()
        self.initProlog()
        self.initTensorboard()
        # self.initVisdom()
        # TODO: We don't need the "SaaM" when are loading from a checkpoint.
        # if not self.is_playing:
        self.createSaaM()
        #################
        self.runMonitor()  # Monitor CPU/GPU/RAM
        self.set_device()

        # Check valid params file:
        if not self.is_loading:
            try:
                get_module(self.args["params"])
            except Exception as ex:
                logger.fatal("While importing user-specified params:", ex)
                exit()
        if self.is_loading:
            logger.warn("Loading from:", self.args["load_checkpoint"])

        if not self.dry_run:
            print(':: The session will be stored in ' +
                  self.state['path_session'])
        else:
            print(
                ':: This session has no footprints. Use without `--dry-run` to store results.'
            )