Ejemplo n.º 1
0
    def end(self, run_context):
        """
        Save the last checkpoint after training finished.

        Args:
            run_context (RunContext): Context of the train running.
        """
        cb_params = run_context.original_args()
        _to_save_last_ckpt = True
        self._save_ckpt(cb_params, _to_save_last_ckpt)

        from mindspore.parallel._cell_wrapper import destroy_allgather_cell
        destroy_allgather_cell()
Ejemplo n.º 2
0
    def end(self, run_context):
        """
        Save the last checkpoint after training finished.

        Args:
            run_context (RunContext): Context of the train running.
        """
        cb_params = run_context.original_args()
        _to_save_last_ckpt = True

        self._save_ckpt(cb_params, _to_save_last_ckpt)

        thread_list = threading.enumerate()
        for thread in thread_list:
            if thread.getName() == "asyn_save_ckpt":
                thread.join()

        destroy_allgather_cell()
Ejemplo n.º 3
0
    def end(self, run_context):
        """
        Save the last checkpoint after training finished.

        Args:
            run_context (RunContext): Context of the train running.
        """
        cb_params = run_context.original_args()
        _to_save_last_ckpt = True

        # if param is cache enable, flush data from cache to host before epoch end
        self._flush_from_cache(cb_params)

        self._save_ckpt(cb_params, _to_save_last_ckpt)

        thread_list = threading.enumerate()
        for thread in thread_list:
            if thread.getName() == "asyn_save_ckpt":
                thread.join()

        from mindspore.parallel._cell_wrapper import destroy_allgather_cell
        destroy_allgather_cell()