コード例 #1
0
ファイル: leader_tm.py プロジェクト: flyfoxCI/fedlearner
    def _restore_checkpoint_fn(self, request):
        assert request.application_id == self._application_id,\
                "Application id not matched: %s vs %s"%(
                    request.application_id, self._application_id)
        response = tm_pb.RestoreDataBlockCheckpointResponse()
        no_need_restore_fn = lambda status: status in (\
                                            tm_pb.MasterStatus.RUNNING,\
                                            tm_pb.MasterStatus.FINISHED,\
                                            tm_pb.MasterStatus.ERROR)
        if self._check_status(no_need_restore_fn):
            logging.info("No need to restore %s", self.__class__.__name__)
            response.status.code = common_pb.STATUS_SUCCESS
            response.status.error_message = "success"
            return response

        # In case of race, load data before state transfering to RUNNING, and
        #   after filling data checkpoint
        with self._checkpoint_mutex:
            self._allocated_data_blockids = set(request.block_ids)
        self._load_data()

        trans_ok = self._transfer_status(tm_pb.MasterStatus.INITIALING,
                             tm_pb.MasterStatus.RUNNING)
        if not trans_ok:
            response.status.code = common_pb.STATUS_WAIT_FOR_SYNCING_CHECKPOINT
            response.status.error_message = \
                    "must sync data checkpoint before alloc"
            return response

        response.status.code = common_pb.STATUS_SUCCESS
        response.status.error_message = "success"
        return response
コード例 #2
0
 def RestoreDataBlockCheckpoint(self, request, context):
     response = tm_pb.RestoreDataBlockCheckpointResponse()
     try:
         response = self._restore_checkpoint_fn(request)
     except Exception:  # pylint: disable=broad-except
         response.status.code = common_pb.STATUS_UNKNOWN_ERROR
         response.status.error_message = sys.exc_info()
     return response
コード例 #3
0
ファイル: follower_tm.py プロジェクト: piiswrong/fedlearner
 def _restore_checkpoint_fn(self, request):
     response = tm_pb.RestoreDataBlockCheckpointResponse()
     response.status.code = common_pb.STATUS_SUCCESS
     response.status.error_message = "success"
     logging.info("Follower _restore_checkpoint_fn, do nothing")
     return response