Ejemplo n.º 1
0
    def resource_for_task(cls, task_info, operation_type):
        cores_per_task, memory_per_task = cls.calculate_task_resource(
            task_info=task_info)

        if cores_per_task or memory_per_task:
            filters, updates = cls.update_resource_sql(
                resource_model=Job,
                cores=cores_per_task,
                memory=memory_per_task,
                operation_type=operation_type,
            )
            filters.append(Job.f_job_id == task_info["job_id"])
            filters.append(Job.f_role == task_info["role"])
            filters.append(Job.f_party_id == task_info["party_id"])
            filters.append(Job.f_resource_in_use == True)
            operate = Job.update(updates).where(*filters)
            operate_status = operate.execute() > 0
        else:
            operate_status = True
        if operate_status:
            schedule_logger(job_id=task_info["job_id"]).info(
                "task {} {} {} resource successfully".format(
                    task_info["task_id"], task_info["task_version"],
                    operation_type))
        else:
            schedule_logger(job_id=task_info["job_id"]).warning(
                "task {} {} {} resource failed".format(
                    task_info["task_id"], task_info["task_version"],
                    operation_type))
        return operate_status
Ejemplo n.º 2
0
    def resource_for_job(cls, job_id, role, party_id, operation_type):
        operate_status = False
        engine_name, cores, memory = cls.calculate_job_resource(job_id=job_id, role=role, party_id=party_id)
        try:
            with DB.atomic():
                updates = {
                    Job.f_engine_type: EngineType.COMPUTING,
                    Job.f_engine_name: engine_name,
                    Job.f_cores: cores,
                    Job.f_memory: memory,
                }
                filters = [
                    Job.f_job_id == job_id,
                    Job.f_role == role,
                    Job.f_party_id == party_id,
                ]
                if operation_type == ResourceOperation.APPLY:
                    updates[Job.f_remaining_cores] = cores
                    updates[Job.f_remaining_memory] = memory
                    updates[Job.f_resource_in_use] = True
                    updates[Job.f_apply_resource_time] = base_utils.current_timestamp()
                    filters.append(Job.f_resource_in_use == False)
                elif operation_type == ResourceOperation.RETURN:
                    updates[Job.f_resource_in_use] = False
                    updates[Job.f_return_resource_time] = base_utils.current_timestamp()
                    filters.append(Job.f_resource_in_use == True)
                operate = Job.update(updates).where(*filters)
                record_status = operate.execute() > 0
                if not record_status:
                    raise RuntimeError(f"record job {job_id} resource {operation_type} failed on {role} {party_id}")

                filters, updates = cls.update_resource_sql(resource_model=EngineRegistry,
                                                           cores=cores,
                                                           memory=memory,
                                                           operation_type=operation_type,
                                                           )
                filters.append(EngineRegistry.f_engine_type == EngineType.COMPUTING)
                filters.append(EngineRegistry.f_engine_name == engine_name)
                operate = EngineRegistry.update(updates).where(*filters)
                apply_status = operate.execute() > 0
                if not apply_status:
                    raise RuntimeError(
                        f"{operation_type} resource from engine {engine_name} for job {job_id} resource {operation_type} failed on {role} {party_id}")
            operate_status = True
        except Exception as e:
            schedule_logger(job_id=job_id).warning(e)
            schedule_logger(job_id=job_id).warning(
                f"{operation_type} job {job_id} resource(cores {cores} memory {memory}) on {role} {party_id} failed")
            operate_status = False
        finally:
            remaining_cores, remaining_memory = cls.get_remaining_resource(EngineRegistry,
                                                                           [
                                                                               EngineRegistry.f_engine_type == EngineType.COMPUTING,
                                                                               EngineRegistry.f_engine_name == engine_name])
            operate_msg = "successfully" if operate_status else "failed"
            schedule_logger(job_id=job_id).info(
                f"{operation_type} job {job_id} resource(cores {cores} memory {memory}) on {role} {party_id} {operate_msg}, remaining cores: {remaining_cores} remaining memory: {remaining_memory}")
            return operate_status
Ejemplo n.º 3
0
 def end_scheduling_updates(cls, job_id):
     operate = Job.update({
         Job.f_end_scheduling_updates:
         Job.f_end_scheduling_updates + 1
     }).where(
         Job.f_job_id == job_id,
         Job.f_end_scheduling_updates < END_STATUS_JOB_SCHEDULING_UPDATES)
     update_status = operate.execute() > 0
     return update_status
Ejemplo n.º 4
0
 def end_scheduling_updates(cls, job_id):
     operate = Job.update({
         Job.f_end_scheduling_updates:
         Job.f_end_scheduling_updates + 1
     }).where(
         Job.f_job_id == job_id, Job.f_end_scheduling_updates <
         JobDefaultConfig.end_status_job_scheduling_updates)
     update_status = operate.execute() > 0
     return update_status
Ejemplo n.º 5
0
 def rerun_signal(cls, job_id, set_or_reset: bool):
     if set_or_reset is True:
         update_fields = {Job.f_rerun_signal: True, Job.f_cancel_signal: False, Job.f_end_scheduling_updates: 0}
     elif set_or_reset is False:
         update_fields = {Job.f_rerun_signal: False}
     else:
         raise RuntimeError(f"can not support rereun signal {set_or_reset}")
     update_status = Job.update(update_fields).where(Job.f_job_id == job_id).execute() > 0
     return update_status
Ejemplo n.º 6
0
 def ready_signal(cls, job_id, set_or_reset: bool, ready_timeout_ttl=None):
     filters = [Job.f_job_id == job_id]
     if set_or_reset:
         update_fields = {Job.f_ready_signal: True, Job.f_ready_time: current_timestamp()}
         filters.append(Job.f_ready_signal == False)
     else:
         update_fields = {Job.f_ready_signal: False, Job.f_ready_time: None}
         filters.append(Job.f_ready_signal == True)
         if ready_timeout_ttl:
             filters.append(current_timestamp() - Job.f_ready_time > ready_timeout_ttl)
     update_status = Job.update(update_fields).where(*filters).execute() > 0
     return update_status
Ejemplo n.º 7
0
 def cancel_signal(cls, job_id, set_or_reset: bool):
     update_status = Job.update({
         Job.f_cancel_signal: set_or_reset,
         Job.f_cancel_time: current_timestamp()
     }).where(Job.f_job_id == job_id).execute() > 0
     return update_status