Exemple #1
0
    def run_job(job_id, initiator_role, initiator_party_id):
        job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(
            job_id=job_id, role=initiator_role, party_id=initiator_party_id)
        job_parameters = job_runtime_conf.get('job_parameters', {})
        job_initiator = job_runtime_conf.get('initiator', {})
        dag = get_job_dsl_parser(dsl=job_dsl,
                                 runtime_conf=job_runtime_conf,
                                 train_runtime_conf=train_runtime_conf)
        job_args = dag.get_args_input()
        if not job_initiator:
            return False
        storage.init_storage(job_id=job_id, work_mode=RuntimeConfig.WORK_MODE)
        job = Job()
        job.f_job_id = job_id
        job.f_start_time = current_timestamp()
        job.f_status = JobStatus.RUNNING
        job.f_update_time = current_timestamp()
        TaskScheduler.sync_job_status(
            job_id=job_id,
            roles=job_runtime_conf['role'],
            work_mode=job_parameters['work_mode'],
            initiator_party_id=job_initiator['party_id'],
            job_info=job.to_json())

        top_level_task_status = set()
        components = dag.get_next_components(None)
        schedule_logger.info('job {} root components is {}'.format(
            job.f_job_id, [component.get_name() for component in components],
            None))
        for component in components:
            try:
                # run a component as task
                run_status = TaskScheduler.run_component(
                    job_id, job_runtime_conf, job_parameters, job_initiator,
                    job_args, dag, component)
            except Exception as e:
                schedule_logger.info(e)
                run_status = False
            top_level_task_status.add(run_status)
            if not run_status:
                break
        if len(top_level_task_status) == 2:
            job.f_status = JobStatus.PARTIAL
        elif True in top_level_task_status:
            job.f_status = JobStatus.SUCCESS
        else:
            job.f_status = JobStatus.FAILED
        job.f_end_time = current_timestamp()
        job.f_elapsed = job.f_end_time - job.f_start_time
        if job.f_status == JobStatus.SUCCESS:
            job.f_progress = 100
        job.f_update_time = current_timestamp()
        TaskScheduler.sync_job_status(
            job_id=job_id,
            roles=job_runtime_conf['role'],
            work_mode=job_parameters['work_mode'],
            initiator_party_id=job_initiator['party_id'],
            job_info=job.to_json())
        TaskScheduler.finish_job(job_id=job_id,
                                 job_runtime_conf=job_runtime_conf)
        schedule_logger.info('job {} finished, status is {}'.format(
            job.f_job_id, job.f_status))
Exemple #2
0
    def run_job(job_id, initiator_role, initiator_party_id):
        job_dsl, job_runtime_conf, train_runtime_conf = job_utils.get_job_configuration(job_id=job_id,
                                                                                        role=initiator_role,
                                                                                        party_id=initiator_party_id)
        job_parameters = job_runtime_conf.get('job_parameters', {})
        job_initiator = job_runtime_conf.get('initiator', {})
        dag = get_job_dsl_parser(dsl=job_dsl,
                                 runtime_conf=job_runtime_conf,
                                 train_runtime_conf=train_runtime_conf)
        job_args = dag.get_args_input()
        if not job_initiator:
            return False
        timeout = job_utils.get_timeout(job_id, job_parameters.get("timeout", None), job_runtime_conf, job_dsl)
        t = Timer(timeout, TaskScheduler.job_handler, [job_id])
        t.start()

        job = Job()
        job.f_job_id = job_id
        job.f_start_time = current_timestamp()
        job.f_status = JobStatus.RUNNING
        job.f_update_time = current_timestamp()
        TaskScheduler.sync_job_status(job_id=job_id, roles=job_runtime_conf['role'],
                                      work_mode=job_parameters['work_mode'],
                                      initiator_party_id=job_initiator['party_id'],
                                      initiator_role=job_initiator['role'],
                                      job_info=job.to_json())

        top_level_task_status = set()
        components = dag.get_next_components(None)
        schedule_logger(job_id).info(
            'job {} root components is {}'.format(job.f_job_id, [component.get_name() for component in components],
                                                  None))
        for component in components:
            try:
                # run a component as task
                run_status = TaskScheduler.run_component(job_id, job_runtime_conf, job_parameters, job_initiator,
                                                         job_args, dag,
                                                         component)
            except Exception as e:
                schedule_logger(job_id).exception(e)
                run_status = False
            top_level_task_status.add(run_status)
            if not run_status:
                break
        if len(top_level_task_status) == 2:
            job.f_status = JobStatus.FAILED
        elif True in top_level_task_status:
            job.f_status = JobStatus.COMPLETE
        else:
            job.f_status = JobStatus.FAILED
        job.f_end_time = current_timestamp()
        job.f_elapsed = job.f_end_time - job.f_start_time
        if job.f_status == JobStatus.COMPLETE:
            job.f_progress = 100
        job.f_update_time = current_timestamp()
        try:
            TaskScheduler.finish_job(job_id=job_id, job_runtime_conf=job_runtime_conf)
        except Exception as e:
            schedule_logger(job_id).exception(e)
            job.f_status = JobStatus.FAILED

        if job.f_status == JobStatus.FAILED:
            TaskScheduler.stop(job_id=job_id, end_status=JobStatus.FAILED)

        try:
            TaskScheduler.sync_job_status(job_id=job_id, roles=job_runtime_conf['role'],
                                          work_mode=job_parameters['work_mode'],
                                          initiator_party_id=job_initiator['party_id'],
                                          initiator_role=job_initiator['role'],
                                          job_info=job.to_json())
        except Exception as e:
            schedule_logger(job_id).exception(e)
            schedule_logger(job_id).warning('job {} sync status failed'.format(job.f_job_id))

        schedule_logger(job_id).info('job {} finished, status is {}'.format(job.f_job_id, job.f_status))
        t.cancel()