Ejemplo n.º 1
0
    def setUp(self):
        super().setUp()
        workflow_0 = Workflow(id=0, name='test-workflow-0', project_id=0)
        workflow_1 = Workflow(id=1, name='test-workflow-1', project_id=0)
        db.session.add_all([workflow_0, workflow_1])

        config = workflow_definition_pb2.JobDefinition(
            name='test-job').SerializeToString()
        job_0 = Job(id=0,
                    name='raw_data_0',
                    job_type=JobType.RAW_DATA,
                    state=JobState.STARTED,
                    workflow_id=0,
                    project_id=0,
                    config=config)
        job_1 = Job(id=1,
                    name='raw_data_1',
                    job_type=JobType.RAW_DATA,
                    state=JobState.COMPLETED,
                    workflow_id=0,
                    project_id=0,
                    config=config)
        job_2 = Job(id=2,
                    name='data_join_0',
                    job_type=JobType.DATA_JOIN,
                    state=JobState.WAITING,
                    workflow_id=0,
                    project_id=0,
                    config=config)
        job_3 = Job(id=3,
                    name='data_join_1',
                    job_type=JobType.DATA_JOIN,
                    state=JobState.COMPLETED,
                    workflow_id=1,
                    project_id=0,
                    config=config)
        job_4 = Job(id=4,
                    name='train_job_0',
                    job_type=JobType.NN_MODEL_TRANINING,
                    state=JobState.WAITING,
                    workflow_id=1,
                    project_id=0,
                    config=config)
        db.session.add_all([job_0, job_1, job_2, job_3, job_4])

        job_dep_0 = JobDependency(src_job_id=job_0.id,
                                  dst_job_id=job_2.id,
                                  dep_index=0)
        job_dep_1 = JobDependency(src_job_id=job_1.id,
                                  dst_job_id=job_2.id,
                                  dep_index=1)
        job_dep_2 = JobDependency(src_job_id=job_3.id,
                                  dst_job_id=job_4.id,
                                  dep_index=0)

        db.session.add_all([job_dep_0, job_dep_1, job_dep_2])
        db.session.commit()
Ejemplo n.º 2
0
    def _setup_jobs(self):
        if self.forked_from is not None:
            trunk = Workflow.query.get(self.forked_from)
            assert trunk is not None, \
                'Source workflow %d not found' % self.forked_from
            trunk_job_defs = trunk.get_config().job_definitions
            trunk_name2index = {
                job.name: i
                for i, job in enumerate(trunk_job_defs)
            }

        job_defs = self.get_config().job_definitions
        flags = self.get_create_job_flags()
        assert len(job_defs) == len(flags), \
            'Number of job defs does not match number of create_job_flags ' \
            '%d vs %d'%(len(job_defs), len(flags))
        jobs = []
        for i, (job_def, flag) in enumerate(zip(job_defs, flags)):
            if flag == common_pb2.CreateJobFlag.REUSE:
                assert job_def.name in trunk_name2index, \
                    f'Job {job_def.name} not found in base workflow'
                j = trunk.get_job_ids()[trunk_name2index[job_def.name]]
                job = Job.query.get(j)
                assert job is not None, \
                    'Job %d not found' % j
                # TODO: check forked jobs does not depend on non-forked jobs
            else:
                job = Job(
                    name=f'{self.uuid}-{job_def.name}',
                    job_type=JobType(job_def.job_type),
                    config=job_def.SerializeToString(),
                    workflow_id=self.id,
                    project_id=self.project_id,
                    state=JobState.NEW,
                    is_disabled=(flag == common_pb2.CreateJobFlag.DISABLED))
                db.session.add(job)
            jobs.append(job)
        db.session.flush()
        name2index = {job.name: i for i, job in enumerate(job_defs)}
        for i, (job, flag) in enumerate(zip(jobs, flags)):
            if flag == common_pb2.CreateJobFlag.REUSE:
                continue
            for j, dep_def in enumerate(job.get_config().dependencies):
                dep = JobDependency(
                    src_job_id=jobs[name2index[dep_def.source]].id,
                    dst_job_id=job.id,
                    dep_index=j)
                db.session.add(dep)

        self.set_job_ids([job.id for job in jobs])
        if Features.FEATURE_MODEL_WORKFLOW_HOOK:
            for job in jobs:
                ModelService(db.session).workflow_hook(job)
Ejemplo n.º 3
0
    def _setup_jobs(self):
        if self.forked_from is not None:
            trunk = Workflow.query.get(self.forked_from)
            assert trunk is not None, \
                'Source workflow %d not found'%self.forked_from
            trunk_job_defs = trunk.get_config().job_definitions
            trunk_name2index = {
                job.name: i
                for i, job in enumerate(trunk_job_defs)
            }
        else:
            assert not self.get_reuse_job_names()

        job_defs = self.get_config().job_definitions
        jobs = []
        reuse_jobs = set(self.get_reuse_job_names())
        for i, job_def in enumerate(job_defs):
            if job_def.name in reuse_jobs:
                assert job_def.name in trunk_name2index, \
                    "Job %s not found in base workflow"%job_def.name
                j = trunk.get_job_ids()[trunk_name2index[job_def.name]]
                job = Job.query.get(j)
                assert job is not None, \
                    'Job %d not found'%j
                # TODO: check forked jobs does not depend on non-forked jobs
            else:
                job = Job(name=f'{self.name}-{job_def.name}',
                          job_type=JobType(job_def.type),
                          config=job_def.SerializeToString(),
                          workflow_id=self.id,
                          project_id=self.project_id,
                          state=JobState.STOPPED)
                job.set_yaml_template(job_def.yaml_template)
                db.session.add(job)
            jobs.append(job)
        db.session.commit()

        name2index = {job.name: i for i, job in enumerate(job_defs)}
        for i, job in enumerate(jobs):
            if job.name in reuse_jobs:
                continue
            for j, dep_def in enumerate(job.get_config().dependencies):
                dep = JobDependency(
                    src_job_id=jobs[name2index[dep_def.source]].id,
                    dst_job_id=job.id,
                    dep_index=j)
                db.session.add(dep)

        self.set_job_ids([job.id for job in jobs])

        db.session.commit()