コード例 #1
0
    def test_is_peer_job_inheritance_matched(self, mock_get_workflow):
        peer_job_0 = JobDefinition(name='raw-data-job')
        peer_job_1 = JobDefinition(name='train-job', is_federated=True)
        peer_config = WorkflowDefinition()
        peer_config.job_definitions.extend([peer_job_0, peer_job_1])
        resp = GetWorkflowResponse(config=peer_config)
        mock_get_workflow.return_value = resp

        job_0 = JobDefinition(name='train-job', is_federated=True)
        config = WorkflowDefinition(job_definitions=[job_0])

        project = Project()
        participant = project_pb2.Participant()
        project.set_config(project_pb2.Project(participants=[participant]))
        workflow0 = Workflow(project=project)
        workflow0.set_config(config)
        db.session.add(workflow0)
        db.session.commit()
        db.session.flush()
        workflow1 = Workflow(project=project, forked_from=workflow0.id)
        workflow1.set_config(config)
        workflow1.set_create_job_flags([CreateJobFlag.REUSE])
        workflow1.set_peer_create_job_flags(
            [CreateJobFlag.NEW, CreateJobFlag.REUSE])

        self.assertTrue(is_peer_job_inheritance_matched(workflow1))

        workflow1.set_create_job_flags([CreateJobFlag.NEW])
        self.assertFalse(is_peer_job_inheritance_matched(workflow1))
コード例 #2
0
 def test_patch_create_job_flags(self):
     wd = WorkflowDefinition()
     jd = wd.job_definitions.add()
     workflow = Workflow(
         name='test-workflow',
         project_id=123,
         config=wd.SerializeToString(),
         forkable=False,
         state=WorkflowState.READY,
     )
     db.session.add(workflow)
     db.session.flush()
     job = Job(name='test_job',
               job_type=JobType(1),
               config=jd.SerializeToString(),
               workflow_id=workflow.id,
               project_id=123,
               state=JobState.STOPPED,
               is_disabled=False)
     db.session.add(job)
     db.session.flush()
     workflow.job_ids = str(job.id)
     db.session.commit()
     response = self.patch_helper(f'/api/v2/workflows/{workflow.id}',
                                  data={'create_job_flags': [3]})
     self.assertEqual(response.status_code, HTTPStatus.OK)
     patched_job = Job.query.get(job.id)
     self.assertEqual(patched_job.is_disabled, True)
     response = self.patch_helper(f'/api/v2/workflows/{workflow.id}',
                                  data={'create_job_flags': [1]})
     self.assertEqual(response.status_code, HTTPStatus.OK)
     patched_job = Job.query.get(job.id)
     self.assertEqual(patched_job.is_disabled, False)
コード例 #3
0
    def test_patch_batch_update_interval(self, mock_collect, mock_finish,
                                         mock_patch_item,
                                         mock_get_item_status):
        mock_get_item_status.side_effect = [None, ItemStatus.ON]
        workflow = Workflow(
            name='test-workflow-left',
            project_id=123,
            config=WorkflowDefinition(is_left=True).SerializeToString(),
            forkable=False,
            state=WorkflowState.STOPPED,
        )
        batch_update_interval = 1
        db.session.add(workflow)
        db.session.commit()
        db.session.refresh(workflow)

        # test create cronjob
        response = self.patch_helper(
            f'/api/v2/workflows/{workflow.id}',
            data={'batch_update_interval': batch_update_interval})
        self.assertEqual(response.status_code, HTTPStatus.OK)

        mock_collect.assert_called_with(
            name=f'workflow_cron_job_{workflow.id}',
            items=[WorkflowCronJobItem(workflow.id)],
            metadata={},
            interval=batch_update_interval * 60)

        # patch new interval time for cronjob
        batch_update_interval = 2
        response = self.patch_helper(
            f'/api/v2/workflows/{workflow.id}',
            data={'batch_update_interval': batch_update_interval})
        self.assertEqual(response.status_code, HTTPStatus.OK)
        mock_patch_item.assert_called_with(
            name=f'workflow_cron_job_{workflow.id}',
            key='interval_time',
            value=batch_update_interval * 60)

        # test stop cronjob
        response = self.patch_helper(f'/api/v2/workflows/{workflow.id}',
                                     data={'batch_update_interval': -1})
        self.assertEqual(response.status_code, HTTPStatus.OK)
        mock_finish.assert_called_with(name=f'workflow_cron_job_{workflow.id}')

        workflow = Workflow(
            name='test-workflow-right',
            project_id=456,
            config=WorkflowDefinition(is_left=False).SerializeToString(),
            forkable=False,
            state=WorkflowState.STOPPED,
        )
        db.session.add(workflow)
        db.session.commit()
        db.session.refresh(workflow)

        response = self.patch_helper(f'/api/v2/workflows/{workflow.id}',
                                     data={'batch_update_interval': 1})
        self.assertEqual(response.status_code, HTTPStatus.BAD_REQUEST)
コード例 #4
0
ファイル: apis_test.py プロジェクト: guotie/fedlearner
    def test_post_successfully(self):
        template_name = 'test-nb-template'
        expected_template = WorkflowTemplate.query.filter_by(
            name=template_name).first()
        self.assertIsNone(expected_template)

        response = self.post_helper('/api/v2/workflow_templates',
                                    data={
                                        'name': template_name,
                                        'comment': 'test-comment',
                                        'config': {
                                            'group_alias': 'g222',
                                            'is_left': True
                                        }
                                    })
        self.assertEqual(response.status_code, HTTPStatus.CREATED)
        data = json.loads(response.data).get('data')
        # Checks DB
        expected_template = WorkflowTemplate.query.filter_by(
            name=template_name).first()
        self.assertEqual(expected_template.name, template_name)
        self.assertEqual(expected_template.comment, 'test-comment')
        self.assertEqual(
            expected_template.config,
            WorkflowDefinition(group_alias='g222',
                               is_left=True).SerializeToString())
        self.assertEqual(data, expected_template.to_dict())
コード例 #5
0
ファイル: apis_test.py プロジェクト: guotie/fedlearner
    def test_patch_invalid_target_state(self, mock_wakeup):
        workflow = Workflow(
            name='test-workflow',
            project_id=123,
            config=WorkflowDefinition().SerializeToString(),
            forkable=False,
            state=WorkflowState.READY,
            target_state=WorkflowState.RUNNING
        )
        db.session.add(workflow)
        db.session.commit()
        db.session.refresh(workflow)

        response = self.patch_helper(
            f'/api/v2/workflows/{workflow.id}',
            data={
                'target_state': 'READY'
            })
        self.assertEqual(response.status_code, HTTPStatus.BAD_REQUEST)
        self.assertEqual(json.loads(response.data).get('details'),
                         'Another transaction is in progress [1]')
        # Checks DB
        patched_workflow = Workflow.query.get(workflow.id)
        self.assertEqual(patched_workflow.state, WorkflowState.READY)
        self.assertEqual(patched_workflow.target_state, WorkflowState.RUNNING)
        # Checks scheduler
        mock_wakeup.assert_not_called()
コード例 #6
0
ファイル: apis_test.py プロジェクト: guotie/fedlearner
    def test_patch_successfully(self, mock_wakeup):
        workflow = Workflow(
            name='test-workflow',
            project_id=123,
            config=WorkflowDefinition().SerializeToString(),
            forkable=False,
            state=WorkflowState.READY,
        )
        db.session.add(workflow)
        db.session.commit()
        db.session.refresh(workflow)

        response = self.patch_helper(
            f'/api/v2/workflows/{workflow.id}',
            data={
                'target_state': 'RUNNING'
            })
        self.assertEqual(response.status_code, HTTPStatus.OK)
        patched_data = json.loads(response.data).get('data')
        self.assertEqual(patched_data['id'], workflow.id)
        self.assertEqual(patched_data['state'], 'READY')
        self.assertEqual(patched_data['target_state'], 'RUNNING')
        # Checks DB
        patched_workflow = Workflow.query.get(workflow.id)
        self.assertEqual(patched_workflow.target_state, WorkflowState.RUNNING)
        # Checks scheduler
        mock_wakeup.assert_called_once_with(workflow.id)
コード例 #7
0
 def setUp(self):
     super().setUp()
     # Inserts data
     template1 = WorkflowTemplate(name='t1',
                                  comment='comment for t1',
                                  group_alias='g1',
                                  is_left=True)
     template1.set_config(WorkflowDefinition(
         group_alias='g1',
         is_left=True,
     ))
     template2 = WorkflowTemplate(name='t2',
                                  group_alias='g2',
                                  is_left=False)
     template2.set_config(WorkflowDefinition(
         group_alias='g2',
         is_left=False,
     ))
     db.session.add(template1)
     db.session.add(template2)
     db.session.commit()
コード例 #8
0
def add_fake_workflow(session):
    wd = WorkflowDefinition()
    jd = wd.job_definitions.add()
    workflow = Workflow(
        name='test-workflow',
        project_id=123,
        config=wd.SerializeToString(),
        forkable=False,
        state=WorkflowState.READY,
    )
    session.add(workflow)
    session.flush()
    job = Job(name='test_job',
              job_type=JobType(1),
              config=jd.SerializeToString(),
              workflow_id=workflow.id,
              project_id=123,
              state=JobState.STOPPED,
              is_disabled=False)
    session.add(job)
    session.flush()
    workflow.job_ids = str(job.id)
    session.commit()
    return workflow, job
コード例 #9
0
ファイル: apis_test.py プロジェクト: guotie/fedlearner
    def test_put_resetting(self):
        workflow = Workflow(
            name='test-workflow',
            project_id=123,
            config=WorkflowDefinition(
                group_alias='test-template').SerializeToString(),
            state=WorkflowState.NEW,
        )
        db.session.add(workflow)
        db.session.commit()
        db.session.refresh(workflow)

        response = self.put_helper(
            f'/api/v2/workflows/{workflow.id}',
            data={
                'forkable': True,
                'config': {'group_alias': 'test-template'},
            })
        self.assertEqual(response.status_code, HTTPStatus.CONFLICT)
コード例 #10
0
def make_workflow_template():
    workflow = WorkflowDefinition(
        group_alias='psi_join_tree_model',
        is_left=False,
        variables=[
            Variable(name='image_version',
                     value='v1.5-rc3',
                     access_mode=Variable.PEER_READABLE),
            Variable(name='num_partitions',
                     value='2',
                     access_mode=Variable.PEER_WRITABLE),
        ],
        job_definitions=[
            JobDefinition(
                name='raw-data-job',
                job_type=JobDefinition.RAW_DATA,
                is_federated=False,
                variables=[
                    Variable(
                        name='input_dir',
                        value='/app/deploy/integrated_test/tfrecord_raw_data',
                        access_mode=Variable.PRIVATE),
                    Variable(name='file_wildcard',
                             value='*.rd',
                             access_mode=Variable.PRIVATE),
                    Variable(name='batch_size',
                             value='1024',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='input_format',
                             value='TF_RECORD',
                             access_mode=Variable.PRIVATE),
                    Variable(name='worker_cpu',
                             value='2000m',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='worker_mem',
                             value='4Gi',
                             access_mode=Variable.PEER_WRITABLE),
                ],
                yaml_template='''{
    "apiVersion": "fedlearner.k8s.io/v1alpha1",
    "kind": "FLApp",
    "metadata": {
        "name": "${workflow.jobs.raw-data-job.name}",
        "namespace": "${project.variables.namespace}"
    },
    "spec": {
        "cleanPodPolicy": "All",
        "flReplicaSpecs": {
            "Master": {
                "template": {
                    "spec": {
                        "containers": [
                            {
                                "resources": {
                                    "limits": {
                                        "cpu": "1000m",
                                        "memory": "2Gi"
                                    },
                                    "requests": {
                                        "cpu": "1000m",
                                        "memory": "2Gi"
                                    }
                                },
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/data_portal/run_data_portal_master.sh"
                                ],
                                "args": [],
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "DATA_PORTAL_NAME",
                                        "value": "${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "OUTPUT_PARTITION_NUM",
                                        "value": "${workflow.variables.num_partitions}"
                                    },
                                    {
                                        "name": "INPUT_BASE_DIR",
                                        "value": "${workflow.jobs.raw-data-job.variables.input_dir}"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/raw_data/${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "RAW_DATA_PUBLISH_DIR",
                                        "value": "portal_publish_dir/${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "DATA_PORTAL_TYPE",
                                        "value": "PSI"
                                    },
                                    {
                                        "name": "FILE_WILDCARD",
                                        "value": "${workflow.jobs.raw-data-job.variables.file_wildcard}"
                                    }
                                ],
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow"
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ],
                        "restartPolicy": "Never"
                    }
                },
                "pair": false,
                "replicas": 1
            },
            "Worker": {
                "replicas": ${workflow.variables.num_partitions},
                "template": {
                    "spec": {
                        "containers": [
                            {
                                "resources": {
                                    "limits": {
                                        "cpu": "${workflow.jobs.raw-data-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.raw-data-job.variables.worker_mem}"
                                    },
                                    "requests": {
                                        "cpu": "${workflow.jobs.raw-data-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.raw-data-job.variables.worker_mem}"
                                    }
                                },
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "command": [
                                    "/app/deploy/scripts/data_portal/run_data_portal_worker.sh"
                                ],
                                "args": [],
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "BATCH_SIZE",
                                        "value": "${workflow.jobs.raw-data-job.variables.batch_size}"
                                    },
                                    {
                                        "name": "INPUT_DATA_FORMAT",
                                        "value": "${workflow.jobs.raw-data-job.variables.input_format}"
                                    },
                                    {
                                        "name": "COMPRESSED_TYPE",
                                        "value": ""
                                    },
                                    {
                                        "name": "OUTPUT_DATA_FORMAT",
                                        "value": "TF_RECORD"
                                    }
                                ],
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow"
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ],
                        "restartPolicy": "Never"
                    }
                },
                "pair": false
            }
        }
    }
}
                '''),
            JobDefinition(name='data-join-job',
                          job_type=JobDefinition.PSI_DATA_JOIN,
                          is_federated=True,
                          variables=[
                              Variable(name='worker_cpu',
                                       value='4000m',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_mem',
                                       value='4Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='rsa_private_key_path',
                                       value='',
                                       access_mode=Variable.PRIVATE),
                          ],
                          dependencies=[JobDependency(source='raw-data-job')],
                          yaml_template='''
{
    "apiVersion": "fedlearner.k8s.io/v1alpha1",
    "kind": "FLApp",
    "metadata": {
        "name": "${workflow.jobs.data-join-job.name}",
        "namespace": "${project.variables.namespace}"
    },
    "spec": {
        "role": "Leader",
        "cleanPodPolicy": "All",
        "peerSpecs": {
            "Follower": {
                "peerURL": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80",
                "authority": "${project.participants[0].egress_domain}",
                "extraHeaders": {
                    "x-host": "default.fedlearner.operator"
                }
            }
        },
        "flReplicaSpecs": {
            "Master": {
                "template": {
                    "spec": {
                        "restartPolicy": "Never",
                        "containers": [
                            {
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "ROLE",
                                        "value": "leader"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "PARTITION_NUM",
                                        "value": "${workflow.variables.num_partitions}"
                                    },
                                    {
                                        "name": "START_TIME",
                                        "value": "0"
                                    },
                                    {
                                        "name": "END_TIME",
                                        "value": "999999999999"
                                    },
                                    {
                                        "name": "NEGATIVE_SAMPLING_RATE",
                                        "value": "1.0"
                                    },
                                    {
                                        "name": "RAW_DATA_SUB_DIR",
                                        "value": "portal_publish_dir/${workflow.jobs.raw-data-job.name}"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/wait4pair_wrapper.sh"
                                ],
                                "args": [
                                    "/app/deploy/scripts/rsa_psi/run_psi_data_join_master.sh"
                                ],
                                "resources": {
                                    "limits": {
                                        "cpu": "2000m",
                                        "memory": "3Gi"
                                    },
                                    "requests": {
                                        "cpu": "2000m",
                                        "memory": "3Gi"
                                    }
                                },
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ]
                    }
                },
                "pair": true,
                "replicas": 1
            },
            "Worker": {
                "template": {
                    "spec": {
                        "restartPolicy": "Never",
                        "containers": [
                            {
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "ROLE",
                                        "value": "follower"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "RSA_KEY_PATH",
                                        "value": "${workflow.jobs.data-join-job.rsa_private_key_path}"
                                    },
                                    {
                                        "name": "RSA_PRIVATE_KEY_PATH",
                                        "value": "${workflow.jobs.data-join-job.rsa_private_key_path}"
                                    },
                                    {
                                        "name": "PSI_RAW_DATA_ITER",
                                        "value": "TF_RECORD"
                                    },
                                    {
                                        "name": "PSI_OUTPUT_BUILDER",
                                        "value": "TF_RECORD"
                                    },
                                    {
                                        "name": "DATA_BLOCK_BUILDER",
                                        "value": "TF_RECORD"
                                    },
                                    {
                                        "name": "DATA_BLOCK_DUMP_INTERVAL",
                                        "value": "600"
                                    },
                                    {
                                        "name": "DATA_BLOCK_DUMP_THRESHOLD",
                                        "value": "524288"
                                    },
                                    {
                                        "name": "EXAMPLE_ID_DUMP_INTERVAL",
                                        "value": "600"
                                    },
                                    {
                                        "name": "EXAMPLE_ID_DUMP_THRESHOLD",
                                        "value": "524288"
                                    },
                                    {
                                        "name": "EXAMPLE_JOINER",
                                        "value": "SORT_RUN_JOINER"
                                    },
                                    {
                                        "name": "SIGN_RPC_TIMEOUT_MS",
                                        "value": "128000"
                                    },
                                    {
                                        "name": "RAW_DATA_SUB_DIR",
                                        "value": "portal_publish_dir/${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "PARTITION_NUM",
                                        "value": "${workflow.variables.num_partitions}"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/wait4pair_wrapper.sh"
                                ],
                                "args": [
                                    "/app/deploy/scripts/rsa_psi/run_psi_data_join_worker.sh"
                                ],
                                "resources": {
                                    "limits": {
                                        "cpu": "${workflow.jobs.data-join-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.data-join-job.variables.worker_mem}"
                                    },
                                    "requests": {
                                        "cpu": "${workflow.jobs.data-join-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.data-join-job.variables.worker_mem}"
                                    }
                                }
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ]
                    }
                },
                "pair": true,
                "replicas": ${workflow.variables.num_partitions}
            }
        }
    }
}
                '''),
            JobDefinition(name='train-job',
                          job_type=JobDefinition.TREE_MODEL_TRAINING,
                          is_federated=True,
                          variables=[
                              Variable(name='worker_cpu',
                                       value='4000m',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_mem',
                                       value='8Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='send_scores_to_follower',
                                       value='True',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='send_metrics_to_follower',
                                       value='True',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='num_parallel',
                                       value='4',
                                       access_mode=Variable.PEER_WRITABLE),
                          ],
                          dependencies=[JobDependency(source='data-join-job')],
                          yaml_template='''
                {
    "apiVersion": "fedlearner.k8s.io/v1alpha1",
    "kind": "FLApp",
    "metadata": {
        "name": "${workflow.jobs.train-job.name}",
        "namespace": "${project.variables.namespace}"
    },
    "spec": {
        "role": "Leader",
        "cleanPodPolicy": "All",
        "peerSpecs": {
            "Leader": {
                "peerURL": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80",
                "authority": "${project.participants[0].egress_domain}",
                "extraHeaders": {
                    "x-host": "default.fedlearner.operator"
                }
            }
        },
        "flReplicaSpecs": {
            "Worker": {
                "template": {
                    "spec": {
                        "restartPolicy": "Never",
                        "containers": [
                            {
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.train-job.name}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "ROLE",
                                        "value": "leader"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/job_output/${workflow.jobs.train-job.name}"
                                    },
                                    {
                                        "name": "MODE",
                                        "value": "train"
                                    },
                                    {
                                        "name": "SEND_SCORES_TO_FOLLOWER",
                                        "value": "${workflow.jobs.train-job.variables.send_scores_to_follower}"
                                    },
                                    {
                                        "name": "SEND_METRICS_TO_FOLLOWER",
                                        "value": "${workflow.jobs.train-job.variables.send_metrics_to_follower}"
                                    },
                                    {
                                        "name": "NUM_PARALLEL",
                                        "value": "${workflow.jobs.train-job.variables.num_parallel}"
                                    },
                                    {
                                        "name": "DATA_SOURCE",
                                        "value": "${workflow.jobs.data-join-job.name}"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/wait4pair_wrapper.sh"
                                ],
                                "args": [
                                    "/app/deploy/scripts/trainer/run_tree_worker.sh"
                                ],
                                "resources": {
                                      "limits": {
                                            "cpu": "${workflow.jobs.train-job.variables.worker_cpu}",
                                            "memory": "${workflow.jobs.train-job.variables.worker_mem}"
                                      },
                                      "requests": {
                                            "cpu": "${workflow.jobs.train-job.variables.worker_cpu}",
                                            "memory": "${workflow.jobs.train-job.variables.worker_mem}"
                                      }
                                }
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ]
                    }
                },
                "pair": true,
                "replicas": 1
            }
        }
    }
}
                ''')
        ])

    return workflow
コード例 #11
0
def make_workflow_template():
    workflow = WorkflowDefinition(
        group_alias='test_template',
        is_left=True,
        variables=[
            Variable(name='image_version',
                     value='v1.5-rc3',
                     access_mode=Variable.PEER_READABLE),
            Variable(name='num_partitions',
                     value='4',
                     access_mode=Variable.PEER_WRITABLE),
        ],
        job_definitions=[
            JobDefinition(
                name='raw_data_job',
                job_type=JobDefinition.RAW_DATA,
                is_federated=False,
                is_manual=False,
                variables=[
                    Variable(
                        name='input_dir',
                        value='/app/deploy/integrated_test/tfrecord_raw_data',
                        access_mode=Variable.PRIVATE),
                    Variable(name='file_wildcard',
                             value='*.rd',
                             access_mode=Variable.PRIVATE),
                    Variable(name='batch_size',
                             value='1024',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='input_format',
                             value='TF_RECORD',
                             access_mode=Variable.PRIVATE),
                    Variable(name='output_format',
                             value='TF_RECORD',
                             access_mode=Variable.PRIVATE),
                    Variable(name='master_cpu',
                             value='2',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='master_mem',
                             value='3Gi',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='worker_cpu',
                             value='2',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='worker_mem',
                             value='3Gi',
                             access_mode=Variable.PEER_WRITABLE),
                ],
                yaml_template='''{
  "apiVersion": "fedlearner.k8s.io/v1alpha1",
  "kind": "FLApp",
  "metadata": {
    "name": "${workflow.jobs.raw_data_job.name}",
    "namespace": "${project.variables.namespace}"
  },
  "spec": {
    "cleanPodPolicy": "All",
    "flReplicaSpecs": {
      "Master": {
        "pair": false,
        "replicas": 1,
        "template": {
          "spec": {
            "containers": [
              {
                "command": [
                  "/app/deploy/scripts/data_portal/run_data_portal_master.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "DATA_PORTAL_NAME",
                    "value": "${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "OUTPUT_PARTITION_NUM",
                    "value": "${workflow.variables.num_partitions}"
                  },
                  {
                    "name": "INPUT_BASE_DIR",
                    "value": "${workflow.jobs.raw_data_job.variables.input_dir}"
                  },
                  {
                    "name": "OUTPUT_BASE_DIR",
                    "value": "${project.variables.storage_root_dir}/raw_data/${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "RAW_DATA_PUBLISH_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "DATA_PORTAL_TYPE",
                    "value": "Streaming"
                  },
                  {
                    "name": "FILE_WILDCARD",
                    "value": "${workflow.jobs.raw_data_job.variables.file_wildcard}"
                  }
                ],
                "image": "hub.docker.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "ports": [
                  {
                    "containerPort": 50051,
                    "name": "flapp-port"
                  }
                ],
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.master_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.master_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      },
      "Worker": {
        "pair": false,
        "replicas": ${workflow.variables.num_partitions},
        "template": {
          "metadata": {
            "creationTimestamp": null
          },
          "spec": {
            "containers": [
              {
                "command": [
                  "/app/deploy/scripts/data_portal/run_data_portal_worker.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "CPU_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.memory"
                      }
                    }
                  },
                  {
                    "name": "CPU_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.memory"
                      }
                    }
                  },
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "BATCH_SIZE",
                    "value": "${workflow.jobs.raw_data_job.variables.batch_size}"
                  },
                  {
                    "name": "INPUT_DATA_FORMAT",
                    "value": "${workflow.jobs.raw_data_job.variables.input_format}"
                  },
                  {
                    "name": "COMPRESSED_TYPE"
                  },
                  {
                    "name": "OUTPUT_DATA_FORMAT",
                    "value": "${workflow.jobs.raw_data_job.variables.output_format}"
                  }
                ],
                "image": "hub.docker.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.worker_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.worker_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.worker_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.worker_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      }
    },
    "peerSpecs": {
      "Leader": {
        "peerURL": ""
      }
    },
    "role": "Follower"
  }
}
                '''),
            JobDefinition(name='data_join_job',
                          job_type=JobDefinition.DATA_JOIN,
                          is_federated=True,
                          is_manual=False,
                          variables=[
                              Variable(name='master_cpu',
                                       value='2',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='master_mem',
                                       value='3Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_cpu',
                                       value='2',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_mem',
                                       value='3Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='role',
                                       value='Follower',
                                       access_mode=Variable.PEER_WRITABLE),
                          ],
                          dependencies=[JobDependency(source='raw_data_job')],
                          yaml_template='''
{
  "apiVersion": "fedlearner.k8s.io/v1alpha1",
  "kind": "FLApp",
  "metadata": {
    "name": "${workflow.jobs.data_join_job.name}",
    "namespace": "${project.variables.namespace}"
  },
  "spec": {
    "cleanPodPolicy": "All",
    "flReplicaSpecs": {
      "Master": {
        "pair": true,
        "replicas": 1,
        "template": {
          "metadata": {
            "creationTimestamp": null
          },
          "spec": {
            "containers": [
              {
                "args": [
                  "/app/deploy/scripts/data_join/run_data_join_master.sh"
                ],
                "command": [
                  "/app/deploy/scripts/wait4pair_wrapper.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "ROLE",
                    "value": "${workflow.jobs.data_join_job.variables.role}"
                  },
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "OUTPUT_BASE_DIR",
                    "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "CPU_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.memory"
                      }
                    }
                  },
                  {
                    "name": "CPU_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.memory"
                      }
                    }
                  },
                  {
                    "name": "BATCH_MODE",
                    "value": "--batch_mode"
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  },
                  {
                    "name": "START_TIME",
                    "value": "0"
                  },
                  {
                    "name": "END_TIME",
                    "value": "999999999999"
                  },
                  {
                    "name": "NEGATIVE_SAMPLING_RATE",
                    "value": "1.0"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  }
                ],
                "image": "hub.docker.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "ports": [
                  {
                    "containerPort": 50051,
                    "name": "flapp-port"
                  }
                ],
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      },
      "Worker": {
        "pair": true,
        "replicas": ${workflow.jobs.raw_data_job.variables.num_partitions},
        "template": {
          "metadata": {
            "creationTimestamp": null
          },
          "spec": {
            "containers": [
              {
                "args": [
                  "/app/deploy/scripts/data_join/run_data_join_worker.sh"
                ],
                "command": [
                  "/app/deploy/scripts/wait4pair_wrapper.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "ROLE",
                    "value": "${workflow.jobs.data_join_job.variables.role}"
                  },
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "OUTPUT_BASE_DIR",
                    "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "CPU_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.memory"
                      }
                    }
                  },
                  {
                    "name": "CPU_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.memory"
                      }
                    }
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "DATA_BLOCK_DUMP_INTERVAL",
                    "value": "600"
                  },
                  {
                    "name": "DATA_BLOCK_DUMP_THRESHOLD",
                    "value": "65536"
                  },
                  {
                    "name": "EXAMPLE_ID_DUMP_INTERVAL",
                    "value": "600"
                  },
                  {
                    "name": "EXAMPLE_ID_DUMP_THRESHOLD",
                    "value": "65536"
                  },
                  {
                    "name": "EXAMPLE_ID_BATCH_SIZE",
                    "value": "4096"
                  },
                  {
                    "name": "MAX_FLYING_EXAMPLE_ID",
                    "value": "307152"
                  },
                  {
                    "name": "MIN_MATCHING_WINDOW",
                    "value": "2048"
                  },
                  {
                    "name": "MAX_MATCHING_WINDOW",
                    "value": "8192"
                  },
                  {
                    "name": "RAW_DATA_ITER",
                    "value": "${workflow.jobs.raw_data_job.variables.output_format}"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  }
                ],
                "image": "artifact.bytedance.com/fedlearner/fedlearner:5b499dd",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "ports": [
                  {
                    "containerPort": 50051,
                    "name": "flapp-port"
                  }
                ],
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      }
    },
    "peerSpecs": {
      "Follower": {
        "authority": "external.name",
        "extraHeaders": {
          "x-host": "leader.flapp.operator"
        },
        "peerURL": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
      }
    },
    "role": "Leader"
  }
}
                ''')
        ])

    return workflow