コード例 #1
0
    def patch(self, project_id):
        project = Project.query.filter_by(id=project_id).first()
        if project is None:
            raise NotFoundException()
        config = project.get_config()
        if request.json.get('token') is not None:
            new_token = request.json.get('token')
            config.token = new_token
            project.token = new_token
        if request.json.get('variables') is not None:
            del config.variables[:]
            config.variables.extend([
                ParseDict(variable, Variable())
                for variable in request.json.get('variables')
            ])
        custom_host = None
        egress_url = 'fedlearner-stack-ingress-nginx-controller.default'\
                     '.svc.cluster.local:80'
        for variable in config.variables:
            if variable.name == 'CUSTOM_HOST':
                custom_host = variable.value
            if variable.name == 'EGRESS_URL':
                egress_url = variable.value
        for participant in config.participants:
            participant.grpc_spec.egress_url = egress_url
            # TODO: update add-on's custom_host
        project.set_config(config)
        if request.json.get('comment') is not None:
            project.comment = request.json.get('comment')

        try:
            db.session.commit()
        except Exception as e:
            raise InvalidArgumentException(details=e)
        return {'data': project.to_dict()}
コード例 #2
0
ファイル: apis.py プロジェクト: cosmtrek/fedlearner
    def patch(self, project_id):
        project = Project.query.filter_by(id=project_id).first()
        if project is None:
            raise NotFoundException(f'Failed to find project: {project_id}')
        config = project.get_config()
        if request.json.get('token') is not None:
            new_token = request.json.get('token')
            config.token = new_token
            project.token = new_token
        if request.json.get('variables') is not None:
            del config.variables[:]
            config.variables.extend([
                ParseDict(variable, Variable())
                for variable in request.json.get('variables')
            ])

        # exact configuration from variables
        grpc_ssl_server_host = None
        egress_host = None
        for variable in config.variables:
            if variable.name == 'GRPC_SSL_SERVER_HOST':
                grpc_ssl_server_host = variable.value
            if variable.name == 'EGRESS_HOST':
                egress_host = variable.value

        if request.json.get('participant_name'):
            config.participants[0].name = request.json.get('participant_name')

        if request.json.get('comment'):
            project.comment = request.json.get('comment')

        for participant in config.participants:
            if participant.domain_name in\
                project.get_certificate().domain_name_to_cert.keys():
                _create_add_on(
                    participant,
                    project.get_certificate().domain_name_to_cert[
                        participant.domain_name], grpc_ssl_server_host)
            if egress_host:
                participant.grpc_spec.authority = egress_host
        project.set_config(config)
        try:
            db.session.commit()
        except Exception as e:
            raise InvalidArgumentException(details=e)
        return {'data': project.to_dict()}
コード例 #3
0
    def patch(self, project_id):
        project = Project.query.filter_by(id=project_id).first()
        if project is None:
            raise NotFoundException()
        config = project.get_config()
        if request.json.get('token') is not None:
            new_token = request.json.get('token')
            config.token = new_token
            project.token = new_token
        if request.json.get('variables') is not None:
            del config.variables[:]
            config.variables.extend([
                ParseDict(variable, Variable())
                for variable in request.json.get('variables')
            ])

        # exact configuration from variables
        custom_host = None
        for variable in config.variables:
            if variable.name == 'CUSTOM_HOST':
                custom_host = variable.value

        project.set_config(config)
        if request.json.get('comment'):
            project.comment = request.json.get('comment')

        for participant in project.get_config().participants:
            if participant.domain_name in\
                project.get_certificate().domain_name_to_cert.keys():
                _create_add_on(
                    participant,
                    project.get_certificate().domain_name_to_cert[
                        participant.domain_name], custom_host)
        try:
            db.session.commit()
        except Exception as e:
            raise InvalidArgumentException(details=e)
        return {'data': project.to_dict()}
コード例 #4
0
    def patch(self, project_id):
        project = Project.query.filter_by(id=project_id).first()
        if project is None:
            raise NotFoundException()
        config = project.get_config()
        if request.json.get('token') is not None:
            new_token = request.json.get('token')
            config.token = new_token
            project.token = new_token
        if request.json.get('variables') is not None:
            del config.variables[:]
            config.variables.extend([
                ParseDict(variable, Variable())
                for variable in request.json.get('variables')
            ])
        project.set_config(config)
        if request.json.get('comment') is not None:
            project.comment = request.json.get('comment')

        try:
            db.session.commit()
        except Exception as e:
            raise InvalidArgumentException(details=e)
        return {'data': project.to_dict()}
コード例 #5
0
def make_workflow_template():
    workflow = WorkflowDefinition(
        group_alias='psi_join_tree_model',
        is_left=False,
        variables=[
            Variable(name='image_version',
                     value='v1.5-rc3',
                     access_mode=Variable.PEER_READABLE),
            Variable(name='num_partitions',
                     value='2',
                     access_mode=Variable.PEER_WRITABLE),
        ],
        job_definitions=[
            JobDefinition(
                name='raw-data-job',
                job_type=JobDefinition.RAW_DATA,
                is_federated=False,
                variables=[
                    Variable(
                        name='input_dir',
                        value='/app/deploy/integrated_test/tfrecord_raw_data',
                        access_mode=Variable.PRIVATE),
                    Variable(name='file_wildcard',
                             value='*.rd',
                             access_mode=Variable.PRIVATE),
                    Variable(name='batch_size',
                             value='1024',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='input_format',
                             value='TF_RECORD',
                             access_mode=Variable.PRIVATE),
                    Variable(name='worker_cpu',
                             value='2000m',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='worker_mem',
                             value='4Gi',
                             access_mode=Variable.PEER_WRITABLE),
                ],
                yaml_template='''{
    "apiVersion": "fedlearner.k8s.io/v1alpha1",
    "kind": "FLApp",
    "metadata": {
        "name": "${workflow.jobs.raw-data-job.name}",
        "namespace": "${project.variables.namespace}"
    },
    "spec": {
        "cleanPodPolicy": "All",
        "flReplicaSpecs": {
            "Master": {
                "template": {
                    "spec": {
                        "containers": [
                            {
                                "resources": {
                                    "limits": {
                                        "cpu": "1000m",
                                        "memory": "2Gi"
                                    },
                                    "requests": {
                                        "cpu": "1000m",
                                        "memory": "2Gi"
                                    }
                                },
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/data_portal/run_data_portal_master.sh"
                                ],
                                "args": [],
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "DATA_PORTAL_NAME",
                                        "value": "${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "OUTPUT_PARTITION_NUM",
                                        "value": "${workflow.variables.num_partitions}"
                                    },
                                    {
                                        "name": "INPUT_BASE_DIR",
                                        "value": "${workflow.jobs.raw-data-job.variables.input_dir}"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/raw_data/${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "RAW_DATA_PUBLISH_DIR",
                                        "value": "portal_publish_dir/${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "DATA_PORTAL_TYPE",
                                        "value": "PSI"
                                    },
                                    {
                                        "name": "FILE_WILDCARD",
                                        "value": "${workflow.jobs.raw-data-job.variables.file_wildcard}"
                                    }
                                ],
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow"
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ],
                        "restartPolicy": "Never"
                    }
                },
                "pair": false,
                "replicas": 1
            },
            "Worker": {
                "replicas": ${workflow.variables.num_partitions},
                "template": {
                    "spec": {
                        "containers": [
                            {
                                "resources": {
                                    "limits": {
                                        "cpu": "${workflow.jobs.raw-data-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.raw-data-job.variables.worker_mem}"
                                    },
                                    "requests": {
                                        "cpu": "${workflow.jobs.raw-data-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.raw-data-job.variables.worker_mem}"
                                    }
                                },
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "command": [
                                    "/app/deploy/scripts/data_portal/run_data_portal_worker.sh"
                                ],
                                "args": [],
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "BATCH_SIZE",
                                        "value": "${workflow.jobs.raw-data-job.variables.batch_size}"
                                    },
                                    {
                                        "name": "INPUT_DATA_FORMAT",
                                        "value": "${workflow.jobs.raw-data-job.variables.input_format}"
                                    },
                                    {
                                        "name": "COMPRESSED_TYPE",
                                        "value": ""
                                    },
                                    {
                                        "name": "OUTPUT_DATA_FORMAT",
                                        "value": "TF_RECORD"
                                    }
                                ],
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow"
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ],
                        "restartPolicy": "Never"
                    }
                },
                "pair": false
            }
        }
    }
}
                '''),
            JobDefinition(name='data-join-job',
                          job_type=JobDefinition.PSI_DATA_JOIN,
                          is_federated=True,
                          variables=[
                              Variable(name='worker_cpu',
                                       value='4000m',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_mem',
                                       value='4Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='rsa_private_key_path',
                                       value='',
                                       access_mode=Variable.PRIVATE),
                          ],
                          dependencies=[JobDependency(source='raw-data-job')],
                          yaml_template='''
{
    "apiVersion": "fedlearner.k8s.io/v1alpha1",
    "kind": "FLApp",
    "metadata": {
        "name": "${workflow.jobs.data-join-job.name}",
        "namespace": "${project.variables.namespace}"
    },
    "spec": {
        "role": "Leader",
        "cleanPodPolicy": "All",
        "peerSpecs": {
            "Follower": {
                "peerURL": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80",
                "authority": "${project.participants[0].egress_domain}",
                "extraHeaders": {
                    "x-host": "default.fedlearner.operator"
                }
            }
        },
        "flReplicaSpecs": {
            "Master": {
                "template": {
                    "spec": {
                        "restartPolicy": "Never",
                        "containers": [
                            {
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "ROLE",
                                        "value": "leader"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "PARTITION_NUM",
                                        "value": "${workflow.variables.num_partitions}"
                                    },
                                    {
                                        "name": "START_TIME",
                                        "value": "0"
                                    },
                                    {
                                        "name": "END_TIME",
                                        "value": "999999999999"
                                    },
                                    {
                                        "name": "NEGATIVE_SAMPLING_RATE",
                                        "value": "1.0"
                                    },
                                    {
                                        "name": "RAW_DATA_SUB_DIR",
                                        "value": "portal_publish_dir/${workflow.jobs.raw-data-job.name}"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/wait4pair_wrapper.sh"
                                ],
                                "args": [
                                    "/app/deploy/scripts/rsa_psi/run_psi_data_join_master.sh"
                                ],
                                "resources": {
                                    "limits": {
                                        "cpu": "2000m",
                                        "memory": "3Gi"
                                    },
                                    "requests": {
                                        "cpu": "2000m",
                                        "memory": "3Gi"
                                    }
                                },
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ]
                    }
                },
                "pair": true,
                "replicas": 1
            },
            "Worker": {
                "template": {
                    "spec": {
                        "restartPolicy": "Never",
                        "containers": [
                            {
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "ROLE",
                                        "value": "follower"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data-join-job.name}"
                                    },
                                    {
                                        "name": "RSA_KEY_PATH",
                                        "value": "${workflow.jobs.data-join-job.rsa_private_key_path}"
                                    },
                                    {
                                        "name": "RSA_PRIVATE_KEY_PATH",
                                        "value": "${workflow.jobs.data-join-job.rsa_private_key_path}"
                                    },
                                    {
                                        "name": "PSI_RAW_DATA_ITER",
                                        "value": "TF_RECORD"
                                    },
                                    {
                                        "name": "PSI_OUTPUT_BUILDER",
                                        "value": "TF_RECORD"
                                    },
                                    {
                                        "name": "DATA_BLOCK_BUILDER",
                                        "value": "TF_RECORD"
                                    },
                                    {
                                        "name": "DATA_BLOCK_DUMP_INTERVAL",
                                        "value": "600"
                                    },
                                    {
                                        "name": "DATA_BLOCK_DUMP_THRESHOLD",
                                        "value": "524288"
                                    },
                                    {
                                        "name": "EXAMPLE_ID_DUMP_INTERVAL",
                                        "value": "600"
                                    },
                                    {
                                        "name": "EXAMPLE_ID_DUMP_THRESHOLD",
                                        "value": "524288"
                                    },
                                    {
                                        "name": "EXAMPLE_JOINER",
                                        "value": "SORT_RUN_JOINER"
                                    },
                                    {
                                        "name": "SIGN_RPC_TIMEOUT_MS",
                                        "value": "128000"
                                    },
                                    {
                                        "name": "RAW_DATA_SUB_DIR",
                                        "value": "portal_publish_dir/${workflow.jobs.raw-data-job.name}"
                                    },
                                    {
                                        "name": "PARTITION_NUM",
                                        "value": "${workflow.variables.num_partitions}"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/wait4pair_wrapper.sh"
                                ],
                                "args": [
                                    "/app/deploy/scripts/rsa_psi/run_psi_data_join_worker.sh"
                                ],
                                "resources": {
                                    "limits": {
                                        "cpu": "${workflow.jobs.data-join-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.data-join-job.variables.worker_mem}"
                                    },
                                    "requests": {
                                        "cpu": "${workflow.jobs.data-join-job.variables.worker_cpu}",
                                        "memory": "${workflow.jobs.data-join-job.variables.worker_mem}"
                                    }
                                }
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ]
                    }
                },
                "pair": true,
                "replicas": ${workflow.variables.num_partitions}
            }
        }
    }
}
                '''),
            JobDefinition(name='train-job',
                          job_type=JobDefinition.TREE_MODEL_TRAINING,
                          is_federated=True,
                          variables=[
                              Variable(name='worker_cpu',
                                       value='4000m',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_mem',
                                       value='8Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='send_scores_to_follower',
                                       value='True',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='send_metrics_to_follower',
                                       value='True',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='num_parallel',
                                       value='4',
                                       access_mode=Variable.PEER_WRITABLE),
                          ],
                          dependencies=[JobDependency(source='data-join-job')],
                          yaml_template='''
                {
    "apiVersion": "fedlearner.k8s.io/v1alpha1",
    "kind": "FLApp",
    "metadata": {
        "name": "${workflow.jobs.train-job.name}",
        "namespace": "${project.variables.namespace}"
    },
    "spec": {
        "role": "Leader",
        "cleanPodPolicy": "All",
        "peerSpecs": {
            "Leader": {
                "peerURL": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80",
                "authority": "${project.participants[0].egress_domain}",
                "extraHeaders": {
                    "x-host": "default.fedlearner.operator"
                }
            }
        },
        "flReplicaSpecs": {
            "Worker": {
                "template": {
                    "spec": {
                        "restartPolicy": "Never",
                        "containers": [
                            {
                                "env": [
                                    ${system.basic_envs},
                                    {
                                        "name": "EGRESS_URL",
                                        "value": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
                                    },
                                    {
                                        "name": "EGRESS_HOST",
                                        "value": "${project.participants[0].egress_host}"
                                    },
                                    {
                                        "name": "EGRESS_DOMAIN",
                                        "value": "${project.participants[0].egress_domain}"
                                    },
                                    {
                                        "name": "APPLICATION_ID",
                                        "value": "${workflow.jobs.train-job.name}"
                                    },
                                    {
                                        "name": "STORAGE_ROOT_PATH",
                                        "value": "${project.variables.storage_root_dir}"
                                    },
                                    {
                                        "name": "ROLE",
                                        "value": "leader"
                                    },
                                    {
                                        "name": "OUTPUT_BASE_DIR",
                                        "value": "${project.variables.storage_root_dir}/job_output/${workflow.jobs.train-job.name}"
                                    },
                                    {
                                        "name": "MODE",
                                        "value": "train"
                                    },
                                    {
                                        "name": "SEND_SCORES_TO_FOLLOWER",
                                        "value": "${workflow.jobs.train-job.variables.send_scores_to_follower}"
                                    },
                                    {
                                        "name": "SEND_METRICS_TO_FOLLOWER",
                                        "value": "${workflow.jobs.train-job.variables.send_metrics_to_follower}"
                                    },
                                    {
                                        "name": "NUM_PARALLEL",
                                        "value": "${workflow.jobs.train-job.variables.num_parallel}"
                                    },
                                    {
                                        "name": "DATA_SOURCE",
                                        "value": "${workflow.jobs.data-join-job.name}"
                                    }
                                ],
                                "imagePullPolicy": "IfNotPresent",
                                "name": "tensorflow",
                                "volumeMounts": [
                                    {
                                        "mountPath": "/data",
                                        "name": "data"
                                    }
                                ],
                                "image": "artifact.bytedance.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                                "ports": [
                                    {
                                        "containerPort": 50051,
                                        "name": "flapp-port"
                                    }
                                ],
                                "command": [
                                    "/app/deploy/scripts/wait4pair_wrapper.sh"
                                ],
                                "args": [
                                    "/app/deploy/scripts/trainer/run_tree_worker.sh"
                                ],
                                "resources": {
                                      "limits": {
                                            "cpu": "${workflow.jobs.train-job.variables.worker_cpu}",
                                            "memory": "${workflow.jobs.train-job.variables.worker_mem}"
                                      },
                                      "requests": {
                                            "cpu": "${workflow.jobs.train-job.variables.worker_cpu}",
                                            "memory": "${workflow.jobs.train-job.variables.worker_mem}"
                                      }
                                }
                            }
                        ],
                        "imagePullSecrets": [
                            {
                                "name": "regcred"
                            }
                        ],
                        "volumes": [
                            {
                                "persistentVolumeClaim": {
                                    "claimName": "pvc-fedlearner-default"
                                },
                                "name": "data"
                            }
                        ]
                    }
                },
                "pair": true,
                "replicas": 1
            }
        }
    }
}
                ''')
        ])

    return workflow
コード例 #6
0
def make_workflow_template():
    workflow = WorkflowDefinition(
        group_alias='test_template',
        is_left=True,
        variables=[
            Variable(name='image_version',
                     value='v1.5-rc3',
                     access_mode=Variable.PEER_READABLE),
            Variable(name='num_partitions',
                     value='4',
                     access_mode=Variable.PEER_WRITABLE),
        ],
        job_definitions=[
            JobDefinition(
                name='raw_data_job',
                job_type=JobDefinition.RAW_DATA,
                is_federated=False,
                is_manual=False,
                variables=[
                    Variable(
                        name='input_dir',
                        value='/app/deploy/integrated_test/tfrecord_raw_data',
                        access_mode=Variable.PRIVATE),
                    Variable(name='file_wildcard',
                             value='*.rd',
                             access_mode=Variable.PRIVATE),
                    Variable(name='batch_size',
                             value='1024',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='input_format',
                             value='TF_RECORD',
                             access_mode=Variable.PRIVATE),
                    Variable(name='output_format',
                             value='TF_RECORD',
                             access_mode=Variable.PRIVATE),
                    Variable(name='master_cpu',
                             value='2',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='master_mem',
                             value='3Gi',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='worker_cpu',
                             value='2',
                             access_mode=Variable.PEER_WRITABLE),
                    Variable(name='worker_mem',
                             value='3Gi',
                             access_mode=Variable.PEER_WRITABLE),
                ],
                yaml_template='''{
  "apiVersion": "fedlearner.k8s.io/v1alpha1",
  "kind": "FLApp",
  "metadata": {
    "name": "${workflow.jobs.raw_data_job.name}",
    "namespace": "${project.variables.namespace}"
  },
  "spec": {
    "cleanPodPolicy": "All",
    "flReplicaSpecs": {
      "Master": {
        "pair": false,
        "replicas": 1,
        "template": {
          "spec": {
            "containers": [
              {
                "command": [
                  "/app/deploy/scripts/data_portal/run_data_portal_master.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "DATA_PORTAL_NAME",
                    "value": "${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "OUTPUT_PARTITION_NUM",
                    "value": "${workflow.variables.num_partitions}"
                  },
                  {
                    "name": "INPUT_BASE_DIR",
                    "value": "${workflow.jobs.raw_data_job.variables.input_dir}"
                  },
                  {
                    "name": "OUTPUT_BASE_DIR",
                    "value": "${project.variables.storage_root_dir}/raw_data/${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "RAW_DATA_PUBLISH_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "DATA_PORTAL_TYPE",
                    "value": "Streaming"
                  },
                  {
                    "name": "FILE_WILDCARD",
                    "value": "${workflow.jobs.raw_data_job.variables.file_wildcard}"
                  }
                ],
                "image": "hub.docker.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "ports": [
                  {
                    "containerPort": 50051,
                    "name": "flapp-port"
                  }
                ],
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.master_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.master_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      },
      "Worker": {
        "pair": false,
        "replicas": ${workflow.variables.num_partitions},
        "template": {
          "metadata": {
            "creationTimestamp": null
          },
          "spec": {
            "containers": [
              {
                "command": [
                  "/app/deploy/scripts/data_portal/run_data_portal_worker.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "CPU_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.memory"
                      }
                    }
                  },
                  {
                    "name": "CPU_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.memory"
                      }
                    }
                  },
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "BATCH_SIZE",
                    "value": "${workflow.jobs.raw_data_job.variables.batch_size}"
                  },
                  {
                    "name": "INPUT_DATA_FORMAT",
                    "value": "${workflow.jobs.raw_data_job.variables.input_format}"
                  },
                  {
                    "name": "COMPRESSED_TYPE"
                  },
                  {
                    "name": "OUTPUT_DATA_FORMAT",
                    "value": "${workflow.jobs.raw_data_job.variables.output_format}"
                  }
                ],
                "image": "hub.docker.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.worker_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.worker_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.raw_data_job.variables.worker_cpu}",
                    "memory": "${workflow.jobs.raw_data_job.variables.worker_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      }
    },
    "peerSpecs": {
      "Leader": {
        "peerURL": ""
      }
    },
    "role": "Follower"
  }
}
                '''),
            JobDefinition(name='data_join_job',
                          job_type=JobDefinition.DATA_JOIN,
                          is_federated=True,
                          is_manual=False,
                          variables=[
                              Variable(name='master_cpu',
                                       value='2',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='master_mem',
                                       value='3Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_cpu',
                                       value='2',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='worker_mem',
                                       value='3Gi',
                                       access_mode=Variable.PEER_WRITABLE),
                              Variable(name='role',
                                       value='Follower',
                                       access_mode=Variable.PEER_WRITABLE),
                          ],
                          dependencies=[JobDependency(source='raw_data_job')],
                          yaml_template='''
{
  "apiVersion": "fedlearner.k8s.io/v1alpha1",
  "kind": "FLApp",
  "metadata": {
    "name": "${workflow.jobs.data_join_job.name}",
    "namespace": "${project.variables.namespace}"
  },
  "spec": {
    "cleanPodPolicy": "All",
    "flReplicaSpecs": {
      "Master": {
        "pair": true,
        "replicas": 1,
        "template": {
          "metadata": {
            "creationTimestamp": null
          },
          "spec": {
            "containers": [
              {
                "args": [
                  "/app/deploy/scripts/data_join/run_data_join_master.sh"
                ],
                "command": [
                  "/app/deploy/scripts/wait4pair_wrapper.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "ROLE",
                    "value": "${workflow.jobs.data_join_job.variables.role}"
                  },
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "OUTPUT_BASE_DIR",
                    "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "CPU_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.memory"
                      }
                    }
                  },
                  {
                    "name": "CPU_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.memory"
                      }
                    }
                  },
                  {
                    "name": "BATCH_MODE",
                    "value": "--batch_mode"
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  },
                  {
                    "name": "START_TIME",
                    "value": "0"
                  },
                  {
                    "name": "END_TIME",
                    "value": "999999999999"
                  },
                  {
                    "name": "NEGATIVE_SAMPLING_RATE",
                    "value": "1.0"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  }
                ],
                "image": "hub.docker.com/fedlearner/fedlearner:${workflow.variables.image_version}",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "ports": [
                  {
                    "containerPort": 50051,
                    "name": "flapp-port"
                  }
                ],
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      },
      "Worker": {
        "pair": true,
        "replicas": ${workflow.jobs.raw_data_job.variables.num_partitions},
        "template": {
          "metadata": {
            "creationTimestamp": null
          },
          "spec": {
            "containers": [
              {
                "args": [
                  "/app/deploy/scripts/data_join/run_data_join_worker.sh"
                ],
                "command": [
                  "/app/deploy/scripts/wait4pair_wrapper.sh"
                ],
                "env": [
                  {
                    "name": "POD_IP",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "status.podIP"
                      }
                    }
                  },
                  {
                    "name": "POD_NAME",
                    "valueFrom": {
                      "fieldRef": {
                        "fieldPath": "metadata.name"
                      }
                    }
                  },
                  ${system.basic_envs},
                  ${project.variables.basic_envs},
                  {
                    "name": "ROLE",
                    "value": "${workflow.jobs.data_join_job.variables.role}"
                  },
                  {
                    "name": "APPLICATION_ID",
                    "value": "${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "OUTPUT_BASE_DIR",
                    "value": "${project.variables.storage_root_dir}/data_source/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "CPU_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_REQUEST",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "requests.memory"
                      }
                    }
                  },
                  {
                    "name": "CPU_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.cpu"
                      }
                    }
                  },
                  {
                    "name": "MEM_LIMIT",
                    "valueFrom": {
                      "resourceFieldRef": {
                        "divisor": "0",
                        "resource": "limits.memory"
                      }
                    }
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.data_join_job.name}"
                  },
                  {
                    "name": "DATA_BLOCK_DUMP_INTERVAL",
                    "value": "600"
                  },
                  {
                    "name": "DATA_BLOCK_DUMP_THRESHOLD",
                    "value": "65536"
                  },
                  {
                    "name": "EXAMPLE_ID_DUMP_INTERVAL",
                    "value": "600"
                  },
                  {
                    "name": "EXAMPLE_ID_DUMP_THRESHOLD",
                    "value": "65536"
                  },
                  {
                    "name": "EXAMPLE_ID_BATCH_SIZE",
                    "value": "4096"
                  },
                  {
                    "name": "MAX_FLYING_EXAMPLE_ID",
                    "value": "307152"
                  },
                  {
                    "name": "MIN_MATCHING_WINDOW",
                    "value": "2048"
                  },
                  {
                    "name": "MAX_MATCHING_WINDOW",
                    "value": "8192"
                  },
                  {
                    "name": "RAW_DATA_ITER",
                    "value": "${workflow.jobs.raw_data_job.variables.output_format}"
                  },
                  {
                    "name": "RAW_DATA_SUB_DIR",
                    "value": "portal_publish_dir/${workflow.jobs.raw_data_job.name}"
                  },
                  {
                    "name": "PARTITION_NUM",
                    "value": "${workflow.jobs.raw_data_job.variables.num_partitions}"
                  }
                ],
                "image": "artifact.bytedance.com/fedlearner/fedlearner:5b499dd",
                "imagePullPolicy": "IfNotPresent",
                "name": "tensorflow",
                "ports": [
                  {
                    "containerPort": 50051,
                    "name": "flapp-port"
                  }
                ],
                "resources": {
                  "limits": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  },
                  "requests": {
                    "cpu": "${workflow.jobs.data_join_job.variables.master_cpu}",
                    "memory": "${workflow.jobs.data_join_job.variables.master_mem}"
                  }
                },
                "volumeMounts": [
                  {
                    "mountPath": "/data",
                    "name": "data"
                  }
                ]
              }
            ],
            "imagePullSecrets": [
              {
                "name": "regcred"
              }
            ],
            "restartPolicy": "Never",
            "volumes": [
              {
                "name": "data",
                "persistentVolumeClaim": {
                  "claimName": "pvc-fedlearner-default"
                }
              }
            ]
          }
        }
      }
    },
    "peerSpecs": {
      "Follower": {
        "authority": "external.name",
        "extraHeaders": {
          "x-host": "leader.flapp.operator"
        },
        "peerURL": "fedlearner-stack-ingress-nginx-controller.default.svc.cluster.local:80"
      }
    },
    "role": "Leader"
  }
}
                ''')
        ])

    return workflow