コード例 #1
0
ファイル: task_executor.py プロジェクト: zhilangtaosha/FATE
    def get_task_run_args(job_id, role, party_id, job_parameters, job_args,
                          input_dsl):
        task_run_args = {}
        for input_type, input_detail in input_dsl.items():
            if input_type == 'data':
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for data_type, data_list in input_detail.items():
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        search_component_name, search_data_name = data_key_item[
                            0], data_key_item[1]
                        if search_component_name == 'args':
                            if job_args.get(
                                    'data', {}).get(search_data_name).get(
                                        'namespace', '') and job_args.get(
                                            'data',
                                            {}).get(search_data_name).get(
                                                'name', ''):

                                data_table = session.table(
                                    namespace=job_args['data']
                                    [search_data_name]['namespace'],
                                    name=job_args['data'][search_data_name]
                                    ['name'])
                            else:
                                data_table = None
                        else:
                            data_table = Tracking(
                                job_id=job_id,
                                role=role,
                                party_id=party_id,
                                component_name=search_component_name
                            ).get_output_data_table(data_name=search_data_name)
                        args_from_component = this_type_args[
                            search_component_name] = this_type_args.get(
                                search_component_name, {})
                        args_from_component[data_type] = data_table
            elif input_type in ['model', 'isometric_model']:
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for dsl_model_key in input_detail:
                    dsl_model_key_items = dsl_model_key.split('.')
                    if len(dsl_model_key_items) == 2:
                        search_component_name, search_model_name = dsl_model_key_items[
                            0], dsl_model_key_items[1]
                    elif len(dsl_model_key_items
                             ) == 3 and dsl_model_key_items[0] == 'pipeline':
                        search_component_name, search_model_name = dsl_model_key_items[
                            1], dsl_model_key_items[2]
                    else:
                        raise Exception(
                            'get input {} failed'.format(input_type))
                    models = Tracking(
                        job_id=job_id,
                        role=role,
                        party_id=party_id,
                        component_name=search_component_name,
                        model_id=job_parameters['model_id'],
                        model_version=job_parameters['model_version']
                    ).get_output_model(model_name=search_model_name)
                    this_type_args[search_component_name] = models
        return task_run_args
コード例 #2
0
    def get_task_run_args(job_id,
                          role,
                          party_id,
                          task_id,
                          job_args,
                          job_parameters,
                          task_parameters,
                          input_dsl,
                          if_save_as_task_input_data,
                          filter_type=None,
                          filter_attr=None):
        task_run_args = {}
        for input_type, input_detail in input_dsl.items():
            if filter_type and input_type not in filter_type:
                continue
            if input_type == 'data':
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for data_type, data_list in input_detail.items():
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        search_component_name, search_data_name = data_key_item[
                            0], data_key_item[1]
                        if search_component_name == 'args':
                            if job_args.get(
                                    'data', {}).get(search_data_name).get(
                                        'namespace', '') and job_args.get(
                                            'data',
                                            {}).get(search_data_name).get(
                                                'name', ''):

                                data_table = session.table(
                                    namespace=job_args['data']
                                    [search_data_name]['namespace'],
                                    name=job_args['data'][search_data_name]
                                    ['name'])
                            else:
                                data_table = None
                        else:
                            data_table = Tracking(
                                job_id=job_id,
                                role=role,
                                party_id=party_id,
                                component_name=search_component_name
                            ).get_output_data_table(data_name=search_data_name)
                        args_from_component = this_type_args[
                            search_component_name] = this_type_args.get(
                                search_component_name, {})
                        # todo: If the same component has more than one identical input, save as is repeated
                        if if_save_as_task_input_data:
                            if data_table:
                                schedule_logger().info(
                                    "start save as task {} input data table {} {}"
                                    .format(task_id,
                                            data_table.get_namespace(),
                                            data_table.get_name()))
                                origin_table_metas = data_table.get_metas()
                                origin_table_schema = data_table.schema
                                save_as_options = {
                                    "store_type": StoreTypes.ROLLPAIR_IN_MEMORY
                                } if SAVE_AS_TASK_INPUT_DATA_IN_MEMORY else {}
                                data_table = data_table.save_as(
                                    namespace=job_utils.generate_session_id(
                                        task_id=task_id,
                                        role=role,
                                        party_id=party_id),
                                    name=data_table.get_name(),
                                    partition=task_parameters[
                                        'input_data_partition']
                                    if task_parameters.get(
                                        'input_data_partition', 0) > 0 else
                                    data_table.get_partitions(),
                                    options=save_as_options)
                                data_table.save_metas(origin_table_metas)
                                data_table.schema = origin_table_schema
                                schedule_logger().info(
                                    "save as task {} input data table to {} {} done"
                                    .format(task_id,
                                            data_table.get_namespace(),
                                            data_table.get_name()))
                            else:
                                schedule_logger().info(
                                    "pass save as task {} input data table, because the table is none"
                                    .format(task_id))
                        else:
                            schedule_logger().info(
                                "pass save as task {} input data table, because the switch is off"
                                .format(task_id))
                        if not data_table or not filter_attr or not filter_attr.get(
                                "data", None):
                            args_from_component[data_type] = data_table
                        else:
                            args_from_component[data_type] = dict([
                                (a, getattr(data_table, "get_{}".format(a))())
                                for a in filter_attr["data"]
                            ])
            elif input_type in ['model', 'isometric_model']:
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for dsl_model_key in input_detail:
                    dsl_model_key_items = dsl_model_key.split('.')
                    if len(dsl_model_key_items) == 2:
                        search_component_name, search_model_alias = dsl_model_key_items[
                            0], dsl_model_key_items[1]
                    elif len(dsl_model_key_items
                             ) == 3 and dsl_model_key_items[0] == 'pipeline':
                        search_component_name, search_model_alias = dsl_model_key_items[
                            1], dsl_model_key_items[2]
                    else:
                        raise Exception(
                            'get input {} failed'.format(input_type))
                    models = Tracking(
                        job_id=job_id,
                        role=role,
                        party_id=party_id,
                        component_name=search_component_name,
                        model_id=job_parameters['model_id'],
                        model_version=job_parameters['model_version']
                    ).get_output_model(model_alias=search_model_alias)
                    this_type_args[search_component_name] = models
        return task_run_args
コード例 #3
0
def load_model_parameters(model_table_name, model_namespace):
    model = table(model_table_name, model_namespace)
    model_parameters = {}
    for meta_name, meta_value in model.collect():
        model_parameters[meta_name] = meta_value
    return model_parameters
コード例 #4
0
def save_data_to_eggroll_table(data, namespace, table_name, partition=1):
    data_table = table(table_name, namespace, partition=partition, create_if_missing=True, error_if_exist=True)
    data_table.put_all(data)
    return data_table
コード例 #5
0
    def host_ids_process(self, data_instances):
        # (host_id_process, 1)
        if self.intersect_cache_param.use_cache:
            LOGGER.info("Use intersect cache.")
            if self.has_cache_version:
                current_version = cache_utils.host_get_current_verison(
                    host_party_id=self.host_party_id,
                    id_type=self.intersect_cache_param.id_type,
                    encrypt_type=self.intersect_cache_param.encrypt_type,
                    tag='Za')
                version = current_version.get('table_name')
                namespace = current_version.get('namespace')
                guest_current_version = self.transfer_variable.cache_version_info.get(
                    0)
                LOGGER.info("current_version:{}".format(current_version))
                LOGGER.info(
                    "guest_current_version:{}".format(guest_current_version))

                if guest_current_version.get('table_name') == version \
                        and guest_current_version.get('namespace') == namespace and \
                        current_version is not None:
                    self.is_version_match = True
                else:
                    self.is_version_match = False

                version_match_info = {
                    'version_match': self.is_version_match,
                    'version': version,
                    'namespace': namespace
                }
                self.transfer_variable.cache_version_match_info.remote(
                    version_match_info, role=consts.GUEST, idx=0)

                host_ids_process_pair = None
                if not self.is_version_match or self.sync_intersect_ids:
                    # if self.sync_intersect_ids is true, host will get the encrypted intersect id from guest,
                    # which need the Za to decrypt them
                    LOGGER.info("read Za from cache")
                    host_ids_process_pair = session.table(
                        name=version,
                        namespace=namespace,
                        create_if_missing=True,
                        error_if_exist=False)
                    if host_ids_process_pair.count() == 0:
                        host_ids_process_pair = self.cal_host_ids_process_pair(
                            data_instances)
                        rsa_key = {
                            'rsa_e': self.e,
                            'rsa_d': self.d,
                            'rsa_n': self.n
                        }
                        self.store_cache(host_ids_process_pair,
                                         rsa_key=rsa_key)
            else:
                self.is_version_match = False
                LOGGER.info("is version_match:{}".format(
                    self.is_version_match))
                namespace = cache_utils.gen_cache_namespace(
                    id_type=self.intersect_cache_param.id_type,
                    encrypt_type=self.intersect_cache_param.encrypt_type,
                    tag='Za',
                    host_party_id=self.host_party_id)
                version = cache_utils.gen_cache_version(namespace=namespace,
                                                        create=True)
                version_match_info = {
                    'version_match': self.is_version_match,
                    'version': version,
                    'namespace': namespace
                }
                self.transfer_variable.cache_version_match_info.remote(
                    version_match_info, role=consts.GUEST, idx=0)

                host_ids_process_pair = self.cal_host_ids_process_pair(
                    data_instances)
                rsa_key = {'rsa_e': self.e, 'rsa_d': self.d, 'rsa_n': self.n}
                self.store_cache(host_ids_process_pair,
                                 rsa_key=rsa_key,
                                 assign_version=version,
                                 assign_namespace=namespace)

            LOGGER.info("remote version match info to guest")
        else:
            LOGGER.info("Not using cache, calculate Za using raw id")
            host_ids_process_pair = self.cal_host_ids_process_pair(
                data_instances)

        return host_ids_process_pair