Esempio n. 1
0
def get_component_output_tables_meta(task_data):
    check_request_parameters(task_data)
    tracker = Tracker(job_id=task_data['job_id'], component_name=task_data['component_name'],
                      role=task_data['role'], party_id=task_data['party_id'])
    job_dsl_parser = schedule_utils.get_job_dsl_parser_by_job_id(job_id=task_data['job_id'])
    if not job_dsl_parser:
        raise Exception('can not get dag parser, please check if the parameters are correct')
    component = job_dsl_parser.get_component_info(task_data['component_name'])
    if not component:
        raise Exception('can not found component, please check if the parameters are correct')
    output_data_table_infos = tracker.get_output_data_info()
    output_tables_meta = tracker.get_output_data_table(output_data_infos=output_data_table_infos)
    return output_tables_meta
Esempio n. 2
0
    def get_task_run_args(cls,
                          job_id,
                          role,
                          party_id,
                          task_id,
                          task_version,
                          job_args,
                          job_parameters: RunParameters,
                          task_parameters: RunParameters,
                          input_dsl,
                          filter_type=None,
                          filter_attr=None,
                          get_input_table=False):
        task_run_args = {}
        input_table = {}
        if 'idmapping' in role:
            return {}
        for input_type, input_detail in input_dsl.items():
            if filter_type and input_type not in filter_type:
                continue
            if input_type == 'data':
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for data_type, data_list in input_detail.items():
                    data_dict = {}
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        data_dict[data_key_item[0]] = {data_type: []}
                    for data_key in data_list:
                        data_key_item = data_key.split('.')
                        search_component_name, search_data_name = data_key_item[
                            0], data_key_item[1]
                        storage_table_meta = None
                        if search_component_name == 'args':
                            if job_args.get(
                                    'data', {}).get(search_data_name).get(
                                        'namespace', '') and job_args.get(
                                            'data',
                                            {}).get(search_data_name).get(
                                                'name', ''):
                                storage_table_meta = storage.StorageTableMeta(
                                    name=job_args['data'][search_data_name]
                                    ['name'],
                                    namespace=job_args['data']
                                    [search_data_name]['namespace'])
                        else:
                            tracker_client = TrackerClient(
                                job_id=job_id,
                                role=role,
                                party_id=party_id,
                                component_name=search_component_name)
                            upstream_output_table_infos_json = tracker_client.get_output_data_info(
                                data_name=search_data_name)
                            if upstream_output_table_infos_json:
                                tracker = Tracker(
                                    job_id=job_id,
                                    role=role,
                                    party_id=party_id,
                                    component_name=search_component_name)
                                upstream_output_table_infos = []
                                for _ in upstream_output_table_infos_json:
                                    upstream_output_table_infos.append(
                                        fill_db_model_object(
                                            Tracker.get_dynamic_db_model(
                                                TrackingOutputDataInfo,
                                                job_id)(), _))
                                output_tables_meta = tracker.get_output_data_table(
                                    output_data_infos=
                                    upstream_output_table_infos)
                                if output_tables_meta:
                                    storage_table_meta = output_tables_meta.get(
                                        search_data_name, None)
                        args_from_component = this_type_args[
                            search_component_name] = this_type_args.get(
                                search_component_name, {})
                        if get_input_table and storage_table_meta:
                            input_table[data_key] = {
                                'namespace':
                                storage_table_meta.get_namespace(),
                                'name': storage_table_meta.get_name()
                            }
                            computing_table = None
                        elif storage_table_meta:
                            LOGGER.info(
                                f"load computing table use {task_parameters.computing_partitions}"
                            )
                            computing_table = session.get_latest_opened(
                            ).computing.load(
                                storage_table_meta.get_address(),
                                schema=storage_table_meta.get_schema(),
                                partitions=task_parameters.computing_partitions
                            )
                        else:
                            computing_table = None

                        if not computing_table or not filter_attr or not filter_attr.get(
                                "data", None):
                            data_dict[search_component_name][data_type].append(
                                computing_table)
                            args_from_component[data_type] = data_dict[
                                search_component_name][data_type]
                        else:
                            args_from_component[data_type] = dict([
                                (a, getattr(computing_table,
                                            "get_{}".format(a))())
                                for a in filter_attr["data"]
                            ])
            elif input_type in ['model', 'isometric_model']:
                this_type_args = task_run_args[input_type] = task_run_args.get(
                    input_type, {})
                for dsl_model_key in input_detail:
                    dsl_model_key_items = dsl_model_key.split('.')
                    if len(dsl_model_key_items) == 2:
                        search_component_name, search_model_alias = dsl_model_key_items[
                            0], dsl_model_key_items[1]
                    elif len(dsl_model_key_items
                             ) == 3 and dsl_model_key_items[0] == 'pipeline':
                        search_component_name, search_model_alias = dsl_model_key_items[
                            1], dsl_model_key_items[2]
                    else:
                        raise Exception(
                            'get input {} failed'.format(input_type))
                    models = Tracker(
                        job_id=job_id,
                        role=role,
                        party_id=party_id,
                        component_name=search_component_name,
                        model_id=job_parameters.model_id,
                        model_version=job_parameters.model_version
                    ).get_output_model(model_alias=search_model_alias)
                    this_type_args[search_component_name] = models
        if get_input_table:
            return input_table
        return task_run_args