def get_component_output_tables_meta(task_data): check_request_parameters(task_data) tracker = Tracker(job_id=task_data['job_id'], component_name=task_data['component_name'], role=task_data['role'], party_id=task_data['party_id']) output_data_table_infos = tracker.get_output_data_info() output_tables_meta = tracker.get_output_data_table( output_data_infos=output_data_table_infos) return output_tables_meta
def get_component_output_tables_meta(task_data): check_request_parameters(task_data) tracker = Tracker(job_id=task_data['job_id'], component_name=task_data['component_name'], role=task_data['role'], party_id=task_data['party_id']) job_dsl_parser = schedule_utils.get_job_dsl_parser_by_job_id(job_id=task_data['job_id']) if not job_dsl_parser: raise Exception('can not get dag parser, please check if the parameters are correct') component = job_dsl_parser.get_component_info(task_data['component_name']) if not component: raise Exception('can not found component, please check if the parameters are correct') output_data_table_infos = tracker.get_output_data_info() output_tables_meta = tracker.get_output_data_table(output_data_infos=output_data_table_infos) return output_tables_meta
def get_task_run_args(cls, job_id, role, party_id, task_id, task_version, job_args, job_parameters: RunParameters, task_parameters: RunParameters, input_dsl, filter_type=None, filter_attr=None, get_input_table=False): task_run_args = {} input_table = {} input_table_info_list = [] if 'idmapping' in role: return {} for input_type, input_detail in input_dsl.items(): if filter_type and input_type not in filter_type: continue if input_type == 'data': this_type_args = task_run_args[input_type] = task_run_args.get( input_type, {}) for data_type, data_list in input_detail.items(): data_dict = {} for data_key in data_list: data_key_item = data_key.split('.') data_dict[data_key_item[0]] = {data_type: []} for data_key in data_list: data_key_item = data_key.split('.') search_component_name, search_data_name = data_key_item[ 0], data_key_item[1] storage_table_meta = None tracker_client = TrackerClient( job_id=job_id, role=role, party_id=party_id, component_name=search_component_name, task_id=task_id, task_version=task_version) if search_component_name == 'args': if job_args.get( 'data', {}).get(search_data_name).get( 'namespace', '') and job_args.get( 'data', {}).get(search_data_name).get( 'name', ''): storage_table_meta = storage.StorageTableMeta( name=job_args['data'][search_data_name] ['name'], namespace=job_args['data'] [search_data_name]['namespace']) else: upstream_output_table_infos_json = tracker_client.get_output_data_info( data_name=search_data_name) if upstream_output_table_infos_json: tracker = Tracker( job_id=job_id, role=role, party_id=party_id, component_name=search_component_name, task_id=task_id, task_version=task_version) upstream_output_table_infos = [] for _ in upstream_output_table_infos_json: upstream_output_table_infos.append( fill_db_model_object( Tracker.get_dynamic_db_model( TrackingOutputDataInfo, job_id)(), _)) output_tables_meta = tracker.get_output_data_table( upstream_output_table_infos) if output_tables_meta: storage_table_meta = output_tables_meta.get( search_data_name, None) args_from_component = this_type_args[ search_component_name] = this_type_args.get( search_component_name, {}) if get_input_table and storage_table_meta: input_table[data_key] = { 'namespace': storage_table_meta.get_namespace(), 'name': storage_table_meta.get_name() } computing_table = None elif storage_table_meta: LOGGER.info( f"load computing table use {task_parameters.computing_partitions}" ) computing_table = session.get_computing_session( ).load(storage_table_meta.get_address(), schema=storage_table_meta.get_schema(), partitions=task_parameters. computing_partitions) input_table_info_list.append({ 'namespace': storage_table_meta.get_namespace(), 'name': storage_table_meta.get_name() }) else: computing_table = None if not computing_table or not filter_attr or not filter_attr.get( "data", None): data_dict[search_component_name][data_type].append( computing_table) args_from_component[data_type] = data_dict[ search_component_name][data_type] else: args_from_component[data_type] = dict([ (a, getattr(computing_table, "get_{}".format(a))()) for a in filter_attr["data"] ]) elif input_type == "cache": this_type_args = task_run_args[input_type] = task_run_args.get( input_type, {}) for search_key in input_detail: search_component_name, cache_name = search_key.split(".") tracker = Tracker(job_id=job_id, role=role, party_id=party_id, component_name=search_component_name) this_type_args[ search_component_name] = tracker.get_output_cache( cache_name=cache_name) elif input_type in {'model', 'isometric_model'}: this_type_args = task_run_args[input_type] = task_run_args.get( input_type, {}) for dsl_model_key in input_detail: dsl_model_key_items = dsl_model_key.split('.') if len(dsl_model_key_items) == 2: search_component_name, search_model_alias = dsl_model_key_items[ 0], dsl_model_key_items[1] elif len(dsl_model_key_items ) == 3 and dsl_model_key_items[0] == 'pipeline': search_component_name, search_model_alias = dsl_model_key_items[ 1], dsl_model_key_items[2] else: raise Exception( 'get input {} failed'.format(input_type)) tracker_client = TrackerClient( job_id=job_id, role=role, party_id=party_id, component_name=search_component_name, model_id=job_parameters.model_id, model_version=job_parameters.model_version) models = tracker_client.read_component_output_model( search_model_alias) this_type_args[search_component_name] = models else: raise Exception(f"not support {input_type} input type") if get_input_table: return input_table return task_run_args, input_table_info_list