def generate_session_id(task_id, task_version, role, party_id, suffix=None, random_end=False): items = [task_id, str(task_version), role, str(party_id)] if suffix: items.append(suffix) if random_end: items.append(fate_uuid()) return "_".join(items)
def build(cls, session_id=None, storage_engine=None, computing_engine=None, **kwargs): session_id = session_id if session_id else fate_uuid() # Find the storage engine type if storage_engine is None and kwargs.get("name") and kwargs.get( "namespace"): storage_engine, address, partitions = StorageSessionBase.get_storage_info( name=kwargs.get("name"), namespace=kwargs.get("namespace")) if storage_engine is None and computing_engine is None: computing_engine, federation_engine, federation_mode = compatibility_utils.backend_compatibility( **kwargs) if storage_engine is None and computing_engine: # Gets the computing engine default storage engine storage_engine = Relationship.CompToStore.get(computing_engine)[0] if storage_engine == StorageEngine.EGGROLL: from fate_arch.storage.eggroll import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.STANDALONE: from fate_arch.storage.standalone import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.MYSQL: from fate_arch.storage.mysql import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.HDFS: from fate_arch.storage.hdfs import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.FILE: from fate_arch.storage.file import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.PATH: from fate_arch.storage.path import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.HIVE: from fate_arch.storage.hive import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) elif storage_engine == StorageEngine.LINKIS_HIVE: from fate_arch.storage.linkis_hive import StorageSession storage_session = StorageSession(session_id=session_id, options=kwargs.get("options", {})) else: raise NotImplementedError( f"can not be initialized with storage engine: {storage_engine}" ) if kwargs.get("name") and kwargs.get("namespace"): storage_session.set_default(name=kwargs["name"], namespace=kwargs["namespace"]) return storage_session
def component_output_data_download(): request_data = request.json try: output_tables_meta = get_component_output_tables_meta(task_data=request_data) except Exception as e: stat_logger.exception(e) return error_response(210, str(e)) limit = request_data.get('limit', -1) if not output_tables_meta: return error_response(response_code=210, retmsg='no data') if limit == 0: return error_response(response_code=210, retmsg='limit is 0') have_data_label = False output_data_file_list = [] output_data_meta_file_list = [] output_tmp_dir = os.path.join(os.getcwd(), 'tmp/{}'.format(fate_uuid())) for output_name, output_table_meta in output_tables_meta.items(): output_data_count = 0 is_str = False output_data_file_path = "{}/{}.csv".format(output_tmp_dir, output_name) os.makedirs(os.path.dirname(output_data_file_path), exist_ok=True) with open(output_data_file_path, 'w') as fw: with storage.Session.build(name=output_table_meta.get_name(), namespace=output_table_meta.get_namespace()) as storage_session: output_table = storage_session.get_table() for k, v in output_table.collect(): data_line, have_data_label, is_str = get_component_output_data_line(src_key=k, src_value=v) fw.write('{}\n'.format(','.join(map(lambda x: str(x), data_line)))) output_data_count += 1 if output_data_count == limit: break if output_data_count: # get meta output_data_file_list.append(output_data_file_path) header = get_component_output_data_schema(output_table_meta=output_table_meta, have_data_label=have_data_label, is_str=is_str) output_data_meta_file_path = "{}/{}.meta".format(output_tmp_dir, output_name) output_data_meta_file_list.append(output_data_meta_file_path) with open(output_data_meta_file_path, 'w') as fw: json.dump({'header': header}, fw, indent=4) if request_data.get('head', True) and header: with open(output_data_file_path, 'r+') as f: content = f.read() f.seek(0, 0) f.write('{}\n'.format(','.join(header)) + content) # tar memory_file = io.BytesIO() tar = tarfile.open(fileobj=memory_file, mode='w:gz') for index in range(0, len(output_data_file_list)): tar.add(output_data_file_list[index], os.path.relpath(output_data_file_list[index], output_tmp_dir)) tar.add(output_data_meta_file_list[index], os.path.relpath(output_data_meta_file_list[index], output_tmp_dir)) tar.close() memory_file.seek(0) output_data_file_list.extend(output_data_meta_file_list) for path in output_data_file_list: try: shutil.rmtree(os.path.dirname(path)) except Exception as e: # warning stat_logger.warning(e) tar_file_name = 'job_{}_{}_{}_{}_output_data.tar.gz'.format(request_data['job_id'], request_data['component_name'], request_data['role'], request_data['party_id']) return send_file(memory_file, attachment_filename=tar_file_name, as_attachment=True)
def default_output_table_info(task_id, task_version): return f"output_data_{task_id}_{task_version}", base_utils.fate_uuid()
def send_table(output_tables_meta, tar_file_name, limit=-1, need_head=True): output_data_file_list = [] output_data_meta_file_list = [] output_tmp_dir = os.path.join( get_fate_flow_directory(), 'tmp/{}/{}'.format(datetime.datetime.now().strftime("%Y%m%d"), fate_uuid())) for output_name, output_table_meta in output_tables_meta.items(): output_data_count = 0 output_data_file_path = "{}/{}.csv".format(output_tmp_dir, output_name) output_data_meta_file_path = "{}/{}.meta".format( output_tmp_dir, output_name) os.makedirs(os.path.dirname(output_data_file_path), exist_ok=True) with open(output_data_file_path, 'w') as fw: with Session() as sess: output_table = sess.get_table( name=output_table_meta.get_name(), namespace=output_table_meta.get_namespace()) if output_table: for k, v in output_table.collect(): data_line, is_str, extend_header = feature_utils.get_component_output_data_line( src_key=k, src_value=v, schema=output_table_meta.get_schema()) # save meta if output_data_count == 0: output_data_file_list.append( output_data_file_path) header = get_component_output_data_schema( output_table_meta=output_table_meta, is_str=is_str, extend_header=extend_header) output_data_meta_file_list.append( output_data_meta_file_path) with open(output_data_meta_file_path, 'w') as f: json.dump({'header': header}, f, indent=4) if need_head and header and output_table_meta.get_have_head( ): fw.write('{}\n'.format(','.join(header))) fw.write('{}\n'.format(','.join( map(lambda x: str(x), data_line)))) output_data_count += 1 if output_data_count == limit: break # tar output_data_tarfile = "{}/{}".format(output_tmp_dir, tar_file_name) tar = tarfile.open(output_data_tarfile, mode='w:gz') for index in range(0, len(output_data_file_list)): tar.add( output_data_file_list[index], os.path.relpath(output_data_file_list[index], output_tmp_dir)) tar.add( output_data_meta_file_list[index], os.path.relpath(output_data_meta_file_list[index], output_tmp_dir)) tar.close() for key, path in enumerate(output_data_file_list): try: os.remove(path) os.remove(output_data_meta_file_list[key]) except Exception as e: # warning stat_logger.warning(e) return send_file(output_data_tarfile, attachment_filename=tar_file_name)