Beispiel #1
0
def generate_session_id(task_id, task_version, role, party_id, suffix=None, random_end=False):
    items = [task_id, str(task_version), role, str(party_id)]
    if suffix:
        items.append(suffix)
    if random_end:
        items.append(fate_uuid())
    return "_".join(items)
Beispiel #2
0
    def build(cls,
              session_id=None,
              storage_engine=None,
              computing_engine=None,
              **kwargs):
        session_id = session_id if session_id else fate_uuid()
        # Find the storage engine type
        if storage_engine is None and kwargs.get("name") and kwargs.get(
                "namespace"):
            storage_engine, address, partitions = StorageSessionBase.get_storage_info(
                name=kwargs.get("name"), namespace=kwargs.get("namespace"))
        if storage_engine is None and computing_engine is None:
            computing_engine, federation_engine, federation_mode = compatibility_utils.backend_compatibility(
                **kwargs)
        if storage_engine is None and computing_engine:
            # Gets the computing engine default storage engine
            storage_engine = Relationship.CompToStore.get(computing_engine)[0]

        if storage_engine == StorageEngine.EGGROLL:
            from fate_arch.storage.eggroll import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.STANDALONE:
            from fate_arch.storage.standalone import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.MYSQL:
            from fate_arch.storage.mysql import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.HDFS:
            from fate_arch.storage.hdfs import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.FILE:
            from fate_arch.storage.file import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.PATH:
            from fate_arch.storage.path import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.HIVE:
            from fate_arch.storage.hive import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        elif storage_engine == StorageEngine.LINKIS_HIVE:
            from fate_arch.storage.linkis_hive import StorageSession
            storage_session = StorageSession(session_id=session_id,
                                             options=kwargs.get("options", {}))
        else:
            raise NotImplementedError(
                f"can not be initialized with storage engine: {storage_engine}"
            )
        if kwargs.get("name") and kwargs.get("namespace"):
            storage_session.set_default(name=kwargs["name"],
                                        namespace=kwargs["namespace"])
        return storage_session
Beispiel #3
0
def component_output_data_download():
    request_data = request.json
    try:
        output_tables_meta = get_component_output_tables_meta(task_data=request_data)
    except Exception as e:
        stat_logger.exception(e)
        return error_response(210, str(e))
    limit = request_data.get('limit', -1)
    if not output_tables_meta:
        return error_response(response_code=210, retmsg='no data')
    if limit == 0:
        return error_response(response_code=210, retmsg='limit is 0')
    have_data_label = False

    output_data_file_list = []
    output_data_meta_file_list = []
    output_tmp_dir = os.path.join(os.getcwd(), 'tmp/{}'.format(fate_uuid()))
    for output_name, output_table_meta in output_tables_meta.items():
        output_data_count = 0
        is_str = False
        output_data_file_path = "{}/{}.csv".format(output_tmp_dir, output_name)
        os.makedirs(os.path.dirname(output_data_file_path), exist_ok=True)
        with open(output_data_file_path, 'w') as fw:
            with storage.Session.build(name=output_table_meta.get_name(), namespace=output_table_meta.get_namespace()) as storage_session:
                output_table = storage_session.get_table()
                for k, v in output_table.collect():
                    data_line, have_data_label, is_str = get_component_output_data_line(src_key=k, src_value=v)
                    fw.write('{}\n'.format(','.join(map(lambda x: str(x), data_line))))
                    output_data_count += 1
                    if output_data_count == limit:
                        break

        if output_data_count:
            # get meta
            output_data_file_list.append(output_data_file_path)
            header = get_component_output_data_schema(output_table_meta=output_table_meta, have_data_label=have_data_label, is_str=is_str)
            output_data_meta_file_path = "{}/{}.meta".format(output_tmp_dir, output_name)
            output_data_meta_file_list.append(output_data_meta_file_path)
            with open(output_data_meta_file_path, 'w') as fw:
                json.dump({'header': header}, fw, indent=4)
            if request_data.get('head', True) and header:
                with open(output_data_file_path, 'r+') as f:
                    content = f.read()
                    f.seek(0, 0)
                    f.write('{}\n'.format(','.join(header)) + content)
    # tar
    memory_file = io.BytesIO()
    tar = tarfile.open(fileobj=memory_file, mode='w:gz')
    for index in range(0, len(output_data_file_list)):
        tar.add(output_data_file_list[index], os.path.relpath(output_data_file_list[index], output_tmp_dir))
        tar.add(output_data_meta_file_list[index], os.path.relpath(output_data_meta_file_list[index], output_tmp_dir))
    tar.close()
    memory_file.seek(0)
    output_data_file_list.extend(output_data_meta_file_list)
    for path in output_data_file_list:
        try:
            shutil.rmtree(os.path.dirname(path))
        except Exception as e:
            # warning
            stat_logger.warning(e)
        tar_file_name = 'job_{}_{}_{}_{}_output_data.tar.gz'.format(request_data['job_id'],
                                                                    request_data['component_name'],
                                                                    request_data['role'], request_data['party_id'])
        return send_file(memory_file, attachment_filename=tar_file_name, as_attachment=True)
Beispiel #4
0
def default_output_table_info(task_id, task_version):
    return f"output_data_{task_id}_{task_version}", base_utils.fate_uuid()
Beispiel #5
0
 def send_table(output_tables_meta,
                tar_file_name,
                limit=-1,
                need_head=True):
     output_data_file_list = []
     output_data_meta_file_list = []
     output_tmp_dir = os.path.join(
         get_fate_flow_directory(),
         'tmp/{}/{}'.format(datetime.datetime.now().strftime("%Y%m%d"),
                            fate_uuid()))
     for output_name, output_table_meta in output_tables_meta.items():
         output_data_count = 0
         output_data_file_path = "{}/{}.csv".format(output_tmp_dir,
                                                    output_name)
         output_data_meta_file_path = "{}/{}.meta".format(
             output_tmp_dir, output_name)
         os.makedirs(os.path.dirname(output_data_file_path), exist_ok=True)
         with open(output_data_file_path, 'w') as fw:
             with Session() as sess:
                 output_table = sess.get_table(
                     name=output_table_meta.get_name(),
                     namespace=output_table_meta.get_namespace())
                 if output_table:
                     for k, v in output_table.collect():
                         data_line, is_str, extend_header = feature_utils.get_component_output_data_line(
                             src_key=k,
                             src_value=v,
                             schema=output_table_meta.get_schema())
                         # save meta
                         if output_data_count == 0:
                             output_data_file_list.append(
                                 output_data_file_path)
                             header = get_component_output_data_schema(
                                 output_table_meta=output_table_meta,
                                 is_str=is_str,
                                 extend_header=extend_header)
                             output_data_meta_file_list.append(
                                 output_data_meta_file_path)
                             with open(output_data_meta_file_path,
                                       'w') as f:
                                 json.dump({'header': header}, f, indent=4)
                             if need_head and header and output_table_meta.get_have_head(
                             ):
                                 fw.write('{}\n'.format(','.join(header)))
                         fw.write('{}\n'.format(','.join(
                             map(lambda x: str(x), data_line))))
                         output_data_count += 1
                         if output_data_count == limit:
                             break
         # tar
     output_data_tarfile = "{}/{}".format(output_tmp_dir, tar_file_name)
     tar = tarfile.open(output_data_tarfile, mode='w:gz')
     for index in range(0, len(output_data_file_list)):
         tar.add(
             output_data_file_list[index],
             os.path.relpath(output_data_file_list[index], output_tmp_dir))
         tar.add(
             output_data_meta_file_list[index],
             os.path.relpath(output_data_meta_file_list[index],
                             output_tmp_dir))
     tar.close()
     for key, path in enumerate(output_data_file_list):
         try:
             os.remove(path)
             os.remove(output_data_meta_file_list[key])
         except Exception as e:
             # warning
             stat_logger.warning(e)
     return send_file(output_data_tarfile,
                      attachment_filename=tar_file_name)