Ejemplos de GcsfsFilePath.from_directory_and_file_name en Python

Lenguaje de programación: Python

Namespace/Package Name: recidiviz.ingest.direct.controllers.gcsfs_path

Clase / Tipo: GcsfsFilePath

Método / Función: from_directory_and_file_name

Ejemplos en hotexamples.com: 10

Python GcsfsFilePath.from_directory_and_file_name - 10 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de recidiviz.ingest.direct.controllers.gcsfs_path.GcsfsFilePath.from_directory_and_file_name extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

from_absolute_path(30)

abs_path(27)

from_directory_and_file_name(10)

GcsfsFilePath(6)

uri(1)

with_new_file_name(1)

Ejemplo n.º 1

Mostrar archivo

    def _move_files(self, from_uri: str):
        curr_gcsfs_file_path = GcsfsFilePath.from_absolute_path(from_uri)
        previous_date_format = filename_parts_from_path(
            curr_gcsfs_file_path).date_str
        new_date_format = date.fromisoformat(previous_date_format).strftime(
            "%Y/%m/%d/")

        path_with_new_file_name = GcsfsFilePath.from_absolute_path(
            to_normalized_unprocessed_file_path_from_normalized_path(
                from_uri, GcsfsDirectIngestFileType.RAW_DATA))

        if DirectIngestGCSFileSystem.is_processed_file(curr_gcsfs_file_path):
            path_with_new_file_name = GcsfsFilePath.from_absolute_path(
                to_normalized_processed_file_path_from_normalized_path(
                    from_uri, GcsfsDirectIngestFileType.RAW_DATA))

        raw_dir_with_date = GcsfsDirectoryPath.from_dir_and_subdir(
            self.region_storage_raw_dir_path, new_date_format)

        to_uri = GcsfsFilePath.from_directory_and_file_name(
            raw_dir_with_date, path_with_new_file_name.file_name).uri()

        if not self.dry_run:
            gsutil_mv(from_path=from_uri, to_path=to_uri)
        with self.mutex:
            self.move_list.append((from_uri, to_uri))
            if self.move_progress:
                self.move_progress.next()

Ejemplo n.º 2

Mostrar archivo

Archivo: state_table_export_to_csv.py Proyecto: pnchbck/pulse-data

def run_export(project_id: str, dry_run: bool, state_code: str,
               target_bucket_suffix: str):
    """Performs the export operation, exporting rows for the given state codes from the tables from the state dataset
    in the given project to CSV files with the same names as the tables to the given GCS bucket."""
    today = datetime.date.today()

    big_query_client = BigQueryClientImpl()
    dataset_ref = big_query_client.dataset_ref_for_id(STATE_BASE_DATASET)
    if not big_query_client.dataset_exists(dataset_ref):
        raise ValueError(f'Dataset {dataset_ref.dataset_id} does not exist')

    tables = big_query_client.list_tables(dataset_ref.dataset_id)

    export_configs = []
    for table in tables:
        logging.info("******************************")
        export_query = state_table_export_query_str(table, [state_code])
        logging.info(export_query)

        if not export_query:
            continue

        target_bucket_name = f'{project_id}-{target_bucket_suffix}'
        export_dir = gcs_export_directory(target_bucket_name, today,
                                          state_code)
        export_file_name = f'{table.table_id}_{today.isoformat()}_export.csv'
        file = GcsfsFilePath.from_directory_and_file_name(
            export_dir, export_file_name)
        output_uri = file.uri()

        export_config = ExportQueryConfig(
            query=export_query,
            query_parameters=[],
            intermediate_dataset_id='export_temporary_tables',
            intermediate_table_name=
            f'{dataset_ref.dataset_id}_{table.table_id}',
            output_uri=output_uri,
            output_format=bigquery.DestinationFormat.CSV,
        )
        export_configs.append(export_config)
        if dry_run:
            logging.info(
                "[DRY RUN] Created export configuration to export table to GCS: %s",
                export_config)
        else:
            logging.info(
                "Created export configuration to export table to GCS: %s",
                export_config)

    if dry_run:
        logging.info("[DRY RUN] Exporting [%d] tables to GCS",
                     len(export_configs))
    else:
        logging.info("Exporting [%d] tables to GCS", len(export_configs))
        big_query_client.export_query_results_to_cloud_storage(export_configs)

Ejemplo n.º 3

Mostrar archivo

    def _copy_to_ingest_bucket(self, path: str,
                               normalized_file_name: str) -> None:
        full_file_upload_path_uri = GcsfsFilePath.from_directory_and_file_name(
            self.ingest_bucket, normalized_file_name).uri()

        if not self.dry_run:
            gsutil_cp(path, full_file_upload_path_uri)

        with self.mutex:
            self.copies_list.append((path, full_file_upload_path_uri))
            if self.move_progress:
                self.move_progress.next()

Ejemplo n.º 4

Mostrar archivo

    def _generate_output_path(self,
                              ingest_view_export_args: GcsfsIngestViewExportArgs,
                              metadata: DirectIngestIngestFileMetadata) -> GcsfsFilePath:
        ingest_view = self.ingest_views_by_tag[ingest_view_export_args.ingest_view_name]
        if not metadata.normalized_file_name:
            output_file_name = to_normalized_unprocessed_file_name(
                f'{ingest_view.file_tag}.csv',
                GcsfsDirectIngestFileType.INGEST_VIEW,
                dt=ingest_view_export_args.upper_bound_datetime_to_export
            )
        else:
            output_file_name = metadata.normalized_file_name

        return GcsfsFilePath.from_directory_and_file_name(self.ingest_directory_path, output_file_name)

Ejemplo n.º 5

Mostrar archivo

    def copy(self,
             src_path: GcsfsFilePath,
             dst_path: GcsfsPath) -> None:

        if isinstance(dst_path, GcsfsFilePath):
            path = dst_path
        elif isinstance(dst_path, GcsfsDirectoryPath):
            path = \
                GcsfsFilePath.from_directory_and_file_name(dst_path,
                                                           src_path.file_name)
        else:
            raise ValueError(f'Unexpected path type [{type(dst_path)}]')

        self._add_path(path)

Ejemplo n.º 6

Mostrar archivo

Archivo: gcsfs_direct_ingest_controller.py Proyecto: xgenie-007/pulse-data

    def _create_split_file_path(self, original_file_path: GcsfsFilePath,
                                output_dir: GcsfsDirectoryPath,
                                split_num: int) -> GcsfsFilePath:
        parts = filename_parts_from_path(original_file_path)

        rank_str = str(split_num + 1).zfill(5)
        existing_suffix = \
            f'_{parts.filename_suffix}' if parts.filename_suffix else ''
        updated_file_name = (
            f'{parts.file_tag}{existing_suffix}_{rank_str}'
            f'_{SPLIT_FILE_SUFFIX}_size{self.file_split_line_limit}'
            f'.{parts.extension}')
        return GcsfsFilePath.from_directory_and_file_name(
            output_dir,
            to_normalized_unprocessed_file_path(updated_file_name,
                                                dt=parts.utc_upload_datetime))

Ejemplo n.º 7

Mostrar archivo

Archivo: fake_direct_ingest_gcs_file_system.py Proyecto: pnchbck/pulse-data

    def copy(self,
             src_path: GcsfsFilePath,
             dst_path: GcsfsPath) -> None:

        if isinstance(dst_path, GcsfsFilePath):
            path = dst_path
        elif isinstance(dst_path, GcsfsDirectoryPath):
            path = \
                GcsfsFilePath.from_directory_and_file_name(dst_path,
                                                           src_path.file_name)
        else:
            raise ValueError(f'Unexpected path type [{type(dst_path)}]')

        if src_path.abs_path() in self.uploaded_test_path_to_actual:
            self.uploaded_test_path_to_actual[dst_path.abs_path()] = \
                self.uploaded_test_path_to_actual[src_path.abs_path()]

        self._add_path(path)

Ejemplo n.º 8

Mostrar archivo

Archivo: direct_ingest_gcs_file_system.py Proyecto: xgenie-007/pulse-data

    def copy(self, src_path: GcsfsFilePath, dst_path: GcsfsPath) -> None:
        src_bucket = self.storage_client.get_bucket(src_path.bucket_name)
        src_blob = src_bucket.get_blob(src_path.blob_name)
        if not src_blob:
            raise ValueError(
                f'Blob at path [{src_path.abs_path()}] does not exist')
        dst_bucket = self.storage_client.get_bucket(dst_path.bucket_name)

        if isinstance(dst_path, GcsfsFilePath):
            dst_blob_name = dst_path.blob_name
        elif isinstance(dst_path, GcsfsDirectoryPath):
            dst_blob_name = \
                GcsfsFilePath.from_directory_and_file_name(
                    dst_path, src_path.file_name).blob_name
        else:
            raise ValueError(f'Unexpected path type [{type(dst_path)}]')

        src_bucket.copy_blob(src_blob, dst_bucket, dst_blob_name)

Ejemplo n.º 9

Mostrar archivo

Archivo: gcsfs_direct_ingest_controller.py Proyecto: nikhil-tibrewal/pulse-data

    def _create_split_file_path(self, original_file_path: GcsfsFilePath,
                                output_dir: GcsfsDirectoryPath,
                                split_num: int) -> GcsfsFilePath:
        parts = filename_parts_from_path(original_file_path)

        rank_str = str(split_num + 1).zfill(5)
        updated_file_name = (
            f'{parts.stripped_file_name}_{rank_str}'
            f'_{SPLIT_FILE_SUFFIX}_size{self.ingest_file_split_line_limit}'
            f'.{parts.extension}')

        file_type = GcsfsDirectIngestFileType.INGEST_VIEW \
            if self.region.is_raw_vs_ingest_file_name_detection_enabled() else GcsfsDirectIngestFileType.UNSPECIFIED

        return GcsfsFilePath.from_directory_and_file_name(
            output_dir,
            to_normalized_unprocessed_file_path(updated_file_name,
                                                file_type=file_type,
                                                dt=parts.utc_upload_datetime))

Ejemplo n.º 10

Mostrar archivo

Archivo: direct_ingest_raw_file_import_manager.py Proyecto: pnchbck/pulse-data

    def get_output_path(self, chunk_num: int):
        name, _extension = os.path.splitext(self.path.file_name)

        return GcsfsFilePath.from_directory_and_file_name(
            self.temp_output_directory_path, f'temp_{name}_{chunk_num}.csv')