Python SparkDFDatasource.build_configuration 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: great_expectations.datasource

클래스/타입: SparkDFDatasource

메소드/함수: build_configuration

hotexamples.com에서의 예제들: 3

Python SparkDFDatasource.build_configuration - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 great_expectations.datasource.SparkDFDatasource.build_configuration에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

SparkDFDatasource(17)

get_batch(12)

get_available_data_asset_names(6)

build_batch_kwargs(3)

build_configuration(3)

get_data_asset(1)

예제 #1

파일 보기

파일: datasource.py 프로젝트: anishshah97/kedro-great

def _add_spark_datasource(datasource_name: str, dataset: AbstractDataSet,
                          ge_context: DataContext) -> str:
    from great_expectations.datasource import SparkDFDatasource

    path = str(dataset._filepath.parent)

    if path.startswith("./"):
        path = path[2:]

    configuration = SparkDFDatasource.build_configuration(
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": os.path.join("..", path),
            }
        })

    configuration["class_name"] = "SparkDFDatasource"
    errors = DatasourceConfigSchema().validate(configuration)
    if len(errors) != 0:
        raise ge_exceptions.GreatExpectationsError(
            "Invalid Datasource configuration: {0:s}".format(errors))

    ge_context.add_datasource(name=datasource_name, **configuration)
    return datasource_name

예제 #2

파일 보기

def _add_spark_datasource(context):
    path = click.prompt(
        msg_prompt_filesys_enter_base_path,
        # default='/data/',
        type=click.Path(exists=True,
                        file_okay=False,
                        dir_okay=True,
                        readable=True),
        show_default=True)
    if path.startswith("./"):
        path = path[2:]

    if path.endswith("/"):
        path = path[:-1]
    default_data_source_name = os.path.basename(path) + "__dir"
    data_source_name = click.prompt(msg_prompt_datasource_name,
                                    default=default_data_source_name,
                                    show_default=True)

    configuration = SparkDFDatasource.build_configuration(
        base_directory=os.path.join("..", path))
    context.add_datasource(name=data_source_name,
                           class_name='SparkDFDatasource',
                           **configuration)
    return data_source_name

예제 #3

파일 보기

def _add_spark_datasource(
    context, passthrough_generator_only=True, prompt_for_datasource_name=True
):
    toolkit.send_usage_message(
        data_context=context,
        event="cli.new_ds_choice",
        event_payload={"type": "spark"},
        success=True,
    )

    if not _verify_pyspark_dependent_modules():
        return None

    if passthrough_generator_only:
        datasource_name = "files_spark_datasource"

        # configuration = SparkDFDatasource.build_configuration(batch_kwargs_generators={
        #     "default": {
        #         "class_name": "PassthroughGenerator",
        #     }
        # }
        # )
        configuration = SparkDFDatasource.build_configuration()

    else:
        path = click.prompt(
            msg_prompt_filesys_enter_base_path,
            type=click.Path(exists=True, file_okay=False),
        ).strip()
        if path.startswith("./"):
            path = path[2:]

        if path.endswith("/"):
            basenamepath = path[:-1]
        else:
            basenamepath = path

        datasource_name = os.path.basename(basenamepath) + "__dir"
        if prompt_for_datasource_name:
            datasource_name = click.prompt(
                msg_prompt_datasource_name, default=datasource_name
            )

        configuration = SparkDFDatasource.build_configuration(
            batch_kwargs_generators={
                "subdir_reader": {
                    "class_name": "SubdirReaderBatchKwargsGenerator",
                    "base_directory": os.path.join("..", path),
                }
            }
        )
        configuration["class_name"] = "SparkDFDatasource"
        configuration["module_name"] = "great_expectations.datasource"
        errors = DatasourceConfigSchema().validate(configuration)
        if len(errors) != 0:
            raise ge_exceptions.GreatExpectationsError(
                "Invalid Datasource configuration: {:s}".format(errors)
            )

    cli_message(
        """
Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml:

{:s}
""".format(
            datasource_name,
            textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), "  "),
        )
    )
    toolkit.confirm_proceed_or_exit()

    context.add_datasource(name=datasource_name, **configuration)
    return datasource_name