Python DataContext.add_datasourceの例

プログラミング言語: Python

名前空間/パッケージ名: great_expectations

クラス/型: DataContext

メソッド/関数: add_datasource

hotexamples.comのコード掲載数: 7

Python DataContext.add_datasource - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgreat_expectations.DataContext.add_datasourceの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DataContext(30)

list_datasources(30)

get_expectation_suite(19)

create_expectation_suite(18)

get_docs_sites_urls(11)

_save_project_config(10)

validate_config(9)

list_expectation_suites(9)

list_expectation_suite_names(8)

create(8)

list_checkpoints(7)

add_datasource(7)

get_ge_config_version(7)

save_expectation_suite(6)

get_batch(6)

run_validation_operator(5)

get_validation_result(4)

does_config_exist_on_disk(4)

get_checkpoint(3)

build_data_docs(3)

is_project_initialized(2)

get_config(2)

get_validator(2)

find_context_root_dir(2)

set_ge_config_version(2)

list_validation_operator_names(2)

save_config_variable(1)

open_data_docs(1)

set_config(1)

profile_data_asset(1)

get_site_names(1)

list_validation_operators(1)

list_stores(1)

_project_config(1)

get_datasource(1)

get_available_data_asset_names(1)

config_variables_yml_exist(1)

all_uncommitted_directories_exist(1)

add_validation_operator(1)

write_project_template_to_disk(1)

コード例 #1

ファイルを表示

ファイル: datasource.py プロジェクト: rpatil524/great_expectations

def sanitize_yaml_and_save_datasource(
        context: DataContext,
        datasource_yaml: str,
        overwrite_existing: bool = False) -> None:
    """A convenience function used in notebooks to help users save secrets."""
    if not datasource_yaml:
        raise ValueError("Please verify the yaml and try again.")
    if not isinstance(datasource_yaml, str):
        raise TypeError("Please pass in a valid yaml string.")
    config = yaml.load(datasource_yaml)
    try:
        datasource_name = config.pop("name")
    except KeyError:
        raise ValueError("The datasource yaml is missing a `name` attribute.")
    if not overwrite_existing and check_if_datasource_name_exists(
            context=context, datasource_name=datasource_name):
        print(
            f'**WARNING** A Datasource named "{datasource_name}" already exists in this Data Context. The Datasource has *not* been saved. Please use a different name or set overwrite_existing=True if you want to overwrite!'
        )
        return
    if "credentials" in config.keys():
        credentials = config["credentials"]
        config["credentials"] = "${" + datasource_name + "}"
        context.save_config_variable(datasource_name, credentials)
    context.add_datasource(name=datasource_name, **config)

コード例 #2

ファイルを表示

ファイル: test_suite.py プロジェクト: trucklos/great_expectations

def test_suite_edit_with_batch_kwargs_unable_to_load_a_batch_raises_helpful_error(
    mock_webbrowser, mock_subprocess, caplog, empty_data_context
):
    """
    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """
    project_dir = empty_data_context.root_directory

    context = DataContext(project_dir)
    context.create_expectation_suite("foo")
    context.add_datasource("source", class_name="PandasDatasource")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = '{"table": "fake", "datasource": "source"}'
    result = runner.invoke(
        cli,
        ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", batch_kwargs],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "To continue editing this suite" not in stdout
    assert "Please check that your batch_kwargs are able to load a batch." in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)

コード例 #3

ファイルを表示

def test_init_on_existing_project_with_multiple_datasources_exist_do_nothing(
    mock_webbrowser, caplog, initialized_project, filesystem_csv_2
):
    project_dir = initialized_project
    ge_dir = os.path.join(project_dir, DataContext.GE_DIR)

    context = DataContext(ge_dir)
    context.add_datasource(
        "another_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
    )

    assert len(context.list_datasources()) == 2

    runner = CliRunner(mix_stderr=False)
    with pytest.warns(
        UserWarning, match="Warning. An existing `great_expectations.yml` was found"
    ):
        result = runner.invoke(
            cli, ["init", "-d", project_dir], input="n\n", catch_exceptions=False,
        )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 0
    assert "Error: invalid input" not in stdout

    assert "Always know what to expect from your data" in stdout
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)

コード例 #4

ファイルを表示

ファイル: test_datasource.py プロジェクト: yjlee215/great_expectations

def test_get_batch_kwargs_for_specific_dataasset(empty_data_context, filesystem_csv):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    base_directory = str(filesystem_csv)

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": base_directory,
            }
        },
    )

    batch = get_batch_kwargs(
        context,
        datasource_name=None,
        batch_kwargs_generator_name=None,
        data_asset_name="f1",
        additional_batch_kwargs={},
    )

    expected_batch = {
        "data_asset_name": "f1",
        "datasource": "wow_a_datasource",
        "path": os.path.join(filesystem_csv, "f1.csv"),
    }
    assert batch == expected_batch

コード例 #5

ファイルを表示

ファイル: datasource.py プロジェクト: anishshah97/kedro-great

def _add_spark_datasource(datasource_name: str, dataset: AbstractDataSet,
                          ge_context: DataContext) -> str:
    from great_expectations.datasource import SparkDFDatasource

    path = str(dataset._filepath.parent)

    if path.startswith("./"):
        path = path[2:]

    configuration = SparkDFDatasource.build_configuration(
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": os.path.join("..", path),
            }
        })

    configuration["class_name"] = "SparkDFDatasource"
    errors = DatasourceConfigSchema().validate(configuration)
    if len(errors) != 0:
        raise ge_exceptions.GreatExpectationsError(
            "Invalid Datasource configuration: {0:s}".format(errors))

    ge_context.add_datasource(name=datasource_name, **configuration)
    return datasource_name

コード例 #6

ファイルを表示

ファイル: test_datasource_pandas.py プロジェクト: rlshuhart/great_expectations

def test_cli_datasorce_list(caplog, empty_data_context, filesystem_csv_2):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["datasource", "list", "-d", project_root_dir],
                           catch_exceptions=False)

    stdout = result.output.strip()
    assert "[]" in stdout
    assert context.list_datasources() == []

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    assert context.list_datasources() == [{
        "name": "wow_a_datasource",
        "class_name": "PandasDatasource"
    }]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["datasource", "list", "-d", project_root_dir],
                           catch_exceptions=False)

    stdout = result.output.strip()
    if PY2:
        # deal with legacy python dictionary sorting
        assert ("'name': 'wow_a_datasource'"
                and "'class_name': 'PandasDatasource'" in stdout)
        assert len(stdout) >= 60 and len(stdout) <= 70
    else:
        assert (
            "[{'name': 'wow_a_datasource', 'class_name': 'PandasDatasource'}]"
            in stdout)
    assert_no_logging_messages_or_tracebacks(caplog, result)

コード例 #7

ファイルを表示

def test_cli_datasource_list(caplog, empty_data_context, filesystem_csv_2):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )

    stdout = result.output.strip()
    assert "No Datasources found" in stdout
    assert context.list_datasources() == []

    base_directory = str(filesystem_csv_2)

    context.add_datasource(
        "wow_a_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": base_directory,
            }
        },
    )

    datasources = context.list_datasources()

    assert datasources == [
        {
            "name": "wow_a_datasource",
            "class_name": "PandasDatasource",
            "data_asset_type": {"class_name": "PandasDataset",
                                "module_name": "great_expectations.dataset"},
            "batch_kwargs_generators": {"subdir_reader": {
                "base_directory": base_directory,
                "class_name": "SubdirReaderBatchKwargsGenerator"}},
            "module_name": "great_expectations.datasource",
        }
    ]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )
    expected_output = """
1 Datasource found:[0m
[0m
 - [36mname:[0m wow_a_datasource[0m
   [36mmodule_name:[0m great_expectations.datasource[0m
   [36mclass_name:[0m PandasDatasource[0m
   [36mbatch_kwargs_generators:[0m[0m
     [36msubdir_reader:[0m[0m
       [36mclass_name:[0m SubdirReaderBatchKwargsGenerator[0m
       [36mbase_directory:[0m {}[0m
   [36mdata_asset_type:[0m[0m
     [36mmodule_name:[0m great_expectations.dataset[0m
     [36mclass_name:[0m PandasDataset[0m""".format(base_directory).strip()
    stdout = result.output.strip()
    assert stdout == expected_output
    assert_no_logging_messages_or_tracebacks(caplog, result)