Ejemplos de dict_without_keys en Python

Lenguaje de programación: Python

Namespace/Package Name: dagster.utils

Método / Función: dict_without_keys

Ejemplos en hotexamples.com: 5

Python dict_without_keys - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de dagster.utils.dict_without_keys extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: types.py Proyecto: zuik/dagster

def dataframe_loader(_context, config):
    spark_read = _context.resources.pyspark.spark_session.read
    file_type, file_options = list(config.items())[0]
    path = file_options.get("path")

    if file_type == "csv":
        return spark_read.csv(path, **dict_without_keys(file_options, "path"))
    elif file_type == "parquet":
        return spark_read.parquet(path, **dict_without_keys(file_options, "path"))
    elif file_type == "json":
        return spark_read.json(path, **dict_without_keys(file_options, "path"))
    elif file_type == "jdbc":
        return spark_read.jdbc(**file_options)
    elif file_type == "orc":
        return spark_read.orc(path, **dict_without_keys(file_options, "path"))
    elif file_type == "table":
        return spark_read.table(**file_options)
    elif file_type == "text":
        return spark_read.text(path, **dict_without_keys(file_options, "path"))
    elif file_type == "other":
        return spark_read.load(**file_options)
    else:
        raise DagsterInvariantViolationError(
            "Unsupported file_type {file_type}".format(file_type=file_type)
        )

Ejemplo n.º 2

Mostrar archivo

Archivo: test_types.py Proyecto: sd2k/dagster

def test_dataframe_outputs(file_type, read, other):
    df = create_pyspark_df()

    @solid(output_defs=[
        OutputDefinition(dagster_type=DagsterPySparkDataFrame, name="df")
    ])
    def return_df(_):
        return df

    with get_temp_dir() as temp_path:
        shutil.rmtree(temp_path)

        options = {"path": temp_path}
        if other:
            options["format"] = file_type
            file_type = "other"

        result = execute_solid(
            return_df,
            run_config={
                "solids": {
                    "return_df": {
                        "outputs": [{
                            "df": {
                                file_type: options
                            }
                        }]
                    }
                }
            },
        )
        assert result.success
        actual = read(options["path"], **dict_without_keys(options, "path"))
        assert sorted(df.collect()) == sorted(actual.collect())

        result = execute_solid(
            return_df,
            run_config={
                "solids": {
                    "return_df": {
                        "outputs": [{
                            "df": {
                                file_type:
                                dict(
                                    {
                                        "mode": "overwrite",
                                        "compression": "gzip",
                                    }, **options)
                            }
                        }]
                    }
                }
            },
        )
        assert result.success
        actual = read(options["path"], **dict_without_keys(options, "path"))
        assert sorted(df.collect()) == sorted(actual.collect())

Ejemplo n.º 3

Mostrar archivo

def dataframe_loader(_context, config):
    file_type, file_options = list(config.items())[0]

    if file_type == "csv":
        path = file_options["path"]
        return pd.read_csv(path, **dict_without_keys(file_options, "path"))
    elif file_type == "parquet":
        return pd.read_parquet(file_options["path"])
    elif file_type == "table":
        return pd.read_csv(file_options["path"], sep="\t")
    else:
        raise DagsterInvariantViolationError(
            "Unsupported file_type {file_type}".format(file_type=file_type))

Ejemplo n.º 4

Mostrar archivo

Archivo: data_frame.py Proyecto: helloworld/dagster

def dataframe_materializer(_context, config, pandas_df):
    check.inst_param(pandas_df, "pandas_df", pd.DataFrame)
    file_type, file_options = list(config.items())[0]

    if file_type == "csv":
        path = file_options["path"]
        pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, "path"))
    elif file_type == "parquet":
        pandas_df.to_parquet(file_options["path"])
    elif file_type == "table":
        pandas_df.to_csv(file_options["path"], sep="\t", index=False)
    elif file_type == "pickle":
        pandas_df.to_pickle(file_options["path"])
    else:
        check.failed("Unsupported file_type {file_type}".format(file_type=file_type))

    return AssetMaterialization.file(file_options["path"])

Ejemplo n.º 5

Mostrar archivo

Archivo: test_types.py Proyecto: yingjiebyron/dagster

def test_dataframe_inputs(file_type, read, other):
    @solid(input_defs=[InputDefinition(dagster_type=DagsterPySparkDataFrame, name="input_df")],)
    def return_df(_, input_df):
        return input_df

    options = {"path": file_relative_path(__file__, "num.{file_type}".format(file_type=file_type))}
    if other:
        options["format"] = file_type
        file_type = "other"

    result = execute_solid(
        return_df,
        mode_def=ModeDefinition(resource_defs={"pyspark": pyspark_resource}),
        run_config={"solids": {"return_df": {"inputs": {"input_df": {file_type: options}}}}},
    )
    assert result.success
    actual = read(options["path"], **dict_without_keys(options, "path"))
    assert sorted(result.output_value().collect()) == sorted(actual.collect())