def test_handle_output_then_load_input_pandas():
    snowflake_manager = snowflake_io_manager(
        build_init_resource_context(config={"database": "TESTDB"})
    )
    contents1 = PandasDataFrame([{"col1": "a", "col2": 1}])  # just to get the types right
    contents2 = PandasDataFrame([{"col1": "b", "col2": 2}])  # contents we will insert
    with temporary_snowflake_table(contents1) as temp_table_name:
        output_context = mock_output_context(temp_table_name)
        list(snowflake_manager.handle_output(output_context, contents2))  # exhaust the iterator

        input_context = mock_input_context(output_context)
        input_value = snowflake_manager.load_input(input_context)
        assert input_value.equals(contents2), f"{input_value}\n\n{contents2}"
Beispiel #2
0
def test_handle_output_spark_then_load_input_pandas():
    snowflake_manager = snowflake_io_manager(
        build_init_resource_context(config={"database": "TESTDB"}))
    spark = SparkSession.builder.config(
        "spark.jars.packages",
        "net.snowflake:snowflake-jdbc:3.8.0,net.snowflake:spark-snowflake_2.12:2.8.2-spark_3.0",
    ).getOrCreate()

    schema = StructType([
        StructField("col1", StringType()),
        StructField("col2", IntegerType())
    ])
    contents = spark.createDataFrame([Row(col1="Thom", col2=51)], schema)

    with temporary_snowflake_table(PandasDataFrame([{
            "col1": "a",
            "col2": 1
    }])) as temp_table_name:
        output_context = mock_output_context(temp_table_name)

        list(snowflake_manager.handle_output(output_context,
                                             contents))  # exhaust the iterator

        input_context = mock_input_context(output_context)
        input_value = snowflake_manager.load_input(input_context)
        contents_pandas = contents.toPandas()
        assert str(input_value) == str(
            contents_pandas), f"{input_value}\n\n{contents_pandas}"
Beispiel #3
0
def test_handle_output_then_load_input_pandas():
    snowflake_manager = snowflake_io_manager(
        build_init_resource_context(
            config={"database": "TESTDB"}, resources={"partition_bounds": None}
        )
    )
    contents1 = PandasDataFrame([{"col1": "a", "col2": 1}])  # just to get the types right
    contents2 = PandasDataFrame([{"col1": "b", "col2": 2}])  # contents we will insert
    with temporary_snowflake_table(contents1) as temp_table_name:
        metadata = {"table": f"public.{temp_table_name}"}
        output_context = build_output_context(metadata=metadata)

        list(snowflake_manager.handle_output(output_context, contents2))  # exhaust the iterator

        input_context = build_input_context(upstream_output=output_context)
        input_value = snowflake_manager.load_input(input_context)
        assert input_value.equals(contents2), f"{input_value}\n\n{contents2}"
Beispiel #4
0
    def to_pandas(self):
        """Convert to pandas DataFrame.

        Note the data is expected to be evaluated.

        Returns
        -------
        pandas.frame.DataFrame

        """
        from pandas import DataFrame as PandasDataFrame

        pandas_index = self.index.to_pandas()
        pandas_data = OrderedDict(
            (column.name, column.to_pandas()) for column in self._iter())

        return PandasDataFrame(pandas_data, pandas_index)
Beispiel #5
0
def rc_to_pd(raccoon_df):
    data = raccoon_df.to_dict(index=False)
    return PandasDataFrame(data,
                           columns=raccoon_df.columns,
                           index=raccoon_df.index)