Python construct_lakehouse_pipeline 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: lakehouse

메소드/함수: construct_lakehouse_pipeline

hotexamples.com에서의 예제들: 10

Python construct_lakehouse_pipeline - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 lakehouse.construct_lakehouse_pipeline에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_lakehouse_table.py 프로젝트: tristaneljed/dagster

def test_missing_resource():

    with pytest.raises(DagsterInvalidDefinitionError):

        @pyspark_table
        def missing(_):
            pass

        construct_lakehouse_pipeline('test', lakehouse_tables=[missing], resources={})

예제 #2

파일 보기

def test_missing_resource():

    with pytest.raises(DagsterInvalidDefinitionError):

        @lakehouse_table(required_resource_keys={'foo'})
        def missing(_):
            pass

        construct_lakehouse_pipeline('test', lakehouse_tables=[missing])

예제 #3

파일 보기

def test_snowflake():
    construct_lakehouse_pipeline(
        name='snowflake_lake',
        lakehouse_tables=[TableOne],
        resources={
            'snowflake': snowflake_resource,
            'lakehouse': SnowflakeLakehouse()
        },
    )

예제 #4

파일 보기

def test_execute_byfeature_parquet_lakehouse():
    with get_temp_dir() as temp_dir:
        lakehouse = ByFeatureParquetLakehouse(temp_dir)
        pipeline_def = construct_lakehouse_pipeline(
            name='test',
            lakehouse_tables=[TableOne, TableTwo, TableThree],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        'spark':
                        spark_session_resource,
                        'lakehouse':
                        ResourceDefinition.hardcoded_resource(lakehouse),
                    })
            ],
        )

        pipeline_result = execute_pipeline(pipeline_def)
        assert pipeline_result.success

        def get_table(table_def):
            spark = spark_session_from_config()
            return spark.read.parquet(
                os.path.join(temp_dir, table_def.metadata[FEATURE_AREA],
                             table_def.name)).collect()

        assert get_table(TableOne) == [Row(num=1)]
        assert get_table(TableTwo) == [Row(num=2)]
        assert set(get_table(TableThree)) == set([Row(num=1), Row(num=2)])

예제 #5

파일 보기

def test_basic_sqlite_pipeline():
    @sqlite_table
    def TableOne(context):
        context.resources.conn.execute('''CREATE TABLE TableOne AS SELECT 1 as num''')
        context.resources.conn.commit()

    @sqlite_table
    def TableTwo(context):
        context.resources.conn.execute('''CREATE TABLE TableTwo AS SELECT 2 as num''')
        context.resources.conn.commit()

    @sqlite_table(
        input_tables=[input_table('table_one', TableOne), input_table('table_two', TableTwo)]
    )
    def TableThree(context, **_kwargs):
        context.resources.conn.execute(
            'CREATE TABLE TableThree AS SELECT num from TableOne UNION SELECT num from TableTwo'
        )
        context.resources.conn.commit()

    conn = sqlite3.connect(':memory:')
    pipeline_def = construct_lakehouse_pipeline(
        name='sqllite_lakehouse_pipeline',
        lakehouse_tables=[TableOne, TableTwo, TableThree],
        resources={'conn': conn, 'lakehouse': SqlLiteLakehouse()},
    )

    result = execute_pipeline(pipeline_def)
    assert result.success

    assert conn.cursor().execute('SELECT * FROM TableThree').fetchall() == [(1,), (2,)]

예제 #6

파일 보기

def test_file_based_sqlite_pipeline():
    def path_for_table(table_name):
        return file_relative_path(
            __file__, 'basic_sqllite_test_files/{table_name}.sql'.format(
                table_name=table_name))

    TableOne = create_sqllite_table_from_file(path_for_table('TableOne'))
    TableTwo = create_sqllite_table_from_file(path_for_table('TableTwo'))
    TableThree = create_sqllite_table_from_file(
        path_for_table('TableThree'),
        input_tables=[
            input_table('table_one', TableOne),
            input_table('table_two', TableTwo)
        ],
    )

    conn = sqlite3.connect(':memory:')
    pipeline_def = construct_lakehouse_pipeline(
        name='sqllite_lakehouse_pipeline',
        lakehouse_tables=[TableOne, TableTwo, TableThree],
        resources={
            'conn': conn,
            'lakehouse': SqlLiteLakehouse()
        },
    )

    result = execute_pipeline(pipeline_def)
    assert result.success

    assert conn.cursor().execute('SELECT * FROM TableThree').fetchall() == [
        (1, ), (2, )
    ]

예제 #7

파일 보기

파일: conftest.py 프로젝트: yangchenghuang/dagster

 def _execute_spark_lakehouse_build(tables, lakehouse, environment_dict=None):
     return execute_pipeline(
         construct_lakehouse_pipeline(
             name='spark_lakehouse_pipeline',
             lakehouse_tables=tables,
             resources={'lakehouse': lakehouse, 'spark': spark_session_resource},
         ),
         environment_dict=environment_dict,
     )

예제 #8

파일 보기

def execute_spark_lakehouse_build(tables, lakehouse, environment_dict=None):
    return execute_pipeline(
        construct_lakehouse_pipeline(
            name='spark_lakehouse_pipeline',
            lakehouse_tables=tables,
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        'spark':
                        spark_session_resource,
                        'lakehouse':
                        ResourceDefinition.hardcoded_resource(lakehouse),
                    })
            ],
        ),
        environment_dict=environment_dict,
    )

예제 #9

파일 보기

    ],
    spark_type=JOIN_TABLE_STRUCT_TYPE,
    description='Joining together of the number and the string.',
)
def JoinTable(_context, number_df: NumberTable,
              string_df: StringTable) -> SparkDF:
    return number_df.join(string_df, number_df.id == string_df.id,
                          'inner').drop(string_df.id)


def test_execute_typed_in_mem_lakehouse():
    lakehouse = TypedPySparkMemLakehouse()
    pipeline_result = execute_spark_lakehouse_build(
        tables=[NumberTable, StringTable, JoinTable], lakehouse=lakehouse)

    assert pipeline_result.success
    # Row ordering varies on 3.5 - compare as dicts
    assert (lakehouse.collected_tables['JoinTable'][0].asDict() == Row(
        id=1, number=2, string='23').asDict())


# for dagit
typed_lakehouse_pipeline = construct_lakehouse_pipeline(
    name='typed_lakehouse_pipeline',
    lakehouse_tables=[NumberTable, StringTable, JoinTable],
    resources={
        'lakehouse': typed_pyspark_mem_lakehouse,
        'spark': spark_session_resource
    },
)

예제 #10

파일 보기

    return number_df.join(string_df, number_df.id == string_df.id, 'inner').drop(string_df.id)


def test_execute_typed_in_mem_lakehouse():
    lakehouse = TypedPySparkMemLakehouse()
    pipeline_result = execute_spark_lakehouse_build(
        tables=[NumberTable, StringTable, JoinTable], lakehouse=lakehouse
    )

    assert pipeline_result.success
    # Row ordering varies on 3.5 - compare as dicts
    assert (
        lakehouse.collected_tables['JoinTable'][0].asDict()
        == Row(id=1, number=2, string='23').asDict()
    )


# for dagit
typed_lakehouse_pipeline = construct_lakehouse_pipeline(
    name='typed_lakehouse_pipeline',
    lakehouse_tables=[NumberTable, StringTable, JoinTable],
    mode_defs=[
        ModeDefinition(
            resource_defs={
                'lakehouse': typed_pyspark_mem_lakehouse,
                'spark': spark_session_resource,
            }
        )
    ],
)