Python DFMock 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dfmock

메소드/함수: DFMock

hotexamples.com에서의 예제들: 5

Python DFMock - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dfmock.DFMock에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_fetch_parq.py 프로젝트: IntegriChain1/s3parq

def test_fetches_nons3parq_large_parquet():
    input_key = "burger-shipment/buns"
    input_bucket = "loadingdock"

    df = dfmock.DFMock(count=100000)
    df.columns = {
        "string_options": {
            "option_count": 4,
            "option_type": "string"
        },
        "int_options": {
            "option_count": 4,
            "option_type": "int"
        },
        "datetime_options": {
            "option_count": 5,
            "option_type": "datetime"
        },
        "float_options": {
            "option_count": 2,
            "option_type": "float"
        },
        "metrics": "integer"
    }

    df.generate_dataframe()
    # This is unfortunately big, but getting it to force a partition doesn't work otherwise
    df.grow_dataframe_to_size(500)

    input_df = pd.DataFrame(df.dataframe)

    s3_client = boto3.client('s3')

    s3_key = "burger-shipment/buns"

    setup_nons3parq_parquet(dataframe=input_df,
                            bucket=input_bucket,
                            key=input_key,
                            s3_client=s3_client)

    fetched_diff = fetch_parq.fetch(bucket=input_bucket,
                                    key=s3_key,
                                    parallel=False)

    assert fetched_diff.shape == input_df.shape
    sorted_dfs_equal_by_pandas_testing(fetched_diff, input_df)

예제 #2

파일 보기

def test_end_to_end():
    df = dfmock.DFMock(count=100000)
    df.columns = {
        "string_options": {
            "option_count": 4,
            "option_type": "string"
        },
        "int_options": {
            "option_count": 4,
            "option_type": "int"
        },
        "datetime_options": {
            "option_count": 5,
            "option_type": "datetime"
        },
        "float_options": {
            "option_count": 2,
            "option_type": "float"
        },
        "metrics": "integer"
    }

    df.generate_dataframe()
    # This is unfortunately big, but getting it to force a partition doesn't work otherwise
    df.grow_dataframe_to_size(500)

    s3_client = boto3.client('s3')
    bucket_name = 'thistestbucket'
    key = 'thisdataset'
    s3_client.create_bucket(Bucket=bucket_name)

    old_df = pd.DataFrame(df.dataframe)
    # pub it
    publish(bucket=bucket_name,
            key=key,
            dataframe=old_df,
            partitions=['string_options', 'datetime_options', 'float_options'])

    # go get it
    fetched_df = fetch(bucket=bucket_name, key=key, parallel=False)

    assert fetched_df.shape == old_df.shape
    assert df_equal_by_set(fetched_df, old_df, old_df.columns)
    sorted_dfs_equal_by_pandas_testing(fetched_df, old_df)

예제 #3

파일 보기

파일: test_integration_stress.py 프로젝트: schafrn/s3parq

def test_end_to_end():
    df = dfmock.DFMock(count=1000)
    df.columns = {
        "string_options": {
            "option_count": 4,
            "option_type": "string"
        },
        "int_options": {
            "option_count": 4,
            "option_type": "int"
        },
        "datetime_options": {
            "option_count": 5,
            "option_type": "datetime"
        },
        "float_options": {
            "option_count": 2,
            "option_type": "float"
        },
        "metrics": "integer"
    }
    df.generate_dataframe()
    df.grow_dataframe_to_size(250)

    s3_client = boto3.client('s3')

    bucket_name = 'thistestbucket'
    key = 'thisdataset'

    s3_client.create_bucket(Bucket=bucket_name)

    # pub it
    publish(bucket=bucket_name,
            key=key,
            dataframe=df.dataframe,
            partitions=['string_options', 'datetime_options', 'float_options'])

    # go get it
    fetched_df = fetch(bucket=bucket_name, key=key, parallel=False)

    assert fetched_df.shape == df.dataframe.shape
    pd.DataFrame.eq(fetched_df, df.dataframe)
    fetched_df.head()

예제 #4

파일 보기

def test_end_to_end():
    # make a sample DF for all the tests
    df = dfmock.DFMock(count=10000)
    df.columns = {
        "string_options": {
            "option_count": 4,
            "option_type": "string"
        },
        "int_options": {
            "option_count": 4,
            "option_type": "int"
        },
        "datetime_options": {
            "option_count": 5,
            "option_type": "datetime"
        },
        "float_options": {
            "option_count": 2,
            "option_type": "float"
        },
        "metrics": "integer"
    }
    df.generate_dataframe()

    s3_client = boto3.client('s3')
    bucket_name = 'thistestbucket'
    key = 'thisdataset'
    s3_client.create_bucket(Bucket=bucket_name)

    old_df = pd.DataFrame(df.dataframe)

    # pub it
    publish(bucket=bucket_name,
            key=key,
            dataframe=old_df,
            partitions=['string_options', 'datetime_options', 'float_options'])

    # go get it
    fetched_df = fetch(bucket=bucket_name, key=key, parallel=False)

    assert fetched_df.shape == old_df.shape
    assert df_equal_by_set(fetched_df, old_df, old_df.columns)
    sorted_dfs_equal_by_pandas_testing(fetched_df, old_df)

예제 #5

파일 보기

import boto3
import moto
import s3parq
import pytest
import dfmock
from s3parq.testing_helper import df_equal_by_set
from s3parq.publish_parq import publish
from s3parq.fetch_parq import fetch
import pandas as pd

# make a sample DF for all the tests
df = dfmock.DFMock(count=10000)
df.columns = {
    "string_options": {
        "option_count": 4,
        "option_type": "string"
    },
    "int_options": {
        "option_count": 4,
        "option_type": "int"
    },
    "datetime_options": {
        "option_count": 5,
        "option_type": "datetime"
    },
    "float_options": {
        "option_count": 2,
        "option_type": "float"
    },
    "metrics": "integer"
}