def test_ingest_with_datetime_formatted():
    """
    Test ingesting datetime data with a given schema and custom date format
    """
    schema = pa.schema([
        pa.field("foo", pa.int64()),
        pa.field("bar", pa.int64()),
        pa.field("baz", pa.timestamp("ns"))
    ])

    data = [{
        "foo": 1,
        "bar": 2,
        "baz": "2018/01/01 01:02:03"
    }, {
        "foo": 10,
        "bar": 20,
        "baz": "2018/01/02 01:02:03"
    }]

    converted_data = client.ingest_data(data,
                                        schema,
                                        date_format="%Y/%m/%d %H:%M:%S")
    timestamp_values = [
        pd.to_datetime("2018-01-01 01:02:03"),
        pd.to_datetime("2018-01-02 01:02:03")
    ]
    assert converted_data.to_pydict() == {
        'foo': [1, 10],
        'bar': [2, 20],
        'baz': timestamp_values
    }
def test_ingest_with_no_schema_and_uneven_column_names():
    """
    Test ingesting data with no schema and incomplete JSON records
    """
    data = [{"foo": 1, "bar": 2}, {"foo": 10, "bar": 20}, {"foo": 100, "bar": 200, "baz": 300}]

    converted_data = client.ingest_data(data)
    assert converted_data.to_pydict() == {'foo': [1, 10, 100], 'bar': [2, 20, 200], 'baz': [None, None, 300]}
def test_ingest_with_no_schema():
    """
    Test ingesting data with no schema
    """
    data = [{"foo": 1, "bar": 2}, {"foo": 10, "bar": 20}]

    converted_data = client.ingest_data(data)
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20]}
def test_ingest_with_column_names():
    """
    Test ingesting data with given column names
    """
    schema = ["foo", "bar"]

    data = [{"foo": 1, "bar": 2}, {"foo": 10, "bar": 20}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20]}
def test_ingest_with_boolean_none():
    """
    Test ingesting data with boolean values and none
    """
    schema = pa.schema([pa.field("foo", pa.bool_())])

    data = [{"foo": 0}, {"foo": 1}, {"foo": None}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict() == {'foo': [False, True, None]}
def test_ingest_with_numeric_boolean():
    """
    Test ingesting data with boolean values given as numbers
    """
    schema = pa.schema([pa.field("foo", pa.bool_())])

    data = [{"foo": 0}, {"foo": 1}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict() == {'foo': [False, True]}
Exemple #7
0
def test_ingest_with_column_names_dict():
    """
    Test ingesting data with columns and user supplied aliases
    """
    schema = {"foo": "foo1", "bar": "bar2"}

    data = [{"foo": 1, "bar": 2}, {"foo": 10, "bar": 20}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict() == {'foo1': [1, 10], 'bar2': [2, 20]}
Exemple #8
0
def test_ingest():
    """
    Test ingesting data with a given schema
    """
    schema = pa.schema(
        [pa.field("foo", pa.int64()),
         pa.field("bar", pa.int64())])

    data = [{"foo": 1, "bar": 2}, {"foo": 10, "bar": 20}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict() == {'foo': [1, 10], 'bar': [2, 20]}
def test_date_conversion():
    """
    Test converting DATE columns to days since epoch
    """
    schema = pa.schema([
        pa.field("foo", pa.date32())
    ])

    data = [{"foo": "2018-01-01"}, {"foo": "2018-01-02"}]

    converted_data = client.ingest_data(data, schema)
    assert converted_data.to_pydict()['foo'][0].strftime("%Y-%m-%d") == "2018-01-01"
    assert converted_data.to_pydict()['foo'][1].strftime("%Y-%m-%d") == "2018-01-02"
Exemple #10
0
def test_ingest_with_field_aliases():
    """
    Test ingesting data with a given schema and field aliases
    """
    schema = pa.schema(
        [pa.field("foo", pa.int64()),
         pa.field("bar", pa.int64())])

    field_aliases = {
        "foo": "corrected_foo",
    }

    data = [{"foo": 1, "bar": 2}, {"foo": 10, "bar": 20}]

    converted_data = client.ingest_data(data,
                                        schema,
                                        field_aliases=field_aliases)
    assert converted_data.to_pydict() == {
        'corrected_foo': [1, 10],
        'bar': [2, 20]
    }