Esempio n. 1
0
def test_coercion():
    assert dt.Date().into("2004-05-05 00:00:00") == datetime(
        year=2004, month=5, day=5, tzinfo=timezone.utc)
    assert dt.Date().into("2019-11-16T08:00:00.000+10:00") == datetime(
        year=2019, month=11, day=15, hour=22, tzinfo=timezone.utc)
    assert dt.Date().into(datetime(year=2004, month=5,
                                   day=5)) == datetime(year=2004,
                                                       month=5,
                                                       day=5,
                                                       tzinfo=timezone.utc)
    assert dt.Array(items=dt.Date()).into(
        ["2004-05-05 00:00:00", "2019-11-16T08:00:00.000+10:00"]) == [
            datetime(year=2004, month=5, day=5, tzinfo=timezone.utc),
            datetime(year=2019, month=11, day=15, hour=22,
                     tzinfo=timezone.utc),
        ]

    assert dt.Long().into("2004") == 2004
    assert dt.Array(items=dt.Long()).into(["2004", "2005"]) == [2004, 2005]

    assert dt.Double().into("3.14159") == 3.14159
    assert dt.Array(items=dt.Double()).into(["3.14159",
                                             "3.12"]) == [3.14159, 3.12]

    assert dt.Boolean().into("true") is True
    assert dt.Array(items=dt.Boolean()).into(["true",
                                              "false"]) == [True, False]

    with pytest.raises(ValueError):
        dt.Array(items=dt.Long(), enum=[1, 2, 3]).into([1, 2, 4])
Esempio n. 2
0
def test_deserialize_long_with_format():
    assert dt.deserialize("""{ "type": "Long", "unit": null }""") == dt.Long(
        unit=None)
    assert dt.deserialize("""{ "type": "Long", "unit": "ms" }""") == dt.Long(
        unit="ms")
    assert dt.deserialize("""{ "type": "Long", "unit": "" }""") == dt.Long(
        unit=None)
    with pytest.raises(Exception):
        assert dt.deserialize("""{ "type": "Long", "unit": 99 }""")
Esempio n. 3
0
def test_simple_type_decoding():
    assert dt.deserialize('"Boolean"') == dt.Boolean()
    assert dt.deserialize('"Double"') == dt.Double()
    assert dt.deserialize('"Long"') == dt.Long()
    assert dt.deserialize('"String"') == dt.String()

    assert dt.deserialize("Boolean") == dt.Boolean()
    assert dt.deserialize("Double") == dt.Double()
    assert dt.deserialize("Long") == dt.Long()
    assert dt.deserialize("String") == dt.String()
Esempio n. 4
0
def test_serialize_simple_types():
    assert dt.serialize(dt.Boolean()) == """{"type": "Boolean"}"""
    assert dt.serialize(
        dt.String()) == """{"type": "String", "format": null}"""
    assert (dt.serialize(dt.String(format=dt.StringSubtypeFormat.EMAIL)) ==
            """{"type": "String", "format": "Email"}""")
    assert dt.serialize(dt.Double()) == """{"type": "Double", "unit": null}"""
    assert (dt.serialize(dt.Double(
        unit="inches")) == """{"type": "Double", "unit": "inches"}""")
    assert dt.serialize(dt.Long()) == """{"type": "Long", "unit": null}"""
    assert (dt.serialize(
        dt.Long(unit="inches")) == """{"type": "Long", "unit": "inches"}""")
Esempio n. 5
0
def test_simple_representation():
    # Boolean
    SimpleBoolean = dt.Boolean()
    assert SimpleBoolean.is_simple
    assert SimpleBoolean.into_simple() == "Boolean"

    # Long
    SimpleLong = dt.Long()
    assert SimpleLong.is_simple
    assert SimpleLong.into_simple() == "Long"

    ComplexLong = dt.Long(unit="m/sec")
    assert not ComplexLong.is_simple

    # Double
    SimpleDouble = dt.Double()
    assert SimpleDouble.is_simple
    assert SimpleDouble.into_simple() == "Double"

    # String
    SimpleString = dt.String()
    assert SimpleString.is_simple
    assert SimpleString.into_simple() == "String"

    ComplexString = dt.String(format=dt.StringSubtypeFormat.EMAIL)
    assert not ComplexString.is_simple
    assert ComplexString.into_simple() is None

    # Date
    SimpleDate = dt.Date()
    assert SimpleDate.is_simple
    assert SimpleDate.into_simple() == "Date"

    Array_ = dt.Array(
        items=dt.String(format=dt.StringSubtypeFormat.EMAIL),
        enum=["*****@*****.**", "*****@*****.**"],
    )
    assert not Array_.is_simple
    assert Array_.into_simple() is None

    Enum = dt.Enumeration(items=dt.Boolean(), enum=[True])
    assert not Enum.is_simple
    assert Enum.into_simple() is None
Esempio n. 6
0
def test_deserialize_complex_type_decoding():
    assert dt.deserialize("""{ "type": "Boolean" }""") == dt.Boolean()
    assert dt.deserialize("""{ "type": "Double" }""") == dt.Double()
    assert dt.deserialize("""{ "type": "Long" }""") == dt.Long()
    assert dt.deserialize("""{ "type": "String" }""") == dt.String()
Esempio n. 7
0
def properties():
    now = datetime.now()
    props = [
        ModelProperty(
            name="id",
            display_name="ID",
            data_type=dt.String(),
            description="User ID",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="name",
            display_name="Name",
            data_type=dt.String(),
            description="Name",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="age",
            display_name="Age",
            data_type=dt.Long(),
            description="Age in years",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="height",
            display_name="Height",
            data_type=dt.Double(unit="inches"),
            description="Height in inches",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="sex",
            display_name="Sex",
            data_type=dt.Enumeration(items=dt.String(), enum=["M", "F"]),
            description="Sex",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="salutation",
            display_name="Salutation",
            data_type=dt.Array(
                items=dt.String(), enum=["Mr.", "Mrs.", "Ms.", "Dr.", "Esq."]
            ),
            description="Salutation",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="email",
            display_name="Email",
            data_type=dt.String(format=dt.StringSubtypeFormat.EMAIL),
            description="Email address",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="url",
            display_name="URL",
            data_type=dt.String(format=dt.StringSubtypeFormat.URL),
            description="URL",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="favorite_numbers",
            display_name="Favorite numbers",
            data_type=dt.Long(),
            description="Favorite numbers",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="favorite_color",
            display_name="Favorite color",
            data_type=dt.Enumeration(items=dt.String(), enum=["red", "green", "blue"]),
            description="Favorite color",
            required=False,
            created_at=now,
            updated_at=now,
        ),
    ]
    return props
Esempio n. 8
0
def test_record_value_serialization(s3, config, read_csv, metadata_key, partitioned_db):
    s3.create_bucket(Bucket=config.s3_bucket)

    patient = partitioned_db.create_model("patient", "Patient")
    partitioned_db.update_properties(
        patient,
        ModelProperty(
            name="string",
            display_name="String",
            data_type=dt.String(),
            model_title=True,
        ),
        ModelProperty(name="boolean", display_name="Boolean", data_type=dt.Boolean()),
        ModelProperty(name="long", display_name="Long", data_type=dt.Long()),
        ModelProperty(name="double", display_name="Double", data_type=dt.Double()),
        ModelProperty(name="date", display_name="Date", data_type=dt.Date()),
        ModelProperty(name="optional", display_name="Optional", data_type=dt.String()),
        ModelProperty(
            name="string_array",
            display_name="String Array",
            data_type=dt.Array(items=dt.String()),
        ),
        ModelProperty(
            name="boolean_array",
            display_name="Boolean Array",
            data_type=dt.Array(items=dt.Boolean()),
        ),
        ModelProperty(
            name="long_array",
            display_name="Long Array",
            data_type=dt.Array(items=dt.Long()),
        ),
        ModelProperty(
            name="double_array",
            display_name="Double Array",
            data_type=dt.Array(items=dt.Double()),
        ),
        ModelProperty(
            name="date_array",
            display_name="Date Array",
            data_type=dt.Array(items=dt.Date()),
        ),
    )
    record = partitioned_db.create_records(
        patient,
        [
            {
                "string": 'tricky"char,acter"string',
                "boolean": True,
                "long": 12345,
                "double": 3.14159,
                "date": datetime.datetime(year=2004, month=5, day=5),
                "optional": None,
                "string_array": ["red", "green", "semi;colon"],
                "boolean_array": [True, False],
                "long_array": [1, 2, 3],
                "double_array": [1.1, 2.2, 3.3],
                "date_array": [
                    datetime.datetime(year=2004, month=5, day=5),
                    datetime.datetime(year=2014, month=5, day=16),
                ],
            }
        ],
    )[0]

    publish_dataset(partitioned_db, s3, config, file_manifests=[])

    patient_csv = read_csv(metadata_key("records/patient.csv"))
    assert patient_csv.rows == [
        OrderedDict(
            {
                "id": str(record.id),
                "string": 'tricky"char,acter"string',
                "boolean": "true",
                "long": "12345",
                "double": "3.14159",
                "date": "2004-05-05T00:00:00",
                "optional": "",
                "string_array": "red;green;semi_colon",
                "boolean_array": "true;false",
                "long_array": "1;2;3",
                "double_array": "1.1;2.2;3.3",
                "date_array": "2004-05-05T00:00:00;2014-05-16T00:00:00",
            }
        )
    ]