def test_coercion(): assert dt.Date().into("2004-05-05 00:00:00") == datetime( year=2004, month=5, day=5, tzinfo=timezone.utc) assert dt.Date().into("2019-11-16T08:00:00.000+10:00") == datetime( year=2019, month=11, day=15, hour=22, tzinfo=timezone.utc) assert dt.Date().into(datetime(year=2004, month=5, day=5)) == datetime(year=2004, month=5, day=5, tzinfo=timezone.utc) assert dt.Array(items=dt.Date()).into( ["2004-05-05 00:00:00", "2019-11-16T08:00:00.000+10:00"]) == [ datetime(year=2004, month=5, day=5, tzinfo=timezone.utc), datetime(year=2019, month=11, day=15, hour=22, tzinfo=timezone.utc), ] assert dt.Long().into("2004") == 2004 assert dt.Array(items=dt.Long()).into(["2004", "2005"]) == [2004, 2005] assert dt.Double().into("3.14159") == 3.14159 assert dt.Array(items=dt.Double()).into(["3.14159", "3.12"]) == [3.14159, 3.12] assert dt.Boolean().into("true") is True assert dt.Array(items=dt.Boolean()).into(["true", "false"]) == [True, False] with pytest.raises(ValueError): dt.Array(items=dt.Long(), enum=[1, 2, 3]).into([1, 2, 4])
def test_deserialize_double_with_format(): assert dt.deserialize( """{ "type": "Double", "unit": null }""") == dt.Double(unit=None) assert dt.deserialize( """{ "type": "Double", "unit": "ms" }""") == dt.Double(unit="ms") assert dt.deserialize("""{ "type": "Double", "unit": "" }""") == dt.Double( unit=None) with pytest.raises(Exception): assert dt.deserialize("""{ "type": "Double", "unit": 99 }""")
def test_simple_type_decoding(): assert dt.deserialize('"Boolean"') == dt.Boolean() assert dt.deserialize('"Double"') == dt.Double() assert dt.deserialize('"Long"') == dt.Long() assert dt.deserialize('"String"') == dt.String() assert dt.deserialize("Boolean") == dt.Boolean() assert dt.deserialize("Double") == dt.Double() assert dt.deserialize("Long") == dt.Long() assert dt.deserialize("String") == dt.String()
def test_serialize_simple_types(): assert dt.serialize(dt.Boolean()) == """{"type": "Boolean"}""" assert dt.serialize( dt.String()) == """{"type": "String", "format": null}""" assert (dt.serialize(dt.String(format=dt.StringSubtypeFormat.EMAIL)) == """{"type": "String", "format": "Email"}""") assert dt.serialize(dt.Double()) == """{"type": "Double", "unit": null}""" assert (dt.serialize(dt.Double( unit="inches")) == """{"type": "Double", "unit": "inches"}""") assert dt.serialize(dt.Long()) == """{"type": "Long", "unit": null}""" assert (dt.serialize( dt.Long(unit="inches")) == """{"type": "Long", "unit": "inches"}""")
def test_serialize_complex_types(): assert (dt.serialize( dt.Array(items=dt.Boolean(), enum=[True]) ) == """{"type": "Array", "items": {"type": "Boolean", "enum": [true]}}""") assert (dt.serialize(dt.Array(items=dt.Boolean())) == """{"type": "Array", "items": {"type": "Boolean"}}""") assert ( dt.serialize( dt.Array( items=dt.String(format=dt.StringSubtypeFormat.EMAIL), enum=["*****@*****.**", "*****@*****.**"], )) == """{"type": "Array", "items": {"type": "String", "format": "Email", "enum": ["*****@*****.**", "*****@*****.**"]}}""" ) assert ( dt.serialize(dt.Array(items=dt.Double(), enum=[1, 2, 3])) == """{"type": "Array", "items": {"type": "Double", "unit": null, "enum": [1, 2, 3]}}""" ) assert ( dt.serialize(dt.Array(items=dt.Double(unit="inches"), enum=[1, 2, 3])) == """{"type": "Array", "items": {"type": "Double", "unit": "inches", "enum": [1, 2, 3]}}""" )
def test_simple_representation(): # Boolean SimpleBoolean = dt.Boolean() assert SimpleBoolean.is_simple assert SimpleBoolean.into_simple() == "Boolean" # Long SimpleLong = dt.Long() assert SimpleLong.is_simple assert SimpleLong.into_simple() == "Long" ComplexLong = dt.Long(unit="m/sec") assert not ComplexLong.is_simple # Double SimpleDouble = dt.Double() assert SimpleDouble.is_simple assert SimpleDouble.into_simple() == "Double" # String SimpleString = dt.String() assert SimpleString.is_simple assert SimpleString.into_simple() == "String" ComplexString = dt.String(format=dt.StringSubtypeFormat.EMAIL) assert not ComplexString.is_simple assert ComplexString.into_simple() is None # Date SimpleDate = dt.Date() assert SimpleDate.is_simple assert SimpleDate.into_simple() == "Date" Array_ = dt.Array( items=dt.String(format=dt.StringSubtypeFormat.EMAIL), enum=["*****@*****.**", "*****@*****.**"], ) assert not Array_.is_simple assert Array_.into_simple() is None Enum = dt.Enumeration(items=dt.Boolean(), enum=[True]) assert not Enum.is_simple assert Enum.into_simple() is None
def test_deserialize_complex_type_decoding(): assert dt.deserialize("""{ "type": "Boolean" }""") == dt.Boolean() assert dt.deserialize("""{ "type": "Double" }""") == dt.Double() assert dt.deserialize("""{ "type": "Long" }""") == dt.Long() assert dt.deserialize("""{ "type": "String" }""") == dt.String()
def properties(): now = datetime.now() props = [ ModelProperty( name="id", display_name="ID", data_type=dt.String(), description="User ID", required=True, created_at=now, updated_at=now, ), ModelProperty( name="name", display_name="Name", data_type=dt.String(), description="Name", required=True, created_at=now, updated_at=now, ), ModelProperty( name="age", display_name="Age", data_type=dt.Long(), description="Age in years", required=True, created_at=now, updated_at=now, ), ModelProperty( name="height", display_name="Height", data_type=dt.Double(unit="inches"), description="Height in inches", required=True, created_at=now, updated_at=now, ), ModelProperty( name="sex", display_name="Sex", data_type=dt.Enumeration(items=dt.String(), enum=["M", "F"]), description="Sex", required=True, created_at=now, updated_at=now, ), ModelProperty( name="salutation", display_name="Salutation", data_type=dt.Array( items=dt.String(), enum=["Mr.", "Mrs.", "Ms.", "Dr.", "Esq."] ), description="Salutation", required=False, created_at=now, updated_at=now, ), ModelProperty( name="email", display_name="Email", data_type=dt.String(format=dt.StringSubtypeFormat.EMAIL), description="Email address", required=False, created_at=now, updated_at=now, ), ModelProperty( name="url", display_name="URL", data_type=dt.String(format=dt.StringSubtypeFormat.URL), description="URL", required=False, created_at=now, updated_at=now, ), ModelProperty( name="favorite_numbers", display_name="Favorite numbers", data_type=dt.Long(), description="Favorite numbers", required=False, created_at=now, updated_at=now, ), ModelProperty( name="favorite_color", display_name="Favorite color", data_type=dt.Enumeration(items=dt.String(), enum=["red", "green", "blue"]), description="Favorite color", required=False, created_at=now, updated_at=now, ), ] return props
def test_record_value_serialization(s3, config, read_csv, metadata_key, partitioned_db): s3.create_bucket(Bucket=config.s3_bucket) patient = partitioned_db.create_model("patient", "Patient") partitioned_db.update_properties( patient, ModelProperty( name="string", display_name="String", data_type=dt.String(), model_title=True, ), ModelProperty(name="boolean", display_name="Boolean", data_type=dt.Boolean()), ModelProperty(name="long", display_name="Long", data_type=dt.Long()), ModelProperty(name="double", display_name="Double", data_type=dt.Double()), ModelProperty(name="date", display_name="Date", data_type=dt.Date()), ModelProperty(name="optional", display_name="Optional", data_type=dt.String()), ModelProperty( name="string_array", display_name="String Array", data_type=dt.Array(items=dt.String()), ), ModelProperty( name="boolean_array", display_name="Boolean Array", data_type=dt.Array(items=dt.Boolean()), ), ModelProperty( name="long_array", display_name="Long Array", data_type=dt.Array(items=dt.Long()), ), ModelProperty( name="double_array", display_name="Double Array", data_type=dt.Array(items=dt.Double()), ), ModelProperty( name="date_array", display_name="Date Array", data_type=dt.Array(items=dt.Date()), ), ) record = partitioned_db.create_records( patient, [ { "string": 'tricky"char,acter"string', "boolean": True, "long": 12345, "double": 3.14159, "date": datetime.datetime(year=2004, month=5, day=5), "optional": None, "string_array": ["red", "green", "semi;colon"], "boolean_array": [True, False], "long_array": [1, 2, 3], "double_array": [1.1, 2.2, 3.3], "date_array": [ datetime.datetime(year=2004, month=5, day=5), datetime.datetime(year=2014, month=5, day=16), ], } ], )[0] publish_dataset(partitioned_db, s3, config, file_manifests=[]) patient_csv = read_csv(metadata_key("records/patient.csv")) assert patient_csv.rows == [ OrderedDict( { "id": str(record.id), "string": 'tricky"char,acter"string', "boolean": "true", "long": "12345", "double": "3.14159", "date": "2004-05-05T00:00:00", "optional": "", "string_array": "red;green;semi_colon", "boolean_array": "true;false", "long_array": "1;2;3", "double_array": "1.1;2.2;3.3", "date_array": "2004-05-05T00:00:00;2014-05-16T00:00:00", } ) ]