def test_deserialize_long_with_format(): assert dt.deserialize("""{ "type": "Long", "unit": null }""") == dt.Long( unit=None) assert dt.deserialize("""{ "type": "Long", "unit": "ms" }""") == dt.Long( unit="ms") assert dt.deserialize("""{ "type": "Long", "unit": "" }""") == dt.Long( unit=None) with pytest.raises(Exception): assert dt.deserialize("""{ "type": "Long", "unit": 99 }""")
def test_deserialize_double_with_format(): assert dt.deserialize( """{ "type": "Double", "unit": null }""") == dt.Double(unit=None) assert dt.deserialize( """{ "type": "Double", "unit": "ms" }""") == dt.Double(unit="ms") assert dt.deserialize("""{ "type": "Double", "unit": "" }""") == dt.Double( unit=None) with pytest.raises(Exception): assert dt.deserialize("""{ "type": "Double", "unit": 99 }""")
def to_model_property(d: JsonDict) -> ModelProperty: d["modelTitle"] = d.pop("conceptTitle") d["dataType"] = dt.deserialize(d.pop("dataType")) return ModelProperty( **{ decamelize(k): v for k, v in d.items() if decamelize(k) in ModelProperty.PUBLIC })
def test_deserialize_enum(): assert dt.deserialize( """{ "type": "enum", "items": { "type": "Boolean", "enum": [true] } }""" ) == dt.Enumeration(items=dt.Boolean(), enum=[True]) assert dt.deserialize( """{ "type": "enum", "items": { "type": "String", "enum": [] }}""" ) == dt.Enumeration(items=dt.String(), enum=[]) with pytest.raises(Exception): assert dt.deserialize( """{ "type": "enum", "items": { "type": "Boolean", "enum": ["foo"] } }""" ) with pytest.raises(Exception): assert dt.deserialize( """{ "type": "enum", "items": { "type": "Boolean" } }""" ) == dt.Enumeration(items=dt.Boolean())
def test_deserialize_array(): assert dt.deserialize( """{ "type": "array", "items": { "type": "Boolean", "enum": [true] } }""" ) == dt.Array(items=dt.Boolean(), enum=[True]) assert dt.deserialize( """{ "type": "array", "items": { "type": "Date" } }""") == dt.Array( items=dt.Date()) assert dt.deserialize( """{ "type": "array", "items": { "type": "Boolean" } }""") == dt.Array( items=dt.Boolean()) with pytest.raises(Exception): assert dt.deserialize( """{ "type": "array", "items": { "type": "Boolean", "enum": ["foo"] } }""" )
def test_simple_type_decoding(): assert dt.deserialize('"Boolean"') == dt.Boolean() assert dt.deserialize('"Double"') == dt.Double() assert dt.deserialize('"Long"') == dt.Long() assert dt.deserialize('"String"') == dt.String() assert dt.deserialize("Boolean") == dt.Boolean() assert dt.deserialize("Double") == dt.Double() assert dt.deserialize("Long") == dt.Long() assert dt.deserialize("String") == dt.String()
def test_deserialize_string_with_format(): assert dt.deserialize( """{ "type": "String", "format": null }""") == dt.String(format=None) # Not a string value: with pytest.raises(Exception): assert dt.deserialize("""{ "type": "String", "format": 99 }""") # Invalid format: with pytest.raises(Exception): assert dt.deserialize("""{ "type": "String", "format": "bar" }""") # Allowed formats: assert dt.deserialize("""{ "type": "String", "format": "email" }""" ) == dt.String(format=dt.StringSubtypeFormat.EMAIL) assert dt.deserialize("""{ "type": "String", "format": "date" }""" ) == dt.String(format=dt.StringDateFormat.DATE) assert dt.deserialize("""{ "type": "String", "format": "datetime" }""" ) == dt.String(format=dt.StringDateFormat.DATETIME) assert dt.deserialize("""{ "type": "String", "format": "date-time" }""" ) == dt.String(format=dt.StringDateFormat.DATETIME) assert dt.deserialize("""{ "type": "String", "format": "time" }""" ) == dt.String(format=dt.StringDateFormat.TIME) assert dt.deserialize("""{ "type": "String", "format": "url" }""" ) == dt.String(format=dt.StringSubtypeFormat.URL) assert dt.deserialize("""{ "type": "String", "format": "url" }""" ) == dt.String(format=dt.StringSubtypeFormat.URL)
def test_deserialize_complex_type_decoding(): assert dt.deserialize("""{ "type": "Boolean" }""") == dt.Boolean() assert dt.deserialize("""{ "type": "Double" }""") == dt.Double() assert dt.deserialize("""{ "type": "Long" }""") == dt.Long() assert dt.deserialize("""{ "type": "String" }""") == dt.String()
def load(db, input_file: str, verbose: bool = False, base_dir: Optional[str] = None): with open(input_file) as f: seed_files = json.load(f) # "models", "properties", and "records" for seed_file in seed_files: # Models with open(resolve_file(seed_file["model"], base_dir), "r") as model_file: models = DictReader(model_file, delimiter="|") model = next(models) model.pop("id") if verbose: print( f"{db.organization_id}:{db.dataset_id} :: Loading model {model['name']}" ) print(model) model_id = db.create_model(**model).id if verbose: print( f"{db.organization_id}:{db.dataset_id} :: Created model {model_id}" ) # Model properties with open(resolve_file(seed_file["properties"], base_dir), "r") as properties_file: properties = list(DictReader(properties_file, delimiter="|")) properties[0]["model_title"] = True if verbose: print( f"{db.organization_id}:{db.dataset_id} :: Loading {len(properties)} properties" ) for prop in properties: data_type = dt.deserialize(prop.pop("data_type")) db.update_properties( model_id, ModelProperty(data_type=data_type, **prop)) # Records with open(resolve_file(seed_file["records"], base_dir), "r") as records_file: records_reader = DictReader(records_file, delimiter="|") total_loaded = 0 for chunk in grouper(LOAD_CHUNK_SIZE, records_reader): chunk = [r for r in chunk if r] record_chunk = [ to_property_values(row["values"]) for row in chunk ] db.create_records(model_id, record_chunk) total_loaded += len(record_chunk) if verbose: print( f"{db.organization_id}:{db.dataset_id} :: {len(record_chunk)} record(s), total {total_loaded}" ) if verbose: print( f"{db.organization_id}:{db.dataset_id} :: total records = {total_loaded}" ) if verbose: print()