Beispiel #1
0
def test_simple_type_decoding():
    assert dt.deserialize('"Boolean"') == dt.Boolean()
    assert dt.deserialize('"Double"') == dt.Double()
    assert dt.deserialize('"Long"') == dt.Long()
    assert dt.deserialize('"String"') == dt.String()

    assert dt.deserialize("Boolean") == dt.Boolean()
    assert dt.deserialize("Double") == dt.Double()
    assert dt.deserialize("Long") == dt.Long()
    assert dt.deserialize("String") == dt.String()
Beispiel #2
0
def test_serialize_simple_types():
    assert dt.serialize(dt.Boolean()) == """{"type": "Boolean"}"""
    assert dt.serialize(
        dt.String()) == """{"type": "String", "format": null}"""
    assert (dt.serialize(dt.String(format=dt.StringSubtypeFormat.EMAIL)) ==
            """{"type": "String", "format": "Email"}""")
    assert dt.serialize(dt.Double()) == """{"type": "Double", "unit": null}"""
    assert (dt.serialize(dt.Double(
        unit="inches")) == """{"type": "Double", "unit": "inches"}""")
    assert dt.serialize(dt.Long()) == """{"type": "Long", "unit": null}"""
    assert (dt.serialize(
        dt.Long(unit="inches")) == """{"type": "Long", "unit": "inches"}""")
Beispiel #3
0
def test_simple_representation():
    # Boolean
    SimpleBoolean = dt.Boolean()
    assert SimpleBoolean.is_simple
    assert SimpleBoolean.into_simple() == "Boolean"

    # Long
    SimpleLong = dt.Long()
    assert SimpleLong.is_simple
    assert SimpleLong.into_simple() == "Long"

    ComplexLong = dt.Long(unit="m/sec")
    assert not ComplexLong.is_simple

    # Double
    SimpleDouble = dt.Double()
    assert SimpleDouble.is_simple
    assert SimpleDouble.into_simple() == "Double"

    # String
    SimpleString = dt.String()
    assert SimpleString.is_simple
    assert SimpleString.into_simple() == "String"

    ComplexString = dt.String(format=dt.StringSubtypeFormat.EMAIL)
    assert not ComplexString.is_simple
    assert ComplexString.into_simple() is None

    # Date
    SimpleDate = dt.Date()
    assert SimpleDate.is_simple
    assert SimpleDate.into_simple() == "Date"

    Array_ = dt.Array(
        items=dt.String(format=dt.StringSubtypeFormat.EMAIL),
        enum=["*****@*****.**", "*****@*****.**"],
    )
    assert not Array_.is_simple
    assert Array_.into_simple() is None

    Enum = dt.Enumeration(items=dt.Boolean(), enum=[True])
    assert not Enum.is_simple
    assert Enum.into_simple() is None
Beispiel #4
0
 def package_proxy(cls) -> "ExportModel":
     return ExportModel(
         model=None,
         # TODO: use constant
         name="file",
         display_name="File",
         description="A file in the dataset",
         properties=[
             ExportProperty.model_property(
                 name="path",
                 display_name="Path",
                 description=
                 "The path to the file from the root of the dataset",
                 data_type=dt.String(),
             )
         ],
     )
Beispiel #5
0
def test_deserialize_enum():
    assert dt.deserialize(
        """{ "type": "enum", "items": { "type": "Boolean", "enum": [true] } }"""
    ) == dt.Enumeration(items=dt.Boolean(), enum=[True])

    assert dt.deserialize(
        """{ "type": "enum", "items": { "type": "String", "enum": [] }}"""
    ) == dt.Enumeration(items=dt.String(), enum=[])

    with pytest.raises(Exception):
        assert dt.deserialize(
            """{ "type": "enum", "items": { "type": "Boolean", "enum": ["foo"] } }"""
        )
    with pytest.raises(Exception):
        assert dt.deserialize(
            """{ "type": "enum", "items": { "type": "Boolean" } }"""
        ) == dt.Enumeration(items=dt.Boolean())
Beispiel #6
0
def test_deserialize_string_with_format():
    assert dt.deserialize(
        """{ "type": "String", "format": null }""") == dt.String(format=None)
    # Not a string value:
    with pytest.raises(Exception):
        assert dt.deserialize("""{ "type": "String", "format": 99 }""")
    # Invalid format:
    with pytest.raises(Exception):
        assert dt.deserialize("""{ "type": "String", "format": "bar" }""")
    # Allowed formats:
    assert dt.deserialize("""{ "type": "String", "format": "email" }"""
                          ) == dt.String(format=dt.StringSubtypeFormat.EMAIL)
    assert dt.deserialize("""{ "type": "String", "format": "date" }"""
                          ) == dt.String(format=dt.StringDateFormat.DATE)
    assert dt.deserialize("""{ "type": "String", "format": "datetime" }"""
                          ) == dt.String(format=dt.StringDateFormat.DATETIME)
    assert dt.deserialize("""{ "type": "String", "format": "date-time" }"""
                          ) == dt.String(format=dt.StringDateFormat.DATETIME)
    assert dt.deserialize("""{ "type": "String", "format": "time" }"""
                          ) == dt.String(format=dt.StringDateFormat.TIME)
    assert dt.deserialize("""{ "type": "String", "format": "url" }"""
                          ) == dt.String(format=dt.StringSubtypeFormat.URL)
    assert dt.deserialize("""{ "type": "String", "format": "url" }"""
                          ) == dt.String(format=dt.StringSubtypeFormat.URL)
Beispiel #7
0
def test_serialize_complex_types():
    assert (dt.serialize(
        dt.Array(items=dt.Boolean(), enum=[True])
    ) == """{"type": "Array", "items": {"type": "Boolean", "enum": [true]}}""")
    assert (dt.serialize(dt.Array(items=dt.Boolean())) ==
            """{"type": "Array", "items": {"type": "Boolean"}}""")
    assert (
        dt.serialize(
            dt.Array(
                items=dt.String(format=dt.StringSubtypeFormat.EMAIL),
                enum=["*****@*****.**", "*****@*****.**"],
            )) ==
        """{"type": "Array", "items": {"type": "String", "format": "Email", "enum": ["*****@*****.**", "*****@*****.**"]}}"""
    )
    assert (
        dt.serialize(dt.Array(items=dt.Double(), enum=[1, 2, 3])) ==
        """{"type": "Array", "items": {"type": "Double", "unit": null, "enum": [1, 2, 3]}}"""
    )
    assert (
        dt.serialize(dt.Array(items=dt.Double(unit="inches"), enum=[1, 2,
                                                                    3])) ==
        """{"type": "Array", "items": {"type": "Double", "unit": "inches", "enum": [1, 2, 3]}}"""
    )
Beispiel #8
0
def test_deserialize_complex_type_decoding():
    assert dt.deserialize("""{ "type": "Boolean" }""") == dt.Boolean()
    assert dt.deserialize("""{ "type": "Double" }""") == dt.Double()
    assert dt.deserialize("""{ "type": "Long" }""") == dt.Long()
    assert dt.deserialize("""{ "type": "String" }""") == dt.String()
Beispiel #9
0
def properties():
    now = datetime.now()
    props = [
        ModelProperty(
            name="id",
            display_name="ID",
            data_type=dt.String(),
            description="User ID",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="name",
            display_name="Name",
            data_type=dt.String(),
            description="Name",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="age",
            display_name="Age",
            data_type=dt.Long(),
            description="Age in years",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="height",
            display_name="Height",
            data_type=dt.Double(unit="inches"),
            description="Height in inches",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="sex",
            display_name="Sex",
            data_type=dt.Enumeration(items=dt.String(), enum=["M", "F"]),
            description="Sex",
            required=True,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="salutation",
            display_name="Salutation",
            data_type=dt.Array(
                items=dt.String(), enum=["Mr.", "Mrs.", "Ms.", "Dr.", "Esq."]
            ),
            description="Salutation",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="email",
            display_name="Email",
            data_type=dt.String(format=dt.StringSubtypeFormat.EMAIL),
            description="Email address",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="url",
            display_name="URL",
            data_type=dt.String(format=dt.StringSubtypeFormat.URL),
            description="URL",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="favorite_numbers",
            display_name="Favorite numbers",
            data_type=dt.Long(),
            description="Favorite numbers",
            required=False,
            created_at=now,
            updated_at=now,
        ),
        ModelProperty(
            name="favorite_color",
            display_name="Favorite color",
            data_type=dt.Enumeration(items=dt.String(), enum=["red", "green", "blue"]),
            description="Favorite color",
            required=False,
            created_at=now,
            updated_at=now,
        ),
    ]
    return props
Beispiel #10
0
def test_publish_linked_properties_with_no_index(
    s3, config, read_csv, read_json, metadata_key, partitioned_db
):
    s3.create_bucket(Bucket=config.s3_bucket)

    gene = partitioned_db.create_model("gene", "Gene")
    partitioned_db.update_properties(
        gene,
        ModelProperty(
            "name", "name", data_type=dt.String(), model_title=True, required=True
        ),
    )
    regulates = partitioned_db.create_model_relationship(
        gene, "regulates", gene, one_to_many=False, index=None
    )
    interacts = partitioned_db.create_model_relationship(
        gene, "interacts", gene, one_to_many=False, index=1
    )

    yy1 = partitioned_db.create_record(gene, {"name": "YY1"})
    pepd = partitioned_db.create_record(gene, {"name": "PEPD"})
    gmpr2 = partitioned_db.create_record(gene, {"name": "GMPR2"})

    partitioned_db.create_record_relationship(yy1, regulates, gmpr2)
    partitioned_db.create_record_relationship(yy1, interacts, pepd)

    publish_dataset(partitioned_db, s3, config, file_manifests=[])

    schema_json = read_json(metadata_key("schema.json"))
    assert schema_json.content["models"][0]["properties"] == [
        {
            "name": "name",
            "displayName": "name",
            "description": "",
            "dataType": {"type": "String"},
        },
        {
            "name": "interacts",
            "displayName": "interacts",
            "description": "",
            "dataType": {"type": "Model", "to": "gene", "file": "records/gene.csv"},
        },
        {
            "name": "regulates",
            "displayName": "regulates",
            "description": "",
            "dataType": {"type": "Model", "to": "gene", "file": "records/gene.csv"},
        },
    ]

    gene_csv = read_csv(metadata_key("records/gene.csv"))
    assert sort_rows(gene_csv.rows) == sort_rows(
        [
            OrderedDict(
                {
                    "id": str(yy1.id),
                    "name": "YY1",
                    "interacts": str(pepd.id),
                    "interacts:display": "PEPD",
                    "regulates": str(gmpr2.id),
                    "regulates:display": "GMPR2",
                }
            ),
            OrderedDict(
                {
                    "id": str(pepd.id),
                    "name": "PEPD",
                    "interacts": None,
                    "interacts:display": None,
                    "regulates": None,
                    "regulates:display": None,
                }
            ),
            OrderedDict(
                {
                    "id": str(gmpr2.id),
                    "name": "GMPR2",
                    "interacts": None,
                    "interacts:display": None,
                    "regulates": None,
                    "regulates:display": None,
                }
            ),
        ]
    )
Beispiel #11
0
def test_proxy_relationships_are_merged_with_record_relationships(
    s3, config, read_csv, metadata_key, partitioned_db
):
    s3.create_bucket(Bucket=config.s3_bucket)

    person = partitioned_db.create_model("person", "Person")
    partitioned_db.update_properties(
        person,
        ModelProperty(
            name="name", display_name="String", data_type=dt.String(), model_title=True
        ),
    )

    item = partitioned_db.create_model("item", "Item")
    partitioned_db.update_properties(
        item,
        ModelProperty(
            name="name", display_name="String", data_type=dt.String(), model_title=True
        ),
    )

    # This relationship uses the default "belongs_to" package proxy relationship,
    # and should be exported in the same CSV file.
    item_belongs_to_person = partitioned_db.create_model_relationship(
        item, "belongs_to", person, one_to_many=True
    )

    person_likes_person = partitioned_db.create_model_relationship(
        person, "likes", person, one_to_many=True
    )

    alice = partitioned_db.create_record(person, {"name": "Alice"})

    bob = partitioned_db.create_record(person, {"name": "Bob"})

    laptop = partitioned_db.create_record(item, {"name": "Laptop"})

    partitioned_db.create_record_relationship(alice, person_likes_person, bob)
    partitioned_db.create_record_relationship(laptop, item_belongs_to_person, alice)

    # Package proxy using default `belongs_to` relationship
    partitioned_db.create_package_proxy(
        alice, package_id=1234, package_node_id="N:package:1234"
    )

    # Package proxy using a non-standard `likes` relationship
    partitioned_db.create_package_proxy(
        alice,
        package_id=4567,
        package_node_id="N:package:4567",
        legacy_relationship_type="likes",
    )

    file_manifests = [
        FileManifest(
            id=UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"),
            path="10/233/files/pkg1/file1.txt",
            size=2293,
            file_type="TEXT",
            source_package_id="N:package:1234",
        ),
        FileManifest(
            id=UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"),
            path="10/233/files/pkg1/file2.csv",
            size=234443,
            file_type="CSV",
            source_package_id="N:package:1234",
        ),
        FileManifest(
            id=UUID("cccccccc-cccc-cccc-cccc-cccccccccccc"),
            path="10/233/files/pkg2/file3.dcm",
            size=338923,
            file_type="DICOM",
            source_package_id="N:package:4567",
        ),
    ]

    graph_manifests = publish_dataset(
        partitioned_db, s3, config, file_manifests=file_manifests
    )

    assert sorted([m.path for m in graph_manifests]) == [
        "metadata/records/file.csv",
        "metadata/records/item.csv",
        "metadata/records/person.csv",
        "metadata/relationships/belongs_to.csv",
        "metadata/relationships/likes.csv",
        "metadata/schema.json",
    ]

    belongs_to_csv = read_csv(metadata_key("relationships/belongs_to.csv"))
    assert sort_rows(belongs_to_csv.rows) == sort_rows(
        [
            OrderedDict(
                {
                    "from": str(alice.id),
                    "to": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                    "relationship": "belongs_to",
                }
            ),
            OrderedDict(
                {
                    "from": str(alice.id),
                    "to": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
                    "relationship": "belongs_to",
                }
            ),
            OrderedDict(
                {
                    "from": str(laptop.id),
                    "to": str(alice.id),
                    "relationship": "belongs_to",
                }
            ),
        ]
    )

    likes_csv = read_csv(metadata_key("relationships/likes.csv"))
    assert sort_rows(likes_csv.rows) == sort_rows(
        [
            OrderedDict(
                {
                    "from": str(alice.id),
                    "to": "cccccccc-cccc-cccc-cccc-cccccccccccc",
                    "relationship": "likes",
                }
            ),
            OrderedDict(
                {"from": str(alice.id), "to": str(bob.id), "relationship": "likes"}
            ),
        ]
    )
Beispiel #12
0
def test_record_value_serialization(s3, config, read_csv, metadata_key, partitioned_db):
    s3.create_bucket(Bucket=config.s3_bucket)

    patient = partitioned_db.create_model("patient", "Patient")
    partitioned_db.update_properties(
        patient,
        ModelProperty(
            name="string",
            display_name="String",
            data_type=dt.String(),
            model_title=True,
        ),
        ModelProperty(name="boolean", display_name="Boolean", data_type=dt.Boolean()),
        ModelProperty(name="long", display_name="Long", data_type=dt.Long()),
        ModelProperty(name="double", display_name="Double", data_type=dt.Double()),
        ModelProperty(name="date", display_name="Date", data_type=dt.Date()),
        ModelProperty(name="optional", display_name="Optional", data_type=dt.String()),
        ModelProperty(
            name="string_array",
            display_name="String Array",
            data_type=dt.Array(items=dt.String()),
        ),
        ModelProperty(
            name="boolean_array",
            display_name="Boolean Array",
            data_type=dt.Array(items=dt.Boolean()),
        ),
        ModelProperty(
            name="long_array",
            display_name="Long Array",
            data_type=dt.Array(items=dt.Long()),
        ),
        ModelProperty(
            name="double_array",
            display_name="Double Array",
            data_type=dt.Array(items=dt.Double()),
        ),
        ModelProperty(
            name="date_array",
            display_name="Date Array",
            data_type=dt.Array(items=dt.Date()),
        ),
    )
    record = partitioned_db.create_records(
        patient,
        [
            {
                "string": 'tricky"char,acter"string',
                "boolean": True,
                "long": 12345,
                "double": 3.14159,
                "date": datetime.datetime(year=2004, month=5, day=5),
                "optional": None,
                "string_array": ["red", "green", "semi;colon"],
                "boolean_array": [True, False],
                "long_array": [1, 2, 3],
                "double_array": [1.1, 2.2, 3.3],
                "date_array": [
                    datetime.datetime(year=2004, month=5, day=5),
                    datetime.datetime(year=2014, month=5, day=16),
                ],
            }
        ],
    )[0]

    publish_dataset(partitioned_db, s3, config, file_manifests=[])

    patient_csv = read_csv(metadata_key("records/patient.csv"))
    assert patient_csv.rows == [
        OrderedDict(
            {
                "id": str(record.id),
                "string": 'tricky"char,acter"string',
                "boolean": "true",
                "long": "12345",
                "double": "3.14159",
                "date": "2004-05-05T00:00:00",
                "optional": "",
                "string_array": "red;green;semi_colon",
                "boolean_array": "true;false",
                "long_array": "1;2;3",
                "double_array": "1.1;2.2;3.3",
                "date_array": "2004-05-05T00:00:00;2014-05-16T00:00:00",
            }
        )
    ]
Beispiel #13
0
def test_rewrite_ids_and_import(neo4j):
    """
    Test that UUIDs are remapped to the exact correct place with a manually
    defined remapping.
    """

    dataset_id = 60000
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    REMAPPING = {
        "0b4b3615-9eaf-425d-9727-bcac29686fd5":
        "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        "7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17":
        "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
        "e507b3ef-ade4-4672-83b4-f3f0774fb282":
        "cccccccc-cccc-cccc-cccc-cccccccccccc",
        "bf858cb5-ae51-4fcf-ad74-b1887946f70f":
        "dddddddd-dddd-dddd-dddd-dddddddddddd",
        "a99b09f5-caa6-4282-aa0e-cf56bde89254":
        "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee",
        "42fa4eb9-51cc-4c59-b550-ac24d6d5024a":
        "ffffffff-ffff-ffff-ffff-ffffffffffff",
        "ecb71447-b684-c589-abda-b673c38edefc":
        "00000000-0000-0000-0000-000000000000",
        "e2b71447-e29d-11c3-24c6-f2ebffd1486a":
        "11111111-1111-1111-1111-111111111111",
        "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50":
        "22222222-2222-2222-2222-222222222222",
        "2e754729-684a-4c45-960f-348d68737d4d":
        "33333333-3333-3333-3333-333333333333",
        "175ff55b-b44d-4381-bd59-d4dbc0b9c5f0":
        "44444444-4444-4444-4444-444444444444",
        "ccf200d3-e77f-4d9e-bed3-f1f28860152f":
        "55555555-5555-5555-5555-555555555555",
        "443e141b-f59c-419f-82c1-eed97925b04d":
        "66666666-6666-6666-6666-666666666666",
        "d0b71de9-21f9-3557-edda-ad278dd81dc0":
        "77777777-7777-7777-7777-777777777777",
        "aeb7476e-55f6-7924-5e43-a83cfa7e4cef":
        "88888888-8888-8888-8888-888888888888",
        "fa3daedd-1761-4730-be7d-bb5de8e1261c":
        "99999999-9999-9999-9999-999999999999",
        "00b71de7-b42f-1fe9-a83f-824452fe966e":
        "aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb",
        "460591a0-8079-4979-a860-c3a4b18a32ad":
        "aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc",
    }

    def generate_new_id(old_id):
        new_id = REMAPPING.get(old_id, None)
        if new_id is None:
            return old_id
        return new_id

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
        remap_ids=True,
        generate_new_id=generate_new_id,
    )

    # Models

    patient = db.get_model("patient")
    assert patient == Model(
        name="patient",
        display_name="Patient",
        description="",
        count=2,
        id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        template_id=None,
    )

    assert sorted(db.get_properties(patient), key=lambda p: p.index) == [
        ModelProperty(
            id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
            name="name",
            display_name="Name",
            description="",
            index=0,
            locked=False,
            model_title=True,
            required=False,
            data_type=dt.String(),
            default=True,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-01T20:01:37.633Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
        ModelProperty(
            id="cccccccc-cccc-cccc-cccc-cccccccccccc",
            name="dob",
            display_name="DOB",
            description="",
            index=1,
            locked=False,
            model_title=False,
            required=False,
            data_type=dt.Date(),
            default=False,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
    ]

    bicycle = db.get_model("bicycle")
    assert bicycle.name == "bicycle"
    assert bicycle.display_name == "Bicycle"
    assert bicycle.id == "dddddddd-dddd-dddd-dddd-dddddddddddd"
    assert bicycle.count == 1
    assert bicycle.template_id == None

    properties = sorted(db.get_properties(bicycle), key=lambda p: p.index)
    assert len(properties) == 2
    brand = properties[0]
    assert brand.name == "brand"
    assert brand.id == "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee"

    color = properties[1]
    assert color.name == "color"
    assert color.data_type == dt.Array(
        items=dt.String(),
        enum=["purple", "blue", "orange", "green", "yellow", "red"])
    assert color.id == "ffffffff-ffff-ffff-ffff-ffffffffffff"

    # Records
    patients = db.get_all_records("patient")

    alice = Record(
        id=UUID("00000000-0000-0000-0000-000000000000"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:58.537Z"),
        updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"),
        values={
            "name": "Alice",
            "dob": neotime.DateTime(year=2004, month=5, day=5,
                                    tzinfo=pytz.UTC),
        },
    )
    bob = Record(
        id=UUID("11111111-1111-1111-1111-111111111111"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        values={
            "name":
            "Bob",
            # Embedded linked property
            "mother":
            RecordStub(id=UUID("00000000-0000-0000-0000-000000000000"),
                       title="Alice"),
        },
    )
    assert sorted(patients.results,
                  key=lambda x: x.values["name"]) == [alice, bob]

    assert db.get_all_records("bicycle").results == [
        Record(
            id=UUID("22222222-2222-2222-2222-222222222222"),
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-05T13:47:02.841Z"),
            updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"),
            values={
                "brand": "Bianchi",
                "color": ["red", "blue"]
            },
        )
    ]

    # Model relationships
    with db.transaction() as tx:
        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="33333333-3333-3333-3333-333333333333",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                display_name="Rides",
                description="",
                from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                to="dddddddd-dddd-dddd-dddd-dddddddddddd",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   bicycle,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="44444444-4444-4444-4444-444444444444",
                type="BELONGS_TO",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                from_="dddddddd-dddd-dddd-dddd-dddddddddddd",
                to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Model relationship stubs contain no "to" and "from" models, eg. belongs_to

        assert list(db.get_model_relationship_stubs_tx(tx)) == [
            ModelRelationshipStub(
                id="55555555-5555-5555-5555-555555555555",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                type="BELONGS_TO",
                created_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created
        assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2

        # Record relationships

        assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [
            RecordRelationship(
                id="77777777-7777-7777-7777-777777777777",
                from_="00000000-0000-0000-0000-000000000000",
                to="22222222-2222-2222-2222-222222222222",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="33333333-3333-3333-3333-333333333333",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "22222222-2222-2222-2222-222222222222", one_to_many=True)
        ) == [
            RecordRelationship(
                id="88888888-8888-8888-8888-888888888888",
                from_="22222222-2222-2222-2222-222222222222",
                to="11111111-1111-1111-1111-111111111111",
                type="BELONGS_TO",
                name="belongs_to",
                model_relationship_id="44444444-4444-4444-4444-444444444444",
                display_name="Belongs To",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Linked properties

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=False)
        ) == [
            ModelRelationship(
                id="66666666-6666-6666-6666-666666666666",
                type="MOTHER",
                name="mother",
                display_name="Mother",
                description="",
                from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
                one_to_many=False,
                index=1,
                created_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created for linked properties
        assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1

        assert list(
            db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False)
        ) == [
            RecordRelationship(
                id="99999999-9999-9999-9999-999999999999",
                from_="11111111-1111-1111-1111-111111111111",
                to="00000000-0000-0000-0000-000000000000",
                type="MOTHER",
                model_relationship_id="66666666-6666-6666-6666-666666666666",
                name="mother",
                display_name="Mother",
                one_to_many=False,
                created_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

    assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == (
        1,
        [
            PackageProxy(
                id="aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb",
                proxy_instance_id="aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc",
                package_id=184418,
                package_node_id=
                "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3",
                relationship_type="belongs_to",
                created_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ],
    )

    # Packages link directly to dataset node
    assert db.count_packages() == 1
Beispiel #14
0
def test_loader(neo4j):
    dataset_id = 29233
    dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484"
    organization_id = 5
    organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b"
    user_id = 114
    user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f"

    db = PartitionedDatabase(
        db=neo4j,
        organization_id=OrganizationId(organization_id),
        dataset_id=DatasetId(dataset_id),
        user_id=user_node_id,
        organization_node_id=organization_node_id,
        dataset_node_id=dataset_node_id,
    )

    load(
        dataset=f"{organization_id}/{dataset_id}",
        bucket="dev-neptune-export-use1",
        db=db,
        use_cache=False,
        smoke_test=False,
    )

    # Models

    patient = db.get_model("patient")
    assert patient == Model(
        name="patient",
        display_name="Patient",
        description="",
        count=2,
        id="0b4b3615-9eaf-425d-9727-bcac29686fd5",
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"),
        template_id=None,
    )

    assert sorted(db.get_properties(patient), key=lambda p: p.index) == [
        ModelProperty(
            id="7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17",
            name="name",
            display_name="Name",
            description="",
            index=0,
            locked=False,
            model_title=True,
            required=False,
            data_type=dt.String(),
            default=True,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-01T20:01:37.633Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
        ModelProperty(
            id="e507b3ef-ade4-4672-83b4-f3f0774fb282",
            name="dob",
            display_name="DOB",
            description="",
            index=1,
            locked=False,
            model_title=False,
            required=False,
            data_type=dt.Date(),
            default=False,
            default_value=None,
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
            updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"),
        ),
    ]

    bicycle = db.get_model("bicycle")
    assert bicycle.name == "bicycle"
    assert bicycle.display_name == "Bicycle"
    assert bicycle.id == "bf858cb5-ae51-4fcf-ad74-b1887946f70f"
    assert bicycle.count == 1
    assert bicycle.template_id == None

    properties = sorted(db.get_properties(bicycle), key=lambda p: p.index)
    assert len(properties) == 2
    brand = properties[0]
    assert brand.name == "brand"

    color = properties[1]
    assert color.name == "color"
    assert color.data_type == dt.Array(
        items=dt.String(),
        enum=["purple", "blue", "orange", "green", "yellow", "red"])

    # Records
    patients = db.get_all_records("patient")

    alice = Record(
        id=UUID("ecb71447-b684-c589-abda-b673c38edefc"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:01:58.537Z"),
        updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"),
        values={
            "name": "Alice",
            "dob": neotime.DateTime(year=2004, month=5, day=5,
                                    tzinfo=pytz.UTC),
        },
    )
    bob = Record(
        id=UUID("e2b71447-e29d-11c3-24c6-f2ebffd1486a"),
        created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
        created_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"),
        values={
            "name":
            "Bob",
            # Embedded linked property
            "mother":
            RecordStub(id=UUID("ecb71447-b684-c589-abda-b673c38edefc"),
                       title="Alice"),
        },
    )
    assert sorted(patients.results,
                  key=lambda x: x.values["name"]) == [alice, bob]

    assert db.get_all_records("bicycle").results == [
        Record(
            id=UUID("c8b71de8-cd9c-cc3f-67fe-4e30968d4e50"),
            created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            created_at=iso8601.parse("2019-11-05T13:47:02.841Z"),
            updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"),
            values={
                "brand": "Bianchi",
                "color": ["red", "blue"]
            },
        )
    ]

    # Model relationships
    with db.transaction() as tx:

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="2e754729-684a-4c45-960f-348d68737d4d",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                display_name="Rides",
                description="",
                from_="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                to="bf858cb5-ae51-4fcf-ad74-b1887946f70f",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # This relationship can be created in the Python client with the following:
        #
        #  >>> patient = ds.models()["patient"]
        #  >>> bike = ds.models()["bicycle"]
        #  >>> bob = patient.get_all()[1]
        #  >>> bianchi = bike.get_all()[0]
        #  >>> bianchi.relate_to(bob, relationship_type="belongs_to")
        #
        # This reuses the `belongs_to` name even though that is disallowed through
        # the frontend.  This means that the `belongs_to` CSV contains relationships
        # between proxy packages and records, *and* between records and records.

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   bicycle,
                                                   one_to_many=True)
        ) == [
            ModelRelationship(
                id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0",
                type="BELONGS_TO",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                from_="bf858cb5-ae51-4fcf-ad74-b1887946f70f",
                to="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                one_to_many=True,
                index=None,
                created_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Model relationship stubs contain no "to" and "from" models, eg. belongs_to

        assert list(db.get_model_relationship_stubs_tx(tx)) == [
            ModelRelationshipStub(
                id="ccf200d3-e77f-4d9e-bed3-f1f28860152f",
                name="belongs_to",
                display_name="Belongs To",
                description="",
                type="BELONGS_TO",
                created_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created
        assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2

        # Record relationships

        assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [
            RecordRelationship(
                id="d0b71de9-21f9-3557-edda-ad278dd81dc0",
                from_="ecb71447-b684-c589-abda-b673c38edefc",
                to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                type="RIDES",
                name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c",
                model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d",
                display_name="Rides",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        assert list(
            db.get_outgoing_record_relationships_tx(
                tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True)
        ) == [
            RecordRelationship(
                id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef",
                from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50",
                to="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                type="BELONGS_TO",
                name="belongs_to",
                model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0",
                display_name="Belongs To",
                one_to_many=True,
                created_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Linked properties

        assert list(
            db.get_outgoing_model_relationships_tx(tx,
                                                   patient,
                                                   one_to_many=False)
        ) == [
            ModelRelationship(
                id="443e141b-f59c-419f-82c1-eed97925b04d",
                type="MOTHER",
                name="mother",
                display_name="Mother",
                description="",
                from_="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                to="0b4b3615-9eaf-425d-9727-bcac29686fd5",
                one_to_many=False,
                index=1,
                created_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

        # Duplicate @RELATED_TO relationships are created for linked properties
        assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1

        assert list(
            db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False)
        ) == [
            RecordRelationship(
                id="fa3daedd-1761-4730-be7d-bb5de8e1261c",
                from_="e2b71447-e29d-11c3-24c6-f2ebffd1486a",
                to="ecb71447-b684-c589-abda-b673c38edefc",
                type="MOTHER",
                model_relationship_id="443e141b-f59c-419f-82c1-eed97925b04d",
                name="mother",
                display_name="Mother",
                one_to_many=False,
                created_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ]

    assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == (
        1,
        [
            PackageProxy(
                id="00b71de7-b42f-1fe9-a83f-824452fe966e",
                proxy_instance_id="460591a0-8079-4979-a860-c3a4b18a32ad",
                package_id=184418,
                package_node_id=
                "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3",
                relationship_type="belongs_to",
                created_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"),
                created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
                updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878",
            )
        ],
    )

    # Packages link directly to dataset node
    assert db.count_packages() == 1