def update_properties(db: PartitionedDatabase, model_id_or_name: str, body: List[JsonDict]): x_bf_trace_id = AuditLogger.trace_id_header() payload: List[ModelProperty] = ModelProperty.schema().load(body, many=True) with db.transaction() as tx: model = db.get_model_tx(tx, model_id_or_name) properties = db.update_properties_tx(tx, model, *payload) PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=[ CreateModelProperty(property_name=p.name, model_id=UUID(model.id), model_name=model.name) if created else UpdateModelProperty(property_name=p.name, model_id=UUID(model.id), model_name=model.name) for p, created in properties ], trace_id=TraceId(x_bf_trace_id), ) return [p.to_dict() for p, _ in properties]
def to_model_property(d: JsonDict) -> ModelProperty: d["modelTitle"] = d.pop("conceptTitle") d["dataType"] = dt.deserialize(d.pop("dataType")) return ModelProperty( **{ decamelize(k): v for k, v in d.items() if decamelize(k) in ModelProperty.PUBLIC })
def to_property_dict(p: ModelProperty) -> JsonDict: d = p.to_dict() d["conceptTitle"] = d.pop("modelTitle") if "defaultValue" not in d: d["defaultValue"] = None d["dataType"] = to_legacy_data_type(p.data_type) return d
def properties(): now = datetime.now() props = [ ModelProperty( name="id", display_name="ID", data_type=dt.String(), description="User ID", required=True, created_at=now, updated_at=now, ), ModelProperty( name="name", display_name="Name", data_type=dt.String(), description="Name", required=True, created_at=now, updated_at=now, ), ModelProperty( name="age", display_name="Age", data_type=dt.Long(), description="Age in years", required=True, created_at=now, updated_at=now, ), ModelProperty( name="height", display_name="Height", data_type=dt.Double(unit="inches"), description="Height in inches", required=True, created_at=now, updated_at=now, ), ModelProperty( name="sex", display_name="Sex", data_type=dt.Enumeration(items=dt.String(), enum=["M", "F"]), description="Sex", required=True, created_at=now, updated_at=now, ), ModelProperty( name="salutation", display_name="Salutation", data_type=dt.Array( items=dt.String(), enum=["Mr.", "Mrs.", "Ms.", "Dr.", "Esq."] ), description="Salutation", required=False, created_at=now, updated_at=now, ), ModelProperty( name="email", display_name="Email", data_type=dt.String(format=dt.StringSubtypeFormat.EMAIL), description="Email address", required=False, created_at=now, updated_at=now, ), ModelProperty( name="url", display_name="URL", data_type=dt.String(format=dt.StringSubtypeFormat.URL), description="URL", required=False, created_at=now, updated_at=now, ), ModelProperty( name="favorite_numbers", display_name="Favorite numbers", data_type=dt.Long(), description="Favorite numbers", required=False, created_at=now, updated_at=now, ), ModelProperty( name="favorite_color", display_name="Favorite color", data_type=dt.Enumeration(items=dt.String(), enum=["red", "green", "blue"]), description="Favorite color", required=False, created_at=now, updated_at=now, ), ] return props
def load(db, input_file: str, verbose: bool = False, base_dir: Optional[str] = None): with open(input_file) as f: seed_files = json.load(f) # "models", "properties", and "records" for seed_file in seed_files: # Models with open(resolve_file(seed_file["model"], base_dir), "r") as model_file: models = DictReader(model_file, delimiter="|") model = next(models) model.pop("id") if verbose: print( f"{db.organization_id}:{db.dataset_id} :: Loading model {model['name']}" ) print(model) model_id = db.create_model(**model).id if verbose: print( f"{db.organization_id}:{db.dataset_id} :: Created model {model_id}" ) # Model properties with open(resolve_file(seed_file["properties"], base_dir), "r") as properties_file: properties = list(DictReader(properties_file, delimiter="|")) properties[0]["model_title"] = True if verbose: print( f"{db.organization_id}:{db.dataset_id} :: Loading {len(properties)} properties" ) for prop in properties: data_type = dt.deserialize(prop.pop("data_type")) db.update_properties( model_id, ModelProperty(data_type=data_type, **prop)) # Records with open(resolve_file(seed_file["records"], base_dir), "r") as records_file: records_reader = DictReader(records_file, delimiter="|") total_loaded = 0 for chunk in grouper(LOAD_CHUNK_SIZE, records_reader): chunk = [r for r in chunk if r] record_chunk = [ to_property_values(row["values"]) for row in chunk ] db.create_records(model_id, record_chunk) total_loaded += len(record_chunk) if verbose: print( f"{db.organization_id}:{db.dataset_id} :: {len(record_chunk)} record(s), total {total_loaded}" ) if verbose: print( f"{db.organization_id}:{db.dataset_id} :: total records = {total_loaded}" ) if verbose: print()
def test_publish_linked_properties_with_no_index( s3, config, read_csv, read_json, metadata_key, partitioned_db ): s3.create_bucket(Bucket=config.s3_bucket) gene = partitioned_db.create_model("gene", "Gene") partitioned_db.update_properties( gene, ModelProperty( "name", "name", data_type=dt.String(), model_title=True, required=True ), ) regulates = partitioned_db.create_model_relationship( gene, "regulates", gene, one_to_many=False, index=None ) interacts = partitioned_db.create_model_relationship( gene, "interacts", gene, one_to_many=False, index=1 ) yy1 = partitioned_db.create_record(gene, {"name": "YY1"}) pepd = partitioned_db.create_record(gene, {"name": "PEPD"}) gmpr2 = partitioned_db.create_record(gene, {"name": "GMPR2"}) partitioned_db.create_record_relationship(yy1, regulates, gmpr2) partitioned_db.create_record_relationship(yy1, interacts, pepd) publish_dataset(partitioned_db, s3, config, file_manifests=[]) schema_json = read_json(metadata_key("schema.json")) assert schema_json.content["models"][0]["properties"] == [ { "name": "name", "displayName": "name", "description": "", "dataType": {"type": "String"}, }, { "name": "interacts", "displayName": "interacts", "description": "", "dataType": {"type": "Model", "to": "gene", "file": "records/gene.csv"}, }, { "name": "regulates", "displayName": "regulates", "description": "", "dataType": {"type": "Model", "to": "gene", "file": "records/gene.csv"}, }, ] gene_csv = read_csv(metadata_key("records/gene.csv")) assert sort_rows(gene_csv.rows) == sort_rows( [ OrderedDict( { "id": str(yy1.id), "name": "YY1", "interacts": str(pepd.id), "interacts:display": "PEPD", "regulates": str(gmpr2.id), "regulates:display": "GMPR2", } ), OrderedDict( { "id": str(pepd.id), "name": "PEPD", "interacts": None, "interacts:display": None, "regulates": None, "regulates:display": None, } ), OrderedDict( { "id": str(gmpr2.id), "name": "GMPR2", "interacts": None, "interacts:display": None, "regulates": None, "regulates:display": None, } ), ] )
def test_convert_properties(properties): schema = ModelProperty.schema() schema.dumps(properties[0])
def test_proxy_relationships_are_merged_with_record_relationships( s3, config, read_csv, metadata_key, partitioned_db ): s3.create_bucket(Bucket=config.s3_bucket) person = partitioned_db.create_model("person", "Person") partitioned_db.update_properties( person, ModelProperty( name="name", display_name="String", data_type=dt.String(), model_title=True ), ) item = partitioned_db.create_model("item", "Item") partitioned_db.update_properties( item, ModelProperty( name="name", display_name="String", data_type=dt.String(), model_title=True ), ) # This relationship uses the default "belongs_to" package proxy relationship, # and should be exported in the same CSV file. item_belongs_to_person = partitioned_db.create_model_relationship( item, "belongs_to", person, one_to_many=True ) person_likes_person = partitioned_db.create_model_relationship( person, "likes", person, one_to_many=True ) alice = partitioned_db.create_record(person, {"name": "Alice"}) bob = partitioned_db.create_record(person, {"name": "Bob"}) laptop = partitioned_db.create_record(item, {"name": "Laptop"}) partitioned_db.create_record_relationship(alice, person_likes_person, bob) partitioned_db.create_record_relationship(laptop, item_belongs_to_person, alice) # Package proxy using default `belongs_to` relationship partitioned_db.create_package_proxy( alice, package_id=1234, package_node_id="N:package:1234" ) # Package proxy using a non-standard `likes` relationship partitioned_db.create_package_proxy( alice, package_id=4567, package_node_id="N:package:4567", legacy_relationship_type="likes", ) file_manifests = [ FileManifest( id=UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"), path="10/233/files/pkg1/file1.txt", size=2293, file_type="TEXT", source_package_id="N:package:1234", ), FileManifest( id=UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"), path="10/233/files/pkg1/file2.csv", size=234443, file_type="CSV", source_package_id="N:package:1234", ), FileManifest( id=UUID("cccccccc-cccc-cccc-cccc-cccccccccccc"), path="10/233/files/pkg2/file3.dcm", size=338923, file_type="DICOM", source_package_id="N:package:4567", ), ] graph_manifests = publish_dataset( partitioned_db, s3, config, file_manifests=file_manifests ) assert sorted([m.path for m in graph_manifests]) == [ "metadata/records/file.csv", "metadata/records/item.csv", "metadata/records/person.csv", "metadata/relationships/belongs_to.csv", "metadata/relationships/likes.csv", "metadata/schema.json", ] belongs_to_csv = read_csv(metadata_key("relationships/belongs_to.csv")) assert sort_rows(belongs_to_csv.rows) == sort_rows( [ OrderedDict( { "from": str(alice.id), "to": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "relationship": "belongs_to", } ), OrderedDict( { "from": str(alice.id), "to": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "relationship": "belongs_to", } ), OrderedDict( { "from": str(laptop.id), "to": str(alice.id), "relationship": "belongs_to", } ), ] ) likes_csv = read_csv(metadata_key("relationships/likes.csv")) assert sort_rows(likes_csv.rows) == sort_rows( [ OrderedDict( { "from": str(alice.id), "to": "cccccccc-cccc-cccc-cccc-cccccccccccc", "relationship": "likes", } ), OrderedDict( {"from": str(alice.id), "to": str(bob.id), "relationship": "likes"} ), ] )
def test_record_value_serialization(s3, config, read_csv, metadata_key, partitioned_db): s3.create_bucket(Bucket=config.s3_bucket) patient = partitioned_db.create_model("patient", "Patient") partitioned_db.update_properties( patient, ModelProperty( name="string", display_name="String", data_type=dt.String(), model_title=True, ), ModelProperty(name="boolean", display_name="Boolean", data_type=dt.Boolean()), ModelProperty(name="long", display_name="Long", data_type=dt.Long()), ModelProperty(name="double", display_name="Double", data_type=dt.Double()), ModelProperty(name="date", display_name="Date", data_type=dt.Date()), ModelProperty(name="optional", display_name="Optional", data_type=dt.String()), ModelProperty( name="string_array", display_name="String Array", data_type=dt.Array(items=dt.String()), ), ModelProperty( name="boolean_array", display_name="Boolean Array", data_type=dt.Array(items=dt.Boolean()), ), ModelProperty( name="long_array", display_name="Long Array", data_type=dt.Array(items=dt.Long()), ), ModelProperty( name="double_array", display_name="Double Array", data_type=dt.Array(items=dt.Double()), ), ModelProperty( name="date_array", display_name="Date Array", data_type=dt.Array(items=dt.Date()), ), ) record = partitioned_db.create_records( patient, [ { "string": 'tricky"char,acter"string', "boolean": True, "long": 12345, "double": 3.14159, "date": datetime.datetime(year=2004, month=5, day=5), "optional": None, "string_array": ["red", "green", "semi;colon"], "boolean_array": [True, False], "long_array": [1, 2, 3], "double_array": [1.1, 2.2, 3.3], "date_array": [ datetime.datetime(year=2004, month=5, day=5), datetime.datetime(year=2014, month=5, day=16), ], } ], )[0] publish_dataset(partitioned_db, s3, config, file_manifests=[]) patient_csv = read_csv(metadata_key("records/patient.csv")) assert patient_csv.rows == [ OrderedDict( { "id": str(record.id), "string": 'tricky"char,acter"string', "boolean": "true", "long": "12345", "double": "3.14159", "date": "2004-05-05T00:00:00", "optional": "", "string_array": "red;green;semi_colon", "boolean_array": "true;false", "long_array": "1;2;3", "double_array": "1.1;2.2;3.3", "date_array": "2004-05-05T00:00:00;2014-05-16T00:00:00", } ) ]
def test_publish( s3, partitioned_db, sample_patient_db, config, read_csv, read_json, metadata_key ): # Helpers # ========================================================================== def id_of(record_name): return str(sample_patient_db["records"][record_name].id) s3.create_bucket(Bucket=config.s3_bucket) # Setup graph - add more data to the patient DB. # ========================================================================== # Add a linked property best_friend = partitioned_db.create_model_relationship( from_model=sample_patient_db["models"]["patient"], name="best_friend", to_model=sample_patient_db["models"]["patient"], display_name="Best friend", one_to_many=False, ) partitioned_db.create_record_relationship(id_of("bob"), best_friend, id_of("alice")) # Alice has a package proxy partitioned_db.create_package_proxy( id_of("alice"), package_id=1234, package_node_id="N:package:1234" ) # Bob also has a package proxy. However, this package no longer exists in # API. The exporter needs to ignore it. # TODO: https://app.clickup.com/t/2c3ec9 partitioned_db.create_package_proxy( id_of("bob"), package_id=4567, package_node_id="N:package:4567" ) # Add another relationship named "attends" The relationship instances for # this relationship need to be exported in the same CSV file as the # (patient)-[attends]->(visit) relationships, but have a distinct entry in # the graph schema. event = partitioned_db.create_model("event", display_name="Event", description="") partitioned_db.update_properties( event, ModelProperty( name="name", display_name="Name", data_type="String", model_title=True ), ) attends = partitioned_db.create_model_relationship( sample_patient_db["models"]["patient"], "attends", event ) birthday = partitioned_db.create_records(event, [{"name": "Birthday"}])[0] partitioned_db.create_record_relationship(id_of("alice"), attends, birthday) # These are the file manifests provided by `discover-publish` file_manifests = [ FileManifest( id=UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"), path="files/pkg1/file1.txt", size=2293, file_type="TEXT", source_package_id="N:package:1234", ), FileManifest( id=UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"), path="files/pkg1/file2.csv", size=234443, file_type="CSV", source_package_id="N:package:1234", ), ] # Publish dataset # ========================================================================== graph_manifests = publish_dataset( partitioned_db, s3, config, file_manifests=file_manifests ) for o in s3.list_objects(Bucket=config.s3_bucket).get("Contents", []): # Don't export a CSV for the best_friend linked property. assert o["Key"] != "10/233/metadata/relationships/best_friend.csv" # Check graph schema # ========================================================================== schema_json = read_json(metadata_key("schema.json")) assert sorted(schema_json.content["models"], key=lambda m: m["name"]) == [ { "name": "event", "displayName": "Event", "description": "", "file": "records/event.csv", "properties": [ { "name": "name", "displayName": "Name", "description": "", "dataType": {"type": "String"}, } ], }, { "name": "file", "displayName": "File", "description": "A file in the dataset", "file": "records/file.csv", "properties": [ { "name": "path", "displayName": "Path", "description": "The path to the file from the root of the dataset", "dataType": {"type": "String"}, } # TODO: add sourcePackageId (enhancement) ], }, { "name": "medication", "displayName": "Medication", "description": "a medication", "file": "records/medication.csv", "properties": [ { "name": "name", "displayName": "Name", "description": "", "dataType": {"type": "String"}, } ], }, { "name": "patient", "displayName": "Patient", "description": "a person", "file": "records/patient.csv", "properties": [ { "name": "name", "displayName": "Name", "description": "", "dataType": {"type": "String"}, }, { "name": "age", "displayName": "Age", "description": "", "dataType": {"type": "Long"}, }, { "name": "best_friend", "displayName": "Best friend", "description": "", "dataType": { "type": "Model", "to": "patient", "file": "records/patient.csv", }, }, ], }, { "name": "visit", "displayName": "Visit", "description": "a visit", "file": "records/visit.csv", "properties": [ { "name": "day", "displayName": "Day", "description": "", "dataType": {"type": "String"}, } ], }, ] assert sorted( schema_json.content["relationships"], key=lambda r: (r["from"], r["to"]) ) == sorted( [ { "name": "attends", "from": "patient", "to": "visit", "file": "relationships/attends.csv", }, { "name": "attends", "from": "patient", "to": "event", "file": "relationships/attends.csv", }, { "name": "belongs_to", "from": "", "to": "", "file": "relationships/belongs_to.csv", }, { "name": "prescribed", "from": "visit", "to": "medication", "file": "relationships/prescribed.csv", }, ], key=lambda r: (r["from"], r["to"]), ) # Check records # ========================================================================== patient_csv = read_csv(metadata_key("records/patient.csv")) assert sort_rows(patient_csv.rows) == sort_rows( [ OrderedDict( { "id": id_of("alice"), "name": "Alice", "age": "34", "best_friend": None, "best_friend:display": None, } ), OrderedDict( { "id": id_of("bob"), "name": "Bob", "age": "20", "best_friend": id_of("alice"), "best_friend:display": "Alice", } ), ] ) visit_csv = read_csv(metadata_key("records/visit.csv")) assert sort_rows(visit_csv.rows) == sort_rows( [ OrderedDict({"id": id_of("monday"), "day": "Monday"}), OrderedDict({"id": id_of("tuesday"), "day": "Tuesday"}), ] ) medication_csv = read_csv(metadata_key("records/medication.csv")) assert sort_rows(medication_csv.rows) == sort_rows( [ OrderedDict({"id": id_of("aspirin"), "name": "Aspirin"}), OrderedDict({"id": id_of("motrin"), "name": "Motrin"}), OrderedDict({"id": id_of("tylenol"), "name": "Tylenol"}), ] ) event_csv = read_csv(metadata_key("records/event.csv")) assert event_csv.rows == [{"id": str(birthday.id), "name": "Birthday"}] # Check relationships # ========================================================================== attends_csv = read_csv(metadata_key("relationships/attends.csv")) assert sort_rows(attends_csv.rows) == sort_rows( [ OrderedDict( { "from": id_of("alice"), "to": id_of("monday"), "relationship": "attends", } ), OrderedDict( { "from": id_of("bob"), "to": id_of("tuesday"), "relationship": "attends", } ), # Contains relationships from multiple model relationships OrderedDict( { "from": id_of("alice"), "to": str(birthday.id), "relationship": "attends", } ), ] ) prescribed_csv = read_csv(metadata_key("relationships/prescribed.csv")) assert sort_rows(prescribed_csv.rows) == sort_rows( [ OrderedDict( { "from": id_of("monday"), "to": id_of("aspirin"), "relationship": "prescribed", } ), OrderedDict( { "from": id_of("tuesday"), "to": id_of("aspirin"), "relationship": "prescribed", } ), OrderedDict( { "from": id_of("tuesday"), "to": id_of("tylenol"), "relationship": "prescribed", } ), ] ) # Check proxy packages # ========================================================================== file_csv = read_csv(metadata_key("records/file.csv")) assert sort_rows(file_csv.rows) == sort_rows( [ OrderedDict( { "id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "path": "files/pkg1/file1.txt", "sourcePackageId": "N:package:1234", } ), OrderedDict( { "id": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "path": "files/pkg1/file2.csv", "sourcePackageId": "N:package:1234", } ), ] ) # Check proxy package relationships # ========================================================================== belongs_to_csv = read_csv(metadata_key("relationships/belongs_to.csv")) assert sort_rows(belongs_to_csv.rows) == sort_rows( [ OrderedDict( { "from": id_of("alice"), "to": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "relationship": "belongs_to", } ), OrderedDict( { "from": id_of("alice"), "to": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "relationship": "belongs_to", } ), ] ) # Check file manifest output # ========================================================================== assert sorted(graph_manifests) == sorted( [ FileManifest( path="metadata/schema.json", file_type="Json", size=schema_json.size ), FileManifest( path="metadata/records/event.csv", file_type="CSV", size=event_csv.size ), FileManifest( path="metadata/records/file.csv", file_type="CSV", size=file_csv.size ), FileManifest( path="metadata/records/medication.csv", file_type="CSV", size=medication_csv.size, ), FileManifest( path="metadata/records/patient.csv", file_type="CSV", size=patient_csv.size, ), FileManifest( path="metadata/records/visit.csv", file_type="CSV", size=visit_csv.size ), FileManifest( path="metadata/relationships/attends.csv", file_type="CSV", size=attends_csv.size, ), FileManifest( path="metadata/relationships/prescribed.csv", file_type="CSV", size=prescribed_csv.size, ), FileManifest( path="metadata/relationships/belongs_to.csv", file_type="CSV", size=belongs_to_csv.size, ), ] )
def test_rewrite_ids_and_import(neo4j): """ Test that UUIDs are remapped to the exact correct place with a manually defined remapping. """ dataset_id = 60000 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) REMAPPING = { "0b4b3615-9eaf-425d-9727-bcac29686fd5": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", "7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17": "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", "e507b3ef-ade4-4672-83b4-f3f0774fb282": "cccccccc-cccc-cccc-cccc-cccccccccccc", "bf858cb5-ae51-4fcf-ad74-b1887946f70f": "dddddddd-dddd-dddd-dddd-dddddddddddd", "a99b09f5-caa6-4282-aa0e-cf56bde89254": "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee", "42fa4eb9-51cc-4c59-b550-ac24d6d5024a": "ffffffff-ffff-ffff-ffff-ffffffffffff", "ecb71447-b684-c589-abda-b673c38edefc": "00000000-0000-0000-0000-000000000000", "e2b71447-e29d-11c3-24c6-f2ebffd1486a": "11111111-1111-1111-1111-111111111111", "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50": "22222222-2222-2222-2222-222222222222", "2e754729-684a-4c45-960f-348d68737d4d": "33333333-3333-3333-3333-333333333333", "175ff55b-b44d-4381-bd59-d4dbc0b9c5f0": "44444444-4444-4444-4444-444444444444", "ccf200d3-e77f-4d9e-bed3-f1f28860152f": "55555555-5555-5555-5555-555555555555", "443e141b-f59c-419f-82c1-eed97925b04d": "66666666-6666-6666-6666-666666666666", "d0b71de9-21f9-3557-edda-ad278dd81dc0": "77777777-7777-7777-7777-777777777777", "aeb7476e-55f6-7924-5e43-a83cfa7e4cef": "88888888-8888-8888-8888-888888888888", "fa3daedd-1761-4730-be7d-bb5de8e1261c": "99999999-9999-9999-9999-999999999999", "00b71de7-b42f-1fe9-a83f-824452fe966e": "aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb", "460591a0-8079-4979-a860-c3a4b18a32ad": "aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc", } def generate_new_id(old_id): new_id = REMAPPING.get(old_id, None) if new_id is None: return old_id return new_id load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, remap_ids=True, generate_new_id=generate_new_id, ) # Models patient = db.get_model("patient") assert patient == Model( name="patient", display_name="Patient", description="", count=2, id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:27.027Z"), updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"), template_id=None, ) assert sorted(db.get_properties(patient), key=lambda p: p.index) == [ ModelProperty( id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", name="name", display_name="Name", description="", index=0, locked=False, model_title=True, required=False, data_type=dt.String(), default=True, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:37.633Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ModelProperty( id="cccccccc-cccc-cccc-cccc-cccccccccccc", name="dob", display_name="DOB", description="", index=1, locked=False, model_title=False, required=False, data_type=dt.Date(), default=False, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-11T15:11:17.383Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ] bicycle = db.get_model("bicycle") assert bicycle.name == "bicycle" assert bicycle.display_name == "Bicycle" assert bicycle.id == "dddddddd-dddd-dddd-dddd-dddddddddddd" assert bicycle.count == 1 assert bicycle.template_id == None properties = sorted(db.get_properties(bicycle), key=lambda p: p.index) assert len(properties) == 2 brand = properties[0] assert brand.name == "brand" assert brand.id == "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee" color = properties[1] assert color.name == "color" assert color.data_type == dt.Array( items=dt.String(), enum=["purple", "blue", "orange", "green", "yellow", "red"]) assert color.id == "ffffffff-ffff-ffff-ffff-ffffffffffff" # Records patients = db.get_all_records("patient") alice = Record( id=UUID("00000000-0000-0000-0000-000000000000"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:58.537Z"), updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"), values={ "name": "Alice", "dob": neotime.DateTime(year=2004, month=5, day=5, tzinfo=pytz.UTC), }, ) bob = Record( id=UUID("11111111-1111-1111-1111-111111111111"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:02:21.113Z"), updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"), values={ "name": "Bob", # Embedded linked property "mother": RecordStub(id=UUID("00000000-0000-0000-0000-000000000000"), title="Alice"), }, ) assert sorted(patients.results, key=lambda x: x.values["name"]) == [alice, bob] assert db.get_all_records("bicycle").results == [ Record( id=UUID("22222222-2222-2222-2222-222222222222"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-05T13:47:02.841Z"), updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"), values={ "brand": "Bianchi", "color": ["red", "blue"] }, ) ] # Model relationships with db.transaction() as tx: assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=True) ) == [ ModelRelationship( id="33333333-3333-3333-3333-333333333333", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", display_name="Rides", description="", from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", to="dddddddd-dddd-dddd-dddd-dddddddddddd", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-05T13:47:17.981Z"), updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_model_relationships_tx(tx, bicycle, one_to_many=True) ) == [ ModelRelationship( id="44444444-4444-4444-4444-444444444444", type="BELONGS_TO", name="belongs_to", display_name="Belongs To", description="", from_="dddddddd-dddd-dddd-dddd-dddddddddddd", to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-21T16:47:36.918Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Model relationship stubs contain no "to" and "from" models, eg. belongs_to assert list(db.get_model_relationship_stubs_tx(tx)) == [ ModelRelationshipStub( id="55555555-5555-5555-5555-555555555555", name="belongs_to", display_name="Belongs To", description="", type="BELONGS_TO", created_at=iso8601.parse("2019-11-05T13:44:38.598Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2 # Record relationships assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [ RecordRelationship( id="77777777-7777-7777-7777-777777777777", from_="00000000-0000-0000-0000-000000000000", to="22222222-2222-2222-2222-222222222222", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="33333333-3333-3333-3333-333333333333", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_record_relationships_tx( tx, "22222222-2222-2222-2222-222222222222", one_to_many=True) ) == [ RecordRelationship( id="88888888-8888-8888-8888-888888888888", from_="22222222-2222-2222-2222-222222222222", to="11111111-1111-1111-1111-111111111111", type="BELONGS_TO", name="belongs_to", model_relationship_id="44444444-4444-4444-4444-444444444444", display_name="Belongs To", one_to_many=True, created_at=iso8601.parse("2019-11-21T16:47:36.938Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Linked properties assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=False) ) == [ ModelRelationship( id="66666666-6666-6666-6666-666666666666", type="MOTHER", name="mother", display_name="Mother", description="", from_="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", to="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", one_to_many=False, index=1, created_at=iso8601.parse("2019-11-05T13:43:38.341Z"), updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created for linked properties assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1 assert list( db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False) ) == [ RecordRelationship( id="99999999-9999-9999-9999-999999999999", from_="11111111-1111-1111-1111-111111111111", to="00000000-0000-0000-0000-000000000000", type="MOTHER", model_relationship_id="66666666-6666-6666-6666-666666666666", name="mother", display_name="Mother", one_to_many=False, created_at=iso8601.parse("2019-11-05T13:43:54.116Z"), updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == ( 1, [ PackageProxy( id="aaaaaaaa-aaaa-aaaa-aaaa-bbbbbbbbbbbb", proxy_instance_id="aaaaaaaa-aaaa-aaaa-aaaa-cccccccccccc", package_id=184418, package_node_id= "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3", relationship_type="belongs_to", created_at=iso8601.parse("2019-11-05T13:44:38.748Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ], ) # Packages link directly to dataset node assert db.count_packages() == 1
def test_loader(neo4j): dataset_id = 29233 dataset_node_id = "N:dataset:b1154216-d1d7-4484-ad18-81b58fb65484" organization_id = 5 organization_node_id = "N:organization:c905919f-56f5-43ae-9c2a-8d5d542c133b" user_id = 114 user_node_id = "N:user:028058b9-dd8d-4f24-a187-ea56830b379f" db = PartitionedDatabase( db=neo4j, organization_id=OrganizationId(organization_id), dataset_id=DatasetId(dataset_id), user_id=user_node_id, organization_node_id=organization_node_id, dataset_node_id=dataset_node_id, ) load( dataset=f"{organization_id}/{dataset_id}", bucket="dev-neptune-export-use1", db=db, use_cache=False, smoke_test=False, ) # Models patient = db.get_model("patient") assert patient == Model( name="patient", display_name="Patient", description="", count=2, id="0b4b3615-9eaf-425d-9727-bcac29686fd5", created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:27.027Z"), updated_at=iso8601.parse("2019-11-01T20:01:27.027Z"), template_id=None, ) assert sorted(db.get_properties(patient), key=lambda p: p.index) == [ ModelProperty( id="7b17c60d-ca2a-4cf5-a4ff-a52bbc32ff17", name="name", display_name="Name", description="", index=0, locked=False, model_title=True, required=False, data_type=dt.String(), default=True, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:37.633Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ModelProperty( id="e507b3ef-ade4-4672-83b4-f3f0774fb282", name="dob", display_name="DOB", description="", index=1, locked=False, model_title=False, required=False, data_type=dt.Date(), default=False, default_value=None, created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-11T15:11:17.383Z"), updated_at=iso8601.parse("2019-11-11T15:11:17.383Z"), ), ] bicycle = db.get_model("bicycle") assert bicycle.name == "bicycle" assert bicycle.display_name == "Bicycle" assert bicycle.id == "bf858cb5-ae51-4fcf-ad74-b1887946f70f" assert bicycle.count == 1 assert bicycle.template_id == None properties = sorted(db.get_properties(bicycle), key=lambda p: p.index) assert len(properties) == 2 brand = properties[0] assert brand.name == "brand" color = properties[1] assert color.name == "color" assert color.data_type == dt.Array( items=dt.String(), enum=["purple", "blue", "orange", "green", "yellow", "red"]) # Records patients = db.get_all_records("patient") alice = Record( id=UUID("ecb71447-b684-c589-abda-b673c38edefc"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:01:58.537Z"), updated_at=iso8601.parse("2019-11-11T15:37:02.165Z"), values={ "name": "Alice", "dob": neotime.DateTime(year=2004, month=5, day=5, tzinfo=pytz.UTC), }, ) bob = Record( id=UUID("e2b71447-e29d-11c3-24c6-f2ebffd1486a"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-01T20:02:21.113Z"), updated_at=iso8601.parse("2019-11-01T20:02:21.113Z"), values={ "name": "Bob", # Embedded linked property "mother": RecordStub(id=UUID("ecb71447-b684-c589-abda-b673c38edefc"), title="Alice"), }, ) assert sorted(patients.results, key=lambda x: x.values["name"]) == [alice, bob] assert db.get_all_records("bicycle").results == [ Record( id=UUID("c8b71de8-cd9c-cc3f-67fe-4e30968d4e50"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", created_at=iso8601.parse("2019-11-05T13:47:02.841Z"), updated_at=iso8601.parse("2019-11-11T15:12:28.042Z"), values={ "brand": "Bianchi", "color": ["red", "blue"] }, ) ] # Model relationships with db.transaction() as tx: assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=True) ) == [ ModelRelationship( id="2e754729-684a-4c45-960f-348d68737d4d", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", display_name="Rides", description="", from_="0b4b3615-9eaf-425d-9727-bcac29686fd5", to="bf858cb5-ae51-4fcf-ad74-b1887946f70f", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-05T13:47:17.981Z"), updated_at=iso8601.parse("2019-11-05T13:47:17.981Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # This relationship can be created in the Python client with the following: # # >>> patient = ds.models()["patient"] # >>> bike = ds.models()["bicycle"] # >>> bob = patient.get_all()[1] # >>> bianchi = bike.get_all()[0] # >>> bianchi.relate_to(bob, relationship_type="belongs_to") # # This reuses the `belongs_to` name even though that is disallowed through # the frontend. This means that the `belongs_to` CSV contains relationships # between proxy packages and records, *and* between records and records. assert list( db.get_outgoing_model_relationships_tx(tx, bicycle, one_to_many=True) ) == [ ModelRelationship( id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0", type="BELONGS_TO", name="belongs_to", display_name="Belongs To", description="", from_="bf858cb5-ae51-4fcf-ad74-b1887946f70f", to="0b4b3615-9eaf-425d-9727-bcac29686fd5", one_to_many=True, index=None, created_at=iso8601.parse("2019-11-21T16:47:36.918Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.918Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Model relationship stubs contain no "to" and "from" models, eg. belongs_to assert list(db.get_model_relationship_stubs_tx(tx)) == [ ModelRelationshipStub( id="ccf200d3-e77f-4d9e-bed3-f1f28860152f", name="belongs_to", display_name="Belongs To", description="", type="BELONGS_TO", created_at=iso8601.parse("2019-11-05T13:44:38.598Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.598Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created assert len(db.get_model_relationships_tx(tx, one_to_many=True)) == 2 # Record relationships assert list(db.get_outgoing_record_relationships_tx(tx, alice)) == [ RecordRelationship( id="d0b71de9-21f9-3557-edda-ad278dd81dc0", from_="ecb71447-b684-c589-abda-b673c38edefc", to="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", type="RIDES", name="rides_c83d5af0-ffd2-11e9-b8f0-1b1d6297ff8c", model_relationship_id="2e754729-684a-4c45-960f-348d68737d4d", display_name="Rides", one_to_many=True, created_at=iso8601.parse("2019-11-05T13:47:46.032Z"), updated_at=iso8601.parse("2019-11-05T13:47:46.032Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert list( db.get_outgoing_record_relationships_tx( tx, "c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", one_to_many=True) ) == [ RecordRelationship( id="aeb7476e-55f6-7924-5e43-a83cfa7e4cef", from_="c8b71de8-cd9c-cc3f-67fe-4e30968d4e50", to="e2b71447-e29d-11c3-24c6-f2ebffd1486a", type="BELONGS_TO", name="belongs_to", model_relationship_id="175ff55b-b44d-4381-bd59-d4dbc0b9c5f0", display_name="Belongs To", one_to_many=True, created_at=iso8601.parse("2019-11-21T16:47:36.938Z"), updated_at=iso8601.parse("2019-11-21T16:47:36.938Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Linked properties assert list( db.get_outgoing_model_relationships_tx(tx, patient, one_to_many=False) ) == [ ModelRelationship( id="443e141b-f59c-419f-82c1-eed97925b04d", type="MOTHER", name="mother", display_name="Mother", description="", from_="0b4b3615-9eaf-425d-9727-bcac29686fd5", to="0b4b3615-9eaf-425d-9727-bcac29686fd5", one_to_many=False, index=1, created_at=iso8601.parse("2019-11-05T13:43:38.341Z"), updated_at=iso8601.parse("2019-11-05T13:43:38.341Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] # Duplicate @RELATED_TO relationships are created for linked properties assert len(db.get_model_relationships_tx(tx, one_to_many=False)) == 1 assert list( db.get_outgoing_record_relationships_tx(tx, bob, one_to_many=False) ) == [ RecordRelationship( id="fa3daedd-1761-4730-be7d-bb5de8e1261c", from_="e2b71447-e29d-11c3-24c6-f2ebffd1486a", to="ecb71447-b684-c589-abda-b673c38edefc", type="MOTHER", model_relationship_id="443e141b-f59c-419f-82c1-eed97925b04d", name="mother", display_name="Mother", one_to_many=False, created_at=iso8601.parse("2019-11-05T13:43:54.116Z"), updated_at=iso8601.parse("2019-11-05T13:43:54.116Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ] assert db.get_package_proxies_for_record(alice, limit=10, offset=0) == ( 1, [ PackageProxy( id="00b71de7-b42f-1fe9-a83f-824452fe966e", proxy_instance_id="460591a0-8079-4979-a860-c3a4b18a32ad", package_id=184418, package_node_id= "N:package:b493794a-1c86-4c18-9fb9-dfdf236b1fe3", relationship_type="belongs_to", created_at=iso8601.parse("2019-11-05T13:44:38.748Z"), updated_at=iso8601.parse("2019-11-05T13:44:38.748Z"), created_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", updated_by="N:user:06080380-fb56-46eb-8c70-f24112aff878", ) ], ) # Packages link directly to dataset node assert db.count_packages() == 1