def test_read_relationship_file(spark_session): ancestors = with_relationships( spark_session, create_hierarchies(spark_session), 'tests/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT', '20160901').get_ancestors() assert set(ancestors.columns) == EXPECTED_COLUMNS
def test_isa_custom(spark_session, bundles): observations = extract_entry(spark_session, bundles, 'observation') observations.registerTempTable('observations') blood_pressure = {'blood_pressure': [('http://loinc.org', '8462-4')]} spark_session.sql('create database custom_ontologies') create_value_sets(spark_session).write_to_database('custom_ontologies') create_hierarchies(spark_session).write_to_database('custom_ontologies') push_valuesets(spark_session, blood_pressure, database='custom_ontologies') results = spark_session.sql("SELECT subject.reference, " + "effectiveDateTime, " + "valueQuantity.value " + "FROM observations " + "WHERE in_valueset(code, 'blood_pressure')") assert get_current_valuesets(spark_session) == blood_pressure assert results.count() == 14
def test_isa_snomed(spark_session): spark_session.sql('create database isa_snomed_ontologies') with_relationships( spark_session, create_hierarchies(spark_session), 'tests/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT', '20160901') \ .write_to_database('isa_snomed_ontologies') create_value_sets(spark_session).write_to_database('isa_snomed_ontologies') push_valuesets(spark_session, {'diabetes': isa_snomed('73211009')}, database='isa_snomed_ontologies') expected = { 'diabetes': [('http://snomed.info/sct', '73211009'), ('http://snomed.info/sct', '44054006')] } assert get_current_valuesets(spark_session) == expected
def test_isa_loinc(spark_session): spark_session.sql('create database isa_loinc_ontologies') with_loinc_hierarchy( spark_session, create_hierarchies(spark_session), 'tests/resources/LOINC_HIERARCHY_SAMPLE.CSV', '2.56') \ .write_to_database('isa_loinc_ontologies') create_value_sets(spark_session).write_to_database('isa_loinc_ontologies') push_valuesets(spark_session, {'leukocytes': isa_loinc('LP14738-6')}, database='isa_loinc_ontologies') expected = { 'leukocytes': [('http://loinc.org', '5821-4'), ('http://loinc.org', 'LP14738-6'), ('http://loinc.org', 'LP14419-3')] } assert get_current_valuesets(spark_session) == expected
def push_valuesets(spark_session, valueset_map, database='ontologies'): """ Pushes valuesets onto a stack and registers an in_valueset user-defined function that uses this content. The valueset_map takes the form of {referenceName: [(codeset, codevalue), (codeset, codevalue)]} to specify which codesets/values are used for the given valueset reference name. Rather than explicitly passing a list of (codeset, codevalue) tuples, users may instead load particular value sets or particular hierarchies by providing a ValueSetPlaceholder or HierarchyPlaceholder that instructs the system to load codes belonging to a particular value set or hierarchical system, respectively. See the isa_loinc and isa_snomed functions above for details. Finally, ontology information is assumed to be stored in the 'ontologies' database by default, but users can specify another database name if they have customized ontologies that are separated from the default ontologies database. :param spark_session: the SparkSession instance :param valueset_map: a map containing value set structures to publish :param database: the database from which value set data is loaded """ loads_valuesets = False loads_hierarchies = False jvm = spark_session._jvm builder = jvm.com.cerner.bunsen.spark.codes.broadcast.BroadcastableValueSets.newBuilder() for (name, content) in valueset_map.items(): if type(content) is HierarchyPlaceholder: # Add codes belonging to the specified hierarchy (codeSystem, codeValue, hierarchyUri, hierarchyVersion) = content builder.addDescendantsOf(name, codeSystem, codeValue, hierarchyUri, hierarchyVersion) loads_hierarchies = True elif type(content) is ValueSetPlaceholder: # Add codes belonging to the specified value set (valueSetUri, valueSetVersion) = content builder.addReference(name, valueSetUri, valueSetVersion) loads_valuesets = True else: # Add the explicitly provided code values for (codeSystem, codeValue) in content: builder.addCode(name, codeSystem, codeValue) valuesets = get_value_sets(spark_session, database) if loads_valuesets else create_value_sets(spark_session) hierarchies = get_hierarchies(spark_session, database) if loads_hierarchies else create_hierarchies(spark_session) broadcastable = builder.build(spark_session._jsparkSession, valuesets._jvalue_sets, hierarchies._jhierarchies) jvm.com.cerner.bunsen.spark.ValueSetUdfs.pushUdf(spark_session._jsparkSession, broadcastable)
def test_read_hierarchy_file(spark_session): ancestors = with_loinc_hierarchy( spark_session, create_hierarchies(spark_session), 'tests/resources/LOINC_HIERARCHY_SAMPLE.CSV', '2.56').get_ancestors() assert set(ancestors.columns) == EXPECTED_COLUMNS