Example #1
0
 def setUp(self):
     c = Config()
     cfg_value = lambda v: c.get('warehouse', v)
     self.alchemy = AlchemyAccess(database=cfg_value('database'),
                                  host='localhost',
                                  user=cfg_value('database_user'),
                                  password=cfg_value('database_password'))
     self.session = self.alchemy.session
Example #2
0
    def _record_report(self, report_oid):
        """Record the details from this report generation in the db"""
        if not report_oid:
            return
        report = Report(processed_datetime=datetime.now(),
                        file_path=report_oid,
                        report_method=self.criteria.report_method)

        alchemy = AlchemyAccess(database=self.database)
        alchemy.session.add(report)
        alchemy.session.commit()
        alchemy.disconnect()
    def _record_report(self, report_oid):
        """Record the details from this report generation in the db"""
        if not report_oid:
            return
        report = Report(processed_datetime=datetime.now(),
                        file_path=report_oid,
                        report_method=self.criteria.report_method)

        alchemy = AlchemyAccess(database=self.database)
        alchemy.session.add(report)
        alchemy.session.commit()
        alchemy.disconnect()
 def setUp(self):
     c = Config()
     cfg_value = lambda v: c.get('warehouse', v)
     self.alchemy = AlchemyAccess(database=cfg_value('database'),
                                  host='localhost',
                                  user=cfg_value('database_user'),
                                  password=cfg_value('database_password'))
     self.session = self.alchemy.session
Example #5
0
 def setUp(self):
     c = Config()
     cfg_value = lambda v: c.get("longitudinal", v)
     self.alchemy = AlchemyAccess(
         database=cfg_value("database"),
         host="localhost",
         user=cfg_value("database_user"),
         password=cfg_value("database_password"),
     )
     self.session = self.alchemy.session
     self.remove_after_test = []
Example #6
0
class testSqlAObjects(unittest.TestCase):
    """We should be able to create and work with objects
    that are based on tables in the database
    """
    def setUp(self):
        c = Config()
        cfg_value = lambda v: c.get('warehouse', v)
        self.alchemy = AlchemyAccess(database=cfg_value('database'),
                                     host='localhost',
                                     user=cfg_value('database_user'),
                                     password=cfg_value('database_password'))
        self.session = self.alchemy.session

    def tearDown(self):
        # Purge the unittest hl7_msh and all related data
        self.session.delete(self.msh)
        self.session.commit()
        self.alchemy.disconnect()

    def testABuildTables(self):
        """We need to build dependent tables in the correct order.
        """
        self.tHL7_Msh()
        self.tHL7_RawMessage()
        self.tHL7_Visit()
        self.tHL7_Dx()
        self.tHL7_Obr()
        self.tHL7_Obx()

    def tHL7_RawMessage(self):
        """Create an HL7_RawMessage object that is saved to the database"""
        mess = HL7_RawMessage(hl7_raw_message_id=1,
                              message_control_id=u'control_id',
                              raw_data=u'some raw data')
        #Add the new message to the session
        self.session.add(mess)
        self.session.commit()

        query = self.session.query(HL7_RawMessage).\
                filter(HL7_RawMessage.hl7_raw_message_id == 1)

        self.assert_(query.count() == 1,
                     'The message we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_RawMessage 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_RawMessage 1>\nGot: %s' % result)

        #Make sure all the fields came out as expected
        self.assert_(result.hl7_raw_message_id == 1,
                     'hl7_raw_message_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_raw_message_id)
        self.assert_(result.message_control_id == 'control_id',
                     'message_control_id invalid.\nExpected: '\
                     'control_id\nGot: %s' % result.message_control_id)
        self.assert_(result.raw_data == 'some raw data',
                     'raw_data invalid.\nExpected: some raw '\
                     'data\nGot: %s' % result.raw_data)

    def tHL7_Msh(self):
        """Create an HL7_Msh object that is saved to the database"""
        self.msh = HL7_Msh(hl7_msh_id=1,
                           message_control_id=u'control_id',
                           message_type=u'message type',
                           facility=u'facility',
                           message_datetime=datetime(2007, 01, 01),
                           batch_filename=u'183749382629734')

        #Add the new msh to the session
        self.session.add(self.msh)
        self.session.commit()
        query = self.session.query(HL7_Msh)
        self.assert_(query.count() == 1, 'The msh we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Msh 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_RawMessage 1>\nGot: %s' % result)

        #Make sure all the fields came out as expected
        self.assert_(result.hl7_msh_id == 1,
                     'hl7_msh_id invalid.\nExpected: 1\nGot: '\
                     '%s' % result.hl7_msh_id)
        self.assert_(result.message_control_id == 'control_id',
                     'message_control_id invalid.\nExpected: '\
                     'control_id\nGot: %s' % result.message_control_id)
        self.assert_(result.message_type == 'message type',
                     'message_type invalid.\nExpected: message '\
                     'type\nGot: %s' % result.message_type)
        self.assert_(result.facility == 'facility',
                     'facility invalid.\nExpected: '\
                     'facility\nGot: %s' % result.facility)
        self.assert_(result.message_datetime ==
                     datetime(2007, 01, 01, 0, 0),
                     'message_datetime invalid.\nExpected: '\
                     '2007-01-01 00:00:00\nGot: %s' % result.message_datetime)
        self.assert_(result.batch_filename == '183749382629734',
                     'batch_filename invalid.\nExpected: '\
                     '183749382629734\nGot: %s' % result.batch_filename)

    def tHL7_Visit(self):
        """Create an HL7_Visit object that is saved to the database"""
        visit = HL7_Visit(hl7_visit_id=1,
                          visit_id=u'45',
                          patient_id=u'patient id',
                          zip=u'zip',
                          admit_datetime=datetime(2007, 01, 01),
                          gender=u'F',
                          dob=u'2001,01',
                          chief_complaint=u'Pain',
                          patient_class=u'1',
                          hl7_msh_id=1,
                          disposition='01',
                          state='WA',
                          admission_source='Emergency room',
                          assigned_patient_location='MVMGREF')

        #Add the new msh to the session
        self.session.add(visit)
        self.session.commit()

        query = self.session.query(HL7_Visit)
        self.assert_(query.count() == 1, 'The visit we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Visit 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Visit 1>\nGot: %s' % result)

        #Make sure all the fields came out as expected
        self.assert_(result.hl7_visit_id == 1,
                     'hl7_visit_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_visit_id)
        self.assert_(result.visit_id == '45',
                     'visit_id invalid.\nExpected: 45\nGot: '\
                     '%s' % result.visit_id)
        self.assert_(result.patient_id == 'patient id',
                     'patient_id invalid.\nExpected: patient '\
                     'id\nGot: %s' % result.patient_id)
        self.assert_(result.zip == 'zip',
                     'zip invalid.\nExpected: zip\nGot: %s' % result.zip)
        self.assert_(result.admit_datetime == datetime(2007, 01, 01),
                     'admit_datetime invalid.\nExpected: '\
                     '2007-01-01 00:00:00\nGot: %s' % result.admit_datetime)
        self.assert_(result.gender == 'F',
                     'gender invalid.\nExpected: F\nGot: %s' % result.gender)
        self.assert_(result.dob == '2001,01',
                     'dob invalid.\nExpected: 2001-01-10 '\
                     '00:00:00\nGot: %s' % result.dob)
        self.assert_(result.chief_complaint == 'Pain',
                     'chief_complaint invalid.\nExpected: '\
                     'Pain\nGot: %s' % result.chief_complaint)
        self.assert_(result.patient_class == '1',
                     'patient_class invalid.\nExpected: '\
                     '1\nGot: %s' % result.patient_class)
        self.assert_(result.disposition == '01',
                     'disposition invalid.\nExpected: '\
                     '01\nGot: %s' % result.disposition)
        self.assertEquals(result.state, 'WA')
        self.assertEquals(result.admission_source, 'Emergency room')
        self.assertEquals(result.assigned_patient_location, 'MVMGREF')

    def tHL7_Dx(self):
        """Create an HL7_Dx object that is saved to the database"""
        dx = HL7_Dx(hl7_dx_id=1,
                    dx_code=u'dx code',
                    dx_description=u'description',
                    dx_type=u'A',
                    hl7_msh_id=1)

        #Add the new msh to the session
        self.session.add(dx)
        self.session.commit()

        query = self.session.query(HL7_Dx)
        self.assert_(query.count() == 1, 'The dx we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Dx 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Dx 1>\nGot: %s' % result)

        self.assert_(result.hl7_dx_id == 1,
                     'hl7_dx_id invalid.\nExpected: 1\nGot: '\
                     '%s' % result.hl7_dx_id)
        self.assert_(result.dx_code == 'dx code',
                     'dx_code invalid.\nExpected: dx code\nGot: '\
                     '%s' % result.dx_code)
        self.assert_(result.dx_description == 'description',
                     'dx_description invalid.\nExpected: '\
                     'description\nGot: %s' % result.dx_description)
        self.assert_(result.dx_type == 'A',
                     'dx_type invalid.\nExpected: A\nGot: %s' % result.dx_type)

    def tHL7_Obr(self):
        """Create an HL7_Obr object that is saved to the database"""
        dt = datetime.now()
        obr = HL7_Obr(hl7_obr_id=1,
                      loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=1,
                      status='W',
                      report_datetime=dt,
                      specimen_source='NASAL')

        #Add the new msh to the session
        self.session.add(obr)
        self.session.commit()

        query = self.session.query(HL7_Obr)
        self.assert_(query.count() == 1, 'The obr we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Obr 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Obr 1>\nGot: %s' % result)

        self.assert_(result.hl7_obr_id == 1,
                     'hl7_obr_id invalid.\nExpected: 1\nGot: '\
                     '%s' % result.hl7_obr_id)
        self.assert_(result.loinc_code == 'loinc code',
                     'loinc_code invalid.\nExpected: '\
                     'loinc code\nGot: %s' % result.loinc_code)
        self.assert_(result.loinc_text == 'loinc text',
                     'loinc_text invalid.\nExpected: '\
                     'loinc text\nGot: %s' % result.loinc_text)
        self.assert_(result.alt_text == 'alt text',
                     'alt text invalid.\nExpected: alt '\
                     'text\nGot: %s' % result.alt_text)
        self.assertEquals(result.status, 'W')
        self.assertEquals(result.report_datetime, dt)
        self.assertEquals(result.specimen_source, 'NASAL')

    def tHL7_Obx(self):
        """Create an HL7_Obx object that is saved to the database"""
        obx = HL7_Obx(hl7_obx_id=1,
                      hl7_obr_id=1,
                      value_type='vt',
                      observation_id=u'observation id',
                      observation_text=u'observation text',
                      observation_result=u'observation result',
                      units=u'units',
                      result_status=u'result status',
                      observation_datetime=datetime(2001, 1, 1),
                      hl7_msh_id=1,
                      performing_lab_code='SHMC')
        #Add the new msh to the session
        self.session.add(obx)
        self.session.commit()

        query = self.session.query(HL7_Obx)
        self.assert_(query.count() == 1, 'The obx we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Obx 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Obx 1>\nGot: %s' % result)

        self.assert_(result.hl7_obx_id == 1,
                     'hl7_obx_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_obx_id)
        self.assert_(result.hl7_obr_id == 1,
                     'hl7_obr_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_obr_id)
        self.assert_(result.value_type.strip() == 'vt',
                     'value_type invalid.\nExpected: '\
                     'vt\nGot: %s' % result.value_type)
        self.assert_(result.observation_text == 'observation text',
                     'observation_text invalid.\nExpected: '\
                     'observation text\nGot: %s' % result.observation_text)
        self.assert_(result.observation_result == 'observation result',
                     'observation_result invalid.\nExpected: '\
                     'observation result\nGot: %s' % result.observation_result)
        self.assert_(result.units == 'units',
                     'units invalid.\nExpected: units\nGot: %s' % result.units)
        self.assert_(result.result_status == 'result status',
                     'result_status invalid.\nExpected: result '\
                     'status\nGot: %s' % result.result_status)
        self.assert_(result.observation_datetime == datetime(2001, 1, 1),
                     'observation_datetime invalid.\nExpected: '\
                     '2001-01-01 00:00:00\nGot: %s' %
                     result.observation_datetime)
        self.assertEquals(result.performing_lab_code, 'SHMC')

    def testObxRelation(self):
        "Use sqlalchemy relations for automated obx/obr relations "
        # Need an HL7_Msh for foreign key constraint conformance
        self.msh = HL7_Msh(hl7_msh_id=1,
                           message_control_id=u'control_id',
                           message_type=u'message type',
                           facility=u'facility',
                           message_datetime=datetime(2007, 01, 01),
                           batch_filename=u'183749382629734')

        obr = HL7_Obr(loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=self.msh.hl7_msh_id)

        obx = HL7_Obx(value_type='vt',
                      observation_id=u'observation id',
                      observation_text=u'observation text',
                      observation_result=u'observation result',
                      units=u'units',
                      result_status=u'result status',
                      observation_datetime=datetime(2001, 1, 1),
                      hl7_msh_id=self.msh.hl7_msh_id)
        obr.obxes.append(obx)
        self.session.add(self.msh)
        self.session.commit()
        self.session.add(obr)
        self.session.commit()

        # See if the commit cascaded.  If so, the obx will have a
        # valid pk and the obr foreign key set.
        self.assertEquals(obr.hl7_obr_id, obx.hl7_obr_id)

        # Now query for the obr, see if the obx is in tow.
        roundTripObr = self.session.query(HL7_Obr).one()
        self.assertTrue(roundTripObr.hl7_obr_id > 0)
        self.assertEquals(type(roundTripObr.obxes[0]), type(obx))
        self.assertEquals(roundTripObr.obxes[0], obx)

    def testNte(self):
        """Test HL7_Nte table access """
        self.msh = HL7_Msh(hl7_msh_id=1,
                           message_control_id=u'control_id',
                           message_type=u'message type',
                           facility=u'facility',
                           message_datetime=datetime(2007, 01, 01),
                           batch_filename=u'183749382629734')
        self.session.add(self.msh)
        self.session.commit()

        obr = HL7_Obr(hl7_obr_id=1,
                      loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=1,
                      status='W',
                      report_datetime=datetime.now(),
                      specimen_source='NASAL')
        self.session.add(obr)
        self.session.commit()

        obx = HL7_Obx(
            hl7_obx_id=1,
            hl7_obr_id=1,
            value_type='vt',
            observation_id=u'observation id',
            observation_text=u'observation text',
            observation_result=u'observation result',
            units=u'units',
            result_status=u'result status',
            observation_datetime=datetime(2001, 1, 1),
            hl7_msh_id=1,
            performing_lab_code=u'SHMC',
            sequence=u'1.1',
        )
        self.session.add(obx)
        self.session.commit()

        note = HL7_Nte(sequence_number=1,
                       note='fascinating unittest note',
                       hl7_obx_id=1)
        self.session.add(note)
        self.session.commit()
        query = self.session.query(HL7_Nte)
        self.assertEquals(query.count(), 1)
        self.assertEquals(query.one().note, 'fascinating unittest note')
        self.assertEquals(query.one().sequence_number, 1)

    def testSpecimenSource(self):
        """Test HL7_Spm table access """
        self.msh = HL7_Msh(hl7_msh_id=1,
                           message_control_id=u'control_id',
                           message_type=u'message type',
                           facility=u'facility',
                           message_datetime=datetime(2007, 01, 01),
                           batch_filename=u'183749382629734')
        self.session.add(self.msh)
        self.session.commit()

        obr = HL7_Obr(hl7_obr_id=1,
                      loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=1,
                      status='W',
                      report_datetime=datetime.now(),
                      specimen_source='NASAL')
        self.session.add(obr)
        self.session.commit()

        spm = HL7_Spm(id='123',
                      description="your belly",
                      code='bly',
                      hl7_obr_id=1)
        self.session.add(spm)
        self.session.commit()
        query = self.session.query(HL7_Spm)
        self.assertEquals(query.count(), 1)
        self.assertEquals(query.one().description, 'your belly')
        self.assertEquals(query.one().code, 'bly')
class testSqlAObjects(unittest.TestCase):
    """We should be able to create and work with objects
    that are based on tables in the database
    """
    def setUp(self):
        c = Config()
        cfg_value = lambda v: c.get('warehouse', v)
        self.alchemy = AlchemyAccess(database=cfg_value('database'),
                                     host='localhost',
                                     user=cfg_value('database_user'),
                                     password=cfg_value('database_password'))
        self.session = self.alchemy.session


    def tearDown(self):
        # Purge the unittest hl7_msh and all related data
        self.session.delete(self.msh)
        self.session.commit()
        self.alchemy.disconnect()

    def testABuildTables(self):
        """We need to build dependent tables in the correct order.
        """
        self.tHL7_Msh()
        self.tHL7_RawMessage()
        self.tHL7_Visit()
        self.tHL7_Dx()
        self.tHL7_Obr()
        self.tHL7_Obx()

    def tHL7_RawMessage(self):
        """Create an HL7_RawMessage object that is saved to the database"""
        mess = HL7_RawMessage(hl7_raw_message_id=1,
                           message_control_id=u'control_id',
                           raw_data=u'some raw data')
        #Add the new message to the session
        self.session.add(mess)
        self.session.commit()

        query = self.session.query(HL7_RawMessage).\
                filter(HL7_RawMessage.hl7_raw_message_id == 1)

        self.assert_(query.count() == 1,
                     'The message we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_RawMessage 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_RawMessage 1>\nGot: %s' % result)

        #Make sure all the fields came out as expected
        self.assert_(result.hl7_raw_message_id == 1,
                     'hl7_raw_message_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_raw_message_id)
        self.assert_(result.message_control_id == 'control_id',
                     'message_control_id invalid.\nExpected: '\
                     'control_id\nGot: %s' % result.message_control_id)
        self.assert_(result.raw_data == 'some raw data',
                     'raw_data invalid.\nExpected: some raw '\
                     'data\nGot: %s' % result.raw_data)

    def tHL7_Msh(self):
        """Create an HL7_Msh object that is saved to the database"""
        self.msh = HL7_Msh(hl7_msh_id=1,
                           message_control_id=u'control_id',
                           message_type=u'message type',
                           facility=u'facility',
                           message_datetime=datetime(2007, 01, 01),
                           batch_filename=u'183749382629734')

        #Add the new msh to the session
        self.session.add(self.msh)
        self.session.commit()
        query = self.session.query(HL7_Msh)
        self.assert_(query.count() == 1,
                     'The msh we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Msh 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_RawMessage 1>\nGot: %s' % result)

        #Make sure all the fields came out as expected
        self.assert_(result.hl7_msh_id == 1,
                     'hl7_msh_id invalid.\nExpected: 1\nGot: '\
                     '%s' % result.hl7_msh_id)
        self.assert_(result.message_control_id == 'control_id',
                     'message_control_id invalid.\nExpected: '\
                     'control_id\nGot: %s' % result.message_control_id)
        self.assert_(result.message_type == 'message type',
                     'message_type invalid.\nExpected: message '\
                     'type\nGot: %s' % result.message_type)
        self.assert_(result.facility == 'facility',
                     'facility invalid.\nExpected: '\
                     'facility\nGot: %s' % result.facility)
        self.assert_(result.message_datetime ==
                     datetime(2007, 01, 01, 0, 0),
                     'message_datetime invalid.\nExpected: '\
                     '2007-01-01 00:00:00\nGot: %s' % result.message_datetime)
        self.assert_(result.batch_filename == '183749382629734',
                     'batch_filename invalid.\nExpected: '\
                     '183749382629734\nGot: %s' % result.batch_filename)

    def tHL7_Visit(self):
        """Create an HL7_Visit object that is saved to the database"""
        visit = HL7_Visit(hl7_visit_id=1,
                          visit_id=u'45',
                          patient_id=u'patient id',
                          zip=u'zip',
                          admit_datetime=datetime(2007, 01, 01),
                          gender=u'F',
                          dob=u'2001,01',
                          chief_complaint=u'Pain',
                          patient_class=u'1',
                          hl7_msh_id=1,
                          disposition='01',
                          state='WA',
                          admission_source='Emergency room',
                          assigned_patient_location='MVMGREF')

        #Add the new msh to the session
        self.session.add(visit)
        self.session.commit()

        query = self.session.query(HL7_Visit)
        self.assert_(query.count() == 1,
                     'The visit we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Visit 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Visit 1>\nGot: %s' % result)

        #Make sure all the fields came out as expected
        self.assert_(result.hl7_visit_id == 1,
                     'hl7_visit_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_visit_id)
        self.assert_(result.visit_id == '45',
                     'visit_id invalid.\nExpected: 45\nGot: '\
                     '%s' % result.visit_id)
        self.assert_(result.patient_id == 'patient id',
                     'patient_id invalid.\nExpected: patient '\
                     'id\nGot: %s' % result.patient_id)
        self.assert_(result.zip == 'zip',
                     'zip invalid.\nExpected: zip\nGot: %s' % result.zip)
        self.assert_(result.admit_datetime == datetime(2007, 01, 01),
                     'admit_datetime invalid.\nExpected: '\
                     '2007-01-01 00:00:00\nGot: %s' % result.admit_datetime)
        self.assert_(result.gender == 'F',
                     'gender invalid.\nExpected: F\nGot: %s' % result.gender)
        self.assert_(result.dob == '2001,01',
                     'dob invalid.\nExpected: 2001-01-10 '\
                     '00:00:00\nGot: %s' % result.dob)
        self.assert_(result.chief_complaint == 'Pain',
                     'chief_complaint invalid.\nExpected: '\
                     'Pain\nGot: %s' % result.chief_complaint)
        self.assert_(result.patient_class == '1',
                     'patient_class invalid.\nExpected: '\
                     '1\nGot: %s' % result.patient_class)
        self.assert_(result.disposition == '01',
                     'disposition invalid.\nExpected: '\
                     '01\nGot: %s' % result.disposition)
        self.assertEquals(result.state, 'WA')
        self.assertEquals(result.admission_source, 'Emergency room')
        self.assertEquals(result.assigned_patient_location, 'MVMGREF')

    def tHL7_Dx(self):
        """Create an HL7_Dx object that is saved to the database"""
        dx = HL7_Dx(hl7_dx_id=1,
                    dx_code=u'dx code',
                    dx_description=u'description',
                    dx_type=u'A',
                    hl7_msh_id=1)

        #Add the new msh to the session
        self.session.add(dx)
        self.session.commit()

        query = self.session.query(HL7_Dx)
        self.assert_(query.count() == 1,
                     'The dx we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Dx 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Dx 1>\nGot: %s' % result)

        self.assert_(result.hl7_dx_id == 1,
                     'hl7_dx_id invalid.\nExpected: 1\nGot: '\
                     '%s' % result.hl7_dx_id)
        self.assert_(result.dx_code == 'dx code',
                     'dx_code invalid.\nExpected: dx code\nGot: '\
                     '%s' % result.dx_code)
        self.assert_(result.dx_description == 'description',
                     'dx_description invalid.\nExpected: '\
                     'description\nGot: %s' % result.dx_description)
        self.assert_(result.dx_type == 'A',
                     'dx_type invalid.\nExpected: A\nGot: %s' % result.dx_type)

    def tHL7_Obr(self):
        """Create an HL7_Obr object that is saved to the database"""
        dt = datetime.now()
        obr = HL7_Obr(hl7_obr_id=1,
                      loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=1,
                      status='W',
                      report_datetime=dt,
                      specimen_source='NASAL')

        #Add the new msh to the session
        self.session.add(obr)
        self.session.commit()

        query = self.session.query(HL7_Obr)
        self.assert_(query.count() == 1,
                     'The obr we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Obr 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Obr 1>\nGot: %s' % result)

        self.assert_(result.hl7_obr_id == 1,
                     'hl7_obr_id invalid.\nExpected: 1\nGot: '\
                     '%s' % result.hl7_obr_id)
        self.assert_(result.loinc_code == 'loinc code',
                     'loinc_code invalid.\nExpected: '\
                     'loinc code\nGot: %s' % result.loinc_code)
        self.assert_(result.loinc_text == 'loinc text',
                     'loinc_text invalid.\nExpected: '\
                     'loinc text\nGot: %s' % result.loinc_text)
        self.assert_(result.alt_text == 'alt text',
                     'alt text invalid.\nExpected: alt '\
                     'text\nGot: %s' % result.alt_text)
        self.assertEquals(result.status, 'W')
        self.assertEquals(result.report_datetime, dt)
        self.assertEquals(result.specimen_source, 'NASAL')

    def tHL7_Obx(self):
        """Create an HL7_Obx object that is saved to the database"""
        obx = HL7_Obx(hl7_obx_id=1,
                      hl7_obr_id=1,
                      value_type='vt',
                      observation_id=u'observation id',
                      observation_text=u'observation text',
                      observation_result=u'observation result',
                      units=u'units',
                      result_status=u'result status',
                      observation_datetime=datetime(2001, 1, 1),
                      hl7_msh_id=1,
                      performing_lab_code='SHMC')
        #Add the new msh to the session
        self.session.add(obx)
        self.session.commit()

        query = self.session.query(HL7_Obx)
        self.assert_(query.count() == 1,
                     'The obx we created was not found')

        result = query.first()
        #Check that the __repr__ is working as expected
        self.assert_(result.__repr__() == '<HL7_Obx 1>',
                     'Message string invalid.\nExpected: '\
                     '<HL7_Obx 1>\nGot: %s' % result)

        self.assert_(result.hl7_obx_id == 1,
                     'hl7_obx_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_obx_id)
        self.assert_(result.hl7_obr_id == 1,
                     'hl7_obr_id invalid.\nExpected: '\
                     '1\nGot: %s' % result.hl7_obr_id)
        self.assert_(result.value_type.strip() == 'vt',
                     'value_type invalid.\nExpected: '\
                     'vt\nGot: %s' % result.value_type)
        self.assert_(result.observation_text == 'observation text',
                     'observation_text invalid.\nExpected: '\
                     'observation text\nGot: %s' % result.observation_text)
        self.assert_(result.observation_result == 'observation result',
                     'observation_result invalid.\nExpected: '\
                     'observation result\nGot: %s' % result.observation_result)
        self.assert_(result.units == 'units',
                     'units invalid.\nExpected: units\nGot: %s'
                     % result.units)
        self.assert_(result.result_status == 'result status',
                     'result_status invalid.\nExpected: result '\
                     'status\nGot: %s' % result.result_status)
        self.assert_(result.observation_datetime == datetime(2001, 1, 1),
                     'observation_datetime invalid.\nExpected: '\
                     '2001-01-01 00:00:00\nGot: %s' %
                     result.observation_datetime)
        self.assertEquals(result.performing_lab_code, 'SHMC')

    def testObxRelation(self):
        "Use sqlalchemy relations for automated obx/obr relations "
        # Need an HL7_Msh for foreign key constraint conformance
        self.msh = HL7_Msh(hl7_msh_id=1,
                           message_control_id=u'control_id',
                           message_type=u'message type',
                           facility=u'facility',
                           message_datetime=datetime(2007, 01, 01),
                           batch_filename=u'183749382629734')

        obr = HL7_Obr(loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=self.msh.hl7_msh_id)

        obx = HL7_Obx(value_type='vt',
                      observation_id=u'observation id',
                      observation_text=u'observation text',
                      observation_result=u'observation result',
                      units=u'units',
                      result_status=u'result status',
                      observation_datetime=datetime(2001, 1, 1),
                      hl7_msh_id=self.msh.hl7_msh_id)
        obr.obxes.append(obx)
        self.session.add(self.msh)
        self.session.commit()
        self.session.add(obr)
        self.session.commit()

        # See if the commit cascaded.  If so, the obx will have a
        # valid pk and the obr foreign key set.
        self.assertEquals(obr.hl7_obr_id, obx.hl7_obr_id)

        # Now query for the obr, see if the obx is in tow.
        roundTripObr = self.session.query(HL7_Obr).one()
        self.assertTrue(roundTripObr.hl7_obr_id > 0)
        self.assertEquals(type(roundTripObr.obxes[0]), type(obx))
        self.assertEquals(roundTripObr.obxes[0], obx)

    def testNte(self):
        """Test HL7_Nte table access """
        self.msh = HL7_Msh(hl7_msh_id=1,
                      message_control_id=u'control_id',
                      message_type=u'message type',
                      facility=u'facility',
                      message_datetime=datetime(2007, 01, 01),
                      batch_filename=u'183749382629734')
        self.session.add(self.msh)
        self.session.commit()

        obr = HL7_Obr(hl7_obr_id=1,
                      loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=1,
                      status='W',
                      report_datetime=datetime.now(),
                      specimen_source='NASAL')
        self.session.add(obr)
        self.session.commit()

        obx = HL7_Obx(hl7_obx_id=1,
                      hl7_obr_id=1,
                      value_type='vt',
                      observation_id=u'observation id',
                      observation_text=u'observation text',
                      observation_result=u'observation result',
                      units=u'units',
                      result_status=u'result status',
                      observation_datetime=datetime(2001, 1, 1),
                      hl7_msh_id=1,
                      performing_lab_code=u'SHMC',
                      sequence=u'1.1',)
        self.session.add(obx)
        self.session.commit()

        note = HL7_Nte(sequence_number=1,
                       note='fascinating unittest note',
                       hl7_obx_id=1)
        self.session.add(note)
        self.session.commit()
        query = self.session.query(HL7_Nte)
        self.assertEquals(query.count(), 1)
        self.assertEquals(query.one().note,
                          'fascinating unittest note')
        self.assertEquals(query.one().sequence_number, 1)

    def testSpecimenSource(self):
        """Test HL7_Spm table access """
        self.msh = HL7_Msh(hl7_msh_id=1,
                      message_control_id=u'control_id',
                      message_type=u'message type',
                      facility=u'facility',
                      message_datetime=datetime(2007, 01, 01),
                      batch_filename=u'183749382629734')
        self.session.add(self.msh)
        self.session.commit()

        obr = HL7_Obr(hl7_obr_id=1,
                      loinc_code=u'loinc code',
                      loinc_text=u'loinc text',
                      alt_text=u'alt text',
                      hl7_msh_id=1,
                      status='W',
                      report_datetime=datetime.now(),
                      specimen_source='NASAL')
        self.session.add(obr)
        self.session.commit()

        spm = HL7_Spm(id='123', description="your belly",
                      code='bly', hl7_obr_id=1)
        self.session.add(spm)
        self.session.commit()
        query = self.session.query(HL7_Spm)
        self.assertEquals(query.count(), 1)
        self.assertEquals(query.one().description, 'your belly')
        self.assertEquals(query.one().code, 'bly')
class LongitudinalManager(object):
    """ Abstraction to handle which db, user, etc. the deduplication
    process should be run on.  Handles runtime arguments and
    execution.

    Manages the process by farming out the individual visit
    deduplication to a number of worker processes (necessary to take
    advatage of multi-core processor and database as the limiting
    factor).

    """

    # The gating issue is the number of postgres connections that are
    # allowed to run concurrently.  Setting this to N-1 (where N is
    # the number of cores) has proven the fastest and most reliable.
    NUM_PROCS = 5

    def __init__(
        self,
        data_warehouse=None,
        data_mart=None,
        reportDate=None,
        database_user=None,
        database_password=None,
        verbosity=0,
    ):
        self.data_warehouse = data_warehouse
        self.warehouse_port = 5432  # postgres default
        self.data_mart = data_mart
        self.mart_port = 5432  # postgres default
        self.reportDate = reportDate and parseDate(reportDate) or None
        self.database_user = database_user
        self.database_password = database_password
        self.dir, thisFile = os.path.split(__file__)
        self.verbosity = verbosity
        self.queue = JoinableQueue()
        self.datefile = "/tmp/longitudinal_datefile"
        self.datePersistence = Datefile(initial_date=self.reportDate)
        self.lock = FileLock(LOCKFILE)
        self.skip_prep = False

    def __call__(self):
        return self.execute()

    def processArgs(self):
        """ Process any optional arguments and possitional parameters
        """
        parser = OptionParser(usage=usage)
        parser.add_option(
            "-c",
            "--countdown",
            dest="countdown",
            default=None,
            help="count {down,up} date using date string in "
            "%s - set to 'forwards' or 'backwards' "
            "if desired" % self.datefile,
        )
        parser.add_option(
            "-d",
            "--date",
            dest="date",
            default=None,
            help="single admission date to dedup " "(by default, checks the entire database)",
        )
        parser.add_option(
            "-s",
            "--skip-prep",
            dest="skip_prep",
            default=False,
            action="store_true",
            help="skip the expense of looking for new " "messages",
        )
        parser.add_option(
            "-v",
            "--verbose",
            dest="verbosity",
            action="count",
            default=self.verbosity,
            help="increase output verbosity",
        )
        parser.add_option(
            "-m",
            "--mart-port",
            dest="mart_port",
            default=self.mart_port,
            type="int",
            help="alternate port number for data mart",
        )
        parser.add_option(
            "-w",
            "--warehouse-port",
            dest="warehouse_port",
            default=self.warehouse_port,
            type="int",
            help="alternate port number for data warehouse",
        )

        (options, args) = parser.parse_args()
        if len(args) != 2:
            parser.error("incorrect number of arguments")

        self.data_warehouse = args[0]
        self.data_mart = args[1]
        self.warehouse_port = parser.values.warehouse_port
        self.mart_port = parser.values.mart_port
        self.verbosity = parser.values.verbosity
        self.skip_prep = parser.values.skip_prep
        initial_date = parser.values.date and parseDate(parser.values.date) or None
        self.datePersistence = Datefile(
            initial_date=initial_date, persistence_file=self.datefile, direction=parser.values.countdown
        )

        self.reportDate = self.datePersistence.get_date()

    def _prepDeduplicateTables(self):
        """ Add any missing rows to the MessageProcessed table

        This is the bridge between the data warehouse and the data
        mart.  In an effort to make the data mart independent of the
        warehouse, the processed message data is kept in the mart.  As
        we're dealing with two distinct databases, there's no
        referential integrity available at the database level, so care
        should be taken.

        """
        startTime = time.time()
        logging.info("Starting INSERT INTO internal_message_processed " "at %s", startTime)

        # We can take advantage of an "add only" data_warehouse,
        # knowing the hl7_msh_id is a sequence moving in the positive
        # direction.  Simply add any values greater than the previous
        # max.

        stmt = "SELECT max(hl7_msh_id) from internal_message_processed"
        max_id = self.data_mart_access.engine.execute(stmt).first()[0]
        if not max_id:
            max_id = 0

        new_msgs = list()
        stmt = (
            """SELECT hl7_msh_id, message_datetime, visit_id
        FROM hl7_msh JOIN hl7_visit USING (hl7_msh_id) WHERE
        hl7_msh_id > %d """
            % max_id
        )
        rs = self.data_warehouse_access.engine.execute(stmt)
        many = 500
        while True:
            results = rs.fetchmany(many)
            if not results:
                break
            for r in results:
                new_msgs.append(MessageProcessed(hl7_msh_id=r[0], message_datetime=r[1], visit_id=r[2]))

            self.data_mart_access.session.add_all(new_msgs)
            self.data_mart_access.session.commit()
            logging.debug("added %d new messages" % len(new_msgs))
            new_msgs = list()

        logging.info("Added new rows to internal_message_processed in %s", time.time() - startTime)

    def _visitsToProcess(self):
        """ Look up all distinct visit ids needing attention

        Obtain unique list of visit_ids that have messages that
        haven't previously been processed.  If the user requested just
        one days worth (i.e. -d) only that days visits will be
        returned.

        """
        visit_ids = list()
        if not self.reportDate:
            logging.info("Launch deduplication for entire database")
            # Do the whole batch, that is, all that haven't been
            # processed before.
            stmt = """SELECT DISTINCT(visit_id) FROM
            internal_message_processed
            WHERE processed_datetime IS NULL"""
            rs = self.data_mart_access.engine.execute(stmt)
            many = 10000
            while True:
                results = rs.fetchmany(many)
                if not results:
                    break
                for r in results:
                    visit_ids.append(r[0])

        else:
            logging.info("Launch deduplication for %s", self.reportDate)
            # Process the requested day only - as we can't join across
            # db boundaries - first acquire the full list of visits
            # for the requested day from the data_warehouse to use in
            # a massive 'in' clause

            stmt = """SELECT DISTINCT(visit_id) FROM hl7_visit WHERE
            admit_datetime BETWEEN '%s' AND '%s';""" % (
                self.reportDate,
                self.reportDate + timedelta(days=1),
            )
            self.access.raw_query(stmt)
            rs = self.data_warehouse_access.engine.execute(stmt)
            many = 1000
            potential_visit_ids = list()
            while True:
                results = rs.fetchmany(many)
                if not results:
                    break
                for r in results:
                    # tmp_table.insert(r[0])
                    potential_visit_ids.append(r[0])

            if potential_visit_ids:
                query = (
                    self.data_mart_access.session.query(MessageProcessed.visit_id)
                    .distinct()
                    .filter(
                        and_(
                            MessageProcessed.processed_datetime == None,
                            MessageProcessed.visit_id.in_(potential_visit_ids),
                        )
                    )
                )

                for r in query:
                    visit_ids.append(r[0])

        logging.info("Found %d visits needing attention", len(visit_ids))
        return visit_ids

    def tearDown(self):
        """ Clean up any open handles/connections """
        # now done in execute when we're done with teh connections

    def execute(self):
        """ Start the process """
        # Initialize logging now (verbosity is now set regardless of
        # invocation method)
        configure_logging(verbosity=self.verbosity, logfile="longitudinal-manager.log")

        logging.info("Initiate deduplication for %s", (self.reportDate and self.reportDate or "whole database"))
        # Only allow one instance of the manager to run at a time.
        if self.lock.is_locked():
            logging.warn("Can't continue, %s is locked ", LOCKFILE)
            return

        if systemUnderLoad():
            logging.warn("system under load - continue anyhow")

        try:
            self.lock.acquire()

            self.access = DirectAccess(
                database=self.data_warehouse,
                port=self.warehouse_port,
                user=self.database_user,
                password=self.database_password,
            )
            self.data_warehouse_access = AlchemyAccess(
                database=self.data_warehouse,
                port=self.warehouse_port,
                user=self.database_user,
                password=self.database_password,
            )
            self.data_mart_access = AlchemyAccess(
                database=self.data_mart, port=self.mart_port, user=self.database_user, password=self.database_password
            )

            startTime = time.time()
            if not self.skip_prep:
                self._prepDeduplicateTables()
            visits_to_process = self._visitsToProcess()

            # Now done with db access needs at the manager level
            # free up resources:
            self.data_mart_access.disconnect()
            self.data_warehouse_access.disconnect()
            self.access.close()

            # Set of locks used, one for each table needing protection
            # from asynchronous inserts.  Names should match table
            # minus 'dim_' prefix, plus '_lock' suffix
            # i.e. dim_location -> 'location_lock'
            table_locks = {
                "admission_source_lock": Lock(),
                "admission_o2sat_lock": Lock(),
                "admission_temp_lock": Lock(),
                "assigned_location_lock": Lock(),
                "admit_reason_lock": Lock(),
                "chief_complaint_lock": Lock(),
                "diagnosis_lock": Lock(),
                "disposition_lock": Lock(),
                "flu_vaccine_lock": Lock(),
                "h1n1_vaccine_lock": Lock(),
                "lab_flag_lock": Lock(),
                "lab_result_lock": Lock(),
                "location_lock": Lock(),
                "note_lock": Lock(),
                "order_number_lock": Lock(),
                "performing_lab_lock": Lock(),
                "pregnancy_lock": Lock(),
                "race_lock": Lock(),
                "reference_range_lock": Lock(),
                "service_area_lock": Lock(),
                "specimen_source_lock": Lock(),
            }

            # If we have visits to process, fire up the workers...
            if len(visits_to_process) > 1:
                for i in range(self.NUM_PROCS):
                    dw = Process(
                        target=LongitudinalWorker,
                        kwargs={
                            "queue": self.queue,
                            "procNumber": i,
                            "data_warehouse": self.data_warehouse,
                            "warehouse_port": self.warehouse_port,
                            "data_mart": self.data_mart,
                            "mart_port": self.mart_port,
                            "dbUser": self.database_user,
                            "dbPass": self.database_password,
                            "table_locks": table_locks,
                            "verbosity": self.verbosity,
                        },
                    )
                    dw.daemon = True
                    dw.start()

                # Populate the queue
                for v in visits_to_process:
                    self.queue.put(v)

                # Wait on the queue until empty
                self.queue.join()

            # Common cleanup
            self.tearDown()
            self.datePersistence.bump_date()
            logging.info("Queue is empty - done in %s", time.time() - startTime)
        finally:
            self.lock.release()
    def execute(self):
        """ Start the process """
        # Initialize logging now (verbosity is now set regardless of
        # invocation method)
        configure_logging(verbosity=self.verbosity, logfile="longitudinal-manager.log")

        logging.info("Initiate deduplication for %s", (self.reportDate and self.reportDate or "whole database"))
        # Only allow one instance of the manager to run at a time.
        if self.lock.is_locked():
            logging.warn("Can't continue, %s is locked ", LOCKFILE)
            return

        if systemUnderLoad():
            logging.warn("system under load - continue anyhow")

        try:
            self.lock.acquire()

            self.access = DirectAccess(
                database=self.data_warehouse,
                port=self.warehouse_port,
                user=self.database_user,
                password=self.database_password,
            )
            self.data_warehouse_access = AlchemyAccess(
                database=self.data_warehouse,
                port=self.warehouse_port,
                user=self.database_user,
                password=self.database_password,
            )
            self.data_mart_access = AlchemyAccess(
                database=self.data_mart, port=self.mart_port, user=self.database_user, password=self.database_password
            )

            startTime = time.time()
            if not self.skip_prep:
                self._prepDeduplicateTables()
            visits_to_process = self._visitsToProcess()

            # Now done with db access needs at the manager level
            # free up resources:
            self.data_mart_access.disconnect()
            self.data_warehouse_access.disconnect()
            self.access.close()

            # Set of locks used, one for each table needing protection
            # from asynchronous inserts.  Names should match table
            # minus 'dim_' prefix, plus '_lock' suffix
            # i.e. dim_location -> 'location_lock'
            table_locks = {
                "admission_source_lock": Lock(),
                "admission_o2sat_lock": Lock(),
                "admission_temp_lock": Lock(),
                "assigned_location_lock": Lock(),
                "admit_reason_lock": Lock(),
                "chief_complaint_lock": Lock(),
                "diagnosis_lock": Lock(),
                "disposition_lock": Lock(),
                "flu_vaccine_lock": Lock(),
                "h1n1_vaccine_lock": Lock(),
                "lab_flag_lock": Lock(),
                "lab_result_lock": Lock(),
                "location_lock": Lock(),
                "note_lock": Lock(),
                "order_number_lock": Lock(),
                "performing_lab_lock": Lock(),
                "pregnancy_lock": Lock(),
                "race_lock": Lock(),
                "reference_range_lock": Lock(),
                "service_area_lock": Lock(),
                "specimen_source_lock": Lock(),
            }

            # If we have visits to process, fire up the workers...
            if len(visits_to_process) > 1:
                for i in range(self.NUM_PROCS):
                    dw = Process(
                        target=LongitudinalWorker,
                        kwargs={
                            "queue": self.queue,
                            "procNumber": i,
                            "data_warehouse": self.data_warehouse,
                            "warehouse_port": self.warehouse_port,
                            "data_mart": self.data_mart,
                            "mart_port": self.mart_port,
                            "dbUser": self.database_user,
                            "dbPass": self.database_password,
                            "table_locks": table_locks,
                            "verbosity": self.verbosity,
                        },
                    )
                    dw.daemon = True
                    dw.start()

                # Populate the queue
                for v in visits_to_process:
                    self.queue.put(v)

                # Wait on the queue until empty
                self.queue.join()

            # Common cleanup
            self.tearDown()
            self.datePersistence.bump_date()
            logging.info("Queue is empty - done in %s", time.time() - startTime)
        finally:
            self.lock.release()
Example #10
0
class TestLongitudinalAccess(unittest.TestCase):
    """Series of tests on longitudinal ORM classes. """

    def setUp(self):
        c = Config()
        cfg_value = lambda v: c.get("longitudinal", v)
        self.alchemy = AlchemyAccess(
            database=cfg_value("database"),
            host="localhost",
            user=cfg_value("database_user"),
            password=cfg_value("database_password"),
        )
        self.session = self.alchemy.session
        self.remove_after_test = []

    def tearDown(self):
        map(self.session.delete, self.remove_after_test)
        self.session.commit()
        self.alchemy.disconnect()

    def commit_test_obj(self, obj):
        """Commit to db and bookkeep for safe removal on teardown"""
        self.session.add(obj)
        self.remove_after_test.append(obj)
        self.session.commit()

    def testAdmissionSource(self):
        self.commit_test_obj(AdmissionSource(pk="7", description="Emergency room"))
        query = self.session.query(AdmissionSource).filter_by(description="Emergency room")
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().pk, "7")

    def testAdmissionTemp(self):
        self.commit_test_obj(AdmissionTemp(degree_fahrenheit=98.5))
        query = self.session.query(AdmissionTemp)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().degree_fahrenheit, Decimal("98.5"))

    def testAdmissionO2sat(self):
        self.commit_test_obj(AdmissionO2sat(o2sat_percentage=98))
        query = self.session.query(AdmissionO2sat)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().o2sat_percentage, 98)

    def testAssignedLocation(self):
        self.commit_test_obj(AssignedLocation(location="PMCLAB"))
        query = self.session.query(AssignedLocation)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().location, "PMCLAB")

    def testChiefComplaint(self):
        self.commit_test_obj(ChiefComplaint(chief_complaint="ABDOMINAL PAIN"))
        query = self.session.query(ChiefComplaint)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().chief_complaint, "ABDOMINAL PAIN")

    def testLabResult(self):
        loinc_text = "Bacteria identified:Prid:Pt:Sputum:Nom:Aerobic culture"
        loinc_code = "622-1"
        coding = "LN"
        result = """Few Neutrophils   Few Squamous Epithelial Cells   Mixed Flora   Squamous cells in the specimen   indicate the presence of   superficial material that may   contain contaminating or   colonizing bacteria unrelated to   infection. Collection of another   specimen is suggested, avoiding   superficial sources of   contamination.   *****CULTURE RESULTS*****"""

        self.commit_test_obj(LabResult(test_code=loinc_code, test_text=loinc_text, coding=coding, result=result))
        query = self.session.query(LabResult)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().test_code, loinc_code)
        self.assertEquals(query.first().test_text, loinc_text)
        self.assertEquals(query.first().result, result)

    def testLocationCountry(self):
        self.commit_test_obj(Location(country="CAN"))
        query = self.session.query(Location)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().country, "CAN")
        self.assertTrue(datetime.datetime.now() >= query.first().last_updated)

    def testLocationCounty(self):
        self.commit_test_obj(Location(county="SPO-WA"))
        query = self.session.query(Location)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().county, "SPO-WA")
        self.assertTrue(datetime.datetime.now() >= query.first().last_updated)

    def testLocationZip(self):
        self.commit_test_obj(Location(zip="98101"))
        query = self.session.query(Location)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().zip, "98101")

    def testLocation(self):
        self.commit_test_obj(Location(county="SPO-WA", state="WA", country="USA", zip="95432"))
        query = self.session.query(Location)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().state, "WA")
        self.assertEquals(query.first().county, "SPO-WA")
        self.assertEquals(query.first().country, "USA")
        self.assertEquals(query.first().zip, "95432")

    def testNote(self):
        self.commit_test_obj(Note(note="IS PT ALLERGIC TO PENICILLIN? N"))
        query = self.session.query(Note)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().note, "IS PT ALLERGIC TO PENICILLIN? N")

    def testLongNote(self):
        too_long_note = """ REFERENCE INTERVAL: INFLUENZA B VIRUS Ab, IgG 0.89 IV or less:    Negative - No significant level of influenza B virus IgG antibody detected. 0.90 - 1.10 IV:     Equivocal - Questionable presence of influenza B virus IgG antibody detected. Repeat testing in 10-14 days may be helpful. 1.11 IV or greater: Positive - IgG antibodies to influenza B virus detected, which may suggest current or past infection. Test performed at ARUP Laboratories, 500 Chipeta Way, Salt Lake City, Utah 84108 Performed at ARUP, 500 Chipeta Way, Salt Lake City, UT 84108"""
        self.commit_test_obj(Note(note=too_long_note))
        query = self.session.query(Note)
        self.assertEquals(1, query.count())
        self.assertTrue(query.first().note.startswith(too_long_note[:100]))

    def testDisposition(self):
        self.commit_test_obj(Disposition(code=20, description="Expired", gipse_mapping="Expired", odin_mapping="Died"))
        disposition = self.session.query(Disposition).filter(Disposition.description == "Expired").one()
        self.assertTrue(disposition)
        self.assertEquals(disposition.code, 20)
        self.assertEquals(disposition.odin_mapping, "Died")
        self.assertEquals(disposition.gipse_mapping, "Expired")
        self.assertTrue(datetime.datetime.now() > disposition.last_updated)

    def testDx(self):
        self.commit_test_obj(Diagnosis(status="W", icd9="569.3", description="HYPERTENSION NOS"))
        query = self.session.query(Diagnosis)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().description, "HYPERTENSION NOS")

    def testFacility(self):
        self.commit_test_obj(
            Facility(
                county="NEAR",
                npi=123454321,
                zip="99999",
                organization_name="Nearby Medical " "Center",
                local_code="NMC",
            )
        )
        sh = self.session.query(Facility).filter_by(npi=123454321).one()
        self.assertEquals(sh.organization_name, "Nearby Medical Center")
        self.assertEquals(sh.zip, "99999")
        self.assertEquals(sh.county, "NEAR")

    def testFacilityUpdates(self):
        "Facilities are pre-loaded.  Use to test update timestamps"
        self.commit_test_obj(
            Facility(
                county="NEAR",
                npi=123454321,
                zip="99999",
                organization_name="Nearby Medical " "Center",
                local_code="NMC",
            )
        )
        facility = self.session.query(Facility).filter_by(npi=123454321).one()
        b4 = facility.last_updated
        self.assertTrue(b4)
        facility.local_code = "FOO"
        self.session.commit()
        facility = self.session.query(Facility).filter_by(npi=123454321).one()
        after = facility.last_updated
        self.assertTrue(after > b4)

    def testPerformingLab(self):
        self.commit_test_obj(PerformingLab(local_code="HFH"))
        query = self.session.query(PerformingLab)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().local_code, "HFH")

    def testPrego(self):
        self.commit_test_obj(Pregnancy(result="Patient Currently Pregnant"))
        query = self.session.query(Pregnancy)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().result, "Patient Currently Pregnant")

    def testRace(self):
        self.commit_test_obj(Race(race="Native Hawaiian or Other " "Pacific Islander"))
        query = self.session.query(Race)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().race, "Native Hawaiian or Other Pacific Islander")

    def testServiceArea(self):
        self.commit_test_obj(ServiceArea(area="obstetrics"))
        query = self.session.query(ServiceArea)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().area, "obstetrics")

    def testSpecimenSource(self):
        self.commit_test_obj(SpecimenSource(source="PLEFLD"))
        query = self.session.query(SpecimenSource)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().source, "PLEFLD")

    def testFluVaccine(self):
        self.commit_test_obj(FluVaccine(status="Not Specified"))
        query = self.session.query(FluVaccine)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().status, "Not Specified")

    def testH1N1Vaccine(self):
        self.commit_test_obj(H1N1Vaccine(status="Not Applicable (Age&lt;18)"))
        query = self.session.query(H1N1Vaccine)
        self.assertEquals(1, query.count())
        self.assertEquals(query.first().status, "Not Applicable (Age&lt;18)")

    def testVisit(self):
        "Test with minimal required fields set"
        self.commit_test_obj(
            Facility(
                county="NEAR",
                npi=123454321,
                zip="99999",
                organization_name="Nearby Medical " "Center",
                local_code="NMC",
            )
        )
        kw = {
            "visit_id": "284999^^^&650903.98473.0179.6039.1.333.1&ISO",
            "patient_class": "E",
            "patient_id": "156999^^^&650903.98473.0179.6039.1.333.1&ISO",
            "admit_datetime": datetime.datetime(2007, 01, 01),
            "first_message": datetime.datetime(2007, 01, 01),
            "last_message": datetime.datetime(2007, 01, 01),
            "dim_facility_pk": 123454321,
        }
Example #11
0
class LongitudinalManager(object):
    """ Abstraction to handle which db, user, etc. the deduplication
    process should be run on.  Handles runtime arguments and
    execution.

    Manages the process by farming out the individual visit
    deduplication to a number of worker processes (necessary to take
    advatage of multi-core processor and database as the limiting
    factor).

    """
    # The gating issue is the number of postgres connections that are
    # allowed to run concurrently.  Setting this to N-1 (where N is
    # the number of cores) has proven the fastest and most reliable.
    NUM_PROCS = 5

    def __init__(self, data_warehouse=None, data_mart=None,
                 reportDate=None, database_user=None,
                 database_password=None, verbosity=0):
        self.data_warehouse = data_warehouse
        self.warehouse_port = 5432  # postgres default
        self.data_mart = data_mart
        self.mart_port = 5432  # postgres default
        self.reportDate = reportDate and parseDate(reportDate) or None
        self.database_user = database_user
        self.database_password = database_password
        self.dir, thisFile = os.path.split(__file__)
        self.verbosity = verbosity
        self.queue = JoinableQueue()
        self.datefile = "/tmp/longitudinal_datefile"
        self.datePersistence = Datefile(initial_date=self.reportDate)
        self.lock = FileLock(LOCKFILE)
        self.skip_prep = False

    def __call__(self):
        return self.execute()

    def processArgs(self):
        """ Process any optional arguments and possitional parameters
        """
        parser = OptionParser(usage=usage)
        parser.add_option("-c", "--countdown", dest="countdown",
                          default=None,
                          help="count {down,up} date using date string in "\
                              "%s - set to 'forwards' or 'backwards' "\
                              "if desired" % self.datefile)
        parser.add_option("-d", "--date", dest="date", default=None,
                          help="single admission date to dedup "\
                          "(by default, checks the entire database)")
        parser.add_option("-s", "--skip-prep", dest="skip_prep",
                          default=False, action="store_true",
                          help="skip the expense of looking for new "\
                          "messages")
        parser.add_option("-v", "--verbose", dest="verbosity",
                          action="count", default=self.verbosity,
                          help="increase output verbosity")
        parser.add_option("-m", "--mart-port", dest="mart_port",
                          default=self.mart_port, type="int",
                          help="alternate port number for data mart")
        parser.add_option("-w", "--warehouse-port", dest="warehouse_port",
                          default=self.warehouse_port, type="int",
                          help="alternate port number for data warehouse")

        (options, args) = parser.parse_args()
        if len(args) != 2:
            parser.error("incorrect number of arguments")

        self.data_warehouse = args[0]
        self.data_mart = args[1]
        self.warehouse_port = parser.values.warehouse_port
        self.mart_port = parser.values.mart_port
        self.verbosity = parser.values.verbosity
        self.skip_prep = parser.values.skip_prep
        initial_date = parser.values.date and \
            parseDate(parser.values.date) or None
        self.datePersistence = Datefile(initial_date=initial_date,
                                        persistence_file=self.datefile,
                                        direction=parser.values.countdown)

        self.reportDate = self.datePersistence.get_date()

    def _prepDeduplicateTables(self):
        """ Add any missing rows to the MessageProcessed table

        This is the bridge between the data warehouse and the data
        mart.  In an effort to make the data mart independent of the
        warehouse, the processed message data is kept in the mart.  As
        we're dealing with two distinct databases, there's no
        referential integrity available at the database level, so care
        should be taken.

        """
        startTime = time.time()
        logging.info("Starting INSERT INTO internal_message_processed "
                     "at %s", startTime)

        # We can take advantage of an "add only" data_warehouse,
        # knowing the hl7_msh_id is a sequence moving in the positive
        # direction.  Simply add any values greater than the previous
        # max.

        stmt = "SELECT max(hl7_msh_id) from internal_message_processed"
        max_id = self.data_mart_access.engine.execute(stmt).first()[0]
        if not max_id:
            max_id = 0

        new_msgs = list()
        stmt = """SELECT hl7_msh_id, message_datetime, visit_id
        FROM hl7_msh JOIN hl7_visit USING (hl7_msh_id) WHERE
        hl7_msh_id > %d """ % max_id
        rs = self.data_warehouse_access.engine.execute(stmt)
        many = 500
        while True:
            results = rs.fetchmany(many)
            if not results:
                break
            for r in results:
                new_msgs.append(MessageProcessed(hl7_msh_id=r[0],
                                                 message_datetime=r[1],
                                                 visit_id=r[2]))

            self.data_mart_access.session.add_all(new_msgs)
            self.data_mart_access.session.commit()
            logging.debug("added %d new messages" % len(new_msgs))
            new_msgs = list()

        logging.info("Added new rows to internal_message_processed in %s",
                     time.time() - startTime)

    def _visitsToProcess(self):
        """ Look up all distinct visit ids needing attention

        Obtain unique list of visit_ids that have messages that
        haven't previously been processed.  If the user requested just
        one days worth (i.e. -d) only that days visits will be
        returned.

        """
        visit_ids = list()
        if not self.reportDate:
            logging.info("Launch deduplication for entire database")
            # Do the whole batch, that is, all that haven't been
            # processed before.
            stmt = """SELECT DISTINCT(visit_id) FROM
            internal_message_processed
            WHERE processed_datetime IS NULL"""
            rs = self.data_mart_access.engine.execute(stmt)
            many = 10000
            while True:
                results = rs.fetchmany(many)
                if not results:
                    break
                for r in results:
                    visit_ids.append(r[0])

        else:
            logging.info("Launch deduplication for %s",
                         self.reportDate)
            # Process the requested day only - as we can't join across
            # db boundaries - first acquire the full list of visits
            # for the requested day from the data_warehouse to use in
            # a massive 'in' clause

            stmt = """SELECT DISTINCT(visit_id) FROM hl7_visit WHERE
            admit_datetime BETWEEN '%s' AND '%s';""" %\
            (self.reportDate, self.reportDate + timedelta(days=1))
            self.access.raw_query(stmt)
            rs = self.data_warehouse_access.engine.execute(stmt)
            many = 1000
            potential_visit_ids = list()
            while True:
                results = rs.fetchmany(many)
                if not results:
                    break
                for r in results:
                    #tmp_table.insert(r[0])
                    potential_visit_ids.append(r[0])

            if potential_visit_ids:
                query = self.data_mart_access.session.query(\
                    MessageProcessed.visit_id).distinct().\
                    filter(and_(MessageProcessed.processed_datetime ==
                                None,
                                MessageProcessed.visit_id.\
                                in_(potential_visit_ids)))

                for r in query:
                    visit_ids.append(r[0])

        logging.info("Found %d visits needing attention",
                     len(visit_ids))
        return visit_ids

    def tearDown(self):
        """ Clean up any open handles/connections """
        # now done in execute when we're done with teh connections

    def execute(self):
        """ Start the process """
        # Initialize logging now (verbosity is now set regardless of
        # invocation method)
        configure_logging(verbosity=self.verbosity,
                          logfile="longitudinal-manager.log")

        logging.info("Initiate deduplication for %s",
                         (self.reportDate and self.reportDate or
                          "whole database"))
        # Only allow one instance of the manager to run at a time.
        if self.lock.is_locked():
            logging.warn("Can't continue, %s is locked ", LOCKFILE)
            return

        if systemUnderLoad():
            logging.warn("system under load - continue anyhow")

        try:
            self.lock.acquire()

            self.access = DirectAccess(database=self.data_warehouse,
                                       port=self.warehouse_port,
                                       user=self.database_user,
                                       password=self.database_password)
            self.data_warehouse_access = AlchemyAccess(
                database=self.data_warehouse,
                port=self.warehouse_port,
                user=self.database_user, password=self.database_password)
            self.data_mart_access = AlchemyAccess(
                database=self.data_mart, port=self.mart_port,
                user=self.database_user, password=self.database_password)

            startTime = time.time()
            if not self.skip_prep:
                self._prepDeduplicateTables()
            visits_to_process = self._visitsToProcess()

            # Now done with db access needs at the manager level
            # free up resources:
            self.data_mart_access.disconnect()
            self.data_warehouse_access.disconnect()
            self.access.close()

            # Set of locks used, one for each table needing protection
            # from asynchronous inserts.  Names should match table
            # minus 'dim_' prefix, plus '_lock' suffix
            # i.e. dim_location -> 'location_lock'
            table_locks = {'admission_source_lock': Lock(),
                           'admission_o2sat_lock': Lock(),
                           'admission_temp_lock': Lock(),
                           'assigned_location_lock': Lock(),
                           'admit_reason_lock': Lock(),
                           'chief_complaint_lock': Lock(),
                           'diagnosis_lock': Lock(),
                           'disposition_lock': Lock(),
                           'flu_vaccine_lock': Lock(),
                           'h1n1_vaccine_lock': Lock(),
                           'lab_flag_lock': Lock(),
                           'lab_result_lock': Lock(),
                           'location_lock': Lock(),
                           'note_lock': Lock(),
                           'order_number_lock': Lock(),
                           'performing_lab_lock': Lock(),
                           'pregnancy_lock': Lock(),
                           'race_lock': Lock(),
                           'reference_range_lock': Lock(),
                           'service_area_lock': Lock(),
                           'specimen_source_lock': Lock(),
                           }

            # If we have visits to process, fire up the workers...
            if len(visits_to_process) > 1:
                for i in range(self.NUM_PROCS):
                    dw = Process(target=LongitudinalWorker,
                                 kwargs={'queue': self.queue,
                                         'procNumber': i,
                                         'data_warehouse': self.data_warehouse,
                                         'warehouse_port': self.warehouse_port,
                                         'data_mart': self.data_mart,
                                         'mart_port': self.mart_port,
                                         'dbUser': self.database_user,
                                         'dbPass': self.database_password,
                                         'table_locks': table_locks,
                                         'verbosity': self.verbosity})
                    dw.daemon = True
                    dw.start()

                # Populate the queue
                for v in visits_to_process:
                    self.queue.put(v)

                # Wait on the queue until empty
                self.queue.join()

            # Common cleanup
            self.tearDown()
            self.datePersistence.bump_date()
            logging.info("Queue is empty - done in %s", time.time() -
                         startTime)
        finally:
            self.lock.release()
Example #12
0
    def execute(self):
        """ Start the process """
        # Initialize logging now (verbosity is now set regardless of
        # invocation method)
        configure_logging(verbosity=self.verbosity,
                          logfile="longitudinal-manager.log")

        logging.info("Initiate deduplication for %s",
                         (self.reportDate and self.reportDate or
                          "whole database"))
        # Only allow one instance of the manager to run at a time.
        if self.lock.is_locked():
            logging.warn("Can't continue, %s is locked ", LOCKFILE)
            return

        if systemUnderLoad():
            logging.warn("system under load - continue anyhow")

        try:
            self.lock.acquire()

            self.access = DirectAccess(database=self.data_warehouse,
                                       port=self.warehouse_port,
                                       user=self.database_user,
                                       password=self.database_password)
            self.data_warehouse_access = AlchemyAccess(
                database=self.data_warehouse,
                port=self.warehouse_port,
                user=self.database_user, password=self.database_password)
            self.data_mart_access = AlchemyAccess(
                database=self.data_mart, port=self.mart_port,
                user=self.database_user, password=self.database_password)

            startTime = time.time()
            if not self.skip_prep:
                self._prepDeduplicateTables()
            visits_to_process = self._visitsToProcess()

            # Now done with db access needs at the manager level
            # free up resources:
            self.data_mart_access.disconnect()
            self.data_warehouse_access.disconnect()
            self.access.close()

            # Set of locks used, one for each table needing protection
            # from asynchronous inserts.  Names should match table
            # minus 'dim_' prefix, plus '_lock' suffix
            # i.e. dim_location -> 'location_lock'
            table_locks = {'admission_source_lock': Lock(),
                           'admission_o2sat_lock': Lock(),
                           'admission_temp_lock': Lock(),
                           'assigned_location_lock': Lock(),
                           'admit_reason_lock': Lock(),
                           'chief_complaint_lock': Lock(),
                           'diagnosis_lock': Lock(),
                           'disposition_lock': Lock(),
                           'flu_vaccine_lock': Lock(),
                           'h1n1_vaccine_lock': Lock(),
                           'lab_flag_lock': Lock(),
                           'lab_result_lock': Lock(),
                           'location_lock': Lock(),
                           'note_lock': Lock(),
                           'order_number_lock': Lock(),
                           'performing_lab_lock': Lock(),
                           'pregnancy_lock': Lock(),
                           'race_lock': Lock(),
                           'reference_range_lock': Lock(),
                           'service_area_lock': Lock(),
                           'specimen_source_lock': Lock(),
                           }

            # If we have visits to process, fire up the workers...
            if len(visits_to_process) > 1:
                for i in range(self.NUM_PROCS):
                    dw = Process(target=LongitudinalWorker,
                                 kwargs={'queue': self.queue,
                                         'procNumber': i,
                                         'data_warehouse': self.data_warehouse,
                                         'warehouse_port': self.warehouse_port,
                                         'data_mart': self.data_mart,
                                         'mart_port': self.mart_port,
                                         'dbUser': self.database_user,
                                         'dbPass': self.database_password,
                                         'table_locks': table_locks,
                                         'verbosity': self.verbosity})
                    dw.daemon = True
                    dw.start()

                # Populate the queue
                for v in visits_to_process:
                    self.queue.put(v)

                # Wait on the queue until empty
                self.queue.join()

            # Common cleanup
            self.tearDown()
            self.datePersistence.bump_date()
            logging.info("Queue is empty - done in %s", time.time() -
                         startTime)
        finally:
            self.lock.release()