예제 #1
0
 def _set_transport(self):
     """Plug in the appropriate transport mechanism"""
     # Transport strategies differ for the different reports
     if self.criteria.reportable_region:
         self._transport = Distribute_client(zip_first=True)
     else:
         self._transport = PHINMS_client(zip_first=True)
 def _set_transport(self):
     """Plug in the appropriate transport mechanism"""
     # Transport strategies differ for the different reports
     if self.criteria.reportable_region:
         self._transport = Distribute_client(zip_first=True)
     else:
         self._transport = PHINMS_client(zip_first=True)
예제 #3
0
 def testUnZippedUpload(self):
     "Can we upload an unzipped copy"
     tmpfile = tempfile.NamedTemporaryFile('w')
     tmpfile.write(lorem_ipsum)
     tmpfile.seek(0)
     doc_id = document_store(tmpfile.name,
                             document_type='essence',
                             compress_with=None)
     self.assertTrue(doc_id)
     agent = PHINMS_client(zip_first=False)
     self.assertFalse(agent.transfer_file(doc_id))
예제 #4
0
class GenerateReport(object):
    """ Process options and generate the requested report.  Optionally
    persists the file to the filesystem, and uploads to the DOH sftp
    server.

    """
    __version__ = '0.2'
    config = Config()
    IGNORE_SITE = config.get('longitudinal', 'ignore_site', default='')

    # Order matters, create a tuple of paired values (reportColumn,
    # essenceColumn) - NB, the Diagnosis column is being bastardized.
    # Previously there was an SQL function to do the subselect, but it
    # ran way too slow.  Now contains the foreign key to join w/ the
    # diagnosis for the respective visit.

    diagnosis_column_index = 7
    patient_class_column_index = 11
    columns = (
        ('Hosp', 'hospital'),
        ('Reg Date', 'visit_date'),
        ('Time', 'visit_time'),
        ('Sex', 'gender'),
        ('Age', 'age'),
        ('Reason For Visit', 'chief_complaint'),
        ('Zip Code', 'zip'),
        ('Diagnosis', 'visit_pk'),
        ('Admit Status', 'gipse_disposition'),
        ('Medical Record No.', 'patient_id'),
        ('Visit Record No.', 'visit_id'),
        ('Service Area', 'patient_class'),
    )
    assert (columns[diagnosis_column_index][1] == 'visit_pk')
    assert (columns[patient_class_column_index][1] == 'patient_class')

    def __init__(self,
                 user=None,
                 password=None,
                 report_criteria=None,
                 datefile=None):
        """Initialize report generation.

        :param user: database user
        :param password: database password
        :param report_criteria: ReportCriteria defining specifics
        :param datefile: useful for persistent walks through time

        """
        self.user = user
        self.password = password
        self.criteria = report_criteria
        self.database = self.criteria.database
        if datefile:
            assert ((self.criteria.start_date,
                     self.criteria.end_date) == datefile.get_date_range())
            self.datePersistence = datefile

        self._diags = {}
        self._prepare_output_file()
        self._prepare_columns()
        self._set_transport()

    def _prepare_columns(self):
        # Don't include the patient_class column if splitting out by
        # patient_class
        if self.criteria.patient_class:
            len_b4 = len(self.columns)
            self.columns =\
                self.columns[:self.patient_class_column_index] \
                + self.columns[self.patient_class_column_index + 1:]
            assert (len(self.columns) + 1 == len_b4)

    def _set_transport(self):
        """Plug in the appropriate transport mechanism"""
        # Transport strategies differ for the different reports
        if self.criteria.reportable_region:
            self._transport = Distribute_client(zip_first=True)
        else:
            self._transport = PHINMS_client(zip_first=True)

    def _generate_output_filename(self, start_date=None, end_date=None):
        start_date = self.criteria.start_date if start_date is None\
            else start_date
        end_date = self.criteria.end_date if end_date is None else end_date

        datestr = end_date.strftime('%Y%m%d')
        if start_date != end_date:
            datestr = '-'.join(
                (start_date.strftime('%Y%m%d'), end_date.strftime('%Y%m%d')))

        filename = self.criteria.report_method + '-' + datestr + '.txt'

        config = Config()
        tmp_dir = config.get('general', 'tmp_dir', default='/tmp')

        filepath = os.path.join(tmp_dir, filename)
        return filepath

    def _prepare_output_file(self):
        """Open the local filesystem file for output"""
        filepath = self.\
            _generate_output_filename(start_date=self.criteria.start_date,
                                      end_date=self.criteria.end_date)

        # watch for oversight errors; notify if like report exists -
        # unless it's size zero (from a previous failed run)
        if os.path.exists(filepath) and os.path.getsize(filepath):
            logging.warning("Found requested report file already "\
                             "exists - overwriting: '%s'"\
                             % filepath)

        self.output = open(filepath, 'w')
        self._output_filename = self.output.name

    @property
    def output_filename(self):
        if not hasattr(self, '_output_filename'):
            raise RuntimeError("prerequisite call to "\
                               "_prepare_output_file() "\
                "didn't happen!")
        return self._output_filename

    def _header(self):
        if self.criteria.include_vitals:
            columns = [c[0] for c in self.columns]
            columns += ('Measured Temperature', 'O2 Saturation',
                        'Self-Reported Influenza Vaccine',
                        'Self-Reported H1N1 Vaccine')
            return '|'.join(columns)

        else:
            return '|'.join([c[0] for c in self.columns])

    def _build_join_tables(self):
        """ Scope continues to grow, build all join tables necessary
        for the query.  Some are only necessary with certain features
        on.
        """
        # Always need the list of reportable visits
        self._build_visit_join_table()

        if self.criteria.include_vitals:
            self._build_vitals_join_table()

    def _build_visit_join_table(self):
        """ Helper in selection of visits for the report - this method
        builds a temporary table and populates it with the visit_pks
        that belong in the report.  This should include all visit_pks
        with the matching admit_datetime as well as any that have
        received updates since the last like report was produced.

        """
        # If include_vitals is on, we also need the visit_id to keep
        # the joins managable.  vitals don't have a patient class, so
        # you can't join on the same values.

        sql = "CREATE TEMPORARY TABLE reportable_pks (pk "\
            "integer not null unique)"
        selectCols = "fact_visit.pk"

        self._getConn()
        self.access.raw_query(sql)

        # If we're only selecting those facilites in a region, the SQL
        # is more complicated - build up the respective clauses.
        joinClause = regionClause = ""
        if self.criteria.reportable_region:
            joinClause = "JOIN internal_reportable_region ON "\
                "internal_reportable_region.dim_facility_pk = "\
                "fact_visit.dim_facility_pk"
            regionClause = "AND region_name = '%s'" %\
                self.criteria.reportable_region

        # Another HACK!  One site is not even wanted by the state DOH,
        # as it's being duplicated from another source, and ESSENCE
        # can't help but count them twice.  Remove this one site
        # regardless
        else:
            joinClause = "JOIN internal_reportable_region ON "\
                "internal_reportable_region.dim_facility_pk = "\
                "fact_visit.dim_facility_pk"
            regionClause = "AND region_name = '%s'" % self.IGNORE_SITE

        # Limit by patient_class if requested.  Note we may still end
        # up with visit ids that have changed patient classes, so more
        # pruning later is necessary.
        pc_limit = ""
        if self.criteria.patient_class:
            pc_limit = "AND patient_class = '%c'" %\
                self.criteria.patient_class

        # Start with all visits for the requested date range
        sql = "INSERT INTO reportable_pks SELECT %s FROM "\
              "fact_visit %s WHERE admit_datetime BETWEEN '%s' AND "\
              "'%s' %s %s" %\
              (selectCols, joinClause, self.criteria.start_date,
               self.criteria.end_date + timedelta(days=1),
               pc_limit, regionClause)

        self.access.raw_query(sql)

        if self.criteria.include_updates:
            # In this case, add all visits with updates since the
            # last run, but no newer than the requested date (in case
            # we're building reports forward from historical data)
            sql = "SELECT max(processed_datetime) FROM internal_report "\
                  "WHERE report_method = '%s'" % self.criteria.report_method

            cursor = self.access.raw_query(sql)
            last_report_generated = cursor.fetchall()[0][0]
            if last_report_generated is None:
                last_report_generated = '2009-01-01'  # our epoch
            logging.debug("including updates, last_report_generated: "\
                              "%s", last_report_generated)
            sql = "INSERT INTO reportable_pks SELECT %(sel_cols)s FROM "\
                  "fact_visit %(join_clause)s LEFT JOIN reportable_pks ON "\
                  "reportable_pks.pk = fact_visit.pk WHERE "\
                  "last_updated > '%(last_report)s' AND admit_datetime "\
                  "< '%(date)s' AND reportable_pks.pk IS NULL "\
                  "%(pc_limit)s %(region_clause)s" %\
                  {'sel_cols': selectCols,
                   'last_report': last_report_generated,
                   'date': self.criteria.end_date + timedelta(days=1),
                   'pc_limit': pc_limit,
                   'join_clause': joinClause,
                   'region_clause': regionClause}
            self.access.raw_query(sql)

        cursor = self.access.raw_query("SELECT COUNT(*) FROM "\
                                           "reportable_pks")
        logging.debug("%d visits to report on", cursor.fetchall()[0][0])

    def _build_vitals_join_table(self):
        """When report is to include vitals - we use an additional
        temporary table (visit_loinc_data) to hold the data for more
        timely queries.

        Like the rest of the report, the list of interesting visits is
        limited to the rows in the reportable_pks - see
        _build_join_table() for details.

        """
        raise ValueError('not ported yet')
        sql = """
          CREATE TEMPORARY TABLE visit_loinc_data (
            visit_id VARCHAR(255) not null,
            patient_class CHAR(1) default null,
            observation_id VARCHAR(255) not null,
            observation_result VARCHAR(255) not null)
          """
        self._getConn()
        self.access.raw_query(sql)

        sql = """
          INSERT INTO visit_loinc_data (visit_id, patient_class,
          observation_id, observation_result) SELECT visit.visit_id,
          visit.patient_class, observation_id,
          observation_result FROM visit JOIN hl7_visit ON
          visit.visit_id = hl7_visit.visit_id JOIN hl7_obx ON
          hl7_visit.hl7_msh_id = hl7_obx.hl7_msh_id JOIN
          reportable_pks ON reportable_pks.visit_id = visit.visit_id
          AND reportable_pks.patient_class = visit.patient_class
          WHERE
          observation_id in ('8310-5', '20564-1', '46077-4',
          '29544-4')
          """
        self.access.raw_query(sql)

    def _select_from_essence_view(self):
        """Build up the SQL select statement to be used in gathering
        the data for this report.

        """
        stmt = """SELECT %s FROM essence e JOIN reportable_pks ri
        ON e.visit_pk = ri.pk""" %\
            (','.join(['e.' + c[1] for c in self.columns]))
        return stmt

    def _select_diagnosis(self):
        """ Need to pull in all the diagnosis data for this report.
        This is saved in an instance dictionary for use in
        self._diagnosis to generate the list of diagnoses for each
        respective visit.

        A list of unique diagnoses ordered by rank is required.
        """
        # We order descending on dx_datetime as the most recent should
        # be best.  Add any others as the persistence mechanism only
        # saves a unique icd9 dx that has changed status.
        stmt = "SELECT fact_visit_pk, rank, icd9 "\
               "FROM assoc_visit_dx JOIN "\
               "dim_dx ON dim_dx_pk = dim_dx.pk JOIN "\
               "reportable_pks ON "\
               "assoc_visit_dx.fact_visit_pk = reportable_pks.pk "\
               "ORDER BY dx_datetime DESC"
        cursor = self.access.raw_query(stmt)
        for row in cursor.fetchall():
            visit_pk = row[0]
            if visit_pk in self._diags:
                self._diags[visit_pk].add(row[0], row[1], row[2])
            else:
                self._diags[visit_pk] = \
                    SortedDiagnosis(row[0], row[1], row[2])

    def _diagnosis(self, visit_pk):
        if visit_pk in self._diags:
            return [
                self._diags[visit_pk].__repr__(),
            ]
        else:
            return [
                '',
            ]

    def _select_vitals(self):
        """ Need to pull in all the vitals data for this report.
        This is saved in an instance dictionary for use in
        self._vitals_for_visit to generate the list of vitals for each
        respective visit.

        This is an effective NOP when self.criteria.include_vitals = False

        """
        if not self.criteria.include_vitals:
            return None

        self._vitals = {}
        stmt = """SELECT reportable_pks.visit_pk,
          observation_id, observation_result
          FROM visit_loinc_data JOIN reportable_pks ON
          reportable_pks.visit_id = visit_loinc_data.visit_id"""

        cursor = self.access.raw_query(stmt)
        for row in cursor.fetchall():
            visit_pk = row[0]
            if visit_pk in self._vitals:
                self._vitals[visit_pk].add(row[1], row[2])
            else:
                self._vitals[visit_pk] = \
                    Vitals(row[1], row[2])

    def _vitals_for_visit(self, visit_pk):
        """Returns the list of vitals for the visit in question.

        This is an effective NOP when self.criteria.include_vitals = False

        """
        if not self.criteria.include_vitals:
            return []

        if visit_pk in self._vitals:
            return self._vitals[visit_pk].__repr__()
        else:
            return Vitals().__repr__()

    def _write_report(self, save_report=False):
        """ Write out and potentially store the results.

        Generate results via database queries and write the results to
        self.output.

        :param save_report: If set, persist the document and related
          metadata to the mbds archive.

        returns the document ID, the mbds archive key, if saved

        """
        out = self.output
        print >> out, self._header()
        self._build_join_tables()
        self._select_diagnosis()
        self._select_vitals()
        cursor = self.access.raw_query(self._select_from_essence_view())
        for row in cursor.fetchall():
            # Each row is the colums up to the diagnosis + the
            # comma separated diagnosis + the rest of the columns
            # and finally with vitals if configured for such
            visit_pk = row[self.diagnosis_column_index]  # yuck, but true
            print >> out,\
                '|'.join([strSansNone(column) for column in
                          row[:self.diagnosis_column_index]] +
                         self._diagnosis(visit_pk) +
                         [strSansNone(column) for column in
                          row[self.diagnosis_column_index + 1:]] +
                         self._vitals_for_visit(visit_pk))

        # Close the file and persist to the document archive if
        # requested
        self.output.close()
        if save_report:
            metadata = {
                k: v
                for k, v in self.criteria._crit.items() if v is not None
            }

            # At this point, all documents are of 'essence' type
            return document_store(document=self.output.name,
                                  allow_duplicate_filename=True,
                                  document_type='essence',
                                  **metadata)

    def _record_report(self, report_oid):
        """Record the details from this report generation in the db"""
        if not report_oid:
            return
        report = Report(processed_datetime=datetime.now(),
                        file_path=report_oid,
                        report_method=self.criteria.report_method)

        alchemy = AlchemyAccess(database=self.database)
        alchemy.session.add(report)
        alchemy.session.commit()
        alchemy.disconnect()

    def _transmit_report(self, report):
        """Transmit report using self._transport()"""
        logging.info("initiate upload of %s", report)
        self._transport.transfer_file(report)

    def _transmit_differences(self, report):
        """Compute differences from yesterday's like report; transport"""

        # This option really only makes sense on date range reports,
        # as updates hit older data than just 'yesterday'.
        if self.criteria.start_date == self.criteria.end_date:
            raise ValueError("difference calculation not supported on "\
                             "single day reports")
        # See if we can find a similar report in the archive from
        # yesterday
        search_criteria = {
            'report_method': self.criteria.report_method,
            'start_date': self.criteria.start_date - timedelta(days=1),
            'end_date': self.criteria.end_date - timedelta(days=1)
        }
        old_doc = document_find(search_criteria, limit=1)
        if old_doc is None:
            logging.info("No comparable report found for difference "\
                         "generation")
            self._transmit_report(report)
        else:
            target_filename = self.\
                _generate_output_filename(start_date=self.criteria.start_date,
                                          end_date=self.criteria.end_date)
            # RemoveDuplicates not yet ported!!
            raise ValueError("RemoveDuplicates not ported")
            #from pheme.essence.remove_duplicates import RemoveDuplicates
            #rd = RemoveDuplicates(new_report=report,
            #                      old_report=old_doc,
            #                      out=target_filename)
            #rd.generate_report()
            #logging.info("initiate upload of difference %s", target_filename)
            #self._transport.transfer_file(target_filename)

    def _getConn(self):
        """ Local wrapper to get database connection
        """
        if hasattr(self, 'access'):
            return
        self.access = DirectAccess(database=self.database,
                                   user=self.user,
                                   password=self.password)

    def _closeConn(self):
        """ Local wrapper to close database connection
        """
        if hasattr(self, 'access'):
            self.access.close()

    def tearDown(self):
        "Public interface to clean up internals"
        self._closeConn()

    def execute(self,
                save_report=False,
                transmit_report=False,
                transmit_differences=False):
        """Execute the report generation
        """
        logging.info("Initiate ESSENCE report generation [%s-%s] for %s",
                     self.criteria.start_date, self.criteria.end_date,
                     self.criteria.report_method)

        self._getConn()
        report_oid = self._write_report(save_report)
        self._record_report(report_oid)
        if transmit_report:
            self._transmit_report(report_oid)
        if transmit_differences:
            self._transmit_differences(report_oid)
        self._closeConn()
        if hasattr(self, 'datePersistence'):
            self.datePersistence.bump_date()

        logging.info("Completed ESSENCE report generation [%s-%s] for %s",
                     self.criteria.start_date, self.criteria.end_date,
                     self.criteria.report_method)
class GenerateReport(object):
    """ Process options and generate the requested report.  Optionally
    persists the file to the filesystem, and uploads to the DOH sftp
    server.

    """
    __version__ = '0.2'
    config = Config()
    IGNORE_SITE = config.get('longitudinal', 'ignore_site', default='')

    # Order matters, create a tuple of paired values (reportColumn,
    # essenceColumn) - NB, the Diagnosis column is being bastardized.
    # Previously there was an SQL function to do the subselect, but it
    # ran way too slow.  Now contains the foreign key to join w/ the
    # diagnosis for the respective visit.

    diagnosis_column_index = 7
    patient_class_column_index = 11
    columns = (('Hosp', 'hospital'),
               ('Reg Date', 'visit_date'),
               ('Time', 'visit_time'),
               ('Sex', 'gender'),
               ('Age', 'age'),
               ('Reason For Visit', 'chief_complaint'),
               ('Zip Code', 'zip'),
               ('Diagnosis', 'visit_pk'),
               ('Admit Status', 'gipse_disposition'),
               ('Medical Record No.', 'patient_id'),
               ('Visit Record No.', 'visit_id'),
               ('Service Area', 'patient_class'),)
    assert(columns[diagnosis_column_index][1] == 'visit_pk')
    assert(columns[patient_class_column_index][1] == 'patient_class')

    def __init__(self, user=None, password=None, report_criteria=None,
                 datefile=None):
        """Initialize report generation.

        :param user: database user
        :param password: database password
        :param report_criteria: ReportCriteria defining specifics
        :param datefile: useful for persistent walks through time

        """
        self.user = user
        self.password = password
        self.criteria = report_criteria
        self.database = self.criteria.database
        if datefile:
            assert((self.criteria.start_date, self.criteria.end_date)
                   == datefile.get_date_range())
            self.datePersistence = datefile

        self._diags = {}
        self._prepare_output_file()
        self._prepare_columns()
        self._set_transport()

    def _prepare_columns(self):
        # Don't include the patient_class column if splitting out by
        # patient_class
        if self.criteria.patient_class:
            len_b4 = len(self.columns)
            self.columns =\
                self.columns[:self.patient_class_column_index] \
                + self.columns[self.patient_class_column_index + 1:]
            assert(len(self.columns) + 1 == len_b4)

    def _set_transport(self):
        """Plug in the appropriate transport mechanism"""
        # Transport strategies differ for the different reports
        if self.criteria.reportable_region:
            self._transport = Distribute_client(zip_first=True)
        else:
            self._transport = PHINMS_client(zip_first=True)

    def _generate_output_filename(self, start_date=None,
                                  end_date=None):
        start_date = self.criteria.start_date if start_date is None\
            else start_date
        end_date = self.criteria.end_date if end_date is None else end_date

        datestr = end_date.strftime('%Y%m%d')
        if start_date != end_date:
            datestr = '-'.join((start_date.strftime('%Y%m%d'),
                         end_date.strftime('%Y%m%d')))

        filename = self.criteria.report_method + '-' + datestr + '.txt'

        config = Config()
        tmp_dir = config.get('general', 'tmp_dir', default='/tmp')

        filepath = os.path.join(tmp_dir, filename)
        return filepath

    def _prepare_output_file(self):
        """Open the local filesystem file for output"""
        filepath = self.\
            _generate_output_filename(start_date=self.criteria.start_date,
                                      end_date=self.criteria.end_date)

        # watch for oversight errors; notify if like report exists -
        # unless it's size zero (from a previous failed run)
        if os.path.exists(filepath) and os.path.getsize(filepath):
            logging.warning("Found requested report file already "\
                             "exists - overwriting: '%s'"\
                             % filepath)

        self.output = open(filepath, 'w')
        self._output_filename = self.output.name

    @property
    def output_filename(self):
        if not hasattr(self, '_output_filename'):
            raise RuntimeError("prerequisite call to "\
                               "_prepare_output_file() "\
                "didn't happen!")
        return self._output_filename

    def _header(self):
        if self.criteria.include_vitals:
            columns = [c[0] for c in self.columns]
            columns += ('Measured Temperature', 'O2 Saturation',
                        'Self-Reported Influenza Vaccine',
                        'Self-Reported H1N1 Vaccine')
            return '|'.join(columns)

        else:
            return '|'.join([c[0] for c in self.columns])

    def _build_join_tables(self):
        """ Scope continues to grow, build all join tables necessary
        for the query.  Some are only necessary with certain features
        on.
        """
        # Always need the list of reportable visits
        self._build_visit_join_table()

        if self.criteria.include_vitals:
            self._build_vitals_join_table()

    def _build_visit_join_table(self):
        """ Helper in selection of visits for the report - this method
        builds a temporary table and populates it with the visit_pks
        that belong in the report.  This should include all visit_pks
        with the matching admit_datetime as well as any that have
        received updates since the last like report was produced.

        """
        # If include_vitals is on, we also need the visit_id to keep
        # the joins managable.  vitals don't have a patient class, so
        # you can't join on the same values.

        sql = "CREATE TEMPORARY TABLE reportable_pks (pk "\
            "integer not null unique)"
        selectCols = "fact_visit.pk"

        self._getConn()
        self.access.raw_query(sql)

        # If we're only selecting those facilites in a region, the SQL
        # is more complicated - build up the respective clauses.
        joinClause = regionClause = ""
        if self.criteria.reportable_region:
            joinClause = "JOIN internal_reportable_region ON "\
                "internal_reportable_region.dim_facility_pk = "\
                "fact_visit.dim_facility_pk"
            regionClause = "AND region_name = '%s'" %\
                self.criteria.reportable_region

        # Another HACK!  One site is not even wanted by the state DOH,
        # as it's being duplicated from another source, and ESSENCE
        # can't help but count them twice.  Remove this one site
        # regardless
        else:
            joinClause = "JOIN internal_reportable_region ON "\
                "internal_reportable_region.dim_facility_pk = "\
                "fact_visit.dim_facility_pk"
            regionClause = "AND region_name = '%s'" % self.IGNORE_SITE

        # Limit by patient_class if requested.  Note we may still end
        # up with visit ids that have changed patient classes, so more
        # pruning later is necessary.
        pc_limit = ""
        if self.criteria.patient_class:
            pc_limit = "AND patient_class = '%c'" %\
                self.criteria.patient_class

        # Start with all visits for the requested date range
        sql = "INSERT INTO reportable_pks SELECT %s FROM "\
              "fact_visit %s WHERE admit_datetime BETWEEN '%s' AND "\
              "'%s' %s %s" %\
              (selectCols, joinClause, self.criteria.start_date,
               self.criteria.end_date + timedelta(days=1),
               pc_limit, regionClause)

        self.access.raw_query(sql)

        if self.criteria.include_updates:
            # In this case, add all visits with updates since the
            # last run, but no newer than the requested date (in case
            # we're building reports forward from historical data)
            sql = "SELECT max(processed_datetime) FROM internal_report "\
                  "WHERE report_method = '%s'" % self.criteria.report_method

            cursor = self.access.raw_query(sql)
            last_report_generated = cursor.fetchall()[0][0]
            if last_report_generated is None:
                last_report_generated = '2009-01-01'  # our epoch
            logging.debug("including updates, last_report_generated: "\
                              "%s", last_report_generated)
            sql = "INSERT INTO reportable_pks SELECT %(sel_cols)s FROM "\
                  "fact_visit %(join_clause)s LEFT JOIN reportable_pks ON "\
                  "reportable_pks.pk = fact_visit.pk WHERE "\
                  "last_updated > '%(last_report)s' AND admit_datetime "\
                  "< '%(date)s' AND reportable_pks.pk IS NULL "\
                  "%(pc_limit)s %(region_clause)s" %\
                  {'sel_cols': selectCols,
                   'last_report': last_report_generated,
                   'date': self.criteria.end_date + timedelta(days=1),
                   'pc_limit': pc_limit,
                   'join_clause': joinClause,
                   'region_clause': regionClause}
            self.access.raw_query(sql)

        cursor = self.access.raw_query("SELECT COUNT(*) FROM "\
                                           "reportable_pks")
        logging.debug("%d visits to report on", cursor.fetchall()[0][0])

    def _build_vitals_join_table(self):
        """When report is to include vitals - we use an additional
        temporary table (visit_loinc_data) to hold the data for more
        timely queries.

        Like the rest of the report, the list of interesting visits is
        limited to the rows in the reportable_pks - see
        _build_join_table() for details.

        """
        raise ValueError('not ported yet')
        sql = """
          CREATE TEMPORARY TABLE visit_loinc_data (
            visit_id VARCHAR(255) not null,
            patient_class CHAR(1) default null,
            observation_id VARCHAR(255) not null,
            observation_result VARCHAR(255) not null)
          """
        self._getConn()
        self.access.raw_query(sql)

        sql = """
          INSERT INTO visit_loinc_data (visit_id, patient_class,
          observation_id, observation_result) SELECT visit.visit_id,
          visit.patient_class, observation_id,
          observation_result FROM visit JOIN hl7_visit ON
          visit.visit_id = hl7_visit.visit_id JOIN hl7_obx ON
          hl7_visit.hl7_msh_id = hl7_obx.hl7_msh_id JOIN
          reportable_pks ON reportable_pks.visit_id = visit.visit_id
          AND reportable_pks.patient_class = visit.patient_class
          WHERE
          observation_id in ('8310-5', '20564-1', '46077-4',
          '29544-4')
          """
        self.access.raw_query(sql)

    def _select_from_essence_view(self):
        """Build up the SQL select statement to be used in gathering
        the data for this report.

        """
        stmt = """SELECT %s FROM essence e JOIN reportable_pks ri
        ON e.visit_pk = ri.pk""" %\
            (','.join(['e.' + c[1] for c in self.columns]))
        return stmt

    def _select_diagnosis(self):
        """ Need to pull in all the diagnosis data for this report.
        This is saved in an instance dictionary for use in
        self._diagnosis to generate the list of diagnoses for each
        respective visit.

        A list of unique diagnoses ordered by rank is required.
        """
        # We order descending on dx_datetime as the most recent should
        # be best.  Add any others as the persistence mechanism only
        # saves a unique icd9 dx that has changed status.
        stmt = "SELECT fact_visit_pk, rank, icd9 "\
               "FROM assoc_visit_dx JOIN "\
               "dim_dx ON dim_dx_pk = dim_dx.pk JOIN "\
               "reportable_pks ON "\
               "assoc_visit_dx.fact_visit_pk = reportable_pks.pk "\
               "ORDER BY dx_datetime DESC"
        cursor = self.access.raw_query(stmt)
        for row in cursor.fetchall():
            visit_pk = row[0]
            if visit_pk in self._diags:
                self._diags[visit_pk].add(row[0], row[1], row[2])
            else:
                self._diags[visit_pk] = \
                    SortedDiagnosis(row[0], row[1], row[2])

    def _diagnosis(self, visit_pk):
        if visit_pk in self._diags:
            return [self._diags[visit_pk].__repr__(), ]
        else:
            return ['', ]

    def _select_vitals(self):
        """ Need to pull in all the vitals data for this report.
        This is saved in an instance dictionary for use in
        self._vitals_for_visit to generate the list of vitals for each
        respective visit.

        This is an effective NOP when self.criteria.include_vitals = False

        """
        if not self.criteria.include_vitals:
            return None

        self._vitals = {}
        stmt = """SELECT reportable_pks.visit_pk,
          observation_id, observation_result
          FROM visit_loinc_data JOIN reportable_pks ON
          reportable_pks.visit_id = visit_loinc_data.visit_id"""

        cursor = self.access.raw_query(stmt)
        for row in cursor.fetchall():
            visit_pk = row[0]
            if visit_pk in self._vitals:
                self._vitals[visit_pk].add(row[1], row[2])
            else:
                self._vitals[visit_pk] = \
                    Vitals(row[1], row[2])

    def _vitals_for_visit(self, visit_pk):
        """Returns the list of vitals for the visit in question.

        This is an effective NOP when self.criteria.include_vitals = False

        """
        if not self.criteria.include_vitals:
            return []

        if visit_pk in self._vitals:
            return self._vitals[visit_pk].__repr__()
        else:
            return Vitals().__repr__()

    def _write_report(self, save_report=False):
        """ Write out and potentially store the results.

        Generate results via database queries and write the results to
        self.output.

        :param save_report: If set, persist the document and related
          metadata to the mbds archive.

        returns the document ID, the mbds archive key, if saved

        """
        out = self.output
        print >> out, self._header()
        self._build_join_tables()
        self._select_diagnosis()
        self._select_vitals()
        cursor = self.access.raw_query(self._select_from_essence_view())
        for row in cursor.fetchall():
            # Each row is the colums up to the diagnosis + the
            # comma separated diagnosis + the rest of the columns
            # and finally with vitals if configured for such
            visit_pk = row[self.diagnosis_column_index]  # yuck, but true
            print >> out,\
                '|'.join([strSansNone(column) for column in
                          row[:self.diagnosis_column_index]] +
                         self._diagnosis(visit_pk) +
                         [strSansNone(column) for column in
                          row[self.diagnosis_column_index + 1:]] +
                         self._vitals_for_visit(visit_pk))

        # Close the file and persist to the document archive if
        # requested
        self.output.close()
        if save_report:
            metadata = {k: v for k, v in self.criteria._crit.items() if v
                        is not None}

            # At this point, all documents are of 'essence' type
            return document_store(document=self.output.name,
                                  allow_duplicate_filename=True,
                                  document_type='essence', **metadata)

    def _record_report(self, report_oid):
        """Record the details from this report generation in the db"""
        if not report_oid:
            return
        report = Report(processed_datetime=datetime.now(),
                        file_path=report_oid,
                        report_method=self.criteria.report_method)

        alchemy = AlchemyAccess(database=self.database)
        alchemy.session.add(report)
        alchemy.session.commit()
        alchemy.disconnect()

    def _transmit_report(self, report):
        """Transmit report using self._transport()"""
        logging.info("initiate upload of %s", report)
        self._transport.transfer_file(report)

    def _transmit_differences(self, report):
        """Compute differences from yesterday's like report; transport"""

        # This option really only makes sense on date range reports,
        # as updates hit older data than just 'yesterday'.
        if self.criteria.start_date == self.criteria.end_date:
            raise ValueError("difference calculation not supported on "\
                             "single day reports")
        # See if we can find a similar report in the archive from
        # yesterday
        search_criteria = {'report_method':
                           self.criteria.report_method,
                           'start_date': self.criteria.start_date -
                           timedelta(days=1), 'end_date':
                           self.criteria.end_date - timedelta(days=1)}
        old_doc = document_find(search_criteria, limit=1)
        if old_doc is None:
            logging.info("No comparable report found for difference "\
                         "generation")
            self._transmit_report(report)
        else:
            target_filename = self.\
                _generate_output_filename(start_date=self.criteria.start_date,
                                          end_date=self.criteria.end_date)
            # RemoveDuplicates not yet ported!!
            raise ValueError("RemoveDuplicates not ported")
            #from pheme.essence.remove_duplicates import RemoveDuplicates
            #rd = RemoveDuplicates(new_report=report,
            #                      old_report=old_doc,
            #                      out=target_filename)
            #rd.generate_report()
            #logging.info("initiate upload of difference %s", target_filename)
            #self._transport.transfer_file(target_filename)

    def _getConn(self):
        """ Local wrapper to get database connection
        """
        if hasattr(self, 'access'):
            return
        self.access = DirectAccess(database=self.database,
                                   user=self.user,
                                   password=self.password)

    def _closeConn(self):
        """ Local wrapper to close database connection
        """
        if hasattr(self, 'access'):
            self.access.close()

    def tearDown(self):
        "Public interface to clean up internals"
        self._closeConn()

    def execute(self, save_report=False, transmit_report=False,
                 transmit_differences=False):
        """Execute the report generation
        """
        logging.info("Initiate ESSENCE report generation [%s-%s] for %s",
                     self.criteria.start_date,
                     self.criteria.end_date,
                     self.criteria.report_method)

        self._getConn()
        report_oid = self._write_report(save_report)
        self._record_report(report_oid)
        if transmit_report:
            self._transmit_report(report_oid)
        if transmit_differences:
            self._transmit_differences(report_oid)
        self._closeConn()
        if hasattr(self, 'datePersistence'):
            self.datePersistence.bump_date()

        logging.info("Completed ESSENCE report generation [%s-%s] for %s",
                     self.criteria.start_date,
                     self.criteria.end_date,
                     self.criteria.report_method)