Python ColumnManagerの例

プログラミング言語: Python

名前空間/パッケージ名: log_column_manager

クラス/型: ColumnManager

hotexamples.comのコード掲載数: 7

Python ColumnManager - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのlog_column_manager.ColumnManagerの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

ColumnManager(5)

determine_column_map(2)

is_header_row(2)

determine_log_version(1)

コード例 #1

ファイルを表示

    def test_determine_column_map(self):
        '''Basic test cases for ColumnManager.determine_column_map().'''

        # pylint: disable=protected-access

        column_manager = ColumnManager()

        for version, row in self.header_row.iteritems():
            column_manager.determine_column_map(row)

コード例 #2

ファイルを表示

    def test_determine_log_version(self):
        '''Basic test cases for ColumnManager.determine_log_version().'''

        # pylint: disable=protected-access

        for version, row in self.header_row.iteritems():
            self.assertEqual(ColumnManager.determine_log_version(row), version)

        self.assertIsNone(ColumnManager.determine_log_version([]))
        self.assertIsNone(ColumnManager.determine_log_version(['a', 'b', 'c']))

コード例 #3

ファイルを表示

    def test_is_header_row(self):
        '''Basic test cases for ColumnManager.is_header_row().'''

        # pylint: disable=protected-access

        column_manager = ColumnManager()

        version = 'CSVPL16.1'
        not_version = 'CSVPL17.1'
        row = self.header_row[version]
        self.assertTrue(column_manager.is_header_row(row))
        self.assertTrue(column_manager.is_header_row(row, version=version))
        self.assertFalse(column_manager.is_header_row(row,
                                                      version=not_version))

        not_version = 'CSVPL16.1'
        version = 'CSVPL17.1'
        row = self.header_row[version]
        self.assertTrue(column_manager.is_header_row(row))
        self.assertTrue(column_manager.is_header_row(row, version=version))
        self.assertFalse(column_manager.is_header_row(row,
                                                      version=not_version))

コード例 #4

ファイルを表示

    def test_log_column_manager_init(self):
        '''Test initialization of a ColumnManager instance.'''

        column_manager = ColumnManager()
        self.assertIsNotNone(column_manager)

コード例 #5

ファイルを表示

    def test_is_record_row(self, version=None):
        '''Basic test cases for ColumnManager._is_record_row().'''

        # pylint: disable=protected-access

        column_manager = ColumnManager()

コード例 #6

ファイルを表示

ファイル: csv_parking_log.py プロジェクト: ScottABrown/CSV_Parking

    def parse(self):
        '''Parse the instance's parking log file.'''

        self._logger.debug('parsing log file %s', self.filepath)
        workbook = xlrd.open_workbook(self.filepath)

        self.column_manager = ColumnManager()

        sheet = workbook.sheet_by_name('Sheet1')
        self.column_manager.determine_column_map(sheet.row(0))

        if self.column_manager.log_version is None:
            err_msg = '%s: sheet %s row 0 is not a recognized header row'
            self._logger.error(err_msg, self.filepath, sheet.name)
            raise CsvParkingLogStructureError(err_msg %
                                              (self.filepath, sheet.name))
        self._logger.info('log version determined: %s',
                          self.column_manager.log_version)

        number_of_rows = sheet.nrows
        self._logger.debug('rows: %s', number_of_rows)

        # Just to be sure these are reset.
        self.rows_parsed = 0
        self.header_rows_skipped = 0
        self.rows_inprocessed = 0

        license_column = self.column_manager.license_column

        for row_num in range(number_of_rows):
            self.rows_parsed += 1
            record_row = sheet.row(row_num)

            if not record_row[license_column].value:
                continue

            if self.column_manager.is_header_row(record_row):
                self.header_rows_skipped += 1
                continue

            self.rows_inprocessed += 1
            _force_float_to_int(record_row,
                                self.column_manager.column_indices['LIC'])
            _force_float_to_int(record_row,
                                self.column_manager.column_indices['MODEL'])

            self.create_row_records(record_row)

        self._log_parse_statistics()

        if self.days and not (self.start_date or self.end_date):
            # This will also dynamically calculate start and end dates.
            self._prune_to_dynamic_date_bounds()

        self._canonicalize_plates()
        self._log_parse_statistics()

        for plate, log_records in self._canonical_plate_index.iteritems():
            plate_record_sets = self.get_plate_record_sets(log_records)
            self._plate_record_set_index[plate] = plate_record_sets
            _get_five_day_totals(plate_record_sets)

コード例 #7

ファイルを表示

ファイル: csv_parking_log.py プロジェクト: ScottABrown/CSV_Parking

class LogParser(object):
    '''A Creekside Village parking log reader and parser.

    A LogParser is responsible for reading in basic records and keeping
    track of the date offsets found.

    Arguments:

        filepath (str):
            The path to the log file the ``LogParser`` instance will
            parse.

        start_date (str, optional):
            The date, in YYYY-MM-DD format, of the earliest day to
            include in the records resulting from parsing.

        end_date (str, optional):
            The date, in YYYY-MM-DD format, of the day *after* the
            last day to include in the records resulting from parsing.

        days (int, optional):
            The maximum number of days to include in the records
            resulting from parsing.

    Raises:

        ValueError: if all three of ``start_date``, ``end_date`` and
        ``days`` are passed. At most two are necessary.

    The records retained after parsing will only include those with a
    date on or after ``start_date`` and before (not on) ``end_date``.
    If not explicitly provided, these date boundaries will be
    inferred according to the following rules.

    *   If the ``days`` parameter is provided along with one of
        ``start_date`` and ``end_date``, the other date boundary will
        be calculated.

    *   If the ``days`` parameter is provided and neither
        ``start_date`` nor ``end_date`` is provided, ``end_date`` will
        be set to the latest valid date found for a parsed record.

    '''

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def __init__(self, filepath, start_date=None, end_date=None, days=None):  # pylint: disable=bad-continuation
        '''Initialize one LogRecord instance.'''

        logger_name = '%s.%s' % (__name__, self.__class__.__name__)
        self._logger = logging.getLogger(logger_name)

        self.column_manager = None

        # The path to the log file.
        self.filepath = filepath

        # We set the "real" values in _initialize_refdt_offset_boundaries().
        # Note that internally we do all comparisons using
        # refdt_offset values, so start_date and end_date need not be
        # set.
        self.start_date = start_date
        self.end_date = end_date
        self.days = days
        self.start_refdt_offset = DEFAULT_START_REFDT_OFFSET
        self.end_refdt_offset = DEFAULT_END_REFDT_OFFSET

        # The following checks that if days is defined, at least one
        # of start_date and end_date is not defined; if one of them is
        # defined, this also calculates the other by
        # adding/subtracting days as appropriate.
        self._initialize_refdt_offset_boundaries()

        # Individual LogRecord instances creeated from the log.
        self.log_records = []

        # An index, by plate, of all log records for that plte.
        self._plate_index = {}

        # An index, by canonical plate, of all log records for that
        # canonical plate.
        self._canonical_plate_index = {}

        # An index, by canonical plate, of all plate record sets for
        # a given canonical plate. Each plate record set contains all
        # log records for the canonical plate on a given day.
        self._plate_record_set_index = {}

        # Set the offset from REF_DATETIME to the latest date that is
        # considered valid. Typically either a date determined from
        # the name of the log file (if found) or today.
        self.max_valid_refdt_offset = _get_latest_valid_refdt_offset(
            os.path.basename(filepath))

        # Parsing statistics.
        self.rows_parsed = 0
        self.header_rows_skipped = 0
        self.rows_inprocessed = 0

        # Record statistics updated as the file is parsed.
        # - - - - - - - - - - - - - - - -
        # Records with dates that are invalid will still be
        # inprocessed, but if we are dynamically calculating start and
        # end dates based on the value of self.days, we'll use the
        # latest valid date found as the end date and invalid date
        # records will not be included.
        self.latest_valid_date_found = None
        self.latest_valid_refdt_offset_found = 0

        # The first and last record dates found, among record dates
        # that fall on or after self.start_date and before
        # self.end_date.
        self.first_record_date = None
        self.first_record_refdt_offset = 0

        self.last_record_date = None
        self.last_record_refdt_offset = 0

        # The minimum date and refdt_offset among all records
        # inprocessed, even if the record was outside the desired date
        # boundaries.
        self.min_date_inprocessed = None
        self.max_date_inprocessed = None

        # We start with a high min and low max, and increase/decrease
        # as we process records. This is obvious once you think about
        # it, but it always makes me do a double take when I see it.
        self.min_refdt_offset_inprocessed = DEFAULT_END_REFDT_OFFSET - 1
        self.max_refdt_offset_inprocessed = 0

        # The number of records that were found outside the desired
        # date boundaries. These are not inprocessed.
        self.records_out_of_date = 0

        # The total log entries inprocessed, that is, turned into
        # saved records.
        self.records_inprocessed = 0

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _initialize_refdt_offset_boundaries(self):
        '''Calculate refdt_offset boundaries.
        '''

        # assert self.start_refdt_offset == DEFAULT_START_REFDT_OFFSET
        if self.start_date and self.end_date and self.days:
            err_msg = ('no more than two of start_date (%s), end_date (%s)'
                       ' and days (%s) can be defined.')
            self._logger.error(err_msg, self.start_date, self.end_date,
                               self.days)
            raise ValueError(err_msg %
                             (self.start_date, self.end_date, self.days))

        self._logger.debug('calculating reference date offsets...')
        if self.start_date:
            self.start_refdt_offset = _datetime_to_refdt_offset(
                datetime.strptime(self.start_date, STANDARD_DATE_FORMAT))

        if self.end_date:
            self.end_refdt_offset = _datetime_to_refdt_offset(
                datetime.strptime(self.end_date, STANDARD_DATE_FORMAT))

        # assert self.start_refdt_offset == DEFAULT_START_REFDT_OFFSET
        if self.days and (self.start_date or self.end_date):
            if self.start_date:
                assert self.end_refdt_offset == DEFAULT_END_REFDT_OFFSET
                self.end_refdt_offset = self.start_refdt_offset + self.days
                assert self.end_date is None
                self.end_date = _to_yyyy_mm_dd(self.end_refdt_offset)
            else:
                assert self.start_refdt_offset == DEFAULT_START_REFDT_OFFSET
                self.start_refdt_offset = self.end_refdt_offset - self.days
                assert self.start_date is None
                self.start_date = _to_yyyy_mm_dd(self.start_refdt_offset)

        self._logger.debug('starting offset: %s', self.start_refdt_offset)
        self._logger.debug('ending offset: %s', self.end_refdt_offset)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _validate_refdt_offset(self, log_record):
        '''Assess refdt offset validity and update tracking bounds.

        In addition to updating the latest valid, minimum and maximum
        improcessed values for date and refdt_offset, this method
        logs a warning for invalid dates and returns True or False
        depending on whether the record's refdt offset is within the
        start and end specified boundaries.

        '''
        if log_record.refdt_offset > self.max_valid_refdt_offset:
            self._logger.warn(
                'warning: row %s: refdt %s exceeds limit %s, date was %s',
                self.rows_parsed, log_record.refdt_offset,
                self.max_valid_refdt_offset, log_record.date)
        elif log_record.refdt_offset > self.latest_valid_refdt_offset_found:
            # It's valid, so it's the new latest found.
            self.latest_valid_refdt_offset_found = log_record.refdt_offset
            self.latest_valid_date_found = log_record.date

        # - - - - - - - - - - - - - - - -
        if log_record.refdt_offset < self.min_refdt_offset_inprocessed:
            self.min_refdt_offset_inprocessed = log_record.refdt_offset
            self.min_date_inprocessed = log_record.date

        if log_record.refdt_offset > self.max_refdt_offset_inprocessed:
            self.max_refdt_offset_inprocessed = log_record.refdt_offset
            self.max_date_inprocessed = log_record.date

        # if (
        #         log_record.refdt_offset < self.start_refdt_offset
        #         or log_record.refdt_offset >= self.end_refdt_offset
        #         ):  # pylint: disable=bad-continuation
        if not self._check_offset_in_bounds(log_record):
            self.records_out_of_date += 1
            return False

        return True

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _check_offset_in_bounds(self, log_record):
        '''Check whether a log record's refdt_offset is between offset bounds.

        Note that the lower bound is included, the upper excluded.
        '''
        return (log_record.refdt_offset >= self.start_refdt_offset
                and log_record.refdt_offset < self.end_refdt_offset)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _update_validated_record_bounds(self, log_record):
        '''Update the records of the first and last record date.

        These are the first and last dates among log entries that are
        being accepted and turned into parsed records.

        '''
        # logger = logging.getLogger(__name__)
        # if log_record.refdt_offset == 7628:
        #     logger.warning('we hit it...')
        if (not self.first_record_refdt_offset
                or log_record.refdt_offset < self.first_record_refdt_offset):  # pylint: disable=bad-continuation
            self.first_record_date = log_record.date
            self.first_record_refdt_offset = log_record.refdt_offset

        if (not self.last_record_refdt_offset
                or log_record.refdt_offset > self.last_record_refdt_offset):  # pylint: disable=bad-continuation
            self.last_record_date = log_record.date
            self.last_record_refdt_offset = log_record.refdt_offset

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _prune_to_dynamic_date_bounds(self):
        '''Discard records outside date bounds based only on days.

        This method updates all the output data fields affected by
        the pruning.
        '''
        self._logger.info('pruning to dynamic date bounds...')

        assert self.start_date is None
        assert self.end_date is None
        assert self.start_refdt_offset == DEFAULT_START_REFDT_OFFSET
        assert self.end_refdt_offset == DEFAULT_END_REFDT_OFFSET
        assert self.days is not None

        self._set_dynamic_date_bounds()

        # We have to calcualate these anew, so we reset them here.
        self.first_record_date = None
        self.first_record_refdt_offset = 0
        self.last_record_date = None
        self.last_record_refdt_offset = 0
        self._plate_index = {}

        # Comprehension selector that also offsets necessary records.
        def in_bounds(log_record):
            '''Check offset bounds, and tally those out of bounds.'''
            if not self._check_offset_in_bounds(log_record):
                self.records_out_of_date += 1
                return False
            self._update_validated_record_bounds(log_record)
            return True

        # Remove log_record entries that are out of bounds.
        self.log_records = [r for r in self.log_records if in_bounds(r)]

        # Add to plate index.
        for log_record in self.log_records:
            _ = self._plate_index.setdefault(log_record.plate, [])
            self._plate_index[log_record.plate].append(log_record)

        self._logger.debug('first record offset set to: %s',
                           self.first_record_refdt_offset)
        self._logger.debug('last record offset set to: %s',
                           self.last_record_refdt_offset)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _set_dynamic_date_bounds(self):
        '''Calculate start/end date if only days is set.

        We want days worth of records and the last record we retain is
        on the day *before* end_refdt_offset. Therefore we set the
        end_refdt_offset to one more than the last record we'll be
        retaining.

        '''
        assert self.days and not (self.start_date or self.end_date)
        self._logger.debug('resetting date offset bounds for %s days...',
                           self.days)

        # Previously we ran through this block even if we had already set
        # end_date, so we might have had a lower last_record_refdt_offset
        # than latest_valid_date_found. Now we only pass here if there was
        # no end_date, so we didn't throw any records away.
        # self.end_refdt_offset = min(
        #     self.last_record_refdt_offset,
        #     self.latest_valid_refdt_offset_found
        #     ) + 1
        assert (self.last_record_refdt_offset >=
                self.latest_valid_refdt_offset_found)

        self.end_refdt_offset = self.latest_valid_refdt_offset_found + 1
        # self.last_record_refdt_offset = self.end_refdt_offset
        self.end_date = _to_yyyy_mm_dd(self.end_refdt_offset)

        self.start_refdt_offset = self.end_refdt_offset - self.days
        self.start_date = _to_yyyy_mm_dd(self.start_refdt_offset)

        self._logger.debug('starting offset set to: %s',
                           self.start_refdt_offset)
        # self._logger.debug(
        #     'first record offset set to: %s', self.first_record_refdt_offset
        #     )
        self._logger.debug('start date set to: %s', self.start_date)

        self._logger.debug('ending offset set to: %s', self.end_refdt_offset)
        # self._logger.debug(
        #     'last record offset set to: %s', self.last_record_refdt_offset
        #     )
        self._logger.debug('end date set to: %s', self.end_date)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _canonicalize_plates(self):
        '''Find canonical plates and create canonical plate index.

        The canonical representative will be the most commonly
        occurring plate among records that are equivalent as
        determined by matchiness.

        The lists of log records that are the values of the
        canonical plate index are sorted by ``refdt_offset``.

        '''

        matches = matchiness.find_equivalence_classes(self._plate_index.keys())

        # print '--------'
        for plate_list in matches:
            # Get all log records with a plate in matches.
            matching_records = []
            for plate in plate_list:
                matching_records.extend(self._plate_index[plate])

            canonical_plate = _most_common_element(
                [r.plate for r in matching_records])

            # print [r.plate for r in matching_records]
            for log_record in matching_records:
                log_record.canonical_plate = canonical_plate
                # print '{}\t{}'.format(
                #     log_record.plate, log_record.canonical_plate
                #     )
            self._canonical_plate_index[canonical_plate] = sorted(
                matching_records, key=lambda x: x.refdt_offset)

        # print '-----------'
        # print len(self.log_records)
        # print sum([len(x) for x in self._plate_index.values()])
        # print sum([len(x) for x in self._canonical_plate_index.values()])
        # print '-----------'

        # exit()

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _consolidate_date_records(self):
        '''Combine multiple records for the same date without information loss.

        Multiple records may be recorded on one date for the same
        canonical plate under a number of circumstances:
        *   A tow record as well as a log record.
        *   Mistranscription errors in transferring data to Excel.
        *   Canonicalization collisions between what should be separated
            plates.


        '''

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def _log_parse_statistics(self):
        '''Log statistics on the parsed data.'''

        self._logger.debug('excel rows processed: %s', self.rows_inprocessed)
        self._logger.debug('header rows skipped: %s', self.header_rows_skipped)

        self._logger.debug('earliest refdt_offset found: %s',
                           self.min_refdt_offset_inprocessed)
        self._logger.debug('earliest date found: %s',
                           self.min_date_inprocessed)
        self._logger.debug('latest refdt_offset found: %s',
                           self.max_refdt_offset_inprocessed)
        self._logger.debug('latest date found: %s', self.max_date_inprocessed)
        self._logger.debug('latest valid refdt_offset found: %s',
                           self.latest_valid_refdt_offset_found)
        self._logger.debug('latest valid date found: %s',
                           self.latest_valid_date_found)

        self._logger.debug('records inprocessed: %s', self.records_inprocessed)
        self._logger.debug('out of date records skipped: %s',
                           self.records_out_of_date)
        self._logger.debug('records retained: %s', len(self.log_records))

        # self._logger.debug(
        #     'plates found: %s', len(plates)
        #     )
        # for index_type in ['LIC', 'MAKE', 'MODEL']:
        #     self._logger.debug(
        #         'records in %s: %s',
        #         index_type, len(record_index[index_type])
        #         )

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    # pylint: disable=invalid-name
    def _calculate_guest_parking_window_totals(self, plate):
        '''Populate the 30, 60, 90 day window totals for a canonical plate.'''

        record_sets = self._plate_record_set_index[plate]
        # We tally with an index, and convert to a list of
        # {key:, value:} dicts at the end.
        window_totals = {k: 0 for k in WINDOWS}

        # record_sets is sorted in refdt_offset order.
        for record_set in record_sets:
            if not record_set.record_class['guest_parking']:
                continue
            days_since_record = self.end_refdt_offset - record_set.refdt_offset
            for window_type, window_size in WINDOWS.iteritems():
                if days_since_record <= window_size:
                    if window_type[:4] == 'log1':
                        window_totals[window_type] += 1
                    elif (window_type[:4] == 'log5'
                          and record_set.five_day_total >= 3):  # pylint: disable=bad-continuation
                        window_totals[window_type] += 1

        return [{'key': k, 'value': v} for k, v in window_totals.iteritems()]

    # pylint: enable=invalid-name

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def parse(self):
        '''Parse the instance's parking log file.'''

        self._logger.debug('parsing log file %s', self.filepath)
        workbook = xlrd.open_workbook(self.filepath)

        self.column_manager = ColumnManager()

        sheet = workbook.sheet_by_name('Sheet1')
        self.column_manager.determine_column_map(sheet.row(0))

        if self.column_manager.log_version is None:
            err_msg = '%s: sheet %s row 0 is not a recognized header row'
            self._logger.error(err_msg, self.filepath, sheet.name)
            raise CsvParkingLogStructureError(err_msg %
                                              (self.filepath, sheet.name))
        self._logger.info('log version determined: %s',
                          self.column_manager.log_version)

        number_of_rows = sheet.nrows
        self._logger.debug('rows: %s', number_of_rows)

        # Just to be sure these are reset.
        self.rows_parsed = 0
        self.header_rows_skipped = 0
        self.rows_inprocessed = 0

        license_column = self.column_manager.license_column

        for row_num in range(number_of_rows):
            self.rows_parsed += 1
            record_row = sheet.row(row_num)

            if not record_row[license_column].value:
                continue

            if self.column_manager.is_header_row(record_row):
                self.header_rows_skipped += 1
                continue

            self.rows_inprocessed += 1
            _force_float_to_int(record_row,
                                self.column_manager.column_indices['LIC'])
            _force_float_to_int(record_row,
                                self.column_manager.column_indices['MODEL'])

            self.create_row_records(record_row)

        self._log_parse_statistics()

        if self.days and not (self.start_date or self.end_date):
            # This will also dynamically calculate start and end dates.
            self._prune_to_dynamic_date_bounds()

        self._canonicalize_plates()
        self._log_parse_statistics()

        for plate, log_records in self._canonical_plate_index.iteritems():
            plate_record_sets = self.get_plate_record_sets(log_records)
            self._plate_record_set_index[plate] = plate_record_sets
            _get_five_day_totals(plate_record_sets)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def create_row_records(self, record_row):
        '''Create LogRecord instances for the dates logged in this row.
        '''

        # Syntax sugar.
        column_indices = self.column_manager.column_indices

        plate = record_row[column_indices['LIC']].value

        # Add a record for each of these potential date fields
        # that have a value defined.
        for event_field_index in self.column_manager.record_type_columns:

            record_type = self.column_manager.record_type[event_field_index]

            # If a value is present for this type of event, it should
            # be the date the event was logged.
            if record_row[event_field_index].value:

                record_date = record_row[event_field_index].value

                new_record = LogRecord(
                    plate,
                    record_date,
                    record_type,
                    make=record_row[column_indices['MAKE']].value,
                    model=record_row[column_indices['MODEL']].value,
                    color=record_row[column_indices['COLOR']].value,
                    location=record_row[column_indices['LOCATION']].value)

                if self._validate_refdt_offset(new_record):
                    self._update_validated_record_bounds(new_record)
                    self.log_records.append(new_record)
                    self.records_inprocessed += 1
                    _ = self._plate_index.setdefault(new_record.plate, [])
                    self._plate_index[new_record.plate].append(new_record)

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def get_plate_record_sets(self, canonical_plate_log_records):
        '''Create a list of PlateRecordSet instances from log records.

        Arguments:

            canonical_plate_log_records (list):
                A list of ``LogRecord`` instances that share a common
                ``canonical_plate`` value.

        The list of plate record sets returned will be sorted by
        ``refdt_offset``.

        '''

        records = sorted(canonical_plate_log_records,
                         key=lambda r: r.refdt_offset)
        canonical_plates = set([r.canonical_plate for r in records])

        if len(canonical_plates) > 1:
            err_msg = ('get_plate_record_sets():'
                       ' canonical_plate must be unique; found: %s')
            self._logger.error(err_msg, list(canonical_plates))
            raise ValueError(err_msg % list(canonical_plates))

        plate_record_sets = []

        # Walk through records, finding groups with common
        # refdt_offset. Since we sorted, this is straightforward.
        record_num = 0
        while record_num < len(records):

            current_refdt_offset = records[record_num].refdt_offset
            current_record_set = []

            while (record_num < len(records) and
                   records[record_num].refdt_offset == current_refdt_offset):  # pylint: disable=bad-continuation

                current_record_set.append(records[record_num])
                record_num += 1

            new_set = PlateRecordSet(self.column_manager, current_record_set)
            plate_record_sets.append(new_set)

        return plate_record_sets

    # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    def dashboard_data(self):
        '''Create a structure with data for the dashboard.
        {
            "date_range"
                "first_record_date",
                "first_record_refdt_offset",
                "last_record_date",
                "last_record_refdt_offset"
            "records_by_lic"
                [PLATE]
                    "canonical_lic",
                    "lic_equivalents",
                    "records",
                        []
                            "canonical_lic",
                            "days_since_20000101",
                            "five_day_total",
                            "lic_equivalents",
                            "raw_color",
                            "raw_date",
                            "raw_lic",
                            "raw_location",
                            "raw_make",
                            "raw_model"
                    "window_total"
                        []
                            "key", "value"
            }
        '''

        dashboard_data = {
            'date_range': {
                'first_record_date': self.first_record_date,
                'first_record_refdt_offset': self.first_record_refdt_offset,
                'last_record_date': self.last_record_date,
                'last_record_refdt_offset': self.last_record_refdt_offset,
            },
            'records_by_lic': {
                plate: {
                    'canonical_plate':
                    (self._plate_record_set_index[plate][0].canonical_plate),
                    'records': [u.to_dict() for u in v],
                    'window_total':
                    (self._calculate_guest_parking_window_totals(plate)),
                }
                for plate, v in self._plate_record_set_index.iteritems()
            }
        }

        # This may be consumed downstream.
        return dashboard_data