Exemple #1
0
    def test_transaction(self):
        # Create the database and schema
        weedb.create(self.db_dict)
        _connect = weedb.connect(self.db_dict)

        # With sqlite, a rollback can roll back a table creation. With MySQL, it does not. So,
        # create the table outside of the transaction. We're not as concerned about a transaction failing
        # when creating a table, because it only happens the first time weewx starts up.
        _connect.execute(
            """CREATE TABLE test1 ( dateTime INTEGER NOT NULL UNIQUE PRIMARY KEY, x REAL );"""
        )

        # We're going to trigger the rollback by raising a bogus exception. Be prepared to catch it.
        try:
            with weedb.Transaction(_connect) as _cursor:
                for i in range(10):
                    _cursor.execute(
                        """INSERT INTO test1 (dateTime, x) VALUES (?, ?)""",
                        (i, i + 1))
                # Raise an exception:
                raise Exception("Bogus exception")
        except Exception:
            pass

        # Now make sure nothing is in the database
        _connect = weedb.connect(self.db_dict)
        _cursor = _connect.cursor()
        _cursor.execute("SELECT dateTime, x from test1")
        _row = _cursor.fetchone()
        _cursor.close()
        _connect.close()
        self.assertEqual(_row, None)
Exemple #2
0
    def do_fix(self):
        """Recalculate windSpeed daily summary max field from archive data.

        Step through each row in the windSpeed daily summary table and replace
        the max field with the max value for that day based on archive data.
        Database transactions are done in self.trans_days days at a time.
        """

        t1 = time.time()
        log.info("maxwindspeed: Applying %s..." % self.name)
        # get the start and stop Gregorian day number
        start_ts = self.first_summary_ts('windSpeed')
        start_greg = weeutil.weeutil.toGregorianDay(start_ts)
        stop_greg = weeutil.weeutil.toGregorianDay(self.dbm.last_timestamp)
        # initialise a few things
        day = start_greg
        n_days = 0
        last_start = None
        while day <= stop_greg:
            # get the start and stop timestamps for this tranche
            tr_start_ts = weeutil.weeutil.startOfGregorianDay(day)
            tr_stop_ts = weeutil.weeutil.startOfGregorianDay(day +
                                                             self.trans_days -
                                                             1)
            # start the transaction
            with weedb.Transaction(self.dbm.connection) as _cursor:
                # iterate over the rows in the windSpeed daily summary table
                for day_span in self.genSummaryDaySpans(
                        tr_start_ts, tr_stop_ts, 'windSpeed'):
                    # get the days max windSpeed and the time it occurred from
                    # the archive
                    (day_max_ts, day_max) = self.get_archive_span_max(
                        day_span, 'windSpeed')
                    # now save the value and time in the applicable row in the
                    # windSpeed daily summary, but only if its not a dry run
                    if not self.dry_run:
                        self.write_max('windSpeed', day_span.start, day_max,
                                       day_max_ts)
                    # increment our days done counter
                    n_days += 1
                    # give the user some information on progress
                    if n_days % 50 == 0:
                        self._progress(n_days, day_span.start)
                    last_start = day_span.start
            # advance to the next tranche
            day += self.trans_days

        # we have finished, give the user some final information on progress,
        # mainly so the total tallies with the log
        self._progress(n_days, last_start)
        print(file=sys.stdout)
        tdiff = time.time() - t1
        # We are done so log and inform the user
        log.info("maxwindspeed: Maximum windSpeed calculated "
                 "for %s days in %0.2f seconds." % (n_days, tdiff))
        if self.dry_run:
            log.info("maxwindspeed: This was a dry run. %s was not applied." %
                     self.name)
Exemple #3
0
 def populate_db(self):
     weedb.create(self.db_dict)
     self.assertRaises(weedb.DatabaseExists, weedb.create, self.db_dict)
     _connect = weedb.connect(self.db_dict)
     with weedb.Transaction(_connect) as _cursor:
         _cursor.execute("""CREATE TABLE test1 ( dateTime INTEGER NOT NULL UNIQUE PRIMARY KEY,
                   min REAL, mintime INTEGER, max REAL, maxtime INTEGER, sum REAL, count INTEGER, descript CHAR(20));""")
         _cursor.execute("""CREATE TABLE test2 ( dateTime INTEGER NOT NULL UNIQUE PRIMARY KEY,
                   min REAL, mintime INTEGER, max REAL, maxtime INTEGER, sum REAL, count INTEGER, descript CHAR(20));""")
         for irec in range(20):
             _cursor.execute("INSERT INTO test1 (dateTime, min, mintime) VALUES (?, ?, ?)", (irec, 10*irec, irec))
     _connect.close()
Exemple #4
0
 def test_create(self):
     self.populate_db()
     _connect = weedb.connect(self.db_dict)
     self.assertItemsEqual(_connect.tables(), ['test1', 'test2'])
     self.assertEqual(_connect.columnsOf('test1'), ['dateTime', 'min', 'mintime', 'max', 'maxtime', 'sum', 'count', 'descript'])
     self.assertEqual(_connect.columnsOf('test2'), ['dateTime', 'min', 'mintime', 'max', 'maxtime', 'sum', 'count', 'descript'])
     for icol, col in enumerate(_connect.genSchemaOf('test1')):
         self.assertEqual(schema[icol], col)
     for icol, col in enumerate(_connect.genSchemaOf('test2')):
         self.assertEqual(schema[icol], col)
     # Make sure an IntegrityError gets raised in the case of a duplicate key:
     with weedb.Transaction(_connect) as _cursor:
         self.assertRaises(weedb.IntegrityError, _cursor.execute, 
                           "INSERT INTO test1 (dateTime, min, mintime) VALUES (0, 10, 0)")
     _connect.close()
Exemple #5
0
    def test_patch(self):
        # Sanity check that the original database is at V3.0
        self.assertEqual(self.db_manager.version, weewx.manager.DaySummaryManager.version)

        # Bugger up roughly half the database
        with weedb.Transaction(self.db_manager.connection) as cursor:
            for key in self.db_manager.daykeys:
                sql_update = "UPDATE %s_day_%s SET wsum=sum, sumtime=count WHERE dateTime >?" \
                             % (self.db_manager.table_name, key)
                cursor.execute(sql_update, (mid_ts,))

        # Force the patch:
        self.db_manager.version = '2.0'

        self.db_manager.patch_sums()
        self.check_weights()

        # Make sure the version was set to V3.0 after the patch
        self.assertEqual(self.db_manager.version, weewx.manager.DaySummaryManager.version)
Exemple #6
0
    def _create_table(archive_db_dict, archiveSchema, table):
        """Create a SQL table using a given archive schema.
        
        archive_db_dict: A database dictionary holding the information necessary
        to open the database.
        
        archiveSchema: The schema to be used
        
        table: The name of the table to be used within the database.
        
        Returns: 
        A connection"""

        # First try to create the database. If it already exists, an exception will
        # be thrown.
        try:
            weedb.create(archive_db_dict)
        except weedb.DatabaseExists:
            pass

        # List comprehension of the types, joined together with commas. Put
        # the SQL type in backquotes, because at least one of them ('interval')
        # is a MySQL reserved word
        _sqltypestr = ', '.join(["`%s` %s" % _type for _type in archiveSchema])

        _connect = weedb.connect(archive_db_dict)
        try:
            with weedb.Transaction(_connect) as _cursor:
                _cursor.execute("CREATE TABLE %s (%s);" % (table, _sqltypestr))

        except Exception, e:
            _connect.close()
            syslog.syslog(
                syslog.LOG_ERR,
                "archive: Unable to create database table '%s'." % table)
            syslog.syslog(syslog.LOG_ERR, "****     %s" % (e, ))
            raise
Exemple #7
0
    def run(self):
        """Main entry point for calculating missing derived fields.

        Calculate the missing derived fields for the timespan concerned, save
        the calculated data to archive and recalculate the daily summaries.
        """

        # record the current time
        t1 = time.time()

        # Instantiate a dummy engine, to be used to calculate derived variables. This will
        # cause all the xtype services to get loaded.
        engine = weewx.engine.DummyEngine(self.config_dict)
        # While the above instantiated an instance of StdWXCalculate, we have no way of
        # retrieving it. So, instantiate another one, then use that to calculate derived types.
        wxcalculate = weewx.wxservices.StdWXCalculate(engine, self.config_dict)

        # initialise some counters so we know what we have processed
        days_updated = 0
        days_processed = 0
        total_records_processed = 0
        total_records_updated = 0

        # obtain gregorian days for our start and stop timestamps
        start_greg = weeutil.weeutil.toGregorianDay(self.start_ts)
        stop_greg = weeutil.weeutil.toGregorianDay(self.stop_ts)
        # start at the first day
        day = start_greg
        while day <= stop_greg:
            # get the start and stop timestamps for this tranche
            tr_start_ts = weeutil.weeutil.startOfGregorianDay(day)
            tr_stop_ts = min(weeutil.weeutil.startOfGregorianDay(stop_greg + 1),
                             weeutil.weeutil.startOfGregorianDay(day + self.trans_days))
            # start the transaction
            with weedb.Transaction(self.dbm.connection) as _cursor:
                # iterate over each day in the tranche we are to work in
                for tranche_day in weeutil.weeutil.genDaySpans(tr_start_ts, tr_stop_ts):
                    # initialise a counter for records processed on this day
                    records_updated = 0
                    # iterate over each record in this day
                    for record in self.dbm.genBatchRecords(startstamp=tranche_day.start,
                                                           stopstamp=tranche_day.stop):
                        # but we are only concerned with records after the
                        # start and before or equal to the stop timestamps
                        if self.start_ts < record['dateTime'] <= self.stop_ts:
                            # first obtain a list of the fields that may be calculated
                            extras_list = []
                            for obs in wxcalculate.calc_dict:
                                directive = wxcalculate.calc_dict[obs]
                                if directive == 'software' \
                                        or directive == 'prefer_hardware' \
                                        and (obs not in record or record[obs] is None):
                                    extras_list.append(obs)

                            # calculate the missing derived fields for the record
                            wxcalculate.do_calculations(record)

                            # Obtain a new record dictionary that contains only those items
                            # that wxcalculate calculated. Use dictionary comprehension.
                            extras_dict = {k:v for (k,v) in record.items() if k in extras_list}

                            # update the archive with the calculated data
                            records_updated += self.update_record_fields(record['dateTime'],
                                                                         extras_dict)
                            # update the total records processed
                            total_records_processed += 1
                        # Give the user some information on progress
                        if total_records_processed % 1000 == 0:
                            p_msg = "Processing record: %d; Last record: %s" % (total_records_processed,
                                                                                timestamp_to_string(record['dateTime']))
                            self._progress(p_msg)
                    # update the total records updated
                    total_records_updated += records_updated
                    # if we updated any records on this day increment the count
                    # of days updated
                    days_updated += 1 if records_updated > 0 else 0
                    days_processed += 1
            # advance to the next tranche
            day += self.trans_days
        # finished, so give the user some final information on progress, mainly
        # so the total tallies with the log
        p_msg = "Processing record: %d; Last record: %s" % (total_records_processed,
                                                            timestamp_to_string(tr_stop_ts))
        self._progress(p_msg, overprint=False)
        # now update the daily summaries, but only if this is not a dry run
        if not self.dry_run:
            print("Recalculating daily summaries...")
            # first we need a start and stop date object
            start_d = datetime.date.fromtimestamp(self.start_ts)
            # Since each daily summary is identified by the midnight timestamp
            # for that day we need to make sure we our stop timestamp is not on
            # a midnight boundary or we will rebuild the following days sumamry
            # as well. if it is on a midnight boundary just subtract 1 second
            # and use that.
            summary_stop_ts = self.stop_ts
            if weeutil.weeutil.isMidnight(self.stop_ts):
                summary_stop_ts -= 1
            stop_d = datetime.date.fromtimestamp(summary_stop_ts)
            # do the update
            self.dbm.backfill_day_summary(start_d=start_d, stop_d=stop_d)
            print(file=sys.stdout)
            print("Finished recalculating daily summaries")
        else:
            # it's a dry run so say the rebuild was skipped
            print("This is a dry run, recalculation of daily summaries was skipped")
        tdiff = time.time() - t1
        # we are done so log and inform the user
        _day_processed_str = "day" if days_processed == 1 else "days"
        _day_updated_str = "day" if days_updated == 1 else "days"
        if not self.dry_run:
            log.info("Processed %d %s consisting of %d records. "
                     "%d %s consisting of %d records were updated "
                     "in %0.2f seconds." % (days_processed,
                                            _day_processed_str,
                                            total_records_processed,
                                            days_updated,
                                            _day_updated_str,
                                            total_records_updated,
                                            tdiff))
        else:
            # this was a dry run
            log.info("Processed %d %s consisting of %d records. "
                     "%d %s consisting of %d records would have been updated "
                     "in %0.2f seconds." % (days_processed,
                                            _day_processed_str,
                                            total_records_processed,
                                            days_updated,
                                            _day_updated_str,
                                            total_records_updated,
                                            tdiff))
Exemple #8
0
    def run(self):
        """Main entry point for calculating missing derived fields.

        Calculate the missing derived fields for the timespan concerned, save
        the calculated data to archive and recalculate the daily summaries.
        """

        # record the current time
        t1 = time.time()
        # obtain a wxservices.WXCalculate object to calculate the missing fields
        # first we need station altitude, latitude and longitude
        stn_dict = self.config_dict['Station']
        altitude_t = option_as_list(stn_dict.get('altitude', (None, None)))
        try:
            altitude_vt = weewx.units.ValueTuple(float(altitude_t[0]),
                                                 altitude_t[1],
                                                 "group_altitude")
        except KeyError as e:
            raise weewx.ViolatedPrecondition(
                "Value 'altitude' needs a unit (%s)" % e)
        latitude_f = float(stn_dict['latitude'])
        longitude_f = float(stn_dict['longitude'])

        # now we can create a WXCalculate object
        wxcalculate = weewx.wxservices.WXCalculate(self.config_dict,
                                                   altitude_vt,
                                                   latitude_f,
                                                   longitude_f)

        # initialise some counters so we know what we have processed
        days_updated = 0
        days_processed = 0
        total_records_processed = 0
        total_records_updated = 0

        # obtain gregorian days for our start and stop timestamps
        start_greg = weeutil.weeutil.toGregorianDay(self.start_ts)
        stop_greg = weeutil.weeutil.toGregorianDay(self.stop_ts)
        # start at the first day
        day = start_greg
        while day <= stop_greg:
            # get the start and stop timestamps for this tranche
            tr_start_ts = weeutil.weeutil.startOfGregorianDay(day)
            tr_stop_ts = min(weeutil.weeutil.startOfGregorianDay(stop_greg + 1),
                             weeutil.weeutil.startOfGregorianDay(day + self.trans_days))
            # start the transaction
            with weedb.Transaction(self.dbm.connection) as _cursor:
                # iterate over each day in the tranche we are to work in
                for tranche_day in weeutil.weeutil.genDaySpans(tr_start_ts, tr_stop_ts):
                    # initialise a counter for records processed on this day
                    records_updated = 0
                    # iterate over each record in this day
                    for record in self.dbm.genBatchRecords(startstamp=tranche_day.start,
                                                           stopstamp=tranche_day.stop):
                        # but we are only concerned with records after the
                        # start and before or equal to the stop timestamps
                        if self.start_ts < record['dateTime'] <= self.stop_ts:
                            # first obtain a list of the fields that may be calculated
                            extras_list = []
                            for obs in wxcalculate.svc_dict['Calculations']:
                                directive = wxcalculate.svc_dict['Calculations'][obs]
                                if directive == 'software' \
                                        or directive == 'prefer_hardware' and (
                                        obs not in record or record[obs] is None):
                                    extras_list.append(obs)

                            # calculate the missing derived fields for the record
                            wxcalculate.do_calculations(data_dict=record,
                                                        data_type='archive')
                            # Obtain a dict containing only those fields that
                            # WXCalculate calculated. We could do this as a
                            # dictionary comprehension but python2.6 does not
                            # support dictionary comprehensions.
                            extras_dict = {}
                            for k in extras_list:
                                if k in record.keys():
                                    extras_dict[k] = record[k]
                            # update the archive with the calculated data
                            records_updated += self.update_record_fields(record['dateTime'],
                                                                         extras_dict)
                            # update the total records processed
                            total_records_processed += 1
                        # Give the user some information on progress
                        if total_records_processed % 1000 == 0:
                            p_msg = "Processing record: %d; Last record: %s" % (total_records_processed,
                                                                                timestamp_to_string(record['dateTime']))
                            self._progress(p_msg)
                    # update the total records updated
                    total_records_updated += records_updated
                    # if we updated any records on this day increment the count
                    # of days updated
                    days_updated += 1 if records_updated > 0 else 0
                    days_processed += 1
            # advance to the next tranche
            day += self.trans_days
        # finished, so give the user some final information on progress, mainly
        # so the total tallies with the log
        p_msg = "Processing record: %d; Last record: %s" % (total_records_processed,
                                                            timestamp_to_string(tr_stop_ts))
        self._progress(p_msg, overprint=False)
        # now update the daily summaries, but only if this is not a dry run
        if not self.dry_run:
            print("Recalculating daily summaries...")
            # first we need a start and stop date object
            start_d = datetime.date.fromtimestamp(self.start_ts)
            # Since each daily summary is identified by the midnight timestamp
            # for that day we need to make sure we our stop timestamp is not on
            # a midnight boundary or we will rebuild the following days sumamry
            # as well. if it is on a midnight boundary just subtract 1 second
            # and use that.
            summary_stop_ts = self.stop_ts
            if weeutil.weeutil.isMidnight(self.stop_ts):
                summary_stop_ts -= 1
            stop_d = datetime.date.fromtimestamp(summary_stop_ts)
            # do the update
            self.dbm.backfill_day_summary(start_d=start_d, stop_d=stop_d)
            print(file=sys.stdout)
            print("Finished recalculating daily summaries")
        else:
            # it's a dry run so say the rebuild was skipped
            print("This is a dry run, recalculation of daily summaries was skipped")
        tdiff = time.time() - t1
        # we are done so log and inform the user
        _day_processed_str = "day" if days_processed == 1 else "days"
        _day_updated_str = "day" if days_updated == 1 else "days"
        if not self.dry_run:
            log.info("Processed %d %s consisting of %d records. "
                     "%d %s consisting of %d records were updated "
                     "in %0.2f seconds." % (days_processed,
                                            _day_processed_str,
                                            total_records_processed,
                                            days_updated,
                                            _day_updated_str,
                                            total_records_updated,
                                            tdiff))
        else:
            # this was a dry run
            log.info("Processed %d %s consisting of %d records. "
                     "%d %s consisting of %d records would have been updated "
                     "in %0.2f seconds." % (days_processed,
                                            _day_processed_str,
                                            total_records_processed,
                                            days_updated,
                                            _day_updated_str,
                                            total_records_updated,
                                            tdiff))
Exemple #9
0
    def do_fix(self, np_ts):
        """Apply the interval weighting fix to the daily summaries."""

        # do we need to weight? Only weight if next day to weight ts is None or
        # there are records in the archive from that day
        if np_ts is None or self.dbm.last_timestamp > np_ts:
            t1 = time.time()
            log.info("intervalweighting: Applying %s..." % self.name)
            _days = 0
            # Get the earliest daily summary ts and the obs that it came from
            first_ts, obs = self.first_summary()
            # Get the start and stop ts for our first transaction days
            _tr_start_ts = np_ts if np_ts is not None else first_ts
            _tr_stop_dt = datetime.datetime.fromtimestamp(_tr_start_ts) \
                + datetime.timedelta(days=self.trans_days)
            _tr_stop_ts = time.mktime(_tr_stop_dt.timetuple())
            _tr_stop_ts = min(startOfDay(self.dbm.last_timestamp), _tr_stop_ts)
            last_start = None
            while True:
                with weedb.Transaction(self.dbm.connection) as _cursor:
                    for _day_span in self.genSummaryDaySpans(_tr_start_ts, _tr_stop_ts, obs):
                        # Get the weight to be applied for the day
                        _weight = self.get_interval(_day_span) * 60
                        # Get the current day stats in an accumulator
                        _day_accum = self.dbm._get_day_summary(_day_span.start)
                        # Set the unit system for the accumulator
                        _day_accum.unit_system = self.dbm.std_unit_system
                        # Weight the necessary accumulator stats, use a
                        # try..except in case something goes wrong
                        last_key = None
                        try:
                            for _day_key in self.dbm.daykeys:
                                last_key = _day_key
                                _day_accum[_day_key].wsum *= _weight
                                _day_accum[_day_key].sumtime *= _weight
                                # Do we have a vecstats accumulator?
                                if hasattr(_day_accum[_day_key], 'wsquaresum'):
                                    # Yes, so update the weighted vector stats
                                    _day_accum[_day_key].wsquaresum *= _weight
                                    _day_accum[_day_key].xsum *= _weight
                                    _day_accum[_day_key].ysum *= _weight
                                    _day_accum[_day_key].dirsumtime *= _weight
                        except Exception as e:
                            # log the exception and re-raise it
                            log.info("intervalweighting: Interval weighting of '%s' daily summary "
                                     "for %s failed: %s"
                                     % (last_key, timestamp_to_string(_day_span.start,
                                                                      format_str="%Y-%m-%d"), e))
                            raise
                        # Update the daily summary with the weighted accumulator
                        if not self.dry_run:
                            self.dbm._set_day_summary(_day_accum, None, _cursor)
                        _days += 1
                        # Save the ts of the weighted daily summary as the
                        # 'lastWeightPatch' value in the archive_day__metadata
                        # table
                        if not self.dry_run:
                            self.dbm._write_metadata('lastWeightPatch',
                                                     _day_span.start,
                                                     _cursor)
                        # Give the user some information on progress
                        if _days % 50 == 0:
                            self._progress(_days, _day_span.start)
                        last_start = _day_span.start

                    # Setup our next tranche
                    # Have we reached the end, if so break to finish
                    if _tr_stop_ts >= startOfDay(self.dbm.last_timestamp):
                        break
                    # More to process so set our start and stop for the next
                    # transaction
                    _tr_start_dt = datetime.datetime.fromtimestamp(_tr_stop_ts) \
                        + datetime.timedelta(days=1)
                    _tr_start_ts = time.mktime(_tr_start_dt.timetuple())
                    _tr_stop_dt = datetime.datetime.fromtimestamp(_tr_start_ts) \
                        + datetime.timedelta(days=self.trans_days)
                    _tr_stop_ts = time.mktime(_tr_stop_dt.timetuple())
                    _tr_stop_ts = min(self.dbm.last_timestamp, _tr_stop_ts)

            # We have finished. Get rid of the no longer needed lastWeightPatch
            with weedb.Transaction(self.dbm.connection) as _cursor:
                _cursor.execute("DELETE FROM %s_day__metadata WHERE name=?"
                                % self.dbm.table_name, ('lastWeightPatch',))

            # Give the user some final information on progress,
            # mainly so the total tallies with the log
            self._progress(_days, last_start)
            print(file=sys.stdout)
            tdiff = time.time() - t1
            # We are done so log and inform the user
            log.info("intervalweighting: Calculated weighting "
                     "for %s days in %0.2f seconds." % (_days, tdiff))
            if self.dry_run:
                log.info("intervalweighting: "
                         "This was a dry run. %s was not applied." % self.name)
        else:
            # we didn't need to weight so inform the user
            log.info("intervalweighting: %s has already been applied." % self.name)
Exemple #10
0
    def run(self):
        """Main entry point for applying the interval weighting fix.

        Check archive records of unweighted days to see if each day of records
        has a unique interval value. If interval value is unique then apply the
        weighting. Catch any exceptions and raise as necessary. If any one day
        has multiple interval value then we cannot weight the daily summaries,
        instead rebuild the daily summaries.
        """

        # first do some logging about what we will do
        if self.dry_run:
            log.info("intervalweighting: This is a dry run. "
                     "Interval weighting will be applied but not saved.")

        log.info("intervalweighting: Using database binding '%s', "
                 "which is bound to database '%s'." %
                 (self.binding, self.dbm.database_name))
        log.debug("intervalweighting: Database transactions "
                  "will use %s days of data." % self.trans_days)
        # Check metadata 'Version' value, if its greater than 1.0 we are
        # already weighted
        _daily_summary_version = self.dbm._read_metadata('Version')
        if _daily_summary_version is None or _daily_summary_version < '2.0':
            # Get the ts of the (start of the) next day to weight; it's the day
            # after the ts of the last successfully weighted daily summary
            _last_patched_ts = self.dbm._read_metadata('lastWeightPatch')
            if _last_patched_ts:
                _next_day_to_patch_dt = datetime.datetime.fromtimestamp(int(_last_patched_ts)) \
                                        + datetime.timedelta(days=1)
                _next_day_to_patch_ts = time.mktime(_next_day_to_patch_dt.timetuple())
            else:
                _next_day_to_patch_ts = None
            # Check to see if any days that need to be weighted have multiple
            # distinct interval values
            if self.unique_day_interval(_next_day_to_patch_ts):
                # We have a homogeneous intervals for each day so we can weight
                # the daily summaries.

                # Now apply the weighting but be prepared to catch any
                # exceptions
                try:
                    self.do_fix(_next_day_to_patch_ts)
                    # If we arrive here the fix was applied, if this is not
                    # a dry run then set the 'Version' metadata field to
                    # indicate we have updated to version 2.0.
                    if not self.dry_run:
                        with weedb.Transaction(self.dbm.connection) as _cursor:
                            self.dbm._write_metadata('Version', '2.0', _cursor)
                except weewx.ViolatedPrecondition as e:
                    log.info("intervalweighting: %s not applied: %s"
                             % (self.name, e))
                    # raise the error so caller can deal with it if they want
                    raise
            else:
                # At least one day that needs to be weighted has multiple
                # distinct interval values. We cannot apply the weighting by
                # manipulating the existing daily summaries so we will weight
                # by rebuilding the daily summaries. Rebuild is destructive so
                # only do it if this is not a dry run
                if not self.dry_run:
                    log.debug("intervalweighting: Multiple distinct 'interval' "
                              "values found for at least one archive day.")
                    log.info("intervalweighting: %s will be applied by dropping "
                             "and rebuilding daily summaries." % self.name)
                    self.dbm.drop_daily()
                    self.dbm.close()
                    # Reopen to force rebuilding of the schema
                    self.dbm = weewx.manager.open_manager_with_config(self.config_dict,
                                                                      self.binding,
                                                                      initialize=True)
                    # This will rebuild to a V2 daily summary
                    self.dbm.backfill_day_summary()
        else:
            # daily summaries are already weighted
            log.info("intervalweighting: %s has already been applied." % self.name)
Exemple #11
0
    def run(self):
        """Main entry point for calculating missing derived fields.

        Calculate the missing derived fields for the timespan concerned, save
        the calculated data to archive and recalculate the daily summaries.
        """

        # record the current time
        t1 = time.time()
        # obtain a wxservices.WXCalculate object to calculate the missing
        # fields, first we need to get a DBBinder object ...
        db_binder = weewx.manager.DBBinder(self.config_dict)
        # ... then a database manager ...
        db_manager = db_binder.get_manager(
            data_binding=self.config_dict['StdWXCalculate']['data_binding'])
        # ... then station altitude, latitude and longitude
        stn_dict = self.config_dict['Station']
        altitude_t = option_as_list(stn_dict.get('altitude', (None, None)))
        try:
            altitude_vt = weewx.units.ValueTuple(float(altitude_t[0]),
                                                 altitude_t[1],
                                                 "group_altitude")
        except KeyError as e:
            raise weewx.ViolatedPrecondition(
                "Value 'altitude' needs a unit (%s)" % e)
        latitude_f = float(stn_dict['latitude'])
        longitude_f = float(stn_dict['longitude'])

        # now we can create a WXCalculate object
        wxcalculate = weewx.wxservices.WXCalculate(self.config_dict,
                                                   altitude_vt, latitude_f,
                                                   longitude_f, db_manager)

        # initialise some counters so we know what we have processed
        days_updated = 0
        days_processed = 0
        total_records_processed = 0
        total_records_updated = 0

        # obtain gregorian days for our start and stop timestamps
        start_greg = weeutil.weeutil.toGregorianDay(self.start_ts)
        stop_greg = weeutil.weeutil.toGregorianDay(self.stop_ts)
        # start at the first day
        day = start_greg
        while day <= stop_greg:
            # get the start and stop timestamps for this tranche
            tr_start_ts = weeutil.weeutil.startOfGregorianDay(day)
            tr_stop_ts = min(
                weeutil.weeutil.startOfGregorianDay(stop_greg + 1),
                weeutil.weeutil.startOfGregorianDay(day + self.trans_days))
            # start the transaction
            with weedb.Transaction(self.dbm.connection) as _cursor:
                # iterate over each day in the tranche we are to work in
                for tranche_day in weeutil.weeutil.genDaySpans(
                        tr_start_ts, tr_stop_ts):
                    # initialise a counter for records processed on this day
                    records_updated = 0
                    # iterate over each record in this day
                    for record in self.dbm.genBatchRecords(
                            startstamp=tranche_day.start,
                            stopstamp=tranche_day.stop):
                        # but we are only concerned with records after the
                        # start and before or equal to the stop timestamps
                        if self.start_ts < record['dateTime'] <= self.stop_ts:
                            # calculate the missing derived fields for the record
                            wxcalculate.do_calculations(data_dict=record,
                                                        data_type='archive')
                            # obtain a dict containing only those fields that
                            # WXCalculate calculated
                            extras_dict = {
                                k: record[k]
                                for k in record.keys()
                                if k in wxcalculate.calculations.keys()
                            }
                            # update the archive with the calculated data
                            records_updated += self.update_record_fields(
                                record['dateTime'], extras_dict)
                            # update the total records updated
                            total_records_updated += records_updated
                            total_records_processed += 1
                        # Give the user some information on progress
                        if total_records_processed % 1000 == 0:
                            p_msg = "Processing record: %d; Last date: %s" % (
                                total_records_processed,
                                timestamp_to_string(record['dateTime']))
                            self._progress(p_msg)
                    # if we updated any records on this day increment the count
                    # of days updated
                    days_updated += 1 if records_updated > 0 else 0
                    days_processed += 1
            # advance to the next tranche
            day += self.trans_days
        # finished, so give the user some final information on progress, mainly
        # so the total tallies with the log
        p_msg = "Processing record: %d; (%s)" % (
            total_records_processed, timestamp_to_string(tr_stop_ts))
        self._progress(p_msg, overprint=False)
        # now update the daily summaries
        print("Recalculating daily summaries...")
        # first we need a start and stop date object
        start_d = datetime.date.fromtimestamp(self.start_ts)
        stop_d = datetime.date.fromtimestamp(self.stop_ts)
        # do the update
        self.dbm.backfill_day_summary(start_d=start_d, stop_d=stop_d)
        print(file=sys.stdout)
        print("Finished recalculating daily summaries")
        tdiff = time.time() - t1
        # we are done so log and inform the user
        log.info(
            "calcmissing: Processed %d days consisting of %d records. "
            "%d days consisting of %d records were updated in %0.2f seconds." %
            (days_processed, total_records_processed, days_updated,
             total_records_updated, tdiff))

        if self.dry_run:
            log.info("calcmissing: "
                     "This was a dry run. %s was not applied." % self.name)
Exemple #12
0
    def addRecord(self, record_obj, log_level=syslog.LOG_NOTICE):
        """Commit a single record or a collection of records to the archive.
        
        record_obj: Either a data record, or an iterable that can return data
        records. Each data record must look like a dictionary, where the keys
        are the SQL types and the values are the values to be stored in the
        database."""

        # Determine if record_obj is just a single dictionary instance (in which
        # case it will have method 'keys'). If so, wrap it in something iterable
        # (a list):
        record_list = [record_obj] if hasattr(record_obj,
                                              'keys') else record_obj

        with weedb.Transaction(self.connection) as cursor:

            for record in record_list:

                if record['dateTime'] is None:
                    syslog.syslog(
                        syslog.LOG_ERR,
                        "Archive: archive record with null time encountered.")
                    raise weewx.ViolatedPrecondition(
                        "Archive record with null time encountered.")

                # Check to make sure the incoming record is in the same unit system as the
                # records already in the database:
                if self.std_unit_system:
                    if record['usUnits'] != self.std_unit_system:
                        raise ValueError("Unit system of incoming record (0x%x) "\
                                         "differs from the archive database (0x%x)" % (record['usUnits'], self.std_unit_system))
                else:
                    # This is the first record. Remember the unit system to check
                    # against subsequent records:
                    self.std_unit_system = record['usUnits']

                # Only data types that appear in the database schema can be inserted.
                # To find them, form the intersection between the set of all record
                # keys and the set of all sql keys
                record_key_set = set(record.keys())
                insert_key_set = record_key_set.intersection(self.sqlkeys)
                # Convert to an ordered list:
                key_list = list(insert_key_set)
                # Get the values in the same order:
                value_list = [record[k] for k in key_list]

                # This will a string of sql types, separated by commas. Because
                # some of the weewx sql keys (notably 'interval') are reserved
                # words in MySQL, put them in backquotes.
                k_str = ','.join(["`%s`" % k for k in key_list])
                # This will be a string with the correct number of placeholder question marks:
                q_str = ','.join('?' * len(key_list))
                # Form the SQL insert statement:
                sql_insert_stmt = "INSERT INTO %s (%s) VALUES (%s)" % (
                    self.table, k_str, q_str)
                try:
                    cursor.execute(sql_insert_stmt, value_list)
                    syslog.syslog(
                        log_level, "Archive: added %s record %s" %
                        (self.table,
                         weeutil.weeutil.timestamp_to_string(
                             record['dateTime'])))
                except Exception, e:
                    syslog.syslog(
                        syslog.LOG_ERR,
                        "Archive: unable to add archive record %s" %
                        weeutil.weeutil.timestamp_to_string(
                            record['dateTime']))
                    syslog.syslog(syslog.LOG_ERR, " ****    Reason: %s" % e)