Esempio n. 1
0
    def getRawData(self, period):
        """Get raw observation data and construct a map from Cumulus monthly
            log fields to weewx archive fields.

        Obtain raw observational data from Cumulus monthly logs. This raw data
        needs to be cleaned of unnecessary characters/codes, a date-time field
        generated for each row and an iterable returned.

        Input parameters:

            period: the file name, including path, of the Cumulus monthly log
                    file from which raw obs data will be read.
        """

        # period holds the filename of the monthly log file that contains our
        # data. Does our source exist?
        if os.path.isfile(period):
            with open(period, 'r') as f:
                _raw_data = f.readlines()
        else:
            # If it doesn't we can't go on so raise it
            raise weeimport.WeeImportIOError(
                "Cumulus monthly log file '%s' could not be found." % period)

        # Our raw data needs a bit of cleaning up before we can parse/map it.
        _clean_data = []
        for _row in _raw_data:
            # Make sure we have full stops as decimal points
            _line = _row.replace(self.decimal, '.')
            # Ignore any blank lines
            if _line != "\n":
                # Cumulus has separate date and time fields as the first 2
                # fields of a row. It is easier to combine them now into a
                # single date-time field that we can parse later when we map the
                # raw data.
                _datetime_line = _line.replace(self.delimiter, ' ', 1)
                # Save what's left
                _clean_data.append(_datetime_line)

        # Now create a dictionary CSV reader
        _reader = csv.DictReader(_clean_data,
                                 fieldnames=self._field_list,
                                 delimiter=self.delimiter)
        # Finally, get our database-source mapping
        self.map = self.parseMap('Cumulus', _reader, self.cumulus_config_dict)
        # Return our dict reader
        return _reader
Esempio n. 2
0
    def getRawData(self, period):
        """Obtain an iterable containing the raw data to be imported.

        Raw data is read and any clean-up/pre-processing carried out before the
        iterable is returned. In this case we will use csv.Dictreader(). The
        iterable should be of a form where the field names in the field map can
        be used to map the data to the weeWX archive record format.

        Input parameters:

            period: a simple counter that is unused but retained to keep the
                    getRawData() signature the same across all classes.
        """

        # does our source exist?
        if os.path.isfile(self.source):
            with open(self.source, 'r') as f:
                _raw_data = f.readlines()
        else:
            # if it doesn't we can't go on so raise it
            raise weeimport.WeeImportIOError(
                "CSV source file '%s' could not be found." % self.source)

        # just in case the data has been sourced from the web we will remove
        # any HTML tags and blank lines that may exist
        _clean_data = []
        for _row in _raw_data:
            # get rid of any HTML tags
            _line = ''.join(CSVSource._tags.split(_row))
            if _line != "\n":
                # save anything that is not a blank line
                _clean_data.append(_line)

        # create a dictionary CSV reader, using the first line as the set of keys
        _csv_reader = csv.DictReader(_clean_data)

        # finally, get our source-to-database mapping
        self.map = self.parseMap('CSV', _csv_reader, self.csv_config_dict)

        # return our CSV dict reader
        return _csv_reader
Esempio n. 3
0
    def getRawData(self, period):
        """Get raw observation data and construct a map from Cumulus monthly
            log fields to weeWX archive fields.

        Obtain raw observational data from Cumulus monthly logs. This raw data
        needs to be cleaned of unnecessary characters/codes, a date-time field
        generated for each row and an iterable returned.

        Input parameters:

            period: the file name, including path, of the Cumulus monthly log
                    file from which raw obs data will be read.
        """

        # period holds the filename of the monthly log file that contains our
        # data. Does our source exist?
        if os.path.isfile(period):
            with open(period, 'r') as f:
                _raw_data = f.readlines()
            extra_data_file = self.getExtraDataFile(period)
            if os.path.isfile(extra_data_file):
                with open(extra_data_file, 'r') as f:
                    extra_data = f.readlines()
                    if (len(extra_data) == len(_raw_data)):
                        joined = []
                        for it in zip(_raw_data, extra_data):
                            joined.append(it[0].strip() + ',' + it[1].strip())
                        _raw_data = joined
                    else:
                        print "extra data had wrong length"
                        print "main data length : %d , extra data length : %d" % (
                            len(_raw_data), len(extra_data))
            else:
                print "extra data file not found : " + extra_data_file

        else:
            # If it doesn't we can't go on so raise it
            raise weeimport.WeeImportIOError(
                "Cumulus monthly log file '%s' could not be found." % period)

        # Our raw data needs a bit of cleaning up before we can parse/map it.
        _clean_data = []
        for _row in _raw_data:
            # Make sure we have full stops as decimal points
            _line = _row.replace(self.decimal, '.')
            # Ignore any blank lines
            if _line != "\n":
                # Cumulus has separate date and time fields as the first 2
                # fields of a row. It is easier to combine them now into a
                # single date-time field that we can parse later when we map the
                # raw data.
                _datetime_line = _line.replace(self.delimiter, ' ', 1)
                # Save what's left
                _clean_data.append(_datetime_line)

        # if we haven't confirmed our source for the weeWX rain field we need
        # to do so now
        if self.rain_source_confirmed is None:
            # The Cumulus source field depends on the Cumulus version that
            # created the log files. Unfortunately, we can only determine
            # which field to use by looking at the mapped Cumulus data. If we
            # look at our DictReader we have no way to reset it, so we create
            # a one off DictReader to use instead.
            _rain_reader = csv.DictReader(_clean_data,
                                          fieldnames=self._field_list,
                                          delimiter=self.delimiter)
            # now that we know what Cumulus fields are available we can set our
            # rain source appropriately
            self.set_rain_source(_rain_reader)

        # Now create a dictionary CSV reader
        _reader = csv.DictReader(_clean_data,
                                 fieldnames=self._field_list,
                                 delimiter=self.delimiter)
        # Finally, get our database-source mapping
        self.map = self.parseMap('Cumulus', _reader, self.cumulus_config_dict)
        # Return our dict reader
        return _reader
Esempio n. 4
0
    def __init__(self, config_dict, config_path, cumulus_config_dict,
                 import_config_path, options, log):

        # call our parents __init__
        super(CumulusSource, self).__init__(config_dict, cumulus_config_dict,
                                            options, log)

        # save our import config path
        self.import_config_path = import_config_path
        # save our import config dict
        self.cumulus_config_dict = cumulus_config_dict

        # wind dir bounds
        self.wind_dir = [0, 360]

        # field delimiter used in monthly log files, default to comma
        self.delimiter = cumulus_config_dict.get('delimiter', ',')
        # decimal separator used in monthly log files, default to decimal point
        self.decimal = cumulus_config_dict.get('decimal', '.')

        # date separator used in monthly log files, default to solidus
        separator = cumulus_config_dict.get('separator', '/')
        # we combine Cumulus date and time fields to give a fixed format
        # date-time string
        self.raw_datetime_format = separator.join(('%d', '%m', '%y %H:%M'))

        # Cumulus log files provide a number of cumulative rainfall fields. We
        # cannot use the daily rainfall as this may reset at some time of day
        # other than midnight (as required by weeWX). So we use field 26, total
        # rainfall since midnight and treat it as a cumulative value.
        self.rain = 'cumulative'

        # initialise our import field-to-weeWX archive field map
        self.map = None

        # Cumulus log files have a number of 'rain' fields that can be used to
        # derive the weeWX rain field. Which one is available depends on the
        # Cumulus version that created the logs. The preferred field is field
        # 26(AA) - total rainfall since midnight but it is only available in
        # Cumulus v1.9.4 or later. If that field is not available then the
        # preferred field in field 09(J) - total rainfall today then field
        # 11(L) - total rainfall counter. Initialise the rain_source_confirmed
        # property now and we will deal with it later when we have some source
        # data.
        self.rain_source_confirmed = None

        # Units of measure for some obs (eg temperatures) cannot be derived from
        # the Cumulus monthly log files. These units must be specified by the
        # user in the import config file. Read these units and fill in the
        # missing unit data in the header map. Do some basic error checking and
        # validation, if one of the fields is missing or invalid then we need
        # to catch the error and raise it as we can't go on.
        # Temperature
        try:
            temp_u = cumulus_config_dict['Units'].get('temperature')
        except:
            _msg = "No units specified for Cumulus temperature fields in %s." % (
                self.import_config_path, )
            raise weewx.UnitError(_msg)
        else:
            if temp_u in weewx.units.default_unit_format_dict:
                self._header_map['cur_out_temp']['units'] = temp_u
                self._header_map['curr_in_temp']['units'] = temp_u
                self._header_map['cur_dewpoint']['units'] = temp_u
                self._header_map['cur_heatindex']['units'] = temp_u
                self._header_map['cur_windchill']['units'] = temp_u
                self._header_map['cur_app_temp']['units'] = temp_u
                self._header_map['T1']['units'] = temp_u
                self._header_map['T2']['units'] = temp_u
            else:
                _msg = "Unknown units '%s' specified for Cumulus temperature fields in %s." % (
                    temp_u, self.import_config_path)
                raise weewx.UnitError(_msg)
        # Pressure
        try:
            press_u = cumulus_config_dict['Units'].get('pressure')
        except:
            _msg = "No units specified for Cumulus pressure fields in %s." % (
                self.import_config_path, )
            raise weewx.UnitError(_msg)
        else:
            if press_u in ['inHg', 'mbar', 'hPa']:
                self._header_map['cur_slp']['units'] = press_u
            else:
                _msg = "Unknown units '%s' specified for Cumulus pressure fields in %s." % (
                    press_u, self.import_config_path)
                raise weewx.UnitError(_msg)
        # Rain
        try:
            rain_u = cumulus_config_dict['Units'].get('rain')
        except:
            _msg = "No units specified for Cumulus rain fields in %s." % (
                self.import_config_path, )
            raise weewx.UnitError(_msg)
        else:
            if rain_u in rain_units_dict:
                self._header_map['midnight_rain']['units'] = rain_u
                self._header_map['cur_rain_rate']['units'] = rain_units_dict[
                    rain_u]

            else:
                _msg = "Unknown units '%s' specified for Cumulus rain fields in %s." % (
                    rain_u, self.import_config_path)
                raise weewx.UnitError(_msg)
        # Speed
        try:
            speed_u = cumulus_config_dict['Units'].get('speed')
        except:
            _msg = "No units specified for Cumulus speed fields in %s." % (
                self.import_config_path, )
            raise weewx.UnitError(_msg)
        else:
            if speed_u in weewx.units.default_unit_format_dict:
                self._header_map['avg_wind_speed']['units'] = speed_u
                self._header_map['gust_wind_speed']['units'] = speed_u
            else:
                _msg = "Unknown units '%s' specified for Cumulus speed fields in %s." % (
                    speed_u, self.import_config_path)
                raise weewx.UnitError(_msg)

        # get our source file path
        try:
            self.source = cumulus_config_dict['directory']
        except KeyError:
            raise weewx.ViolatedPrecondition(
                "Cumulus monthly logs directory not specified in '%s'." %
                import_config_path)

        # Now get a list on monthly log files sorted from oldest to newest
        month_log_list = glob.glob(self.source + '/?????log.txt')
        _temp = [(fn, fn[-9:-7], time.strptime(fn[-12:-9], '%b').tm_mon)
                 for fn in month_log_list]
        self.log_list = [
            a[0] for a in sorted(_temp, key=lambda el: (el[1], el[2]))
        ]
        if len(self.log_list) == 0:
            raise weeimport.WeeImportIOError(
                "No Cumulus monthly logs found in directory '%s'." %
                self.source)

        # tell the user/log what we intend to do
        _msg = "Cumulus monthly log files in the '%s' directory will be imported" % self.source
        self.wlog.printlog(syslog.LOG_INFO, _msg)
        _msg = "The following options will be used:"
        self.wlog.verboselog(syslog.LOG_DEBUG, _msg)
        _msg = "     config=%s, import-config=%s" % (config_path,
                                                     self.import_config_path)
        self.wlog.verboselog(syslog.LOG_DEBUG, _msg)
        if options.date:
            _msg = "     date=%s" % options.date
        else:
            # we must have --from and --to
            _msg = "     from=%s, to=%s" % (options.date_from, options.date_to)
        self.wlog.verboselog(syslog.LOG_DEBUG, _msg)
        _msg = "     dry-run=%s, calc-missing=%s" % (self.dry_run,
                                                     self.calc_missing)
        self.wlog.verboselog(syslog.LOG_DEBUG, _msg)
        _msg = "     tranche=%s, interval=%s" % (self.tranche, self.interval)
        self.wlog.verboselog(syslog.LOG_DEBUG, _msg)
        _msg = "     UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor)
        self.wlog.verboselog(syslog.LOG_DEBUG, _msg)
        _msg = "Using database binding '%s', which is bound to database '%s'" % (
            self.db_binding_wx, self.dbm.database_name)
        self.wlog.printlog(syslog.LOG_INFO, _msg)
        _msg = "Destination table '%s' unit system is '%#04x' (%s)." % (
            self.dbm.table_name, self.archive_unit_sys,
            unit_nicknames[self.archive_unit_sys])
        self.wlog.printlog(syslog.LOG_INFO, _msg)
        if self.calc_missing:
            print "Missing derived observations will be calculated."
        if not self.UV_sensor:
            print "All weeWX UV fields will be set to None."
        if not self.solar_sensor:
            print "All weeWX radiation fields will be set to None."
        if options.date or options.date_from:
            print "Observations timestamped after %s and up to and" % (
                timestamp_to_string(self.first_ts), )
            print "including %s will be imported." % (timestamp_to_string(
                self.last_ts), )
        if self.dry_run:
            print "This is a dry run, imported data will not be saved to archive."