def getRawData(self, period): """Get raw observation data and construct a map from Cumulus monthly log fields to weewx archive fields. Obtain raw observational data from Cumulus monthly logs. This raw data needs to be cleaned of unnecessary characters/codes, a date-time field generated for each row and an iterable returned. Input parameters: period: the file name, including path, of the Cumulus monthly log file from which raw obs data will be read. """ # period holds the filename of the monthly log file that contains our # data. Does our source exist? if os.path.isfile(period): with open(period, 'r') as f: _raw_data = f.readlines() else: # If it doesn't we can't go on so raise it raise weeimport.WeeImportIOError( "Cumulus monthly log file '%s' could not be found." % period) # Our raw data needs a bit of cleaning up before we can parse/map it. _clean_data = [] for _row in _raw_data: # Make sure we have full stops as decimal points _line = _row.replace(self.decimal, '.') # Ignore any blank lines if _line != "\n": # Cumulus has separate date and time fields as the first 2 # fields of a row. It is easier to combine them now into a # single date-time field that we can parse later when we map the # raw data. _datetime_line = _line.replace(self.delimiter, ' ', 1) # Save what's left _clean_data.append(_datetime_line) # Now create a dictionary CSV reader _reader = csv.DictReader(_clean_data, fieldnames=self._field_list, delimiter=self.delimiter) # Finally, get our database-source mapping self.map = self.parseMap('Cumulus', _reader, self.cumulus_config_dict) # Return our dict reader return _reader
def getRawData(self, period): """Obtain an iterable containing the raw data to be imported. Raw data is read and any clean-up/pre-processing carried out before the iterable is returned. In this case we will use csv.Dictreader(). The iterable should be of a form where the field names in the field map can be used to map the data to the weeWX archive record format. Input parameters: period: a simple counter that is unused but retained to keep the getRawData() signature the same across all classes. """ # does our source exist? if os.path.isfile(self.source): with open(self.source, 'r') as f: _raw_data = f.readlines() else: # if it doesn't we can't go on so raise it raise weeimport.WeeImportIOError( "CSV source file '%s' could not be found." % self.source) # just in case the data has been sourced from the web we will remove # any HTML tags and blank lines that may exist _clean_data = [] for _row in _raw_data: # get rid of any HTML tags _line = ''.join(CSVSource._tags.split(_row)) if _line != "\n": # save anything that is not a blank line _clean_data.append(_line) # create a dictionary CSV reader, using the first line as the set of keys _csv_reader = csv.DictReader(_clean_data) # finally, get our source-to-database mapping self.map = self.parseMap('CSV', _csv_reader, self.csv_config_dict) # return our CSV dict reader return _csv_reader
def getRawData(self, period): """Get raw observation data and construct a map from Cumulus monthly log fields to weeWX archive fields. Obtain raw observational data from Cumulus monthly logs. This raw data needs to be cleaned of unnecessary characters/codes, a date-time field generated for each row and an iterable returned. Input parameters: period: the file name, including path, of the Cumulus monthly log file from which raw obs data will be read. """ # period holds the filename of the monthly log file that contains our # data. Does our source exist? if os.path.isfile(period): with open(period, 'r') as f: _raw_data = f.readlines() extra_data_file = self.getExtraDataFile(period) if os.path.isfile(extra_data_file): with open(extra_data_file, 'r') as f: extra_data = f.readlines() if (len(extra_data) == len(_raw_data)): joined = [] for it in zip(_raw_data, extra_data): joined.append(it[0].strip() + ',' + it[1].strip()) _raw_data = joined else: print "extra data had wrong length" print "main data length : %d , extra data length : %d" % ( len(_raw_data), len(extra_data)) else: print "extra data file not found : " + extra_data_file else: # If it doesn't we can't go on so raise it raise weeimport.WeeImportIOError( "Cumulus monthly log file '%s' could not be found." % period) # Our raw data needs a bit of cleaning up before we can parse/map it. _clean_data = [] for _row in _raw_data: # Make sure we have full stops as decimal points _line = _row.replace(self.decimal, '.') # Ignore any blank lines if _line != "\n": # Cumulus has separate date and time fields as the first 2 # fields of a row. It is easier to combine them now into a # single date-time field that we can parse later when we map the # raw data. _datetime_line = _line.replace(self.delimiter, ' ', 1) # Save what's left _clean_data.append(_datetime_line) # if we haven't confirmed our source for the weeWX rain field we need # to do so now if self.rain_source_confirmed is None: # The Cumulus source field depends on the Cumulus version that # created the log files. Unfortunately, we can only determine # which field to use by looking at the mapped Cumulus data. If we # look at our DictReader we have no way to reset it, so we create # a one off DictReader to use instead. _rain_reader = csv.DictReader(_clean_data, fieldnames=self._field_list, delimiter=self.delimiter) # now that we know what Cumulus fields are available we can set our # rain source appropriately self.set_rain_source(_rain_reader) # Now create a dictionary CSV reader _reader = csv.DictReader(_clean_data, fieldnames=self._field_list, delimiter=self.delimiter) # Finally, get our database-source mapping self.map = self.parseMap('Cumulus', _reader, self.cumulus_config_dict) # Return our dict reader return _reader
def __init__(self, config_dict, config_path, cumulus_config_dict, import_config_path, options, log): # call our parents __init__ super(CumulusSource, self).__init__(config_dict, cumulus_config_dict, options, log) # save our import config path self.import_config_path = import_config_path # save our import config dict self.cumulus_config_dict = cumulus_config_dict # wind dir bounds self.wind_dir = [0, 360] # field delimiter used in monthly log files, default to comma self.delimiter = cumulus_config_dict.get('delimiter', ',') # decimal separator used in monthly log files, default to decimal point self.decimal = cumulus_config_dict.get('decimal', '.') # date separator used in monthly log files, default to solidus separator = cumulus_config_dict.get('separator', '/') # we combine Cumulus date and time fields to give a fixed format # date-time string self.raw_datetime_format = separator.join(('%d', '%m', '%y %H:%M')) # Cumulus log files provide a number of cumulative rainfall fields. We # cannot use the daily rainfall as this may reset at some time of day # other than midnight (as required by weeWX). So we use field 26, total # rainfall since midnight and treat it as a cumulative value. self.rain = 'cumulative' # initialise our import field-to-weeWX archive field map self.map = None # Cumulus log files have a number of 'rain' fields that can be used to # derive the weeWX rain field. Which one is available depends on the # Cumulus version that created the logs. The preferred field is field # 26(AA) - total rainfall since midnight but it is only available in # Cumulus v1.9.4 or later. If that field is not available then the # preferred field in field 09(J) - total rainfall today then field # 11(L) - total rainfall counter. Initialise the rain_source_confirmed # property now and we will deal with it later when we have some source # data. self.rain_source_confirmed = None # Units of measure for some obs (eg temperatures) cannot be derived from # the Cumulus monthly log files. These units must be specified by the # user in the import config file. Read these units and fill in the # missing unit data in the header map. Do some basic error checking and # validation, if one of the fields is missing or invalid then we need # to catch the error and raise it as we can't go on. # Temperature try: temp_u = cumulus_config_dict['Units'].get('temperature') except: _msg = "No units specified for Cumulus temperature fields in %s." % ( self.import_config_path, ) raise weewx.UnitError(_msg) else: if temp_u in weewx.units.default_unit_format_dict: self._header_map['cur_out_temp']['units'] = temp_u self._header_map['curr_in_temp']['units'] = temp_u self._header_map['cur_dewpoint']['units'] = temp_u self._header_map['cur_heatindex']['units'] = temp_u self._header_map['cur_windchill']['units'] = temp_u self._header_map['cur_app_temp']['units'] = temp_u self._header_map['T1']['units'] = temp_u self._header_map['T2']['units'] = temp_u else: _msg = "Unknown units '%s' specified for Cumulus temperature fields in %s." % ( temp_u, self.import_config_path) raise weewx.UnitError(_msg) # Pressure try: press_u = cumulus_config_dict['Units'].get('pressure') except: _msg = "No units specified for Cumulus pressure fields in %s." % ( self.import_config_path, ) raise weewx.UnitError(_msg) else: if press_u in ['inHg', 'mbar', 'hPa']: self._header_map['cur_slp']['units'] = press_u else: _msg = "Unknown units '%s' specified for Cumulus pressure fields in %s." % ( press_u, self.import_config_path) raise weewx.UnitError(_msg) # Rain try: rain_u = cumulus_config_dict['Units'].get('rain') except: _msg = "No units specified for Cumulus rain fields in %s." % ( self.import_config_path, ) raise weewx.UnitError(_msg) else: if rain_u in rain_units_dict: self._header_map['midnight_rain']['units'] = rain_u self._header_map['cur_rain_rate']['units'] = rain_units_dict[ rain_u] else: _msg = "Unknown units '%s' specified for Cumulus rain fields in %s." % ( rain_u, self.import_config_path) raise weewx.UnitError(_msg) # Speed try: speed_u = cumulus_config_dict['Units'].get('speed') except: _msg = "No units specified for Cumulus speed fields in %s." % ( self.import_config_path, ) raise weewx.UnitError(_msg) else: if speed_u in weewx.units.default_unit_format_dict: self._header_map['avg_wind_speed']['units'] = speed_u self._header_map['gust_wind_speed']['units'] = speed_u else: _msg = "Unknown units '%s' specified for Cumulus speed fields in %s." % ( speed_u, self.import_config_path) raise weewx.UnitError(_msg) # get our source file path try: self.source = cumulus_config_dict['directory'] except KeyError: raise weewx.ViolatedPrecondition( "Cumulus monthly logs directory not specified in '%s'." % import_config_path) # Now get a list on monthly log files sorted from oldest to newest month_log_list = glob.glob(self.source + '/?????log.txt') _temp = [(fn, fn[-9:-7], time.strptime(fn[-12:-9], '%b').tm_mon) for fn in month_log_list] self.log_list = [ a[0] for a in sorted(_temp, key=lambda el: (el[1], el[2])) ] if len(self.log_list) == 0: raise weeimport.WeeImportIOError( "No Cumulus monthly logs found in directory '%s'." % self.source) # tell the user/log what we intend to do _msg = "Cumulus monthly log files in the '%s' directory will be imported" % self.source self.wlog.printlog(syslog.LOG_INFO, _msg) _msg = "The following options will be used:" self.wlog.verboselog(syslog.LOG_DEBUG, _msg) _msg = " config=%s, import-config=%s" % (config_path, self.import_config_path) self.wlog.verboselog(syslog.LOG_DEBUG, _msg) if options.date: _msg = " date=%s" % options.date else: # we must have --from and --to _msg = " from=%s, to=%s" % (options.date_from, options.date_to) self.wlog.verboselog(syslog.LOG_DEBUG, _msg) _msg = " dry-run=%s, calc-missing=%s" % (self.dry_run, self.calc_missing) self.wlog.verboselog(syslog.LOG_DEBUG, _msg) _msg = " tranche=%s, interval=%s" % (self.tranche, self.interval) self.wlog.verboselog(syslog.LOG_DEBUG, _msg) _msg = " UV=%s, radiation=%s" % (self.UV_sensor, self.solar_sensor) self.wlog.verboselog(syslog.LOG_DEBUG, _msg) _msg = "Using database binding '%s', which is bound to database '%s'" % ( self.db_binding_wx, self.dbm.database_name) self.wlog.printlog(syslog.LOG_INFO, _msg) _msg = "Destination table '%s' unit system is '%#04x' (%s)." % ( self.dbm.table_name, self.archive_unit_sys, unit_nicknames[self.archive_unit_sys]) self.wlog.printlog(syslog.LOG_INFO, _msg) if self.calc_missing: print "Missing derived observations will be calculated." if not self.UV_sensor: print "All weeWX UV fields will be set to None." if not self.solar_sensor: print "All weeWX radiation fields will be set to None." if options.date or options.date_from: print "Observations timestamped after %s and up to and" % ( timestamp_to_string(self.first_ts), ) print "including %s will be imported." % (timestamp_to_string( self.last_ts), ) if self.dry_run: print "This is a dry run, imported data will not be saved to archive."