def process_data(context): """Generate summaries from raw weather station data. The meteorological day end (typically 2100 or 0900 local time) is set in the preferences file ``weather.ini``. The default value is 2100 (2200 during DST), following the historical convention for weather station readings. """ logger.info('Generating summary data') # get time of last record last_raw = context.raw_data.before(datetime.max) if last_raw is None: raise IOError('No data found. Check data directory parameter.') # get daytime end hour (in local time) day_end_hour, use_dst = get_day_end_hour(context.params) # get other config rain_day_threshold = eval( context.params.get('config', 'rain day threshold', '0.2')) # calibrate raw data start = calibrate_data(context.params, context.raw_data, context.calib_data) # generate hourly data start = generate_hourly(context.calib_data, context.hourly_data, start) # generate daily data start = generate_daily(day_end_hour, use_dst, context.calib_data, context.hourly_data, context.daily_data, start) # generate monthly data generate_monthly(rain_day_threshold, day_end_hour, use_dst, context.daily_data, context.monthly_data, start) return 0
def dailygen(inputdata): """Internal generator function""" day_start = start count = 0 while day_start <= stop: count += 1 if count % 30 == 0: logger.info("daily: %s", day_start.isoformat(' ')) else: logger.debug("daily: %s", day_start.isoformat(' ')) day_end = day_start + DAY if use_dst: # day might be 23 or 25 hours long day_end = timezone.local_replace( day_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in inputdata[day_start:day_end]: acc.add_raw(data) for data in hourly_data[day_start:day_end]: acc.add_hourly(data) new_data = acc.result() if new_data: new_data['start'] = day_start yield new_data day_start = day_end
def monthlygen(inputdata): """Internal generator function""" month_start = start count = 0 while month_start <= stop: count += 1 if count % 12 == 0: logger.info("monthly: %s", month_start.isoformat(' ')) else: logger.debug("monthly: %s", month_start.isoformat(' ')) month_end = month_start + WEEK if month_end.month < 12: month_end = month_end.replace(month=month_end.month + 1) else: month_end = month_end.replace(month=1, year=month_end.year + 1) month_end = month_end - WEEK if use_dst: # month might straddle summer time start or end month_end = timezone.local_replace(month_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in inputdata[month_start:month_end]: acc.add_daily(data) new_data = acc.result() if new_data: new_data['start'] = month_start yield new_data month_start = month_end
def process_data(context): """Generate summaries from raw weather station data. The meteorological day end (typically 2100 or 0900 local time) is set in the preferences file ``weather.ini``. The default value is 2100 (2200 during DST), following the historical convention for weather station readings. """ logger.info('Generating summary data') # get time of last record last_raw = context.raw_data.before(datetime.max) if last_raw is None: raise IOError('No data found. Check data directory parameter.') # get daytime end hour (in local time) day_end_hour, use_dst = get_day_end_hour(context.params) # get other config rain_day_threshold = float( context.params.get('config', 'rain day threshold', '0.2')) # calibrate raw data start = calibrate_data(context.params, context.raw_data, context.calib_data) # generate hourly data start = generate_hourly(context.calib_data, context.hourly_data, start) # generate daily data start = generate_daily(day_end_hour, use_dst, context.calib_data, context.hourly_data, context.daily_data, start) # generate monthly data generate_monthly(rain_day_threshold, day_end_hour, use_dst, context.daily_data, context.monthly_data, start) return 0
def calibrate_data(params, raw_data, calib_data): """'Calibrate' raw data, using a user-supplied function.""" start = calib_data.before(datetime.max) if start is None: start = datetime.min start = raw_data.after(start + SECOND) if start is None: return start del calib_data[start:] calibrator = Calib(params, raw_data) count = 0 for data in raw_data[start:]: idx = data['idx'] count += 1 if count % 10000 == 0: logger.info("calib: %s", idx.isoformat(' ')) elif count % 500 == 0: logger.debug("calib: %s", idx.isoformat(' ')) for key in ('rain', 'abs_pressure', 'temp_in'): if data[key] is None: logger.error('Ignoring invalid data at %s', idx.isoformat(' ')) break else: calib_data[idx] = calibrator.calib(data) return start
def reprocess(data_dir, update): with pywws.storage.pywws_context(data_dir) as context: if update: logger.warning("Updating status to detect invalid wind_dir") count = 0 raw_data = context.raw_data for data in raw_data[:]: count += 1 idx = data['idx'] if count % 10000 == 0: logger.info("update: %s", idx.isoformat(' ')) elif count % 500 == 0: logger.debug("update: %s", idx.isoformat(' ')) if data['wind_dir'] is not None and (data['wind_dir'] & 0x80): data['wind_dir'] = None raw_data[idx] = data raw_data.flush() # delete old format summary files logger.warning('Deleting old summaries') context.calib_data.clear() context.hourly_data.clear() context.daily_data.clear() context.monthly_data.clear() # create data summaries logger.warning('Generating hourly and daily summaries') pywws.process.process_data(context) return 0
def monthlygen(inputdata): """Internal generator function""" month_start = start count = 0 while month_start <= stop: count += 1 if count % 12 == 0: logger.info("monthly: %s", month_start.isoformat(' ')) else: logger.debug("monthly: %s", month_start.isoformat(' ')) month_end = month_start + WEEK if month_end.month < 12: month_end = month_end.replace(month=month_end.month+1) else: month_end = month_end.replace(month=1, year=month_end.year+1) month_end = month_end - WEEK if use_dst: # month might straddle summer time start or end month_end = timezone.local_replace( month_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in inputdata[month_start:month_end]: acc.add_daily(data) new_data = acc.result() if new_data: new_data['start'] = month_start yield new_data month_start = month_end
def monthlygen(inputdata, start, local_start): """Internal generator function""" acc = MonthAcc(rain_day_threshold) month_start = start count = 0 while month_start <= stop: count += 1 if count % 12 == 0: logger.info("monthly: %s", month_start.isoformat(' ')) else: logger.debug("monthly: %s", month_start.isoformat(' ')) if local_start.month < 12: local_start = local_start.replace(month=local_start.month+1) else: local_start = local_start.replace( month=1, year=local_start.year+1) month_end = time_zone.local_to_utc(local_start) month_end = time_zone.day_start( month_end, day_end_hour, use_dst=use_dst) acc.reset() for data in inputdata[month_start:month_end]: acc.add_daily(data) new_data = acc.result() if new_data: new_data['start'] = month_start yield new_data month_start = month_end
def reprocess(data_dir, update): if update: logger.warning("Updating status to detect invalid wind_dir") count = 0 raw_data = pywws.storage.RawStore(data_dir) for data in raw_data[:]: count += 1 idx = data['idx'] if count % 10000 == 0: logger.info("update: %s", idx.isoformat(' ')) elif count % 500 == 0: logger.debug("update: %s", idx.isoformat(' ')) if data['wind_dir'] is not None and (data['wind_dir'] & 0x80): data['wind_dir'] = None raw_data[idx] = data raw_data.flush() # delete old format summary files logger.warning('Deleting old summaries') for summary in ['calib', 'hourly', 'daily', 'monthly']: for root, dirs, files in os.walk( os.path.join(data_dir, summary), topdown=False): logger.info(root) for file in files: os.unlink(os.path.join(root, file)) os.rmdir(root) # create data summaries logger.warning('Generating hourly and daily summaries') with pywws.storage.pywws_context(data_dir) as context: pywws.process.process_data(context) return 0
def dailygen(inputdata): """Internal generator function""" day_start = start count = 0 while day_start <= stop: count += 1 if count % 30 == 0: logger.info("daily: %s", day_start.isoformat(' ')) else: logger.debug("daily: %s", day_start.isoformat(' ')) day_end = day_start + DAY if use_dst: # day might be 23 or 25 hours long day_end = timezone.local_replace(day_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in inputdata[day_start:day_end]: acc.add_raw(data) for data in hourly_data[day_start:day_end]: acc.add_hourly(data) new_data = acc.result() if new_data: new_data['start'] = day_start yield new_data day_start = day_end
def generate_monthly(rain_day_threshold, day_end_hour, use_dst, daily_data, monthly_data, process_from): """Generate monthly summaries from daily data.""" start = monthly_data.before(datetime.max) if start is None: start = datetime.min start = daily_data.after(start + SECOND) if process_from: if start: start = min(start, process_from) else: start = process_from if start is None: return start # set start to start of first day of month (local time) start = timezone.local_replace(start, use_dst=use_dst, day=1, hour=day_end_hour, minute=0, second=0) if day_end_hour >= 12: # month actually starts on the last day of previous month start -= DAY del monthly_data[start:] stop = daily_data.before(datetime.max) if stop is None: return None month_start = start acc = MonthAcc(rain_day_threshold) count = 0 while month_start <= stop: count += 1 if count % 12 == 0: logger.info("monthly: %s", month_start.isoformat(' ')) else: logger.debug("monthly: %s", month_start.isoformat(' ')) month_end = month_start + WEEK if month_end.month < 12: month_end = month_end.replace(month=month_end.month + 1) else: month_end = month_end.replace(month=1, year=month_end.year + 1) month_end = month_end - WEEK if use_dst: # month might straddle summer time start or end month_end = timezone.local_replace(month_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in daily_data[month_start:month_end]: acc.add_daily(data) new_data = acc.result() if new_data: new_data['start'] = month_start monthly_data[new_data['idx']] = new_data month_start = month_end return start
def __init__(self, key, secret, latitude, longitude): logger.info('Using tweepy library') auth = tweepy.OAuthHandler(pct.consumer_key, pct.consumer_secret) auth.set_access_token(key, secret) self.api = tweepy.API(auth) if latitude is not None and longitude is not None: self.kwargs = {'lat': latitude, 'long': longitude} else: self.kwargs = {}
def __init__(self, key, secret, latitude, longitude): logger.info('Using tweepy library') auth = tweepy.OAuthHandler(pct.consumer_key, pct.consumer_secret) auth.set_access_token(key, secret) self.api = tweepy.API(auth) if latitude is not None and longitude is not None: self.kwargs = {'lat' : latitude, 'long' : longitude} else: self.kwargs = {}
def monitor(i): """Given an iterator, yields data from it but prints progress every 10,000 records""" count = 0 for x in i: count += 1 if count % 10000 == 0: logger.info("%d records so far, current record is %s", count, x["idx"]) yield x
def monitor(i): """Given an iterator, yields data from it but prints progress every 10,000 records""" count = 0 for x in i: count+=1 if count % 10000 == 0: logger.info("%d records so far, current record is %s", count, x["idx"]) yield x
def session(self): logger.info("Uploading to web site with FTP") self.ftp = ftplib.FTP() self.ftp.connect(self.site, self.port) logger.debug('welcome message\n' + self.ftp.getwelcome()) self.ftp.login(self.user, self.password) self.ftp.cwd(self.directory) try: yield None finally: self.ftp.close()
def fetch_logged(self, last_date, last_ptr): # offset last stored time by half logging interval last_stored = self.last_stored_time + timedelta( seconds=self.fixed_block['read_period'] * 30) if last_date <= last_stored: # nothing to do return # data_count includes record currently being updated every 48 seconds max_count = self.fixed_block['data_count'] - 1 count = 0 # initialise detection of data left after a station reboot saved_date = self.last_stored_time saved_ptr = self.last_stored_ptr self.last_stored_ptr = None duplicates = [] while last_date > last_stored and count < max_count: data = self.ws.get_data(last_ptr) if last_ptr == saved_ptr: if any(data[key] != self.raw_data[saved_date][key] for key in ( 'hum_in', 'temp_in', 'hum_out', 'temp_out', 'abs_pressure', 'wind_ave', 'wind_gust', 'wind_dir', 'rain', 'status')): # pointer matches but data is different, so no duplicates duplicates = None saved_ptr = None else: # potential duplicate data duplicates.append(last_date) saved_date = self.raw_data.before(saved_date) saved_ptr = self.ws.dec_ptr(saved_ptr) if (data['delay'] is None or data['delay'] > max(self.fixed_block['read_period'] * 2, 35)): logger.error('invalid data at %04x, %s', last_ptr, last_date.isoformat(' ')) last_date -= timedelta(minutes=self.fixed_block['read_period']) else: self.raw_data[last_date] = data count += 1 last_date -= timedelta(minutes=data['delay']) last_ptr = self.ws.dec_ptr(last_ptr) if duplicates: for d in duplicates: del self.raw_data[d] count -= len(duplicates) last_date = self.raw_data.nearest(last_date) next_date = self.raw_data.after(last_date + SECOND) if next_date: gap = (next_date - last_date).seconds // 60 gap -= self.fixed_block['read_period'] if gap > 0: logger.critical("%d minutes gap in data detected", gap) logger.info("%d catchup records", count)
def fetch_logged(self, last_date, last_ptr): # offset last stored time by half logging interval last_stored = self.last_stored_time + timedelta( seconds=self.fixed_block['read_period'] * 30) if last_date <= last_stored: # nothing to do return # data_count includes record currently being updated every 48 seconds max_count = self.fixed_block['data_count'] - 1 count = 0 # initialise detection of data left after a station reboot saved_date = self.last_stored_time saved_ptr = self.last_stored_ptr self.last_stored_ptr = None duplicates = [] while last_date > last_stored and count < max_count: data = self.ws.get_data(last_ptr) if last_ptr == saved_ptr: if any(data[key] != self.raw_data[saved_date][key] for key in ('hum_in', 'temp_in', 'hum_out', 'temp_out', 'abs_pressure', 'wind_ave', 'wind_gust', 'wind_dir', 'rain', 'status')): # pointer matches but data is different, so no duplicates duplicates = None saved_ptr = None else: # potential duplicate data duplicates.append(last_date) saved_date = self.raw_data.before(saved_date) saved_ptr = self.ws.dec_ptr(saved_ptr) if (data['delay'] is None or data['delay'] > max( self.fixed_block['read_period'] * 2, 35)): logger.error('invalid data at %04x, %s', last_ptr, last_date.isoformat(' ')) last_date -= timedelta(minutes=self.fixed_block['read_period']) else: self.raw_data[last_date] = data count += 1 last_date -= timedelta(minutes=data['delay']) last_ptr = self.ws.dec_ptr(last_ptr) if duplicates: for d in duplicates: del self.raw_data[d] count -= len(duplicates) last_date = self.raw_data.nearest(last_date) or datetime.max next_date = self.raw_data.after(last_date + SECOND) if next_date: gap = (next_date - last_date).seconds // 60 gap -= self.fixed_block['read_period'] if gap > 0: logger.critical("%d minutes gap in data detected", gap) logger.info("%d catchup records", count)
def __init__(self, key, secret, latitude, longitude, timeout): logger.info('Using python-twitter library') self.api = twitter.Api( consumer_key=pct.consumer_key, consumer_secret=pct.consumer_secret, access_token_key=key, access_token_secret=secret, timeout=timeout) if latitude is not None and longitude is not None: self.kwargs = {'latitude' : latitude, 'longitude' : longitude, 'display_coordinates' : True} else: self.kwargs = {} self.kwargs['verify_status_length'] = False
def generate_monthly(rain_day_threshold, day_end_hour, use_dst, daily_data, monthly_data, process_from): """Generate monthly summaries from daily data.""" start = monthly_data.before(datetime.max) if start is None: start = datetime.min start = daily_data.after(start + SECOND) if process_from: if start: start = min(start, process_from) else: start = process_from if start is None: return start # set start to start of first day of month (local time) start = timezone.local_replace( start, use_dst=use_dst, day=1, hour=day_end_hour, minute=0, second=0) if day_end_hour >= 12: # month actually starts on the last day of previous month start -= DAY del monthly_data[start:] stop = daily_data.before(datetime.max) month_start = start acc = MonthAcc(rain_day_threshold) count = 0 while month_start <= stop: count += 1 if count % 12 == 0: logger.info("monthly: %s", month_start.isoformat(' ')) else: logger.debug("monthly: %s", month_start.isoformat(' ')) month_end = month_start + WEEK if month_end.month < 12: month_end = month_end.replace(month=month_end.month+1) else: month_end = month_end.replace(month=1, year=month_end.year+1) month_end = month_end - WEEK if use_dst: # month might straddle summer time start or end month_end = timezone.local_replace( month_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in daily_data[month_start:month_end]: acc.add_daily(data) new_data = acc.result() if new_data: new_data['start'] = month_start monthly_data[new_data['idx']] = new_data month_start = month_end return start
def generate_daily(day_end_hour, use_dst, calib_data, hourly_data, daily_data, process_from): """Generate daily summaries from calibrated and hourly data.""" start = daily_data.before(datetime.max) if start is None: start = datetime.min start = calib_data.after(start + SECOND) if process_from: if start: start = min(start, process_from) else: start = process_from if start is None: return start # round to start of this day, in local time start = timezone.local_replace(start, use_dst=use_dst, hour=day_end_hour, minute=0, second=0) del daily_data[start:] stop = calib_data.before(datetime.max) day_start = start acc = DayAcc() count = 0 while day_start <= stop: count += 1 if count % 30 == 0: logger.info("daily: %s", day_start.isoformat(' ')) else: logger.debug("daily: %s", day_start.isoformat(' ')) day_end = day_start + DAY if use_dst: # day might be 23 or 25 hours long day_end = timezone.local_replace(day_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in calib_data[day_start:day_end]: acc.add_raw(data) for data in hourly_data[day_start:day_end]: acc.add_hourly(data) new_data = acc.result() if new_data: new_data['start'] = day_start daily_data[new_data['idx']] = new_data day_start = day_end return start
def calibgen(inputdata): """Internal generator function""" count = 0 for data in inputdata: idx = data['idx'] count += 1 if count % 10000 == 0: logger.info("calib: %s", idx.isoformat(' ')) elif count % 500 == 0: logger.debug("calib: %s", idx.isoformat(' ')) for key in ('rain', 'abs_pressure', 'temp_in'): if data[key] is None: logger.error('Ignoring invalid data at %s', idx.isoformat(' ')) break else: yield calibrator.calib(data)
def __init__(self, key, secret, latitude, longitude, timeout): logger.info('Using python-twitter library') self.api = twitter.Api(consumer_key=pct.consumer_key, consumer_secret=pct.consumer_secret, access_token_key=key, access_token_secret=secret, timeout=timeout) if latitude is not None and longitude is not None: self.kwargs = { 'latitude': latitude, 'longitude': longitude, 'display_coordinates': True } else: self.kwargs = {} self.kwargs['verify_status_length'] = False
def session(self): logger.info("Uploading to web site with SFTP") self.transport = paramiko.Transport((self.site, self.port)) self.transport.start_client() if self.privkey: self.get_private_key(self.privkey) self.transport.auth_publickey(username=self.user, key=self.pkey) else: self.transport.auth_password( username=self.user, password=self.password) self.ftp = paramiko.SFTPClient.from_transport(self.transport) self.ftp.chdir(self.directory) try: yield None finally: self.ftp.close() self.transport.close()
def session(self): logger.info("Uploading to web site with SFTP") self.transport = paramiko.Transport((self.site, self.port)) self.transport.start_client() if self.privkey: self.get_private_key(self.privkey) self.transport.auth_publickey(username=self.user, key=self.pkey) else: self.transport.auth_password(username=self.user, password=self.password) self.ftp = paramiko.SFTPClient.from_transport(self.transport) self.ftp.chdir(self.directory) try: yield None finally: self.ftp.close() self.transport.close()
def generate_daily(day_end_hour, use_dst, calib_data, hourly_data, daily_data, process_from): """Generate daily summaries from calibrated and hourly data.""" start = daily_data.before(datetime.max) if start is None: start = datetime.min start = calib_data.after(start + SECOND) if process_from: if start: start = min(start, process_from) else: start = process_from if start is None: return start # round to start of this day, in local time start = timezone.local_replace( start, use_dst=use_dst, hour=day_end_hour, minute=0, second=0) del daily_data[start:] stop = calib_data.before(datetime.max) day_start = start acc = DayAcc() count = 0 while day_start <= stop: count += 1 if count % 30 == 0: logger.info("daily: %s", day_start.isoformat(' ')) else: logger.debug("daily: %s", day_start.isoformat(' ')) day_end = day_start + DAY if use_dst: # day might be 23 or 25 hours long day_end = timezone.local_replace( day_end + HOURx3, use_dst=use_dst, hour=day_end_hour) acc.reset() for data in calib_data[day_start:day_end]: acc.add_raw(data) for data in hourly_data[day_start:day_end]: acc.add_hourly(data) new_data = acc.result() if new_data: new_data['start'] = day_start daily_data[new_data['idx']] = new_data day_start = day_end return start
def hourlygen(inputdata, prev): """Internal generator function""" hour_start = start count = 0 while hour_start <= stop: count += 1 if count % 1008 == 0: logger.info("hourly: %s", hour_start.isoformat(' ')) elif count % 24 == 0: logger.debug("hourly: %s", hour_start.isoformat(' ')) hour_end = hour_start + HOUR acc.reset() for data in inputdata[hour_start:hour_end]: if data['rel_pressure']: pressure_history.append( (data['idx'], data['rel_pressure'])) if prev: err = data['idx'] - prev['idx'] if abs(err - timedelta(minutes=data['delay'])) > TIME_ERR: logger.info('unexpected data interval %s %s', data['idx'].isoformat(' '), str(err)) acc.add_raw(data) prev = data new_data = acc.result() if new_data and (new_data['idx'] - hour_start) >= timedelta(minutes=9): # compute pressure trend new_data['pressure_trend'] = None if new_data['rel_pressure']: target = new_data['idx'] - HOURx3 while (len(pressure_history) >= 2 and abs(pressure_history[0][0] - target) > abs(pressure_history[1][0] - target)): pressure_history.popleft() if (pressure_history and abs(pressure_history[0][0] - target) < HOUR): new_data['pressure_trend'] = ( new_data['rel_pressure'] - pressure_history[0][1]) # store new hourly data yield new_data hour_start = hour_end
def Upload(self, catchup=True, live_data=None, ignore_last_update=False): """Upload one or more weather data records. This method uploads either the most recent weather data record, or all records since the last upload (up to 7 days), according to the value of :obj:`catchup`. It sets the ``last update`` configuration value to the time stamp of the most recent record successfully uploaded. :param catchup: upload all data since last upload. :type catchup: bool :param live_data: current 'live' data. If not present the most recent logged data is uploaded. :type live_data: dict :param ignore_last_update: don't get or set the 'last update' status.ini entry. :type ignore_last_update: bool :return: success status :rtype: bool """ count = 0 for data in self.next_data(catchup, live_data, ignore_last_update): prepared_data = self.prepare_data(data) if not prepared_data: continue if not self.send_data(data['idx'], prepared_data, ignore_last_update): return False count += 1 if count > 1: logger.info('%s:%d records sent', self.service_name, count) return True
def log_data(self, sync=None, clear=False): # get sync config value if sync is None: if self.fixed_block['read_period'] <= 5: sync = int(self.params.get('config', 'logdata sync', '1')) else: sync = int(self.params.get('config', 'logdata sync', '0')) # get address and date-time of last complete logged data logger.info('Synchronising to weather station') range_hi = datetime.max range_lo = datetime.min last_delay = self.ws.get_data(self.ws.current_pos())['delay'] if last_delay == 0: prev_date = datetime.min else: prev_date = datetime.utcnow() for data, last_ptr, logged in self.ws.live_data( logged_only=(sync > 1)): last_date = data['idx'] logger.debug('Reading time %s', last_date.strftime('%H:%M:%S')) if logged: break if sync < 2 and self.ws._station_clock.clock: err = last_date - datetime.fromtimestamp( self.ws._station_clock.clock) last_date -= timedelta(minutes=data['delay'], seconds=err.seconds % 60) logger.debug('log time %s', last_date.strftime('%H:%M:%S')) last_ptr = self.ws.dec_ptr(last_ptr) break if sync < 1: hi = last_date - timedelta(minutes=data['delay']) if last_date - prev_date > timedelta(seconds=50): lo = hi - timedelta(seconds=60) elif data['delay'] == last_delay: lo = hi - timedelta(seconds=60) hi = hi - timedelta(seconds=48) else: lo = hi - timedelta(seconds=48) last_delay = data['delay'] prev_date = last_date range_hi = min(range_hi, hi) range_lo = max(range_lo, lo) err = (range_hi - range_lo) / 2 last_date = range_lo + err logger.debug('est log time %s +- %ds (%s..%s)', last_date.strftime('%H:%M:%S'), err.seconds, lo.strftime('%H:%M:%S'), hi.strftime('%H:%M:%S')) if err < timedelta(seconds=15): last_ptr = self.ws.dec_ptr(last_ptr) break # go back through stored data, until we catch up with what we've already got logger.info('Fetching data') self.status.set('data', 'ptr', '%06x,%s' % (last_ptr, last_date.isoformat(' '))) self.fetch_logged(last_date, last_ptr) if clear: logger.info('Clearing weather station memory') ptr = self.ws.fixed_format['data_count'][0] self.ws.write_data([(ptr, 1), (ptr + 1, 0)])
def hourlygen(inputdata, prev): """Internal generator function""" hour_start = start count = 0 while hour_start <= stop: count += 1 if count % 1008 == 0: logger.info("hourly: %s", hour_start.isoformat(' ')) elif count % 24 == 0: logger.debug("hourly: %s", hour_start.isoformat(' ')) hour_end = hour_start + HOUR acc.reset() for data in inputdata[hour_start:hour_end]: if data['rel_pressure']: pressure_history.append((data['idx'], data['rel_pressure'])) if prev: err = data['idx'] - prev['idx'] if abs(err - timedelta(minutes=data['delay'])) > TIME_ERR: logger.info('unexpected data interval %s %s', data['idx'].isoformat(' '), str(err)) acc.add_raw(data) prev = data new_data = acc.result() if new_data and (new_data['idx'] - hour_start) >= timedelta(minutes=9): # compute pressure trend new_data['pressure_trend'] = None if new_data['rel_pressure']: target = new_data['idx'] - HOURx3 while (len(pressure_history) >= 2 and abs(pressure_history[0][0] - target) > abs(pressure_history[1][0] - target)): pressure_history.popleft() if (pressure_history and abs(pressure_history[0][0] - target) < HOUR): new_data['pressure_trend'] = ( new_data['rel_pressure'] - pressure_history[0][1]) # store new hourly data yield new_data hour_start = hour_end
def log_data(self, sync=None, clear=False): # get sync config value if sync is None: if self.fixed_block['read_period'] <= 5: sync = int(self.params.get('config', 'logdata sync', '1')) else: sync = int(self.params.get('config', 'logdata sync', '0')) # get address and date-time of last complete logged data logger.info('Synchronising to weather station') range_hi = datetime.max range_lo = datetime.min last_delay = self.ws.get_data(self.ws.current_pos())['delay'] if last_delay == 0: prev_date = datetime.min else: prev_date = datetime.utcnow() for data, last_ptr, logged in self.ws.live_data(logged_only=(sync > 1)): last_date = data['idx'] logger.debug('Reading time %s', last_date.strftime('%H:%M:%S')) if logged: break if sync < 2 and self.ws._station_clock.clock: err = last_date - datetime.fromtimestamp( self.ws._station_clock.clock) last_date -= timedelta( minutes=data['delay'], seconds=err.seconds % 60) logger.debug('log time %s', last_date.strftime('%H:%M:%S')) last_ptr = self.ws.dec_ptr(last_ptr) break if sync < 1: hi = last_date - timedelta(minutes=data['delay']) if last_date - prev_date > timedelta(seconds=50): lo = hi - timedelta(seconds=60) elif data['delay'] == last_delay: lo = hi - timedelta(seconds=60) hi = hi - timedelta(seconds=48) else: lo = hi - timedelta(seconds=48) last_delay = data['delay'] prev_date = last_date range_hi = min(range_hi, hi) range_lo = max(range_lo, lo) err = (range_hi - range_lo) / 2 last_date = range_lo + err logger.debug('est log time %s +- %ds (%s..%s)', last_date.strftime('%H:%M:%S'), err.seconds, lo.strftime('%H:%M:%S'), hi.strftime('%H:%M:%S')) if err < timedelta(seconds=15): last_ptr = self.ws.dec_ptr(last_ptr) break # go back through stored data, until we catch up with what we've already got logger.info('Fetching data') self.status.set( 'data', 'ptr', '%06x,%s' % (last_ptr, last_date.isoformat(' '))) self.fetch_logged(last_date, last_ptr) if clear: logger.info('Clearing weather station memory') ptr = self.ws.fixed_format['data_count'][0] self.ws.write_data([(ptr, 1), (ptr+1, 0)])
def do_plot(self, input_file, output_file): if isinstance(input_file, GraphFileReader): self.graph = input_file else: # read XML graph description self.graph = GraphFileReader(input_file) # get list of plots plot_list = self.graph.get_children(self.plot_name) self.plot_count = len(plot_list) if self.plot_count < 1: # nothing to plot logger.info('%s has no %s nodes', self.graph.input_file, self.plot_name) self.graph.close() return 1 # get start and end datetimes self.x_lo = self.graph.get_value('start', None) self.x_hi = self.graph.get_value('stop', None) self.duration = self.graph.get_value('duration', None) if self.duration: self.duration = eval('timedelta(%s)' % self.duration) else: self.duration = timedelta(hours=24) if self.x_lo: self.x_lo = self._eval_time(self.x_lo) if self.x_hi: self.x_hi = self._eval_time(self.x_hi) self.duration = self.x_hi - self.x_lo else: self.x_hi = self.x_lo + self.duration elif self.x_hi: self.x_hi = self._eval_time(self.x_hi) self.x_lo = self.x_hi - self.duration else: self.x_hi = self.hourly_data.before(datetime.max) if not self.x_hi: self.x_hi = datetime.utcnow() # only if no hourly data self.x_hi += timezone.utcoffset(self.x_hi) if self.duration < timedelta(hours=6): # set end of graph to start of the next minute after last item self.x_hi += timedelta(seconds=55) self.x_hi = self.x_hi.replace(second=0) else: # set end of graph to start of the next hour after last item self.x_hi += timedelta(minutes=55) self.x_hi = self.x_hi.replace(minute=0, second=0) self.x_lo = self.x_hi - self.duration self.utcoffset = timezone.utcoffset(self.x_hi) # open gnuplot command file self.tmp_files = [] cmd_file = os.path.join(self.work_dir, 'plot.cmd') self.tmp_files.append(cmd_file) of = codecs.open(cmd_file, 'w', encoding=self.encoding[0]) # write gnuplot set up of.write('set encoding %s\n' % (self.encoding[1])) lcl = locale.getlocale() if lcl[0]: of.write('set locale "%s.%s"\n' % lcl) self.rows = self.get_default_rows() self.cols = (self.plot_count + self.rows - 1) // self.rows self.rows, self.cols = eval( self.graph.get_value('layout', '%d, %d' % (self.rows, self.cols))) w, h = self.get_default_plot_size() w = w * self.cols h = h * self.rows w, h = eval(self.graph.get_value('size', '(%d, %d)' % (w, h))) fileformat = self.graph.get_value('fileformat', 'png') if fileformat == 'svg': terminal = 'svg enhanced font "arial,9" dynamic rounded' elif u' ' not in fileformat: terminal = '%s large' % (fileformat) else: terminal = fileformat if u'size' not in terminal: terminal += u' size %d,%d' % (w, h) terminal = self.graph.get_value('terminal', terminal) of.write('set terminal %s\n' % (terminal)) of.write('set output "%s"\n' % (output_file)) # set overall title title = self.graph.get_value('title', '') if title: if '%' in title: x_hi = timezone.localize(self.x_hi) if sys.version_info[0] < 3: title = title.encode(self.encoding[0]) title = x_hi.strftime(title) if sys.version_info[0] < 3: title = title.decode(self.encoding[0]) title = 'title "%s"' % title of.write('set multiplot layout %d, %d %s\n' % (self.rows, self.cols, title)) # do actual plots of.write(self.get_preamble()) for plot_no in range(self.plot_count): plot = plot_list[plot_no] # set key / title location title = plot.get_value('title', '') of.write('set key horizontal title "%s"\n' % title) # optional yaxis labels ylabel = plot.get_value('ylabel', '') if ylabel: ylabelangle = plot.get_value('ylabelangle', '') if ylabelangle: ylabelangle = ' rotate by %s' % (ylabelangle) of.write('set ylabel "%s"%s\n' % (ylabel, ylabelangle)) else: of.write('set ylabel\n') y2label = plot.get_value('y2label', '') if y2label: y2labelangle = plot.get_value('y2labelangle', '') if y2labelangle: y2labelangle = ' rotate by %s' % (y2labelangle) of.write('set y2label "%s"%s\n' % (y2label, y2labelangle)) else: of.write('set y2label\n') # set data source source = plot.get_value('source', 'raw') if source == 'raw': source = self.calib_data elif source == 'hourly': source = self.hourly_data elif source == 'monthly': source = self.monthly_data else: source = self.daily_data # do the plot of.write(self.plot_data(plot_no, plot, source)) of.close() self.graph.close() # run gnuplot on file subprocess.check_call(['gnuplot', cmd_file]) for file in self.tmp_files: os.unlink(file) return 0
def session(self): logger.info("Copying to local directory") if not os.path.isdir(self.directory): raise RuntimeError( 'Directory "' + self.directory + '" does not exist.') yield None
def generate_hourly(calib_data, hourly_data, process_from): """Generate hourly summaries from calibrated data.""" start = hourly_data.before(datetime.max) if start is None: start = datetime.min start = calib_data.after(start + SECOND) if process_from: if start: start = min(start, process_from) else: start = process_from if start is None: return start # set start of hour in local time (not all time offsets are integer hours) start += timezone.standard_offset start = start.replace(minute=0, second=0) start -= timezone.standard_offset del hourly_data[start:] # preload pressure history, and find last valid rain prev = None pressure_history = deque() last_rain = None for data in calib_data[start - HOURx3:start]: if data['rel_pressure']: pressure_history.append((data['idx'], data['rel_pressure'])) if data['rain'] is not None: last_rain = data['rain'] prev = data # iterate over data in one hour chunks stop = calib_data.before(datetime.max) hour_start = start acc = HourAcc(last_rain) count = 0 while hour_start <= stop: count += 1 if count % 1008 == 0: logger.info("hourly: %s", hour_start.isoformat(' ')) elif count % 24 == 0: logger.debug("hourly: %s", hour_start.isoformat(' ')) hour_end = hour_start + HOUR acc.reset() for data in calib_data[hour_start:hour_end]: if data['rel_pressure']: pressure_history.append((data['idx'], data['rel_pressure'])) if prev: err = data['idx'] - prev['idx'] if abs(err - timedelta(minutes=data['delay'])) > TIME_ERR: logger.info('unexpected data interval %s %s', data['idx'].isoformat(' '), str(err)) acc.add_raw(data) prev = data new_data = acc.result() if new_data and (new_data['idx'] - hour_start) >= timedelta(minutes=9): # compute pressure trend new_data['pressure_trend'] = None if new_data['rel_pressure']: target = new_data['idx'] - HOURx3 while (len(pressure_history) >= 2 and abs(pressure_history[0][0] - target) > abs(pressure_history[1][0] - target)): pressure_history.popleft() if (pressure_history and abs(pressure_history[0][0] - target) < HOUR): new_data['pressure_trend'] = ( new_data['rel_pressure'] - pressure_history[0][1]) # store new hourly data hourly_data[new_data['idx']] = new_data hour_start = hour_end return start
def generate_hourly(calib_data, hourly_data, process_from): """Generate hourly summaries from calibrated data.""" start = hourly_data.before(datetime.max) if start is None: start = datetime.min start = calib_data.after(start + SECOND) if process_from: if start: start = min(start, process_from) else: start = process_from if start is None: return start # set start of hour in local time (not all time offsets are integer hours) start += timezone.standard_offset start = start.replace(minute=0, second=0) start -= timezone.standard_offset del hourly_data[start:] # preload pressure history, and find last valid rain prev = None pressure_history = deque() last_rain = None for data in calib_data[start - HOURx3:start]: if data['rel_pressure']: pressure_history.append((data['idx'], data['rel_pressure'])) if data['rain'] is not None: last_rain = data['rain'] prev = data # iterate over data in one hour chunks stop = calib_data.before(datetime.max) hour_start = start acc = HourAcc(last_rain) count = 0 while hour_start <= stop: count += 1 if count % 1008 == 0: logger.info("hourly: %s", hour_start.isoformat(' ')) elif count % 24 == 0: logger.debug("hourly: %s", hour_start.isoformat(' ')) hour_end = hour_start + HOUR acc.reset() for data in calib_data[hour_start:hour_end]: if data['rel_pressure']: pressure_history.append((data['idx'], data['rel_pressure'])) if prev: err = data['idx'] - prev['idx'] if abs(err - timedelta(minutes=data['delay'])) > TIME_ERR: logger.info('unexpected data interval %s %s', data['idx'].isoformat(' '), str(err)) acc.add_raw(data) prev = data new_data = acc.result() if new_data and (new_data['idx'] - hour_start) >= timedelta(minutes=9): # compute pressure trend new_data['pressure_trend'] = None if new_data['rel_pressure']: target = new_data['idx'] - HOURx3 while (len(pressure_history) >= 2 and abs(pressure_history[0][0] - target) > abs(pressure_history[1][0] - target)): pressure_history.popleft() if (pressure_history and abs(pressure_history[0][0] - target) < HOUR): new_data['pressure_trend'] = (new_data['rel_pressure'] - pressure_history[0][1]) # store new hourly data hourly_data[new_data['idx']] = new_data hour_start = hour_end return start
def session(self): logger.info("Copying to local directory") if not os.path.isdir(self.directory): raise RuntimeError('Directory "' + self.directory + '" does not exist.') yield None
args = parser.parse_args() source_type = args.SourceType sink_type = args.SinkType source_dir = args.SourceDir sink_dir = args.SinkDir clearfirst = args.clearfirst if source_type == sink_type and source_dir == sink_dir: raise ValueError("You have specified the same source and sink") Source = importlib.import_module("." + source_type, package="pywws") Sink = importlib.import_module("." + sink_type, package="pywws") RawSink = Sink.RawStore(sink_dir) if clearfirst: logger.info("Clearing destination Raw Data...") RawSink.clear() logger.info("Transfering Raw Data...") RawSink.update(monitor(Source.RawStore(source_dir))) RawSink.flush() CalibSink = Sink.CalibStore(sink_dir) if clearfirst: logger.info("Clearing destination Calibrated Data...") CalibSink.clear() logger.info("Transfering Calibrated Data...") CalibSink.update(monitor(Source.CalibStore(source_dir))) CalibSink.flush() HourlySink = Sink.HourlyStore(sink_dir) if clearfirst:
args = parser.parse_args() source_type = args.SourceType sink_type = args.SinkType source_dir = args.SourceDir sink_dir = args.SinkDir clearfirst=args.clearfirst if source_type == sink_type and source_dir == sink_dir: raise ValueError("You have specified the same source and sink") Source = importlib.import_module("."+source_type, package="pywws") Sink = importlib.import_module("."+sink_type, package="pywws") RawSink = Sink.RawStore(sink_dir) if clearfirst: logger.info("Clearing destination Raw Data...") RawSink.clear() logger.info("Transfering Raw Data...") RawSink.update(monitor(Source.RawStore(source_dir))) RawSink.flush() CalibSink = Sink.CalibStore(sink_dir) if clearfirst: logger.info("Clearing destination Calibrated Data...") CalibSink.clear() logger.info("Transfering Calibrated Data...") CalibSink.update(monitor(Source.CalibStore(source_dir))) CalibSink.flush() HourlySink = Sink.HourlyStore(sink_dir) if clearfirst:
def do_plot(self, input_file, output_file): if isinstance(input_file, GraphFileReader): self.graph = input_file else: # read XML graph description self.graph = GraphFileReader(input_file) # get list of plots plot_list = self.graph.get_children(self.plot_name) self.plot_count = len(plot_list) if self.plot_count < 1: # nothing to plot logger.info('%s has no %s nodes', self.graph.input_file, self.plot_name) self.graph.close() return 1 # get start and end datetimes self.x_lo = self.graph.get_value('start', None) self.x_hi = self.graph.get_value('stop', None) self.duration = self.graph.get_value('duration', None) if self.duration: self.duration = eval('timedelta(%s)' % self.duration) else: self.duration = timedelta(hours=24) if self.x_lo: self.x_lo = self._eval_time(self.x_lo) if self.x_hi: self.x_hi = self._eval_time(self.x_hi) self.duration = self.x_hi - self.x_lo else: self.x_hi = self.x_lo + self.duration elif self.x_hi: self.x_hi = self._eval_time(self.x_hi) self.x_lo = self.x_hi - self.duration else: self.x_hi = self.hourly_data.before(datetime.max) if not self.x_hi: self.x_hi = datetime.utcnow() # only if no hourly data self.x_hi += timezone.utcoffset(self.x_hi) if self.duration < timedelta(hours=6): # set end of graph to start of the next minute after last item self.x_hi += timedelta(seconds=55) self.x_hi = self.x_hi.replace(second=0) else: # set end of graph to start of the next hour after last item self.x_hi += timedelta(minutes=55) self.x_hi = self.x_hi.replace(minute=0, second=0) self.x_lo = self.x_hi - self.duration self.utcoffset = timezone.utcoffset(self.x_hi) # open gnuplot command file self.tmp_files = [] cmd_file = os.path.join(self.work_dir, 'plot.cmd') self.tmp_files.append(cmd_file) of = codecs.open(cmd_file, 'w', encoding=self.encoding[0]) # write gnuplot set up of.write('set encoding %s\n' % (self.encoding[1])) lcl = locale.getlocale() if lcl[0]: of.write('set locale "%s.%s"\n' % lcl) self.rows = self.get_default_rows() self.cols = (self.plot_count + self.rows - 1) // self.rows self.rows, self.cols = eval(self.graph.get_value( 'layout', '%d, %d' % (self.rows, self.cols))) w, h = self.get_default_plot_size() w = w * self.cols h = h * self.rows w, h = eval(self.graph.get_value('size', '(%d, %d)' % (w, h))) fileformat = self.graph.get_value('fileformat', 'png') if fileformat == 'svg': terminal = 'svg enhanced font "arial,9" dynamic rounded' elif u' ' not in fileformat: terminal = '%s large' % (fileformat) else: terminal = fileformat if u'size' not in terminal: terminal += u' size %d,%d' % (w, h) terminal = self.graph.get_value('terminal', terminal) of.write('set terminal %s\n' % (terminal)) of.write('set output "%s"\n' % (output_file)) # set overall title title = self.graph.get_value('title', '') if title: if '%' in title: x_hi = timezone.localize(self.x_hi) if sys.version_info[0] < 3: title = title.encode(self.encoding[0]) title = x_hi.strftime(title) if sys.version_info[0] < 3: title = title.decode(self.encoding[0]) title = 'title "%s"' % title of.write('set multiplot layout %d, %d %s\n' % (self.rows, self.cols, title)) # do actual plots of.write(self.get_preamble()) for plot_no in range(self.plot_count): plot = plot_list[plot_no] # set key / title location title = plot.get_value('title', '') of.write('set key horizontal title "%s"\n' % title) # optional yaxis labels ylabel = plot.get_value('ylabel', '') if ylabel: ylabelangle = plot.get_value('ylabelangle', '') if ylabelangle: ylabelangle = ' rotate by %s' % (ylabelangle) of.write('set ylabel "%s"%s\n' % (ylabel, ylabelangle)) else: of.write('set ylabel\n') y2label = plot.get_value('y2label', '') if y2label: y2labelangle = plot.get_value('y2labelangle', '') if y2labelangle: y2labelangle = ' rotate by %s' % (y2labelangle) of.write('set y2label "%s"%s\n' % (y2label, y2labelangle)) else: of.write('set y2label\n') # set data source source = plot.get_value('source', 'raw') if source == 'raw': source = self.calib_data elif source == 'hourly': source = self.hourly_data elif source == 'monthly': source = self.monthly_data else: source = self.daily_data # do the plot of.write(self.plot_data(plot_no, plot, source)) of.close() self.graph.close() # run gnuplot on file subprocess.check_call(['gnuplot', cmd_file]) for file in self.tmp_files: os.unlink(file) return 0