def date_generator(): t1 = start_time while t1 < end_time: t2 = t1 + day yield t1, t2, False t1 = t2 if args.rolling: # Rolling ought to produce current day too if t1 != dt64.floor(now, day): t1 = dt64.floor(now, day) t2 = t1 + day yield t1, t2, False t2 = dt64.ceil(now, np.timedelta64(1, 'h')) t1 = t2 - day yield t1, t2, True
project_list, site_list = ap.parse_project_site_list(args.project_site) # Process --archive options for source data if args.archive: archive = ap.parse_archive_selection(args.archive) else: archive = {} # Process --baseline-archive options for target data if args.baseline_archive: baseline_archive = ap.parse_archive_selection(args.baseline_archive) else: baseline_archive = {} st = dt64.floor(st, day) et = dt64.ceil(et, day) for n in range(len(project_list)): project = project_list[n] site = site_list[n] # Get baseline archive to use for target data if project in baseline_archive and site in baseline_archive[project]: bl_archive = baseline_archive[project][site] else: bl_archive = 'realtime_baseline' an, ai = ap.get_archive_info(project, site, 'MagData', archive=bl_archive)
# t1 = start_time # while t1 < end_time: # Iterate over the list of days to process. If rolling plots were # specified the last item will be start/end times for the rolling # plot. for t1, t2, rolling in date_generator(): t1_sod = dt64.floor(t1, day) plt.close('all') ### DEBUG: Phase these out t1_eod = dt64.ceil(t1, day) # t1 end of day t2_eod = dt64.ceil(t2, day) # t2 end of day # List of magdata objects for this 24 hour period mag_data_list = [] activity_data_list = [] # Get copyright and attribution data for all sites. Licenses had # better be compatible (or we have express permission) since we # are combining them. copyright_list = [] attribution_list = []
print(repr(dest_ai)) # Tune start/end times to avoid requesting data outside of # operational period site_st = ap.get_site_info(project, site, 'start_time') if site_st is None or site_st < st: site_st = st else: site_st = dt64.floor(site_st, day) site_st = dt64.floor(site_st, src_ai['duration']) site_et = ap.get_site_info(project, site, 'end_time') if site_et is None or site_et > et: site_et = et else: site_et = dt64.ceil(site_et, day) site_et = dt64.ceil(site_et, src_ai['duration']) logger.info('Processing %s/%s %s', project, site, dt64. fmt_dt64_range(site_st, site_et)) for t in dt64.dt64_range(site_st, site_et, src_ai['duration']): temp_file_name = None try: if hasattr(dest_path, '__call__'): # Function: call it with relevant information to get # the dest_path dest_file_name = dest_path(t, project=project, site=site, data_type=data_type,
def warn_missing_data(data, project, site, now, status_dir, test_mode, config): section_name = 'missing_data' missing_interval = np.timedelta64(1, 'h') timeout = np.timedelta64(1, 'D') if not missing_interval: return if data is None: t = None else: # Find last non-nan value idx = np.nonzero(np.any(np.logical_not(np.isnan(data.data)), axis=0))[0] if len(idx): t = data.sample_start_time[idx[-1]] else: t = None if t is None: # Data is entirely missing. Was expecting 24 hours of data, # with a nominal end time of the end of the current hour. t = dt64.ceil(now, np.timedelta64(1, 'h')) - np.timedelta64(1, 'D') tstr = dt64.strftime(t, '%Y-%m-%d %H:%M:%SUT') if t < now - missing_interval: # Data is missing logger.info(project + '/' + site + ' missing data') if config.has_option(section_name, 'twitter_username'): username = config.get(section_name, 'twitter_username') mesg = expand_string(config.get(section_name, 'twitter_message'), project, site, now, test_mode, missing_start_time=tstr, missing_interval=str(missing_interval)) run_if_timeout_reached(send_tweet, timeout, now, status_dir, func_args=[username, mesg], name=section_name + '_tweet') if config.has_option(section_name, 'facebook_cmd'): fbcmd_opts = config.get(section_name, 'facebook_cmd').split() mesg = expand_string(config.get(section_name, 'facebook_message'), project, site, now, test_mode, missing_start_time=tstr, missing_interval=str(missing_interval)) run_if_timeout_reached(fbcmd, timeout, now, status_dir, func_args=[fbcmd_opts, facebook_mesg], name=section_name + '_facebook') # Email. Leave to the send_email() function to determine if it # is configured since there are many possible settings in the # config file. Run each email job separately in case of # failure to send. func_kwargs = {'missing_start_time': tstr, 'missing_interval': str(missing_interval)} for ejob in get_email_jobs(config, section_name): run_if_timeout_reached(send_email, timeout, now, status_dir, func_args=[config, section_name, ejob, project, site, now, test_mode], func_kwargs=func_kwargs, name=section_name + '_' + ejob)
day = np.timedelta64(24, 'h') today = dt64.floor(now, day) yesterday = today - day tomorrow = today + day # This can be used in os.path.join() to include the test directory # when needed. if args.test_mode: test_mode_str = 'test' else: test_mode_str = '' if args.rolling: if args.start_time or args.end_time: raise Exception('Cannot set start or end time for rolling plots') end_time = dt64.ceil(now, np.timedelta64(1, 'h')) start_time = end_time - day else: if args.start_time is None: start_time = today else: start_time = dt64.parse_datetime64(args.start_time, 'us') if args.end_time is None: end_time = start_time + day else: end_time = dt64.parse_datetime64(args.end_time, 'us') if args.run_jobs: import aurorawatch_jobs
def make_aurorawatch_plot(project, site, st, et, rolling, exif_tags): ''' Load data and make the AuroraWatch activity plot. Plots always cover 24 hours, but may begin at midnight for day plots, or at any other hour for rolling plots. This function uses the previous 72 hours to help fit the quiet-day curve. project: name of project site: name of site st: start time. For day plots this is the start of the day. For rolling plots this is the start of the rolling 24 hour period. et: end time. For day plots this is the start of the following day. For rolling plots it is the end of the 24 hour period. rolling: flag to indicate if rolling plot should also be made. It is not otherwise possible to identify rolling plots which start at midnight. ''' # global mag_fstr global args # Export to global names for debugging global mag_data global mag_qdc global activity day = np.timedelta64(24, 'h') archive, archive_details = ap.get_archive_info(project, site, 'MagData') # Load the data to plot. For rolling plots load upto midnight so # that both the rolling plot and the current day plot can be # generated efficiently. mag_data = my_load_data(project, site, 'MagData', st, dt64.ceil(et, day)) if mag_data is None or \ not np.any(np.logical_not(np.isnan(mag_data.data))): # not .np.any(etc) eliminates empty array or array of just nans logger.info('No magnetic field data') return # Load up some data from previous days to and apply a # least-squares fit to remove baseline drifts. Data from the # current day is not used. This ensures that results do not change # over the current day when new data becomes available. qdc_fit_interval = args.qdc_fit_interval * day fit_et = dt64.ceil(st, day) # Could be doing a rolling plot fit_st = fit_et - qdc_fit_interval fit_data = my_load_data(project, site, 'MagData', fit_st, fit_et) # Load a QDC. mag_qdc = ap.magdata.load_qdc(project, site, st, tries=6, realtime=True) if mag_qdc is None: logger.info('No QDC') elif fit_data is None: # Cannot fit, so assume no errors in QDC errors = [0.0] else: try: # Fit the QDC to the previous data qdc_aligned, errors, fi = mag_qdc.align(\ fit_data, fit=ap.data.Data.minimise_sign_error_fit, plot_fit=args.plot_fit, full_output=True) except Exception as e: logger.warn('Could not fit QDC') logger.info(str(e)) errors = [0.0] else: # Fitted ok, plot if necessary if args.plot_fit: fig = plt.gcf() fig.set_figwidth(6.4) fig.set_figheight(4.8) fig.subplots_adjust(bottom=0.1, top=0.85, left=0.15, right=0.925) fit_fstr = mag_fstr[:(mag_fstr.rindex('.'))] + '_fit.png' mysavefig(fig, dt64.strftime(dt64.ceil(st, day), fit_fstr), exif_tags) # Adjust the quiet day curve with the error obtained by fitting to # previous days. if mag_qdc is None: mag_qdc_adj = None else: mag_qdc_adj = copy.deepcopy(mag_qdc) mag_qdc_adj.data -= errors[0] # Ensure data gaps are marked as such in the plots. Straight lines # across large gaps look bad! mag_data = mag_data.mark_missing_data(cadence=2*mag_data.nominal_cadence) # Do day plot. Trim start time for occasions when making a day # plot simultaneously with a rolling plot. st2 = dt64.ceil(st, day) md_day = mag_data.extract(start_time=st2) act_ki = activity_plot(md_day, mag_qdc_adj, dt64.strftime(st2, mag_fstr), exif_tags, k_index_filename=dt64.strftime(st2, k_fstr)) r = [md_day] r.extend(act_ki) if rolling: # Trim end time md_rolling = mag_data.extract(end_time=et) act_ki_rolling = activity_plot(md_rolling, mag_qdc_adj, rolling_magdata_filename, exif_tags, k_index_filename=rolling_k_filename) r.append(md_rolling) r.extend(act_ki_rolling) return r
logger.warn('Could not set time zone to UTC') if args.dataset: for ds in args.dataset: new_module = 'auroraplot.datasets.' + ds try: import_module(new_module) except Exception as e: print('Could not import ' + new_module + ': ' + str(e)) sys.exit(1) # Parse and process start and end times. If end time not given use # start time plus 1 day. if args.rolling: et = dt64.ceil(np.datetime64('now', 's'), np.timedelta64(1, 'h')) st = et - np.timedelta64(1, 'D') else: st = dt64.parse_datetime64(args.start_time, 's') if args.end_time is None: et = st + np.timedelta64(86400, 's') else: try: # Parse as date et = dt64.parse_datetime64(args.end_time, 's') except ValueError as e: try: # Parse as a set of duration values et = st + np.timedelta64(0, 's') et_words = args.end_time.split() assert len(et_words) % 2 == 0, 'Need even number of words'
print(repr(dest_ai)) # Tune start/end times to avoid requesting data outside of # operational period site_st = ap.get_site_info(project, site, 'start_time') if site_st is None or site_st < st: site_st = st else: site_st = dt64.floor(site_st, day) site_st = dt64.floor(site_st, src_ai['duration']) site_et = ap.get_site_info(project, site, 'end_time') if site_et is None or site_et > et: site_et = et else: site_et = dt64.ceil(site_et, day) site_et = dt64.ceil(site_et, src_ai['duration']) logger.info('Processing %s/%s %s', project, site, dt64.fmt_dt64_range(site_st, site_et)) for t in dt64.dt64_range(site_st, site_et, src_ai['duration']): temp_file_name = None try: if hasattr(dest_path, '__call__'): # Function: call it with relevant information to get # the dest_path dest_file_name = dest_path(t, project=project, site=site, data_type=data_type, archive=dest_an,
# Cannot use tzset on windows logger.warn('Could not set time zone to UTC') if args.dataset: for ds in args.dataset: new_module = 'auroraplot.datasets.' + ds try: import_module(new_module) except Exception as e: print('Could not import ' + new_module + ': ' + str(e)) sys.exit(1) # Parse and process start and end times. If end time not given use # start time plus 1 day. if args.rolling: et = dt64.ceil(np.datetime64('now', 's'), np.timedelta64(1, 'h')) st = et - np.timedelta64(1, 'D') else: st = dt64.parse_datetime64(args.start_time, 's') if args.end_time is None: et = st + np.timedelta64(86400, 's') else: try: # Parse as date et = dt64.parse_datetime64(args.end_time, 's') except ValueError as e: try: # Parse as a set of duration values et = st + np.timedelta64(0, 's') et_words = args.end_time.split() assert len(et_words) % 2 == 0, 'Need even number of words'
project_list, site_list = ap.parse_project_site_list(args.project_site) # Process --archive options for source data if args.archive: archive = ap.parse_archive_selection(args.archive) else: archive = {} # Process --baseline-archive options for target data if args.baseline_archive: baseline_archive = ap.parse_archive_selection(args.baseline_archive) else: baseline_archive = {} st = dt64.floor(st, day) et = dt64.ceil(et, day) for n in range(len(project_list)): project = project_list[n] site = site_list[n] # Get baseline archive to use for target data if project in baseline_archive and site in baseline_archive[project]: bl_archive = baseline_archive[project][site] else: bl_archive = 'realtime_baseline' an, ai = ap.get_archive_info(project, site, 'MagData', archive=bl_archive) if 'qdc_fit_duration' not in ai: logger.error('no qdc_fit_duration found in %s archive for %s/%s', an,
def __init__(self, project=None, site=None, channels=None, start_time=None, end_time=None, sample_start_time=np.array([]), sample_end_time=np.array([]), nominal_cadence=np.timedelta64(3, 'h'), integration_interval=np.array([]), data=np.array([]), units=None, sort=False, magdata=None, magqdc=None, scale=None, nth=1, fit=None, fit_params={}, with_qdc=None): Data.__init__(self, project=project, site=site, channels=channels, start_time=start_time, end_time=end_time, sample_start_time=sample_start_time, sample_end_time=sample_end_time, integration_interval=integration_interval, nominal_cadence=nominal_cadence, data=data, units=units, sort=sort) if magdata is not None: self.project = magdata.project self.site = magdata.site self.channels = c = magdata.channels self.start_time = dt64.floor(magdata.start_time, self.nominal_cadence) self.end_time = dt64.ceil(magdata.end_time, self.nominal_cadence) self.sample_start_time = np.arange(self.start_time, self.end_time, self.nominal_cadence) self.sample_end_time = self.sample_start_time + self.nominal_cadence self.integration_interval = None if magqdc is None: logger.info('Creating KIndex object without a QDC') bsub = magdata.data[magdata.get_channel_index(c)] self.with_qdc = False else: assert magdata.units == magqdc.units, 'Units must match' if isinstance(magqdc, ap.magdata.MagQDC): aligned = magqdc.align(magdata, fit=fit, **fit_params) else: aligned = magqdc # Baseline subtracted data bsub = np.abs(magdata.data[magdata.get_channel_index(c)] - aligned.data[aligned.get_channel_index(c)]) self.with_qdc = False self.units = None if nth is None: if magdata.nominal_cadence <= np.timedelta64(5, 's'): # Throw away ~30 seconds nth = int(np.timedelta64(30, 's') \ / magdata.nominal_cadence) else: # Throw away up to 2.5 minutes nth = int(np.timedelta64(150, 's') \ / magdata.nominal_cadence) nth_largest = ap.tools.NthLargest(nth) nth_smallest = ap.tools.NthLargest(nth, smallest=True) self.range = np.zeros([len(self.channels), len(self.sample_start_time)]) for i in range(len(self.sample_start_time)): tidx = np.where(np.logical_and(magdata.sample_start_time >= self.sample_start_time[i], magdata.sample_end_time <= self.sample_end_time[i]))[0] for cn in range(len(self.channels)): self.range[cn, i] nth_largest(bsub[cn, tidx]) nth_smallest(bsub[cn, tidx]) self.range[cn, i] = nth_largest(bsub[cn, tidx]) \ - nth_smallest(bsub[cn, tidx]) self.data = np.tile(np.nan, self.range.shape) self.data[np.nonzero(np.isfinite(self.range))] = 0 if scale is None: scale = self.get_site_info('k_index_scale') # K-index thresholds may be scaled but all are proportional to # the limits Bartels defined for Niemegk observatory. self.thresholds = np.array([0.00, 0.01, 0.02, 0.04, 0.08, 0.14, 0.24, 0.40, 0.66, 1.00]) * scale for i in range(1, len(self.thresholds)): self.data[np.nonzero(self.range >= self.thresholds[i])] = i
def __init__(self, project=None, site=None, channels=None, start_time=None, end_time=None, sample_start_time=np.array([]), sample_end_time=np.array([]), nominal_cadence=np.timedelta64(3, 'h'), integration_interval=np.array([]), data=np.array([]), units=None, sort=False, magdata=None, magqdc=None, scale=None, nth=1, fit=None, fit_params={}, with_qdc=None): Data.__init__(self, project=project, site=site, channels=channels, start_time=start_time, end_time=end_time, sample_start_time=sample_start_time, sample_end_time=sample_end_time, integration_interval=integration_interval, nominal_cadence=nominal_cadence, data=data, units=units, sort=sort) if magdata is not None: self.project = magdata.project self.site = magdata.site self.channels = c = magdata.channels self.start_time = dt64.floor(magdata.start_time, self.nominal_cadence) self.end_time = dt64.ceil(magdata.end_time, self.nominal_cadence) self.sample_start_time = np.arange(self.start_time, self.end_time, self.nominal_cadence) self.sample_end_time = self.sample_start_time + self.nominal_cadence self.integration_interval = None if magqdc is None: logger.info('Creating KIndex object without a QDC') bsub = magdata.data[magdata.get_channel_index(c)] self.with_qdc = False else: assert magdata.units == magqdc.units, 'Units must match' if isinstance(magqdc, ap.magdata.MagQDC): aligned = magqdc.align(magdata, fit=fit, **fit_params) else: aligned = magqdc # Baseline subtracted data bsub = np.abs(magdata.data[magdata.get_channel_index(c)] - aligned.data[aligned.get_channel_index(c)]) self.with_qdc = False self.units = None if nth is None: if magdata.nominal_cadence <= np.timedelta64(5, 's'): # Throw away ~30 seconds nth = int(np.timedelta64(30, 's') \ / magdata.nominal_cadence) else: # Throw away up to 2.5 minutes nth = int(np.timedelta64(150, 's') \ / magdata.nominal_cadence) nth_largest = ap.tools.NthLargest(nth) nth_smallest = ap.tools.NthLargest(nth, smallest=True) self.range = np.zeros( [len(self.channels), len(self.sample_start_time)]) for i in range(len(self.sample_start_time)): tidx = np.where( np.logical_and( magdata.sample_start_time >= self.sample_start_time[i], magdata.sample_end_time <= self.sample_end_time[i]))[0] for cn in range(len(self.channels)): self.range[cn, i] nth_largest(bsub[cn, tidx]) nth_smallest(bsub[cn, tidx]) self.range[cn, i] = nth_largest(bsub[cn, tidx]) \ - nth_smallest(bsub[cn, tidx]) self.data = np.tile(np.nan, self.range.shape) self.data[np.nonzero(np.isfinite(self.range))] = 0 if scale is None: scale = self.get_site_info('k_index_scale') # K-index thresholds may be scaled but all are proportional to # the limits Bartels defined for Niemegk observatory. self.thresholds = np.array([ 0.00, 0.01, 0.02, 0.04, 0.08, 0.14, 0.24, 0.40, 0.66, 1.00 ]) * scale for i in range(1, len(self.thresholds)): self.data[np.nonzero(self.range >= self.thresholds[i])] = i
def get_quiet_days(self, nquiet=5, channels=None, cadence=np.timedelta64(5, 's').astype('m8[us]'), method=None): ''' nquiet: number of quiet days channels: channels used in calculations. Defaults to first channel only cadence: cadence used for calculation, and of the returned data returns: data from nquiet quietest days Adapted from algorithm originally developed by Andrew Senior. ''' if channels is None: # Default to using H or X (ie first channel) cidx = [0] channels = self.channels[0] else: cidx = self.get_channel_index(channels) if method is None: method = 'monthly_mean' day = np.timedelta64(24, 'h') st = dt64.floor(self.start_time, day) et = dt64.ceil(self.end_time, day) s = self.space_regularly(cadence, start_time=st, end_time=et, missing_cadence=self.nominal_cadence * 2) num_days = int(np.round((et - st) / day)) daily_data = s.split(day) daily_act = np.zeros(num_days) # Compute monthly mean for each selected channel monthly_means = ap.nanmean(s.data[cidx], axis=1) if method == 'monthly_mean': for n in range(num_days): # Estimate daily activity based on RMS departure from # monthly mean daily_act[n] = \ ap.nanmean(np.sqrt(ap.nanmean((daily_data[n].data[cidx] \ .transpose() - monthly_means)**2, axis=1))) elif method == 'daily_mean': for n in range(num_days): # Estimate daily activity based on RMS departure from # daily mean daily_means = ap.nanmean(daily_data[n].data[cidx], axis=1) daily_act[n] = \ ap.nanmean(np.sqrt(ap.nanmean((daily_data[n].data[cidx] \ .transpose() - \ daily_means)**2, axis=1))) # Shift the data by the difference between the monthly # and daily means daily_data[n].data += (monthly_means - daily_means) elif method == 'linear_fit': x = self.get_mean_sample_time().astype('m8[us]').astype('int64') fits = [] for cn in range(len(cidx)): fits.append(np.polyfit(x, self.data[cidx[cn]], 1)) for n in range(num_days): # Estimate daily activity based on RMS departure from # linear fit to dataset daily_x = daily_data[n].get_mean_sample_time() \ .astype('m8[us]').astype('int64') tmp_act = np.zeros([1, len(cidx)]) for cn in range(len(cidx)): daily_y = fits[cn][0]*daily_x + fits[cn][1] tmp_act[cn] = ap.nanmean((daily_data[n].data[cidx[cn]]\ .transpose() - daily_y)**2) # Shift the data by the difference between the # monthly mean and the fit. daily_data[n].data[cidx[cn]] += \ (monthly_means[cn] - daily_y) daily_act[n] = ap.nanmean(np.sqrt(ap.nanmean(tmp_act))) else: raise Exception('Unknown method') # Don't use days where more than 25% of data is missing for n in range(num_days): if np.mean(np.isnan(daily_data[n].data[cidx])) > 0.25: daily_act[n] = np.inf # Sort into ascending order of activity. Nans are put last. idx = np.argsort(daily_act) r = [] for n in range(nquiet): r.append(daily_data[idx[n]]) return r