def expand_string(s, project, site, now, test_mode, **kwargs):
    d = kwargs.copy()
    d.update({'project': project or '',
              'site': site or '',
              'date': dt64.strftime(now, '%Y-%m-%d'),
              'datetime': dt64.strftime(now, '%Y-%m-%d %H:%M:%SUT'),
              'time': dt64.strftime(now, '%H:%M:%SUT'),
              'test_mode': ' (test mode) ' if test_mode else ''
              })
    
    return s.format(**d)
Beispiel #2
0
def uit_path(t, project, site, data_type, archive, channels):
    if uit_password is None:
        raise Exception(__name__ + '.uit_password must be set, ' + 
                        'to obtain a password see ' + 
                        'http://flux.phys.uit.no/div/DataAccess.html')

    # Expand the path format string with the specific UIT variables,
    # including password.
    a, d = copy.deepcopy(ap.get_archive_info(project, site, data_type, 
                                             archive=archive))
    d['uit_password'] = uit_password
    fstr = path_fstr % d
    return dt64.strftime(t, fstr)
def make_links(link_dir, link_data):
    for link in link_data:
        link_name = os.path.join(link_dir, link['name'])

        # Make the target a relative path
        target = os.path.relpath(dt64.strftime(link['date'], link['fstr']),
                                 os.path.dirname(link_name))
        if os.path.islink(link_name) and \
                os.readlink(link_name) == target:
            # Exists and is correct
            logger.debug('link exists and is correct: ' + link_name +
                          ' -> ' + target)
            continue
        if os.path.lexists(link_name):
            logger.debug('link exists but is incorrect: ' + link_name)
            os.unlink(link_name)
        logger.debug('creating link ' + link_name + ' -> ' + target)
        os.symlink(target, link_name)
Beispiel #4
0
        t1 = start_time
    else:
        t1 = dt64.get_start_of_month(start_time)

    while t1 < end_time:
        if args.single_qdc:
            t2 = end_time
        else:
            t2 = dt64.get_start_of_next_month(t1)

        if args.plot:
            mag_qdc = ap.magdata.load_qdc(project_uc, site_uc, t1)
            if mag_qdc is not None:
                lh = mag_qdc.plot(axes=ax)
                for h in lh:
                    h.set_label(dt64.strftime(t1, '%Y-%m-%d'))
                ax = plt.gca()

        else:
            archive, ad = ap.get_archive_info(project_uc, site_uc, 
                                              'MagData', 
                                              archive=getattr(args, 
                                                              'archive'))

            mag_data = ap.load_data(project_uc, site_uc, 'MagData', t1, t2,
                                    archive=archive,
                                    raise_all=args.raise_all)
            if mag_data is not None:
                mag_qdc = mag_data.make_qdc(smooth=args.smooth)
                qdc_archive, qdc_ad \
                    = ap.get_archive_info(project_uc, site_uc, 'MagQDC')
Beispiel #5
0
def load_data(project, 
              site, 
              data_type, 
              start_time, 
              end_time, 
              archive=None,
              channels=None,
              path=None,
              load_function=None,
              raise_all=False,
              cadence=None,
              aggregate=None,
              filter_function=None,
              use_cache=None,
              now=None):
    '''Load data. 
    project: name of the project (upper case)

    site: site abbreviation (upper case)

    data_type: class name of the data type to be loaded

    start_time: start time (inclusive) of the data set

    end_time: end time (exclusive) of the data set
    
    The following optional parameters are recognised: 
    
    archive: name of the archive. Required if more than one archive is
        present and there is not an archive called "default".

    channels: data channel(s) to load. All are loaded if not specified

    path: URL or file path, specified as a strftime format specifier.
        Alternatively can be a function reference which is passed the
        time and returns the filename. If given this overrides the
        standard load path.

    load_function: Pass responsibility for loading the data to the given
        function reference, after validating the input parameters.
        
    '''
    archive, ad = get_archive_info(project, site, data_type, 
                                   archive=archive)
    cad_units = dt64.get_units(ad['nominal_cadence'])
    start_time = start_time.astype('datetime64[%s]' % cad_units)
    end_time = end_time.astype('datetime64[%s]' % cad_units)

    if channels is None:
        channels = ad['channels']
    else:
        # Could be as single channel name or a list of channels
        if isinstance(channels, six.string_types):
            if channels not in ad['channels']:
                raise Exception('Unknown channel')
        else:
            for c in channels:
                if c not in ad['channels']:
                    raise Exception('Unknown channel')

    if path is None:
        path = ad['path']

    if load_function is None:
        load_function = ad.get('load_function')

    if filter_function is None:
        filter_function = ad.get('filter_function')
        
    if load_function:
        # Pass responsibility for loading to some other
        # function. Parameters have already been checked.
        return load_function(project, 
                             site, 
                             data_type, 
                             start_time, 
                             end_time,
                             archive=archive,
                             channels=channels,
                             path=path,
                             raise_all=raise_all,
                             cadence=cadence,
                             aggregate=aggregate,
                             filter_function=filter_function)


    data = []
    for t in dt64.dt64_range(dt64.floor(start_time, ad['duration']), 
                             end_time, 
                             ad['duration']):
        # A local copy of the file to be loaded, possibly an
        # uncompressed version.
        temp_file_name = None

        t2 = t + ad['duration']
        if hasattr(path, '__call__'):
            # Function: call it with relevant information to get the path
            file_name = path(t, project=project, site=site, 
                             data_type=data_type, archive=archive,
                             channels=channels)
        else:
            file_name = dt64.strftime(t, path)

        url_parts = urlparse(file_name)
        if url_parts.scheme in ('ftp', 'http', 'https'):
            if ad.get('cache_dir'):
                if now is None:
                    now = np.datetime64('now', 's')
                dtd = ad.get('data_transfer_delay', np.timedelta64(0, 's'))
                if use_cache is None:
                    if t2 + dtd < now:
                        uc = True  # OK to try fetching from the cache
                    else:
                        uc = False
                        logger.debug('data too new to cache')
                else:
                    uc = use_cache
                cache_filename = os.path.normpath(os.path.join(ad['cache_dir'],
                                                               file_name.replace(':', '/')))
                logger.debug('cache file: ' + cache_filename)
                if uc:
                    if os.path.exists(cache_filename):
                        file_name = cache_filename
                        logger.debug('cache hit')
                    else:
                        file_name = download_url(file_name, dest=cache_filename)
                else:
                    # Download but discard after use
                    file_name = download_url(file_name)
                    temp_file_name = file_name
            else:
                # No cache so discard after use
                file_name = download_url(file_name)
                temp_file_name = file_name

            if file_name is None:
                continue

        elif url_parts.scheme == 'file':
            file_name = url_parts.path
            
        if not os.path.exists(file_name):
            logger.info('missing file %s', file_name)
            continue

        # Now only need to access local files
        if os.path.splitext(url_parts.path)[1] in ('.gz', '.dgz'):
            # Transparently uncompress
            gunzipped_file = None
            try:
                logger.debug('unzipping %s', file_name)
                gunzipped_file = NamedTemporaryFile(prefix=__name__, 
                                                    delete=False)
                with gzip.open(file_name, 'rb') as gzip_file:
                    shutil.copyfileobj(gzip_file, gunzipped_file)
                gunzipped_file.close()
            except KeyboardInterrupt:
                raise
            except Exception as e:
                if gunzipped_file:
                    gunzipped_file.close()
                    os.unlink(gunzipped_file.name)
                    gunzipped_file = None
                continue    
            finally:
                if temp_file_name:
                    logger.debug('deleting temporary file ' + temp_file_name)
                    os.unlink(temp_file_name)

            temp_file_name = gunzipped_file.name
            file_name = temp_file_name
            
        logger.info('loading ' + file_name)

        try:
            tmp = ad['load_converter'](file_name, 
                                       ad,
                                       project=project,
                                       site=site, 
                                       data_type=data_type, 
                                       start_time=t, 
                                       end_time=t2, 
                                       channels=channels,
                                       archive=archive,
                                       path=path,
                                       raise_all=raise_all)
            if tmp is not None:
                if cadence is not None and cadence <= ad['duration']:
                    tmp.set_cadence(cadence, 
                                    aggregate=aggregate,
                                    inplace=True)
                data.append(tmp)
        except KeyboardInterrupt:
            raise
        except Exception as e:
            if raise_all:
                raise
            logger.info('Could not load ' + file_name)
            logger.debug(str(e))
            logger.debug(traceback.format_exc())

        finally:
            if temp_file_name:
                logger.debug('deleting temporary file ' + temp_file_name)
                os.unlink(temp_file_name)

    if len(data) == 0:
        return None

    r = concatenate(data, sort=False)
    r.extract(inplace=True, 
              start_time=start_time, 
              end_time=end_time, 
              channels=channels)

    if cadence is not None and cadence > ad['duration']:
        # cadence too large to apply on results of loading each file, 
        # apply to combined object
        r.set_cadence(cadence, 
                      aggregate=aggregate,
                      inplace=True)

    if filter_function:
        logger.debug('filtering with function %s', filter_function.__name__)
        r = filter_function(r)

    return r
def warn_missing_data(data, project, site, now, status_dir, test_mode, config):

    section_name = 'missing_data'
    missing_interval = np.timedelta64(1, 'h')
    timeout = np.timedelta64(1, 'D')
    
    if not missing_interval:
        return

    if data is None:
        t = None
    else:
        # Find last non-nan value
        idx = np.nonzero(np.any(np.logical_not(np.isnan(data.data)), 
                                 axis=0))[0]
        if len(idx):
            t = data.sample_start_time[idx[-1]]
        else:
            t = None
    
    if t is None:
        # Data is entirely missing. Was expecting 24 hours of data,
        # with a nominal end time of the end of the current hour.
        t = dt64.ceil(now, np.timedelta64(1, 'h')) - np.timedelta64(1, 'D')


    tstr = dt64.strftime(t, '%Y-%m-%d %H:%M:%SUT')
    if t < now - missing_interval:
        # Data is missing
        logger.info(project + '/' + site + ' missing data')
        if config.has_option(section_name, 'twitter_username'):
            username = config.get(section_name, 'twitter_username')
            mesg = expand_string(config.get(section_name, 'twitter_message'),
                                 project, site, now, test_mode, 
                                 missing_start_time=tstr,
                                 missing_interval=str(missing_interval))   
            run_if_timeout_reached(send_tweet, timeout,
                                   now, status_dir,
                                   func_args=[username, mesg],
                                   name=section_name + '_tweet')

        if config.has_option(section_name, 'facebook_cmd'):
            fbcmd_opts = config.get(section_name, 
                                    'facebook_cmd').split()
            mesg = expand_string(config.get(section_name, 'facebook_message'),
                                 project, site, now, test_mode, 
                                 missing_start_time=tstr,
                                 missing_interval=str(missing_interval)) 
            run_if_timeout_reached(fbcmd, timeout, 
                                   now, status_dir,
                                   func_args=[fbcmd_opts, facebook_mesg],
                                   name=section_name + '_facebook')



        # Email. Leave to the send_email() function to determine if it
        # is configured since there are many possible settings in the
        # config file.  Run each email job separately in case of
        # failure to send.
        func_kwargs = {'missing_start_time': tstr,
                       'missing_interval': str(missing_interval)}
        for ejob in get_email_jobs(config, section_name):
            run_if_timeout_reached(send_email, timeout, 
                                   now, status_dir,
                                   func_args=[config, section_name,
                                              ejob, project, site, 
                                              now, test_mode],
                                   func_kwargs=func_kwargs,
                                   name=section_name + '_' + ejob)
Beispiel #7
0
    def make_qdc(self, nquiet=5, channels=None,
                 cadence=np.timedelta64(5, 's').astype('m8[us]'),
                 quiet_days_method=None,
                 smooth=True,
                 plot=False,
                 remove_nans_window=np.timedelta64(10, 'm'),
                 remove_nans_func=np.nanmean):
        qd = self.get_quiet_days(nquiet=nquiet, channels=channels,
                                 cadence=cadence, method=quiet_days_method)
        axes = None
        if plot:
            for q in qd:
                qst = q.start_time
                # To overlay quiet days use the interval from the start of each quiet day otherwise the
                # lines are spread over time and do not overlay.
                q = copy.deepcopy(q)
                q.start_time = q.start_time - qst
                q.end_time = q.end_time - qst
                q.sample_start_time = q.sample_start_time - qst
                q.sample_end_time = q.sample_end_time - qst
                q.plot(title='Quiet days', axes=axes, label=dt64.strftime(qst, '%Y-%m-%d'))
                axes = plt.gcf().get_axes()
            for ax in axes:
                ax.legend(loc='upper left', fontsize='small')
            
        sam_st = np.arange(np.timedelta64(0, 's').astype('m8[us]'),
                           np.timedelta64(24, 'h').astype('m8[us]'),
                           cadence)
        sam_et = sam_st + cadence


        qdc_data = np.zeros([len(qd[0].channels), len(sam_st)])
        count = np.zeros_like(qdc_data)
        for n in range(nquiet):
            not_nan = np.logical_not(np.isnan(qd[n].data))
            qdc_data[not_nan] += qd[n].data[not_nan]
            count[not_nan] += 1

        qdc_data /= count

        qdc = MagQDC(project=self.project,
                     site=self.site,
                     channels=qd[0].channels,
                     start_time=np.timedelta64(0, 'h'),
                     end_time=np.timedelta64(24, 'h'),
                     sample_start_time=sam_st,
                     sample_end_time=sam_et,
                     integration_interval=None,
                     nominal_cadence=cadence,
                     data=qdc_data,
                     units=self.units,
                     sort=False)

        if remove_nans_window and remove_nans_func and np.any(np.isnan(qdc.data)):
            qdc_no_nans = qdc.sliding_window(remove_nans_func, remove_nans_window)
            qdc = ap.data.first_non_nan([qdc, qdc_no_nans])

        final_fig = None
        if smooth:
            if plot:
                qdc.plot(title='Final QDC', label='Unsmoothed QDC')
                final_fig = plt.gcf()
            qdc.smooth(inplace=True)

        if plot:
            qdc.plot(title='Final QDC', figure=final_fig, label='Final QDC')
            for ax in plt.gcf().get_axes():
                ax.legend(loc='upper left', fontsize='small')

        return qdc
            k_filename = os.path.join(site_summary_dir, 'rolling_k.png')
            temp_plot_filename = os.path.join(site_summary_dir, 
                                              'rolling_temp.png')
            volt_plot_filename = os.path.join(site_summary_dir, 
                                              'rolling_volt.png')

            stackplot_filename = os.path.join(summary_dir,
                                              'stackplots', 'rolling.png')
            activity_plot_filename = os.path.join(summary_dir, 
                                                  'activity_plots',
                                                  'rolling.png')

        else:
            mag_plot_filename = \
                dt64.strftime(t1, 
                              os.path.join(site_summary_dir, '%Y', '%m',
                                           site_lc + '_%Y%m%d.png'))
            qdc_fit_filename =  \
                dt64.strftime(t1, 
                              os.path.join(site_summary_dir, '%Y', '%m',
                                           site_lc + '_%Y%m%d_fit.png'))
            k_filename = \
                dt64.strftime(t1, 
                              os.path.join(site_summary_dir, '%Y', '%m', 
                                           site_lc + '_k_%Y%m%d.png'))

            stackplot_filename = \
                dt64.strftime(t1, 
                              os.path.join(summary_dir, 'stackplots', 
                                           '%Y', '%m', '%Y%m%d.png'))
                              
def make_aurorawatch_plot(project, site, st, et, rolling, exif_tags):
    '''
    Load data and make the AuroraWatch activity plot. Plots always
    cover 24 hours, but may begin at midnight for day plots, or at any
    other hour for rolling plots. This function uses the previous 72
    hours to help fit the quiet-day curve.

    project: name of project
    
    site: name of site
    
    st: start time. For day plots this is the start of the day. For
        rolling plots this is the start of the rolling 24 hour period.
    
    et: end time. For day plots this is the start of the following
        day. For rolling plots it is the end of the 24 hour period.
    
    rolling: flag to indicate if rolling plot should also be made. It
        is not otherwise possible to identify rolling plots which
        start at midnight.

    '''

    # global mag_fstr
    global args

    # Export to global names for debugging
    global mag_data
    global mag_qdc
    global activity

    day = np.timedelta64(24, 'h')

    archive, archive_details = ap.get_archive_info(project, site, 'MagData')

    # Load the data to plot. For rolling plots load upto midnight so
    # that both the rolling plot and the current day plot can be
    # generated efficiently.
    mag_data = my_load_data(project, site, 'MagData', st, dt64.ceil(et, day))

    if mag_data is None or \
            not np.any(np.logical_not(np.isnan(mag_data.data))): 
        # not .np.any(etc) eliminates empty array or array of just nans
        logger.info('No magnetic field data')
        return

    # Load up some data from previous days to and apply a
    # least-squares fit to remove baseline drifts. Data from the
    # current day is not used. This ensures that results do not change
    # over the current day when new data becomes available.
    qdc_fit_interval = args.qdc_fit_interval * day
    fit_et = dt64.ceil(st, day) # Could be doing a rolling plot
    fit_st = fit_et - qdc_fit_interval
    fit_data = my_load_data(project, site, 'MagData', fit_st, fit_et)

    # Load a QDC.
    mag_qdc = ap.magdata.load_qdc(project, site, st, tries=6, realtime=True)

    if mag_qdc is None:
        logger.info('No QDC')
    elif fit_data is None:
        # Cannot fit, so assume no errors in QDC
        errors = [0.0]
    else:
        try:
            # Fit the QDC to the previous data
            qdc_aligned, errors, fi = mag_qdc.align(\
                fit_data, 
                fit=ap.data.Data.minimise_sign_error_fit,
                plot_fit=args.plot_fit,
                full_output=True)
        except Exception as e:
            logger.warn('Could not fit QDC')
            logger.info(str(e))
            errors = [0.0]
        else:
            # Fitted ok, plot if necessary
            if args.plot_fit:
                fig = plt.gcf()
                fig.set_figwidth(6.4)
                fig.set_figheight(4.8)
                fig.subplots_adjust(bottom=0.1, top=0.85, 
                                    left=0.15, right=0.925)
                fit_fstr = mag_fstr[:(mag_fstr.rindex('.'))] + '_fit.png'
                mysavefig(fig, dt64.strftime(dt64.ceil(st, day), fit_fstr),
                          exif_tags)

    # Adjust the quiet day curve with the error obtained by fitting to
    # previous days.
    if mag_qdc is None:
        mag_qdc_adj = None
    else:
        mag_qdc_adj = copy.deepcopy(mag_qdc)
        mag_qdc_adj.data -= errors[0]

    # Ensure data gaps are marked as such in the plots. Straight lines
    # across large gaps look bad!
    mag_data = mag_data.mark_missing_data(cadence=2*mag_data.nominal_cadence)
   
    # Do day plot. Trim start time for occasions when making a day
    # plot simultaneously with a rolling plot.
    st2 = dt64.ceil(st, day)
    md_day = mag_data.extract(start_time=st2)
    act_ki = activity_plot(md_day, mag_qdc_adj,
                           dt64.strftime(st2, mag_fstr), exif_tags,
                           k_index_filename=dt64.strftime(st2, k_fstr))
    r = [md_day]
    r.extend(act_ki)

    if rolling:
        # Trim end time
        md_rolling = mag_data.extract(end_time=et)
        act_ki_rolling = activity_plot(md_rolling, mag_qdc_adj,
                                       rolling_magdata_filename, exif_tags,
                                       k_index_filename=rolling_k_filename)
        r.append(md_rolling)
        r.extend(act_ki_rolling)
    return r
    if len(n_s) == 1:
        # Only project given, use all sites
        for k in ap.projects[n_s[0]].keys():
            project_site[n_s[0] + '/' + k] = (n_s[0], k)

    elif len(n_s) == 2:
        # Project and site given
        project_site[s] = tuple(n_s)
    else:
        raise Exception('bad value for project/site (' + project_site)



t1 = start_time
while t1 < end_time:
    logger.debug('time: %s', dt64.strftime(t1, '%Y-%m-%d'))
    plt.close('all')

    t2 = t1 + day
    t1_eod = dt64.ceil(t1, day) # t1 end of day
    t2_eod = dt64.ceil(t2, day) # t2 end of day

    # List of magdata objects for this day
    mdl_day = []
    act_day = []
    mdl_rolling = []
    act_rolling = []

    # Get copyright and attribution data for all sites. License had
    # better be CC4-BY-NC-SA for all since we are combining them.
    copyright_list = []
Beispiel #11
0
    logger.info('Processing %s/%s %s', project, site,
                dt64.fmt_dt64_range(site_st, site_et))
    for t in dt64.dt64_range(site_st, site_et, src_ai['duration']):
        temp_file_name = None
        try:
            if hasattr(dest_path, '__call__'):
                # Function: call it with relevant information to get
                # the dest_path
                dest_file_name = dest_path(t,
                                           project=project,
                                           site=site,
                                           data_type=data_type,
                                           archive=dest_an,
                                           channels=channels)
            else:
                dest_file_name = dt64.strftime(t, dest_path)

            url_parts = urlparse(dest_file_name)
            if url_parts.scheme in ('ftp', 'http', 'https'):
                raise Exception('Cannot store to a remote location')
            elif url_parts.scheme == 'file':
                dest_file_name = url_parts.path

            if os.path.exists(dest_file_name) and not args.overwrite:
                logger.info('%s already exists', dest_file_name)
                continue

            if hasattr(src_path, '__call__'):
                # Function: call it with relevant information to get
                # the src_path
                file_name = src_path(t,
Beispiel #12
0
            temp_data = my_load_data(project_uc, site_uc, 'TemperatureData', 
                                     t1, t2_eod)
            if temp_data is not None:
                temp_data.set_cadence(np.timedelta64(10, 'm'), 
                                      inplace=True)
                if args.rolling:
                    # Rolling plot
                    make_temperature_plot(temp_data.extract(end_time=t2),
                                          rolling_tempdata_filename, 
                                          exif_tags)

                # Make day plot. Trim data from start because when
                # --rolling option is given it can include data from
                # the previous day.
                make_temperature_plot(temp_data.extract(start_time=t1_eod),
                                      dt64.strftime(t1_eod, temp_fstr),
                                      exif_tags)

        voltage_data = None
        if has_data_of_type(project_uc, site_uc, 'VoltageData'):
            voltage_data = my_load_data(project_uc, site_uc, 'VoltageData', 
                                        t1, t2_eod)
            if voltage_data is not None:
                voltage_data.set_cadence(np.timedelta64(10, 'm'), 
                                         inplace=True)
                if args.rolling:
                    # Rolling plot
                    make_voltage_plot(voltage_data.extract(end_time=t2),
                                      rolling_voltdata_filename,
                                      exif_tags)
Beispiel #13
0
    last_data = None
    for t1 in dt64.dt64_range(site_st, site_et, day):
        try:
            t2 = t1 + day

            if args.missing_only:
                data = ap.load_data(project,
                                    site,
                                    'MagData',
                                    t1,
                                    t2,
                                    archive=bl_archive)
                if data is not None and np.size(data.data) and np.all(
                        np.isfinite(data.data)):
                    logger.info('baseline data for %s/%s %s already exists',
                                project, site, dt64.strftime(t1, '%Y-%m-%d'))
                    continue

            # Calculate dates for data to be used for fitting
            md_mean_time = dt64.mean(t1, t2) + qdc_fit_offset
            md_st = md_mean_time - qdc_fit_duration / 2
            md_et = md_st + qdc_fit_duration

            if last_data is None or last_data.end_time != md_et - day:
                # Load entire data block
                md = ap.load_data(project,
                                  site,
                                  'MagData',
                                  md_st,
                                  md_et,
                                  archive=md_archive)
Beispiel #14
0
def load_data(project,
              site,
              data_type,
              start_time,
              end_time,
              archive=None,
              channels=None,
              path=None,
              load_function=None,
              raise_all=False,
              cadence=None,
              aggregate=None,
              filter_function=None,
              use_cache=None,
              now=None):
    '''Load data. 
    project: name of the project (upper case)

    site: site abbreviation (upper case)

    data_type: class name of the data type to be loaded

    start_time: start time (inclusive) of the data set

    end_time: end time (exclusive) of the data set
    
    The following optional parameters are recognised: 
    
    archive: name of the archive. Required if more than one archive is
        present and there is not an archive called "default".

    channels: data channel(s) to load. All are loaded if not specified

    path: URL or file path, specified as a strftime format specifier.
        Alternatively can be a function reference which is passed the
        time and returns the filename. If given this overrides the
        standard load path.

    load_function: Pass responsibility for loading the data to the given
        function reference, after validating the input parameters.
        
    '''
    archive, ad = get_archive_info(project, site, data_type, archive=archive)
    cad_units = dt64.get_units(ad['nominal_cadence'])
    start_time = start_time.astype('datetime64[%s]' % cad_units)
    end_time = end_time.astype('datetime64[%s]' % cad_units)

    if channels is None:
        channels = ad['channels']
    else:
        # Could be as single channel name or a list of channels
        if isinstance(channels, six.string_types):
            if channels not in ad['channels']:
                raise Exception('Unknown channel')
        else:
            for c in channels:
                if c not in ad['channels']:
                    raise Exception('Unknown channel')

    if path is None:
        path = ad['path']

    if load_function is None:
        load_function = ad.get('load_function')

    if filter_function is None:
        filter_function = ad.get('filter_function')

    if load_function:
        # Pass responsibility for loading to some other
        # function. Parameters have already been checked.
        return load_function(project,
                             site,
                             data_type,
                             start_time,
                             end_time,
                             archive=archive,
                             channels=channels,
                             path=path,
                             raise_all=raise_all,
                             cadence=cadence,
                             aggregate=aggregate,
                             filter_function=filter_function)

    data = []
    for t in dt64.dt64_range(dt64.floor(start_time, ad['duration']), end_time,
                             ad['duration']):
        # A local copy of the file to be loaded, possibly an
        # uncompressed version.
        temp_file_name = None

        t2 = t + ad['duration']
        if hasattr(path, '__call__'):
            # Function: call it with relevant information to get the path
            file_name = path(t,
                             project=project,
                             site=site,
                             data_type=data_type,
                             archive=archive,
                             channels=channels)
        else:
            file_name = dt64.strftime(t, path)

        url_parts = urlparse(file_name)
        if url_parts.scheme in ('ftp', 'http', 'https'):
            if ad.get('cache_dir'):
                if now is None:
                    now = np.datetime64('now', 's')
                dtd = ad.get('data_transfer_delay', np.timedelta64(0, 's'))
                if use_cache is None:
                    if t2 + dtd < now:
                        uc = True  # OK to try fetching from the cache
                    else:
                        uc = False
                        logger.debug('data too new to cache')
                else:
                    uc = use_cache
                cache_filename = os.path.normpath(
                    os.path.join(ad['cache_dir'], file_name.replace(':', '/')))
                logger.debug('cache file: ' + cache_filename)
                if uc:
                    if os.path.exists(cache_filename):
                        file_name = cache_filename
                        logger.debug('cache hit')
                    else:
                        file_name = download_url(file_name,
                                                 dest=cache_filename)
                else:
                    # Download but discard after use
                    file_name = download_url(file_name)
                    temp_file_name = file_name
            else:
                # No cache so discard after use
                file_name = download_url(file_name)
                temp_file_name = file_name

            if file_name is None:
                continue

        elif url_parts.scheme == 'file':
            file_name = url_parts.path

        if not os.path.exists(file_name):
            logger.info('missing file %s', file_name)
            continue

        # Now only need to access local files
        if os.path.splitext(url_parts.path)[1] in ('.gz', '.dgz'):
            # Transparently uncompress
            gunzipped_file = None
            try:
                logger.debug('unzipping %s', file_name)
                gunzipped_file = NamedTemporaryFile(prefix=__name__,
                                                    delete=False)
                with gzip.open(file_name, 'rb') as gzip_file:
                    shutil.copyfileobj(gzip_file, gunzipped_file)
                gunzipped_file.close()
            except KeyboardInterrupt:
                raise
            except Exception as e:
                if gunzipped_file:
                    gunzipped_file.close()
                    os.unlink(gunzipped_file.name)
                    gunzipped_file = None
                continue
            finally:
                if temp_file_name:
                    logger.debug('deleting temporary file ' + temp_file_name)
                    os.unlink(temp_file_name)

            temp_file_name = gunzipped_file.name
            file_name = temp_file_name

        logger.info('loading ' + file_name)

        try:
            tmp = ad['load_converter'](file_name,
                                       ad,
                                       project=project,
                                       site=site,
                                       data_type=data_type,
                                       start_time=t,
                                       end_time=t2,
                                       channels=channels,
                                       archive=archive,
                                       path=path,
                                       raise_all=raise_all)
            if tmp is not None:
                if cadence is not None and cadence <= ad['duration']:
                    tmp.set_cadence(cadence, aggregate=aggregate, inplace=True)
                data.append(tmp)
        except KeyboardInterrupt:
            raise
        except Exception as e:
            if raise_all:
                raise
            logger.info('Could not load ' + file_name)
            logger.debug(str(e))
            logger.debug(traceback.format_exc())

        finally:
            if temp_file_name:
                logger.debug('deleting temporary file ' + temp_file_name)
                os.unlink(temp_file_name)

    if len(data) == 0:
        return None

    r = concatenate(data, sort=False)
    r.extract(inplace=True,
              start_time=start_time,
              end_time=end_time,
              channels=channels)

    if cadence is not None and cadence > ad['duration']:
        # cadence too large to apply on results of loading each file,
        # apply to combined object
        r.set_cadence(cadence, aggregate=aggregate, inplace=True)

    if filter_function:
        logger.debug('filtering with function %s', filter_function.__name__)
        r = filter_function(r)

    return r
Beispiel #15
0
    site_et = ap.get_site_info(project, site, 'end_time')
    if site_et is None or site_et > et:
        site_et = et
    else:
        site_et = dt64.ceil(site_et, day)

    t1 = dt64.get_start_of_month(site_st)
    while t1 < site_et:
        t2 = dt64.get_start_of_next_month(t1)
        try:
            if args.only_missing:
                mag_qdc = ap.magdata.load_qdc(project, site, t1)
                if mag_qdc is not None and mag_qdc.data.size != 0 and not np.any(
                        np.isnan(mag_qdc.data)):
                    logger.info('QDC for %s/%s %s already exists', project,
                                site, dt64.strftime(t1, '%Y-%m-%d'))
                    continue

            kwargs = {}
            if cadence:
                kwargs['cadence'] = cadence
                kwargs['aggregate'] = agg_func

            mag_data = ap.load_data(project,
                                    site,
                                    'MagData',
                                    t1,
                                    t2,
                                    archive=archive,
                                    raise_all=args.raise_all,
                                    **kwargs)
Beispiel #16
0
        site_st = dt64.floor(site_st, day)
    
    site_et = ap.get_site_info(project, site, 'end_time')
    if site_et is None or site_et > et:
        site_et = et
    else:
        site_et = dt64.ceil(site_et, day)
    
    t1 = dt64.get_start_of_month(site_st)
    while t1 < site_et:
        t2 = dt64.get_start_of_next_month(t1)
        try:
            if args.only_missing:
                mag_qdc = ap.magdata.load_qdc(project, site, t1)
                if mag_qdc is not None and mag_qdc.data.size != 0 and not np.any(np.isnan(mag_qdc.data)):
                    logger.info('QDC for %s/%s %s already exists', project, site, dt64.strftime(t1, '%Y-%m-%d'))
                    continue

            kwargs = {}
            if cadence:
                kwargs['cadence'] = cadence
                kwargs['aggregate'] = agg_func

            mag_data = ap.load_data(project, site, 'MagData', t1, t2,
                                    archive=archive,
                                    raise_all=args.raise_all,
                                    **kwargs)
            if mag_data is not None:

                if post_cadence:
                    mag_data.set_cadence(post_cadence, 
    else:
        site_et = dt64.ceil(site_et, day)

    logger.info('Processing %s/%s %s', project, site, dt64.
                fmt_dt64_range(site_st, site_et))

    last_data = None
    for t1 in dt64.dt64_range(site_st, site_et, day):
        try:
            t2 = t1 + day

            if args.missing_only:
                data = ap.load_data(project, site, 'MagData', t1, t2, archive=bl_archive)
                if data is not None and np.size(data.data) and np.all(np.isfinite(data.data)):
                    logger.info('baseline data for %s/%s %s already exists', project, site,
                                dt64.strftime(t1, '%Y-%m-%d'))
                    continue

            # Calculate dates for data to be used for fitting
            md_mean_time = dt64.mean(t1, t2) + qdc_fit_offset
            md_st = md_mean_time - qdc_fit_duration/2
            md_et = md_st + qdc_fit_duration

            if last_data is None or last_data.end_time != md_et - day:
                # Load entire data block
                md = ap.load_data(project, site, 'MagData', md_st, md_et, archive=md_archive)
            else:
                # Load the last day of data and concatenate
                md = ap.load_data(project, site, 'MagData', md_et - day, md_et, archive=md_archive)
                if md is None:
                    md = last_data
Beispiel #18
0
#!/usr/bin/env python
import numpy as np
import matplotlib as mpl
import auroraplot.dt64tools as dt64

from matplotlib import pyplot as plt
plt.close('all')


# Define a line from year 1200 to year 2080
x = np.array(dt64.from_YMD([1200,2080],1,1))
y = np.array([0,1])

x = x.astype('M8[ms]')

# Plot
dt64.plot_dt64(x,y)
plt.show()

print(dt64.strftime(x[0], 'Start time: %Y-%m-%d %H:%M:%S'))
print(dt64.strftime(x[-1], 'End time: %Y-%m-%d %H:%M:%S'))
print('Now try zoom and watch the time units adjust automatically')

Beispiel #19
0
#!/usr/bin/env python
import numpy as np
import matplotlib as mpl
import auroraplot.dt64tools as dt64

from matplotlib import pyplot as plt

plt.close('all')

# Define a line from year 1200 to year 2080
x = np.array(dt64.from_YMD([1200, 2080], 1, 1))
y = np.array([0, 1])

x = x.astype('M8[ms]')

# Plot
dt64.plot_dt64(x, y)
plt.show()

print(dt64.strftime(x[0], 'Start time: %Y-%m-%d %H:%M:%S'))
print(dt64.strftime(x[-1], 'End time: %Y-%m-%d %H:%M:%S'))
print('Now try zoom and watch the time units adjust automatically')
Beispiel #20
0
    logger.info('Processing %s/%s %s', project, site, dt64.
                fmt_dt64_range(site_st, site_et))
    for t in dt64.dt64_range(site_st, site_et, src_ai['duration']):
        temp_file_name = None
        try:
            if hasattr(dest_path, '__call__'):
                # Function: call it with relevant information to get
                # the dest_path
                dest_file_name = dest_path(t,
                                           project=project,
                                           site=site, 
                                           data_type=data_type,
                                           archive=dest_an,
                                           channels=channels)
            else:
                dest_file_name = dt64.strftime(t, dest_path)

            url_parts = urlparse(dest_file_name)
            if url_parts.scheme in ('ftp', 'http', 'https'):
                raise Exception('Cannot store to a remote location')
            elif url_parts.scheme == 'file':
                dest_file_name = url_parts.path

            if os.path.exists(dest_file_name) and not args.overwrite:
                logger.info('%s already exists', dest_file_name)
                continue
            
            
            if hasattr(src_path, '__call__'):
                # Function: call it with relevant information to get
                # the src_path
def aurora_alert(activity, combined, now, status_dir, test_mode, 
                 ignore_timeout, config):
    assert activity.thresholds.size == 4, \
        'Incorrect number of activity thresholds'
    assert activity.sample_start_time[-1] <= now \
        and activity.sample_end_time[-1] >= now, \
        'Last activity sample for wrong time'
    assert np.all(np.logical_or(activity.data >= 0, np.isnan(activity.data))), \
        'Activity data must be >= 0'
    
    if np.isnan(activity.data[0,-1]):
        return
    n = np.where(activity.data[0,-1] >= activity.thresholds)[0][-1]


    logger.debug('Activity level for ' + activity.project + '/' 
                 + activity.site + ': ' + str(n))
    
    
    section_name = 'aurora_alert_' + str(n) 
    if n == 0:
        # No significant activity
        return
    elif not config.has_section(section_name):
        logger.debug('No [' + section_name + '] section found')
        return

    nowstr = dt64.strftime(now, '%Y-%m-%d %H:%M:%SUT')
    tweet_timeout = facebook_timeout = email_timeout = np.timedelta64(12, 'h')

    # Compute filename to use for timeout, and the names of any other
    # files which must be updated.
    job_base_name = section_name
    if not combined:
        job_base_name += '_' + activity.project.lower() + '_' \
            + activity.site.lower()
    tweet_files = []
    facebook_files = []
    email_files = []
    for i in range(1, n+1):
        tweet_files.append(job_base_name + '_tweet')
        facebook_files.append(job_base_name + '_facebook')
        email_files.append(job_base_name + '_') # Must append the ejob later
        

    # Tweet
    if config.has_option(section_name, 'twitter_username'):
        twitter_username = config.get(section_name, 'twitter_username')
        twitter_mesg = expand_string(config.get(section_name, 
                                                'twitter_message'),
                                     activity.project, activity.site, now, 
                                     test_mode)
        run_if_timeout_reached(send_tweet, tweet_timeout, 
                               now, status_dir,
                               func_args=[twitter_username, twitter_mesg],
                               name=tweet_files[-1], 
                               also_update=tweet_files[:-1])
    else:
        logger.debug('Sending tweet not configured')

    # Post to facebook
    if config.has_option(section_name, 'facebook_cmd'):
        facebook_mesg = expand_string(config.get(section_name, 
                                                 'facebook_message'),
                                      activity.project, activity.site, now,
                                      test_mode)
        fbcmd_opts = config.get(section_name, 'facebook_cmd').split()
        run_if_timeout_reached(fbcmd, facebook_timeout, now, status_dir,
                               func_args=[fbcmd_opts, facebook_mesg],
                               name=facebook_files[-1],
                               also_update=facebook_files[:-1])
    else:
        logger.debug('Facebook posting not configured')


    # Email. Leave to the send_email() function to determine if it is
    # configured since there are many possible settings in the config
    # file.
    for ejob in get_email_jobs(config, section_name):
        run_if_timeout_reached(send_email, email_timeout, 
                               now, status_dir,
                               func_args=[config, section_name, ejob, 
                                          activity.project, activity.site, 
                                          now, test_mode],
                               name=email_files[-1] + ejob,
                               also_update=map(lambda x: x + ejob, 
                                               email_files[:-1]))
Beispiel #22
0
def load_qdc(project, 
             site, 
             time, 
             archive=None, 
             channels=None,
             path=None,
             tries=1,
             realtime=False,
             load_function=None,
             full_output=False):
    '''Load quiet-day curve. 
    project: name of the project (upper case)

    site: site abbreviation (upper case)

    time: a time within the quiet-day curve period
    
    The following optional parameters are recognised: 
    
    archive: name of the archive. Required if more than one archive is
        present and there is not an archive called "default".

    channels: data channel(s) to load. All are loaded if not specified.

    tries: The number of attempts to load a quiet-day curve. If >1 and
        the first attempt is not successful then an attempt will be
        made to load the previous QDC.
 
    path: URL or file path, specified as a strftime format specifier.
        Alternatively can be a function reference which is passed the
        time and returns the filename. If given this overrides the
        standard load path.

    load_function: Pass responsibility for loading the data to the given
        function reference, after validating the input parameters.
        
    '''

    data_type = 'MagQDC'
    archive, ad = ap.get_archive_info(project, site, data_type, 
                                      archive=archive)
    if channels is not None:
        # Ensure it is a 1D numpy array
        channels = np.array(channels).flatten()
        for c in channels:
            if c not in ad['channels']:
                raise ValueError('Unknown channel (%s)' % str(c))
    else:
        channels = ad['channels']

    if path is None:
        path = ad['path']

    if load_function is None:
        load_function = ad.get('load_function', None)

    if tries is None:
        tries = 1
       
    if load_function:
        # Pass responsibility for loading to some other
        # function. Parameters have already been checked.
        return load_function(project, 
                             site, 
                             data_type, 
                             time, 
                             archive=archive,
                             channels=channels,
                             path=path,
                             tries=tries,
                             realtime=realtime,
                             full_output=full_output)
    data = []

    t = dt64.get_start_of_month(time)

    if realtime:
        # For realtime use the QDC for the month is (was) not
        # available, so use the previous month's QDC
        t = dt64.get_start_of_previous_month(t)
        
        # Early in the month the previous motnh's QDC was probably not
        # computed, so use the month before that
        qdc_rollover_day = ad.get('qdc_rollover_day', 4)
        if dt64.get_day_of_month(time) < qdc_rollover_day:
            t = dt64.get_start_of_previous_month(t)

    for n in range(tries):
        try:
            if hasattr(path, '__call__'):
                # Function: call it with relevant information to get the path
                file_name = path(t, project=project, site=site, 
                                 data_type=data_type, archive=archive,
                                 channels=channels)
            else:
                file_name = dt64.strftime(t, path)

            logger.info('loading ' + file_name)

            r = ad['load_converter'](file_name, 
                                     ad,
                                     project=project,
                                     site=site, 
                                     data_type=data_type, 
                                     start_time=np.timedelta64(0, 'h'), 
                                     end_time=np.timedelta64(24, 'h'),
                                     archive=archive,
                                     channels=channels,
                                     path=path)
            if r is not None:
                r.extract(inplace=True, 
                          channels=channels)
                if full_output:
                    r2 = {'magqdc': r,
                          'tries': n + 1,
                          'maxtries': tries}
                    return r2
                else:
                    return r
                
        finally:
            # Go to start of previous month
            t = dt64.get_start_of_month(t - np.timedelta64(24, 'h'))

    return None