def date_generator():
    t1 = start_time
    while t1 < end_time:
        t2 = t1 + day
        yield t1, t2, False
        t1 = t2

    if args.rolling:
        # Rolling ought to produce current day too
        if t1 != dt64.floor(now, day):
            t1 = dt64.floor(now, day)
            t2 = t1 + day
            yield t1, t2, False
        t2 = dt64.ceil(now, np.timedelta64(1, 'h'))
        t1 = t2 - day
        yield t1, t2, True
                    help='Run jobs')

args = parser.parse_args()
logging.basicConfig(level=getattr(logging, args.log_level.upper()),
                    format=args.log_format)
    
# Use a consistent value for current time, process any --now option
# before other time-dependent options.
if args.now:
    now = parse_datetime(args.now)
else:
    now = np.datetime64('now', 'us')


day = np.timedelta64(24, 'h')
today = dt64.floor(now, day)
yesterday = today - day
tomorrow = today + day


if args.start_time is None:
    start_time = today
else:
    start_time = dt64.floor(parse_datetime(args.start_time), day)

if args.end_time is None:
    end_time = start_time + day
else:
    end_time = dt64.floor(parse_datetime(args.end_time), day)

Beispiel #3
0
parser.add_argument('project_site',
                    nargs='+',
                    metavar="PROJECT[/SITE]")

args = parser.parse_args()
if __name__ == '__main__':
    logging.basicConfig(level=getattr(logging, args.log_level.upper()),
                        format=args.log_format)



# Use a consistent value for current time
day = np.timedelta64(1, 'D').astype('m8[us]')
now = np.datetime64('now', 'us')
today = dt64.floor(now, day)
yesterday = today - day
tomorrow = today + day

if args.start_time is None:
    start_time = dt64.get_start_of_previous_month(today)
elif args.start_time == 'today': 
    start_time = today
elif args.start_time == 'yesterday':
    start_time = yesterday
else:
    start_time = dt64.floor(np.datetime64(args.start_time), day)

if args.end_time is None:
    end_time = start_time + day
elif args.end_time == 'today':
project_list, site_list = ap.parse_project_site_list(args.project_site)

# Process --archive options for source data
if args.archive:
    archive = ap.parse_archive_selection(args.archive)
else:
    archive = {}

# Process --baseline-archive options for target data
if args.baseline_archive:
    baseline_archive = ap.parse_archive_selection(args.baseline_archive)
else:
    baseline_archive = {}

st = dt64.floor(st, day)
et = dt64.ceil(et, day)


for n in range(len(project_list)):
    project = project_list[n]
    site = site_list[n]

    # Get baseline archive to use for target data
    if project in baseline_archive and site in baseline_archive[project]:
        bl_archive = baseline_archive[project][site]
    else:
        bl_archive = 'realtime_baseline'

    an, ai = ap.get_archive_info(project, site, 'MagData',
                                 archive=bl_archive)
Beispiel #5
0
    src_an, src_ai = ap.get_archive_info(project, site, 'MagData',
                                         archive='original_' + dest_an)
    src_path = src_ai['path']
    dest_path = dest_ai['path']
    print('src_ai ' + src_an)
    print(repr(src_ai))
    print('dest_ai ' + dest_an)
    print(repr(dest_ai))
    
    # Tune start/end times to avoid requesting data outside of
    # operational period
    site_st = ap.get_site_info(project, site, 'start_time')
    if site_st is None or site_st < st:
        site_st = st
    else:
        site_st = dt64.floor(site_st, day)
    site_st = dt64.floor(site_st, src_ai['duration'])
        
    site_et = ap.get_site_info(project, site, 'end_time')
    if site_et is None or site_et > et:
        site_et = et
    else:
        site_et = dt64.ceil(site_et, day)
    site_et = dt64.ceil(site_et, src_ai['duration'])
    

    logger.info('Processing %s/%s %s', project, site, dt64.
                fmt_dt64_range(site_st, site_et))
    for t in dt64.dt64_range(site_st, site_et, src_ai['duration']):
        temp_file_name = None
        try:
Beispiel #6
0
def load_data(project, 
              site, 
              data_type, 
              start_time, 
              end_time, 
              archive=None,
              channels=None,
              path=None,
              load_function=None,
              raise_all=False,
              cadence=None,
              aggregate=None,
              filter_function=None,
              use_cache=None,
              now=None):
    '''Load data. 
    project: name of the project (upper case)

    site: site abbreviation (upper case)

    data_type: class name of the data type to be loaded

    start_time: start time (inclusive) of the data set

    end_time: end time (exclusive) of the data set
    
    The following optional parameters are recognised: 
    
    archive: name of the archive. Required if more than one archive is
        present and there is not an archive called "default".

    channels: data channel(s) to load. All are loaded if not specified

    path: URL or file path, specified as a strftime format specifier.
        Alternatively can be a function reference which is passed the
        time and returns the filename. If given this overrides the
        standard load path.

    load_function: Pass responsibility for loading the data to the given
        function reference, after validating the input parameters.
        
    '''
    archive, ad = get_archive_info(project, site, data_type, 
                                   archive=archive)
    cad_units = dt64.get_units(ad['nominal_cadence'])
    start_time = start_time.astype('datetime64[%s]' % cad_units)
    end_time = end_time.astype('datetime64[%s]' % cad_units)

    if channels is None:
        channels = ad['channels']
    else:
        # Could be as single channel name or a list of channels
        if isinstance(channels, six.string_types):
            if channels not in ad['channels']:
                raise Exception('Unknown channel')
        else:
            for c in channels:
                if c not in ad['channels']:
                    raise Exception('Unknown channel')

    if path is None:
        path = ad['path']

    if load_function is None:
        load_function = ad.get('load_function')

    if filter_function is None:
        filter_function = ad.get('filter_function')
        
    if load_function:
        # Pass responsibility for loading to some other
        # function. Parameters have already been checked.
        return load_function(project, 
                             site, 
                             data_type, 
                             start_time, 
                             end_time,
                             archive=archive,
                             channels=channels,
                             path=path,
                             raise_all=raise_all,
                             cadence=cadence,
                             aggregate=aggregate,
                             filter_function=filter_function)


    data = []
    for t in dt64.dt64_range(dt64.floor(start_time, ad['duration']), 
                             end_time, 
                             ad['duration']):
        # A local copy of the file to be loaded, possibly an
        # uncompressed version.
        temp_file_name = None

        t2 = t + ad['duration']
        if hasattr(path, '__call__'):
            # Function: call it with relevant information to get the path
            file_name = path(t, project=project, site=site, 
                             data_type=data_type, archive=archive,
                             channels=channels)
        else:
            file_name = dt64.strftime(t, path)

        url_parts = urlparse(file_name)
        if url_parts.scheme in ('ftp', 'http', 'https'):
            if ad.get('cache_dir'):
                if now is None:
                    now = np.datetime64('now', 's')
                dtd = ad.get('data_transfer_delay', np.timedelta64(0, 's'))
                if use_cache is None:
                    if t2 + dtd < now:
                        uc = True  # OK to try fetching from the cache
                    else:
                        uc = False
                        logger.debug('data too new to cache')
                else:
                    uc = use_cache
                cache_filename = os.path.normpath(os.path.join(ad['cache_dir'],
                                                               file_name.replace(':', '/')))
                logger.debug('cache file: ' + cache_filename)
                if uc:
                    if os.path.exists(cache_filename):
                        file_name = cache_filename
                        logger.debug('cache hit')
                    else:
                        file_name = download_url(file_name, dest=cache_filename)
                else:
                    # Download but discard after use
                    file_name = download_url(file_name)
                    temp_file_name = file_name
            else:
                # No cache so discard after use
                file_name = download_url(file_name)
                temp_file_name = file_name

            if file_name is None:
                continue

        elif url_parts.scheme == 'file':
            file_name = url_parts.path
            
        if not os.path.exists(file_name):
            logger.info('missing file %s', file_name)
            continue

        # Now only need to access local files
        if os.path.splitext(url_parts.path)[1] in ('.gz', '.dgz'):
            # Transparently uncompress
            gunzipped_file = None
            try:
                logger.debug('unzipping %s', file_name)
                gunzipped_file = NamedTemporaryFile(prefix=__name__, 
                                                    delete=False)
                with gzip.open(file_name, 'rb') as gzip_file:
                    shutil.copyfileobj(gzip_file, gunzipped_file)
                gunzipped_file.close()
            except KeyboardInterrupt:
                raise
            except Exception as e:
                if gunzipped_file:
                    gunzipped_file.close()
                    os.unlink(gunzipped_file.name)
                    gunzipped_file = None
                continue    
            finally:
                if temp_file_name:
                    logger.debug('deleting temporary file ' + temp_file_name)
                    os.unlink(temp_file_name)

            temp_file_name = gunzipped_file.name
            file_name = temp_file_name
            
        logger.info('loading ' + file_name)

        try:
            tmp = ad['load_converter'](file_name, 
                                       ad,
                                       project=project,
                                       site=site, 
                                       data_type=data_type, 
                                       start_time=t, 
                                       end_time=t2, 
                                       channels=channels,
                                       archive=archive,
                                       path=path,
                                       raise_all=raise_all)
            if tmp is not None:
                if cadence is not None and cadence <= ad['duration']:
                    tmp.set_cadence(cadence, 
                                    aggregate=aggregate,
                                    inplace=True)
                data.append(tmp)
        except KeyboardInterrupt:
            raise
        except Exception as e:
            if raise_all:
                raise
            logger.info('Could not load ' + file_name)
            logger.debug(str(e))
            logger.debug(traceback.format_exc())

        finally:
            if temp_file_name:
                logger.debug('deleting temporary file ' + temp_file_name)
                os.unlink(temp_file_name)

    if len(data) == 0:
        return None

    r = concatenate(data, sort=False)
    r.extract(inplace=True, 
              start_time=start_time, 
              end_time=end_time, 
              channels=channels)

    if cadence is not None and cadence > ad['duration']:
        # cadence too large to apply on results of loading each file, 
        # apply to combined object
        r.set_cadence(cadence, 
                      aggregate=aggregate,
                      inplace=True)

    if filter_function:
        logger.debug('filtering with function %s', filter_function.__name__)
        r = filter_function(r)

    return r
Beispiel #7
0
                                         'MagData',
                                         archive='original_' + dest_an)
    src_path = src_ai['path']
    dest_path = dest_ai['path']
    print('src_ai ' + src_an)
    print(repr(src_ai))
    print('dest_ai ' + dest_an)
    print(repr(dest_ai))

    # Tune start/end times to avoid requesting data outside of
    # operational period
    site_st = ap.get_site_info(project, site, 'start_time')
    if site_st is None or site_st < st:
        site_st = st
    else:
        site_st = dt64.floor(site_st, day)
    site_st = dt64.floor(site_st, src_ai['duration'])

    site_et = ap.get_site_info(project, site, 'end_time')
    if site_et is None or site_et > et:
        site_et = et
    else:
        site_et = dt64.ceil(site_et, day)
    site_et = dt64.ceil(site_et, src_ai['duration'])

    logger.info('Processing %s/%s %s', project, site,
                dt64.fmt_dt64_range(site_st, site_et))
    for t in dt64.dt64_range(site_st, site_et, src_ai['duration']):
        temp_file_name = None
        try:
            if hasattr(dest_path, '__call__'):
args = parser.parse_args()
if __name__ == '__main__':
    logging.basicConfig(level=getattr(logging, args.log_level.upper()),
                        format=args.log_format)
    
logger = logging.getLogger(__name__)

# Use a consistent value for current time, process any --now option
# first.
if args.now:
    now = dt64.parse_datetime64(args.now, 'us')
else:
    now = np.datetime64('now', 'us')

day = np.timedelta64(24, 'h')
today = dt64.floor(now, day)
yesterday = today - day
tomorrow = today + day

# This can be used in os.path.join() to include the test directory
# when needed.
if args.test_mode:
    test_mode_str = 'test'
else:
    test_mode_str = ''

if args.rolling:
    if args.start_time or args.end_time:
        raise Exception('Cannot set start or end time for rolling plots')
    end_time = dt64.ceil(now, np.timedelta64(1, 'h'))
    start_time = end_time - day
Beispiel #9
0
project_list, site_list = ap.parse_project_site_list(args.project_site)

# Process --archive options for source data
if args.archive:
    archive = ap.parse_archive_selection(args.archive)
else:
    archive = {}

# Process --baseline-archive options for target data
if args.baseline_archive:
    baseline_archive = ap.parse_archive_selection(args.baseline_archive)
else:
    baseline_archive = {}

st = dt64.floor(st, day)
et = dt64.ceil(et, day)

for n in range(len(project_list)):
    project = project_list[n]
    site = site_list[n]

    # Get baseline archive to use for target data
    if project in baseline_archive and site in baseline_archive[project]:
        bl_archive = baseline_archive[project][site]
    else:
        bl_archive = 'realtime_baseline'

    an, ai = ap.get_archive_info(project, site, 'MagData', archive=bl_archive)

    if 'qdc_fit_duration' not in ai:
    def __init__(self,
                 project=None,
                 site=None,
                 channels=None,
                 start_time=None,
                 end_time=None,
                 sample_start_time=np.array([]),
                 sample_end_time=np.array([]),
                 nominal_cadence=np.timedelta64(3, 'h'),
                 integration_interval=np.array([]),
                 data=np.array([]),
                 units=None,
                 sort=False,
                 magdata=None,
                 magqdc=None, 
                 scale=None,
                 nth=1,
                 fit=None,
                 fit_params={},
                 with_qdc=None):
        Data.__init__(self,
                      project=project,
                      site=site,
                      channels=channels,
                      start_time=start_time,
                      end_time=end_time,
                      sample_start_time=sample_start_time,
                      sample_end_time=sample_end_time,
                      integration_interval=integration_interval,
                      nominal_cadence=nominal_cadence,
                      data=data,
                      units=units,
                      sort=sort)

        if magdata is not None:
            self.project = magdata.project
            self.site = magdata.site
            self.channels = c = magdata.channels
            self.start_time = dt64.floor(magdata.start_time, 
                                         self.nominal_cadence)
            self.end_time = dt64.ceil(magdata.end_time,
                                      self.nominal_cadence)

            self.sample_start_time = np.arange(self.start_time, self.end_time,
                                               self.nominal_cadence)
            self.sample_end_time = self.sample_start_time + self.nominal_cadence
            self.integration_interval = None
                        
            if magqdc is None:
                logger.info('Creating KIndex object without a QDC')
                bsub = magdata.data[magdata.get_channel_index(c)]
                self.with_qdc = False
            else:
                assert magdata.units == magqdc.units, 'Units must match'
                if isinstance(magqdc, ap.magdata.MagQDC):
                    aligned = magqdc.align(magdata, fit=fit, **fit_params)
                else:
                    aligned = magqdc                            

                # Baseline subtracted data
                bsub = np.abs(magdata.data[magdata.get_channel_index(c)] -
                              aligned.data[aligned.get_channel_index(c)])
                self.with_qdc = False

            self.units = None

            if nth is None:
                if magdata.nominal_cadence <= np.timedelta64(5, 's'):
                    # Throw away ~30 seconds
                    nth = int(np.timedelta64(30, 's') \
                                  / magdata.nominal_cadence)
                else:
                    # Throw away up to 2.5 minutes
                    nth = int(np.timedelta64(150, 's') \
                                  / magdata.nominal_cadence)

            nth_largest = ap.tools.NthLargest(nth)
            nth_smallest = ap.tools.NthLargest(nth, smallest=True)
            
            self.range = np.zeros([len(self.channels), 
                                  len(self.sample_start_time)])
            for i in range(len(self.sample_start_time)):
                tidx = np.where(np.logical_and(magdata.sample_start_time >=
                                               self.sample_start_time[i],
                                               magdata.sample_end_time <=
                                               self.sample_end_time[i]))[0]
                for cn in range(len(self.channels)):
                    self.range[cn, i] 
                    nth_largest(bsub[cn, tidx])
                    nth_smallest(bsub[cn, tidx])
                    self.range[cn, i] = nth_largest(bsub[cn, tidx]) \
                        - nth_smallest(bsub[cn, tidx])

            self.data = np.tile(np.nan, self.range.shape)
            self.data[np.nonzero(np.isfinite(self.range))] = 0

            if scale is None:
                scale = self.get_site_info('k_index_scale')
                
            # K-index thresholds may be scaled but all are proportional to
            # the limits Bartels defined for Niemegk observatory.
            self.thresholds = np.array([0.00, 0.01, 0.02, 0.04,
                                        0.08, 0.14, 0.24, 0.40,
                                        0.66, 1.00]) * scale


            for i in range(1, len(self.thresholds)):
                self.data[np.nonzero(self.range >= self.thresholds[i])] = i
Beispiel #11
0
def load_data(project,
              site,
              data_type,
              start_time,
              end_time,
              archive=None,
              channels=None,
              path=None,
              load_function=None,
              raise_all=False,
              cadence=None,
              aggregate=None,
              filter_function=None,
              use_cache=None,
              now=None):
    '''Load data. 
    project: name of the project (upper case)

    site: site abbreviation (upper case)

    data_type: class name of the data type to be loaded

    start_time: start time (inclusive) of the data set

    end_time: end time (exclusive) of the data set
    
    The following optional parameters are recognised: 
    
    archive: name of the archive. Required if more than one archive is
        present and there is not an archive called "default".

    channels: data channel(s) to load. All are loaded if not specified

    path: URL or file path, specified as a strftime format specifier.
        Alternatively can be a function reference which is passed the
        time and returns the filename. If given this overrides the
        standard load path.

    load_function: Pass responsibility for loading the data to the given
        function reference, after validating the input parameters.
        
    '''
    archive, ad = get_archive_info(project, site, data_type, archive=archive)
    cad_units = dt64.get_units(ad['nominal_cadence'])
    start_time = start_time.astype('datetime64[%s]' % cad_units)
    end_time = end_time.astype('datetime64[%s]' % cad_units)

    if channels is None:
        channels = ad['channels']
    else:
        # Could be as single channel name or a list of channels
        if isinstance(channels, six.string_types):
            if channels not in ad['channels']:
                raise Exception('Unknown channel')
        else:
            for c in channels:
                if c not in ad['channels']:
                    raise Exception('Unknown channel')

    if path is None:
        path = ad['path']

    if load_function is None:
        load_function = ad.get('load_function')

    if filter_function is None:
        filter_function = ad.get('filter_function')

    if load_function:
        # Pass responsibility for loading to some other
        # function. Parameters have already been checked.
        return load_function(project,
                             site,
                             data_type,
                             start_time,
                             end_time,
                             archive=archive,
                             channels=channels,
                             path=path,
                             raise_all=raise_all,
                             cadence=cadence,
                             aggregate=aggregate,
                             filter_function=filter_function)

    data = []
    for t in dt64.dt64_range(dt64.floor(start_time, ad['duration']), end_time,
                             ad['duration']):
        # A local copy of the file to be loaded, possibly an
        # uncompressed version.
        temp_file_name = None

        t2 = t + ad['duration']
        if hasattr(path, '__call__'):
            # Function: call it with relevant information to get the path
            file_name = path(t,
                             project=project,
                             site=site,
                             data_type=data_type,
                             archive=archive,
                             channels=channels)
        else:
            file_name = dt64.strftime(t, path)

        url_parts = urlparse(file_name)
        if url_parts.scheme in ('ftp', 'http', 'https'):
            if ad.get('cache_dir'):
                if now is None:
                    now = np.datetime64('now', 's')
                dtd = ad.get('data_transfer_delay', np.timedelta64(0, 's'))
                if use_cache is None:
                    if t2 + dtd < now:
                        uc = True  # OK to try fetching from the cache
                    else:
                        uc = False
                        logger.debug('data too new to cache')
                else:
                    uc = use_cache
                cache_filename = os.path.normpath(
                    os.path.join(ad['cache_dir'], file_name.replace(':', '/')))
                logger.debug('cache file: ' + cache_filename)
                if uc:
                    if os.path.exists(cache_filename):
                        file_name = cache_filename
                        logger.debug('cache hit')
                    else:
                        file_name = download_url(file_name,
                                                 dest=cache_filename)
                else:
                    # Download but discard after use
                    file_name = download_url(file_name)
                    temp_file_name = file_name
            else:
                # No cache so discard after use
                file_name = download_url(file_name)
                temp_file_name = file_name

            if file_name is None:
                continue

        elif url_parts.scheme == 'file':
            file_name = url_parts.path

        if not os.path.exists(file_name):
            logger.info('missing file %s', file_name)
            continue

        # Now only need to access local files
        if os.path.splitext(url_parts.path)[1] in ('.gz', '.dgz'):
            # Transparently uncompress
            gunzipped_file = None
            try:
                logger.debug('unzipping %s', file_name)
                gunzipped_file = NamedTemporaryFile(prefix=__name__,
                                                    delete=False)
                with gzip.open(file_name, 'rb') as gzip_file:
                    shutil.copyfileobj(gzip_file, gunzipped_file)
                gunzipped_file.close()
            except KeyboardInterrupt:
                raise
            except Exception as e:
                if gunzipped_file:
                    gunzipped_file.close()
                    os.unlink(gunzipped_file.name)
                    gunzipped_file = None
                continue
            finally:
                if temp_file_name:
                    logger.debug('deleting temporary file ' + temp_file_name)
                    os.unlink(temp_file_name)

            temp_file_name = gunzipped_file.name
            file_name = temp_file_name

        logger.info('loading ' + file_name)

        try:
            tmp = ad['load_converter'](file_name,
                                       ad,
                                       project=project,
                                       site=site,
                                       data_type=data_type,
                                       start_time=t,
                                       end_time=t2,
                                       channels=channels,
                                       archive=archive,
                                       path=path,
                                       raise_all=raise_all)
            if tmp is not None:
                if cadence is not None and cadence <= ad['duration']:
                    tmp.set_cadence(cadence, aggregate=aggregate, inplace=True)
                data.append(tmp)
        except KeyboardInterrupt:
            raise
        except Exception as e:
            if raise_all:
                raise
            logger.info('Could not load ' + file_name)
            logger.debug(str(e))
            logger.debug(traceback.format_exc())

        finally:
            if temp_file_name:
                logger.debug('deleting temporary file ' + temp_file_name)
                os.unlink(temp_file_name)

    if len(data) == 0:
        return None

    r = concatenate(data, sort=False)
    r.extract(inplace=True,
              start_time=start_time,
              end_time=end_time,
              channels=channels)

    if cadence is not None and cadence > ad['duration']:
        # cadence too large to apply on results of loading each file,
        # apply to combined object
        r.set_cadence(cadence, aggregate=aggregate, inplace=True)

    if filter_function:
        logger.debug('filtering with function %s', filter_function.__name__)
        r = filter_function(r)

    return r
    def __init__(self,
                 project=None,
                 site=None,
                 channels=None,
                 start_time=None,
                 end_time=None,
                 sample_start_time=np.array([]),
                 sample_end_time=np.array([]),
                 nominal_cadence=np.timedelta64(3, 'h'),
                 integration_interval=np.array([]),
                 data=np.array([]),
                 units=None,
                 sort=False,
                 magdata=None,
                 magqdc=None,
                 scale=None,
                 nth=1,
                 fit=None,
                 fit_params={},
                 with_qdc=None):
        Data.__init__(self,
                      project=project,
                      site=site,
                      channels=channels,
                      start_time=start_time,
                      end_time=end_time,
                      sample_start_time=sample_start_time,
                      sample_end_time=sample_end_time,
                      integration_interval=integration_interval,
                      nominal_cadence=nominal_cadence,
                      data=data,
                      units=units,
                      sort=sort)

        if magdata is not None:
            self.project = magdata.project
            self.site = magdata.site
            self.channels = c = magdata.channels
            self.start_time = dt64.floor(magdata.start_time,
                                         self.nominal_cadence)
            self.end_time = dt64.ceil(magdata.end_time, self.nominal_cadence)

            self.sample_start_time = np.arange(self.start_time, self.end_time,
                                               self.nominal_cadence)
            self.sample_end_time = self.sample_start_time + self.nominal_cadence
            self.integration_interval = None

            if magqdc is None:
                logger.info('Creating KIndex object without a QDC')
                bsub = magdata.data[magdata.get_channel_index(c)]
                self.with_qdc = False
            else:
                assert magdata.units == magqdc.units, 'Units must match'
                if isinstance(magqdc, ap.magdata.MagQDC):
                    aligned = magqdc.align(magdata, fit=fit, **fit_params)
                else:
                    aligned = magqdc

                # Baseline subtracted data
                bsub = np.abs(magdata.data[magdata.get_channel_index(c)] -
                              aligned.data[aligned.get_channel_index(c)])
                self.with_qdc = False

            self.units = None

            if nth is None:
                if magdata.nominal_cadence <= np.timedelta64(5, 's'):
                    # Throw away ~30 seconds
                    nth = int(np.timedelta64(30, 's') \
                                  / magdata.nominal_cadence)
                else:
                    # Throw away up to 2.5 minutes
                    nth = int(np.timedelta64(150, 's') \
                                  / magdata.nominal_cadence)

            nth_largest = ap.tools.NthLargest(nth)
            nth_smallest = ap.tools.NthLargest(nth, smallest=True)

            self.range = np.zeros(
                [len(self.channels),
                 len(self.sample_start_time)])
            for i in range(len(self.sample_start_time)):
                tidx = np.where(
                    np.logical_and(
                        magdata.sample_start_time >= self.sample_start_time[i],
                        magdata.sample_end_time <= self.sample_end_time[i]))[0]
                for cn in range(len(self.channels)):
                    self.range[cn, i]
                    nth_largest(bsub[cn, tidx])
                    nth_smallest(bsub[cn, tidx])
                    self.range[cn, i] = nth_largest(bsub[cn, tidx]) \
                        - nth_smallest(bsub[cn, tidx])

            self.data = np.tile(np.nan, self.range.shape)
            self.data[np.nonzero(np.isfinite(self.range))] = 0

            if scale is None:
                scale = self.get_site_info('k_index_scale')

            # K-index thresholds may be scaled but all are proportional to
            # the limits Bartels defined for Niemegk observatory.
            self.thresholds = np.array([
                0.00, 0.01, 0.02, 0.04, 0.08, 0.14, 0.24, 0.40, 0.66, 1.00
            ]) * scale

            for i in range(1, len(self.thresholds)):
                self.data[np.nonzero(self.range >= self.thresholds[i])] = i
Beispiel #13
0
    def get_quiet_days(self, nquiet=5, channels=None, 
                       cadence=np.timedelta64(5, 's').astype('m8[us]'),
                       method=None):
        '''
        nquiet: number of quiet days
        
        channels: channels used in calculations. Defaults to first
        channel only
        
        cadence: cadence used for calculation, and of the returned data
                
        returns: data from nquiet quietest days

        Adapted from algorithm originally developed by Andrew Senior.
        '''
        
        if channels is None:
            # Default to using H or X (ie first channel)
            cidx = [0]
            channels = self.channels[0]
        else:
            cidx = self.get_channel_index(channels)

        if method is None:
            method = 'monthly_mean'

        day = np.timedelta64(24, 'h')
        st = dt64.floor(self.start_time, day)
        et = dt64.ceil(self.end_time, day)
        s = self.space_regularly(cadence, start_time=st, end_time=et,
                                 missing_cadence=self.nominal_cadence * 2)

        num_days = int(np.round((et - st) / day))
        daily_data = s.split(day)
        daily_act = np.zeros(num_days)

        # Compute monthly mean for each selected channel
        monthly_means = ap.nanmean(s.data[cidx], axis=1)

        if method == 'monthly_mean':
            for n in range(num_days):
                # Estimate daily activity based on RMS departure from
                # monthly mean
                daily_act[n] = \
                    ap.nanmean(np.sqrt(ap.nanmean((daily_data[n].data[cidx] \
                                                   .transpose() - 
                                                   monthly_means)**2, axis=1)))

        elif method == 'daily_mean':
            for n in range(num_days):
                # Estimate daily activity based on RMS departure from
                # daily mean
                daily_means = ap.nanmean(daily_data[n].data[cidx], axis=1)
                daily_act[n] = \
                    ap.nanmean(np.sqrt(ap.nanmean((daily_data[n].data[cidx] \
                                                   .transpose() - \
                                                   daily_means)**2, axis=1)))
            
                # Shift the data by the difference between the monthly
                # and daily means
                daily_data[n].data += (monthly_means - daily_means)
            
        elif method == 'linear_fit':
            x = self.get_mean_sample_time().astype('m8[us]').astype('int64')
            fits = []
            for cn in range(len(cidx)):
                fits.append(np.polyfit(x, self.data[cidx[cn]], 1))
                
            for n in range(num_days):
                # Estimate daily activity based on RMS departure from
                # linear fit to dataset
                daily_x = daily_data[n].get_mean_sample_time() \
                    .astype('m8[us]').astype('int64')
                tmp_act = np.zeros([1, len(cidx)])
                for cn in range(len(cidx)):
                    daily_y = fits[cn][0]*daily_x + fits[cn][1]
                    tmp_act[cn] = ap.nanmean((daily_data[n].data[cidx[cn]]\
                                              .transpose() - daily_y)**2)
                    
                    # Shift the data by the difference between the
                    # monthly mean and the fit.
                    daily_data[n].data[cidx[cn]] += \
                        (monthly_means[cn] - daily_y)


                daily_act[n] = ap.nanmean(np.sqrt(ap.nanmean(tmp_act)))



        else:
            raise Exception('Unknown method')

        # Don't use days where more than 25% of data is missing
        for n in range(num_days):
            if np.mean(np.isnan(daily_data[n].data[cidx])) > 0.25:
                daily_act[n] = np.inf

        # Sort into ascending order of activity. Nans are put last.
        idx = np.argsort(daily_act)
        r = []
        for n in range(nquiet):
            r.append(daily_data[idx[n]])
        return r