Example #1
0
def get_sst_filename(config):
    base_filename  = config['sst_filename']
    sst_time       = get_sst_time(config)
    sst_filename   = sub_date(base_filename, init_time=sst_time)
    return sst_filename
    
    
    
    
Example #2
0
def main():
    # merge command-line and file-specified arguments
    config = conf.config(__doc__, sys.argv[1:])

    logger = loghelper.create(LOGGER,
                              log_level=config.get('log.level'),
                              log_fmt=config.get('log.format'))

    if config.get('log.file'):
        log_file = config['log.file']
        logger.addHandler(
            loghelper.file_handler(log_file, config['log.level'],
                                   config['log.format']))
        logger.debug('now logging to file')

    dry_run = config.get('dry-run')

    # either the start time is exactly specified, or else we calculate it
    if config.get('start'):
        init_time = config['start']
    else:
        init_time = shared.get_time(base_time=config.get('base-time'),
                                    delay=config.get('delay'),
                                    round=config.get('cycles'))

    if config.get('end'):
        end_init = config['end']
        init_interval = config['init_interval']
        init_times = list(
            rrule.rrule(freq=rrule.HOURLY,
                        interval=init_interval,
                        dtstart=init_time,
                        until=end_init))
    else:
        init_times = [init_time]

    for init_time in init_times:
        # one-argument function to do initial-time substitution in strings
        expand = lambda s: substitute.sub_date(s, init_time=init_time) if type(
            s) == type("") else s

        # dictionary of replacements e.g. %iY : 2015
        date_replacements = substitute.date_replacements(init_time=init_time)

        source = expand(config['source'])
        target = expand(config['target'])

        assert (_are_compatible(source, target))

        _recursive_replace(source, target, date_replacements)
Example #3
0
def get_bdy_filenames(grb_fmt, bdy_times):
    """ Creates a list of boundary conditions filenames
    based on the information in config. In general this will 
    be called once per forecast, so there is only one 
    init_time.
   
    Arguments:
    config -- dictionary containing various configuration options
    
    """

    logger       = get_logger()
    logger.debug('*** GENERATING BOUNDARY CONDITION FILENAMES ***')
   
    filelist = [sub_date(grb_fmt, init_time=bdy_times[0], valid_time=b) for b in bdy_times]
    logger.debug(filelist)
    return filelist    
Example #4
0
def get_bdy_filenames(grb_fmt, bdy_times):
    """ Creates a list of boundary conditions filenames
    based on the information in config. In general this will 
    be called once per forecast, so there is only one 
    init_time.
   
    Arguments:
    config -- dictionary containing various configuration options
    
    """

    logger       = get_logger()
    logger.debug('*** GENERATING BOUNDARY CONDITION FILENAMES ***')
   
    filelist = [sub_date(grb_fmt, init_time=bdy_times[0], valid_time=b) for b in bdy_times]
    logger.debug(filelist)
    return filelist    
Example #5
0
def main():
    # merge command-line and file-specified arguments
    config = conf.config(__doc__, sys.argv[1:])

    logger = loghelper.create(LOGGER, log_level=config.get('log.level'), log_fmt=config.get('log.format'))

    if config.get('log.file'):
        log_file = config['log.file']
        logger.addHandler(loghelper.file_handler(log_file, config['log.level'], config['log.format']))
        logger.debug('now logging to file')
    
    dry_run = config.get('dry-run')
    
    # either the start time is exactly specified, or else we calculate it
    if config.get('start'):
        init_time = config['start']
    else:
        init_time = shared.get_time(base_time=config.get('base-time'), delay=config.get('delay'), round=config.get('cycles'))

    if config.get('end'):
        end_init = config['end']
        init_interval = config['init_interval']
        init_times = list(rrule.rrule(freq=rrule.HOURLY, interval=init_interval, dtstart=init_time, until=end_init))
    else:
        init_times = [init_time]


    for init_time in init_times:
        # one-argument function to do initial-time substitution in strings
        expand = lambda s : substitute.sub_date(s, init_time=init_time) if type(s)==type("") else s
        
        # dictionary of replacements e.g. %iY : 2015
        date_replacements = substitute.date_replacements(init_time=init_time)
        
        source = expand(config['source'])
        target = expand(config['target'])
        
        assert(_are_compatible(source,target))

        _recursive_replace(source, target, date_replacements)
Example #6
0
def ncdump(config):

    logger = loghelper.get(LOGGER)

    # _listify ensures arguments are enclosed within a list
    # to simplify treatement in following code
    files = nctools._listify(config['<files>'])
    vars = nctools._listify(config.get('vars'))
    global_atts = nctools._listify(config.get('global-atts'))
    var_atts = nctools._listify(config.get('var-atts'))
    coord_vars = nctools._listify(config.get('coords'))
    sort_by = nctools._listify(config.get('sort-by'))
    order_by = nctools._listify(config.get('order-by'))
    out = config.get('out')
    pivot = config.get('pivot')
    valid_time = config.get('valid-time')
    format = config.get('format')
    filter = config.get('filter')
    split_by = config.get('split-by')
    concat = config.get('concat')
    start = config.get('start')
    delay = config.get('delay')
    cycles = nctools._listify(config.get('cycles'))

    basetime = start if start else datetime.datetime.today()

    prior = _prior_time(basetime, delay=delay, hours=cycles)

    logger.debug("using %s as a start time" % prior)

    if files == []:
        logger.info("no files specified, finding using options")
        file_pattern = config.get('file-pattern')
        if not file_pattern:
            raise nctools.ConfigError(
                'either supply files or specify file-pattern')

        expanded = substitute.sub_date(file_pattern, init_time=prior)
        files = glob.glob(expanded)

    if files == []: raise IOError("no files found")

    frame = nctools.melt(files,
                         vars,
                         global_atts,
                         var_atts,
                         coord_vars,
                         missing=MISSING_ATTS)

    if valid_time:
        logger.debug("adding valid time into frame")
        frame['valid_time'] = frame[
            'reftime'] + frame['leadtime'] * datetime.timedelta(0, 60 * 60)

    if filter:
        frame = nctools.filter(frame, filter)

    if concat:
        nctools.concat(frame, concat, name='variable', inplace=True)

    if pivot:
        frame = pd.pivot_table(frame,
                               index=['reftime', 'leadtime', 'location'],
                               columns='variable',
                               values='value')
        frame.reset_index(inplace=True)

    if sort_by: frame.sort(sort_by, inplace=True)

    if order_by:
        frame = frame[order_by]

    if out:
        out = substitute.sub_date(out, init_time=prior)

    if split_by:
        gb = frame.groupby(split_by)
        for key, group in gb:
            if out:
                new_name = _merge_name(out, key)
                save(gb.get_group(key),
                     new_name,
                     config['format'],
                     float_format=config.get('float-format'))
            else:
                print gb.get_group(key).to_string()
                print '\n\n\n'
    elif out:
        save(frame,
             out,
             config['format'],
             float_format=config.get('float-format'))

    else:
        print frame.to_string()
Example #7
0
def power(config):
    """Reads 'time series' from netcdf time series file, and adds power as a variable. """
    
    if __name__ == "__main__":
        logger = loghelper.create_logger(config)
    else:
        logger = loghelper.get_logger(config['log.name'])
    
    # listify ensures they are returned as a list, even if it is one file
    files = shared._listify(config['<files>'])
    
    # Number of samples to use should be in here
    # Whether to normalise power should be in here    
    start     = config.get('start')
    delay     = config.get('delay')
    cycles    = shared._listify(config.get('cycles'))
    pnorm     = config.get('pnorm')
    pdist     = config.get('pdist')
    sstd      = config.get('sstd')
    dstd      = config.get('dstd')
    pquants   = config.get('pquants')
    quantiles = np.array(pquants)
    pcurve_dir = config.get('pcurve-dir')
    ts_dir     = config.get('tseries-dir')
    out        = config.get('out')
    metadata   = config.get('metadata')


    
    basetime = start if start else datetime.datetime.today()
    prior = shared._prior_time(basetime, delay=delay, hours=cycles)

    logger.debug("using %s as a start time" % prior)

    if not files:
        logger.debug("no files specified, finding using options")
        file_pattern = config.get('file-pattern')
        if not file_pattern: raise ConfigError('either supply files or specify file-pattern')
        
        expanded = substitute.sub_date(file_pattern, init_time=prior)
        files = glob.glob(expanded)
        print("hello")
        logger.debug(files)

    # if we get to this point and there are still no files, then we have a problem
    if not files: raise IOError("no files found")
    
    logger.debug("input files: ")
    logger.debug(files)
    for f in files:
        logger.debug("\t%s" % f)
    
    # if pdist 
    if pdist: n=pdist
    
    
    #grid_id         = config['grid_id']
    
    
    out_pattern     = config.get('out')
    
    
    for tseries_file in files:
        dataset_in = Dataset(tseries_file, 'a')
            
        # Get dimensions
        dims      = dataset_in.dimensions
        nreftime  = len(dims['reftime'])
        ntime     = len(dims['leadtime'])
        nloc      = len(dims['location'])
        nheight   = len(dims['height'])
        loc_str_len = len(dims['loc_str_length'])
        
        # Get coordinate variables
        reftime   = dataset_in.variables['reftime']
        leadtime  = dataset_in.variables['leadtime']
        validtime = nctools._valid_time(reftime, leadtime)
        
        refdt     = num2date(reftime[:], reftime.units)
        
        power_file = substitute.sub_date(out, init_time=refdt[0])
        

        logger.info('Estimating power from time series: %s ' % tseries_file)
        logger.info('Writing power time series to: %s ' % power_file)

        
        location = [''.join(l.filled(' ')).strip() for l in dataset_in.variables['location']]
        height   = dataset_in.variables['height']

        
        if power_file == tseries_file:
            dataset_out = dataset_in
        else:
            dataset_out = Dataset(power_file, 'w')

            
        # Get number of quantiles
        nq    = len(quantiles)
        pdata = np.ma.zeros((ntime,nloc,nheight,nq+1), np.float) # mean will be 1st value
        
        use_locs = []
        # loop through locations and look for power-curve file
        
        for l,loc in enumerate(location):
            pcurve_file = '%s/%s.csv' %(pcurve_dir, loc)
            
            # mask power data if no power curve found for this park
            if not os.path.exists(pcurve_file):
                #logger.debug("Power curve: %s not found, skipping" % pcurve_file)
                pdata[:,l,:,:] = np.ma.masked
                continue
            
            logger.info('Predicting power output for %s' % loc )
            #
            # Open power curve
            #
            use_locs.append(l)
            pcurve = from_file(pcurve_file)

        
            for h in range(nheight):
                speed     = dataset_in.variables['SPEED'][0,:,l,h]
                direction = dataset_in.variables['DIRECTION'][0,:,l,h]
                
                #pwr = pcurve.power(speed,direction)
        
                # pdist will create a distribution for each timetep based on sampling
                # n times from a normal distribution. 
                pdist   = pcurve.power_dist(speed, direction, sstd=sstd,dstd=dstd,n=n, normalise=pnorm)
                pmean   = np.mean(pdist, axis=1)
                pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles/100.0,axis=1, alphap=0.5, betap=0.5)
                

                pdata[:,l,h,0]  = pmean
                pdata[:,l,h,1:] = pquants[:,:]

            #logger.info('finished %s' % loc)            



        use_inds = np.array(use_locs)

        
        if dataset_out != dataset_in:
            dataset_out.createDimension('reftime', None)
            dataset_out.createVariable('reftime', 'float', ('reftime',))
            dataset_out.variables['reftime'][:] = reftime[:]
            dataset_out.variables['reftime'].units = reftime.units
            dataset_out.variables['reftime'].calendar = reftime.calendar
            dataset_out.variables['reftime'].long_name = reftime.long_name
            dataset_out.variables['reftime'].standard_name = reftime.standard_name

            
            dataset_out.createDimension('leadtime', len(leadtime))
            dataset_out.createVariable('leadtime', 'int', ('leadtime',))
            dataset_out.variables['leadtime'][:] = leadtime[:]
            dataset_out.variables['leadtime'].units = leadtime.units
            dataset_out.variables['leadtime'].long_name = leadtime.long_name
            dataset_out.variables['leadtime'].standard_name = leadtime.standard_name
            
            dataset_out.createDimension('location', len(use_locs))
            dataset_out.createDimension('loc_str_length', loc_str_len)
            
            loc_data =np.array([list(l.ljust(loc_str_len, ' ')) for l in location])
            dataset_out.createVariable('location', 'c', ('location', 'loc_str_length'))
            dataset_out.variables['location'][:] = loc_data[use_inds,:]
            
            dataset_out.createDimension('height', nheight)        
            dataset_out.createVariable('height', 'i', ('height',))
            dataset_out.variables['height'][:] = height[:]
            dataset_out.GRID_ID = dataset_in.GRID_ID
            dataset_out.DX = dataset_in.DX
            dataset_out.DY = dataset_in.DY
            
            try:
                dataset_out.variables['height'].units = height.units
            except Exception:
                logger.warn("height units missing")
            
            
            pdata = pdata[:, use_inds, :, :]
            for key in metadata.keys():
                key = key.upper()
                dataset_out.setncattr(key,dataset_in.getncattr(key))
                
            
        
        pavg    = dataset_out.createVariable('POWER','f',('reftime','leadtime','location','height'))
        pavg.units = 'kW'
        pavg.description = 'forecast power output'
        pavg[0,:,:,:] = pdata[:,:,:,0]

        
        for q, qval in enumerate(quantiles):

            varname = 'POWER.P%02d' % qval

            var  = dataset_out.createVariable(varname,'f',('reftime','leadtime','location','height'))
            if pnorm:
                var.units = 'ratio'
            else:
                var.units = 'kW'
            var.description = 'forecast power output'

            var[0,:,:,:] = pdata[:,:,:,q+1]
        
                
        #logger.debug(dataset_out)
        
        dataset_in.close()
        if dataset_out!=dataset_in:
            dataset_out.close()
Example #8
0
def ncdump(config):
    
    logger = loghelper.get(LOGGER)
    
    # _listify ensures arguments are enclosed within a list
    # to simplify treatement in following code
    files = nctools._listify(config['<files>'])
    vars        = nctools._listify(config.get('vars'))
    global_atts = nctools._listify(config.get('global-atts'))
    var_atts    = nctools._listify(config.get('var-atts'))
    coord_vars  = nctools._listify(config.get('coords'))
    sort_by     = nctools._listify(config.get('sort-by')) 
    order_by    = nctools._listify(config.get('order-by'))
    out         = config.get('out')
    pivot       = config.get('pivot')
    valid_time  = config.get('valid-time')
    format      = config.get('format')
    filter      = config.get('filter')
    split_by    = config.get('split-by')
    concat      = config.get('concat')
    start       = config.get('start')
    delay       = config.get('delay')
    cycles      = nctools._listify(config.get('cycles'))
        
    basetime = start if start else datetime.datetime.today()
    
    prior = _prior_time(basetime, delay=delay, hours=cycles)

    logger.debug("using %s as a start time" % prior)
    
    if files==[]:
        logger.info("no files specified, finding using options")
        file_pattern = config.get('file-pattern')
        if not file_pattern: raise nctools.ConfigError('either supply files or specify file-pattern')
        
        expanded = substitute.sub_date(file_pattern, init_time=prior)
        files = glob.glob(expanded)


    if files==[]: raise IOError("no files found")

     
    frame = nctools.melt(files, vars, global_atts, var_atts, coord_vars, missing=MISSING_ATTS)
    
    if valid_time:
        logger.debug("adding valid time into frame")
        frame['valid_time'] = frame['reftime'] + frame['leadtime']*datetime.timedelta(0,60*60)
    
    if filter:
        frame = nctools.filter(frame, filter)
    
    if concat:
        nctools.concat(frame, concat, name='variable', inplace=True)
    
    if pivot: 
        frame = pd.pivot_table(frame, index=['reftime','leadtime','location'], columns='variable', values='value')
        frame.reset_index(inplace=True)
        
    if sort_by: frame.sort(sort_by, inplace=True)

    
    if order_by:
        frame = frame[order_by]
    
    if out:
        out = substitute.sub_date(out, init_time=prior)
   
    if split_by:
        gb = frame.groupby(split_by)
        for key,group in gb:
            if out:
                new_name = _merge_name(out,key)
                save(gb.get_group(key), new_name, config['format'], float_format=config.get('float-format')) 
            else:
                print gb.get_group(key).to_string()
                print '\n\n\n'
    elif out: 
        save(frame, out, config['format'], float_format=config.get('float-format')) 
    
    else: 
        print frame.to_string()
Example #9
0
def power(config):
    """Reads 'time series' from netcdf time series file, and adds power as a variable. """

    if __name__ == "__main__":
        logger = loghelper.create_logger(config)
    else:
        logger = loghelper.get_logger(config['log.name'])

    # listify ensures they are returned as a list, even if it is one file
    files = shared._listify(config['<files>'])

    # Number of samples to use should be in here
    # Whether to normalise power should be in here
    start = config.get('start')
    delay = config.get('delay')
    cycles = shared._listify(config.get('cycles'))
    pnorm = config.get('pnorm')
    pdist = config.get('pdist')
    sstd = config.get('sstd')
    dstd = config.get('dstd')
    pquants = config.get('pquants')
    quantiles = np.array(pquants)
    pcurve_dir = config.get('pcurve-dir')
    ts_dir = config.get('tseries-dir')
    out = config.get('out')
    metadata = config.get('metadata')

    basetime = start if start else datetime.datetime.today()
    prior = shared._prior_time(basetime, delay=delay, hours=cycles)

    logger.debug("using %s as a start time" % prior)

    if not files:
        logger.debug("no files specified, finding using options")
        file_pattern = config.get('file-pattern')
        if not file_pattern:
            raise ConfigError('either supply files or specify file-pattern')

        expanded = substitute.sub_date(file_pattern, init_time=prior)
        files = glob.glob(expanded)
        print("hello")
        logger.debug(files)

    # if we get to this point and there are still no files, then we have a problem
    if not files: raise IOError("no files found")

    logger.debug("input files: ")
    logger.debug(files)
    for f in files:
        logger.debug("\t%s" % f)

    # if pdist
    if pdist: n = pdist

    #grid_id         = config['grid_id']

    out_pattern = config.get('out')

    for tseries_file in files:
        dataset_in = Dataset(tseries_file, 'a')

        # Get dimensions
        dims = dataset_in.dimensions
        nreftime = len(dims['reftime'])
        ntime = len(dims['leadtime'])
        nloc = len(dims['location'])
        nheight = len(dims['height'])
        loc_str_len = len(dims['loc_str_length'])

        # Get coordinate variables
        reftime = dataset_in.variables['reftime']
        leadtime = dataset_in.variables['leadtime']
        validtime = nctools._valid_time(reftime, leadtime)

        refdt = num2date(reftime[:], reftime.units)

        power_file = substitute.sub_date(out, init_time=refdt[0])

        logger.info('Estimating power from time series: %s ' % tseries_file)
        logger.info('Writing power time series to: %s ' % power_file)

        location = [
            ''.join(l.filled(' ')).strip()
            for l in dataset_in.variables['location']
        ]
        height = dataset_in.variables['height']

        if power_file == tseries_file:
            dataset_out = dataset_in
        else:
            dataset_out = Dataset(power_file, 'w')

        # Get number of quantiles
        nq = len(quantiles)
        pdata = np.ma.zeros((ntime, nloc, nheight, nq + 1),
                            np.float)  # mean will be 1st value

        use_locs = []
        # loop through locations and look for power-curve file

        for l, loc in enumerate(location):
            pcurve_file = '%s/%s.csv' % (pcurve_dir, loc)

            # mask power data if no power curve found for this park
            if not os.path.exists(pcurve_file):
                #logger.debug("Power curve: %s not found, skipping" % pcurve_file)
                pdata[:, l, :, :] = np.ma.masked
                continue

            logger.info('Predicting power output for %s' % loc)
            #
            # Open power curve
            #
            use_locs.append(l)
            pcurve = from_file(pcurve_file)

            for h in range(nheight):
                speed = dataset_in.variables['SPEED'][0, :, l, h]
                direction = dataset_in.variables['DIRECTION'][0, :, l, h]

                #pwr = pcurve.power(speed,direction)

                # pdist will create a distribution for each timetep based on sampling
                # n times from a normal distribution.
                pdist = pcurve.power_dist(speed,
                                          direction,
                                          sstd=sstd,
                                          dstd=dstd,
                                          n=n,
                                          normalise=pnorm)
                pmean = np.mean(pdist, axis=1)
                pquants = scipy.stats.mstats.mquantiles(pdist,
                                                        prob=quantiles / 100.0,
                                                        axis=1,
                                                        alphap=0.5,
                                                        betap=0.5)

                pdata[:, l, h, 0] = pmean
                pdata[:, l, h, 1:] = pquants[:, :]

            #logger.info('finished %s' % loc)

        use_inds = np.array(use_locs)

        if dataset_out != dataset_in:
            dataset_out.createDimension('reftime', None)
            dataset_out.createVariable('reftime', 'float', ('reftime', ))
            dataset_out.variables['reftime'][:] = reftime[:]
            dataset_out.variables['reftime'].units = reftime.units
            dataset_out.variables['reftime'].calendar = reftime.calendar
            dataset_out.variables['reftime'].long_name = reftime.long_name
            dataset_out.variables[
                'reftime'].standard_name = reftime.standard_name

            dataset_out.createDimension('leadtime', len(leadtime))
            dataset_out.createVariable('leadtime', 'int', ('leadtime', ))
            dataset_out.variables['leadtime'][:] = leadtime[:]
            dataset_out.variables['leadtime'].units = leadtime.units
            dataset_out.variables['leadtime'].long_name = leadtime.long_name
            dataset_out.variables[
                'leadtime'].standard_name = leadtime.standard_name

            dataset_out.createDimension('location', len(use_locs))
            dataset_out.createDimension('loc_str_length', loc_str_len)

            loc_data = np.array(
                [list(l.ljust(loc_str_len, ' ')) for l in location])
            dataset_out.createVariable('location', 'c',
                                       ('location', 'loc_str_length'))
            dataset_out.variables['location'][:] = loc_data[use_inds, :]

            dataset_out.createDimension('height', nheight)
            dataset_out.createVariable('height', 'i', ('height', ))
            dataset_out.variables['height'][:] = height[:]
            dataset_out.GRID_ID = dataset_in.GRID_ID
            dataset_out.DX = dataset_in.DX
            dataset_out.DY = dataset_in.DY

            try:
                dataset_out.variables['height'].units = height.units
            except Exception:
                logger.warn("height units missing")

            pdata = pdata[:, use_inds, :, :]
            for key in metadata.keys():
                key = key.upper()
                dataset_out.setncattr(key, dataset_in.getncattr(key))

        pavg = dataset_out.createVariable(
            'POWER', 'f', ('reftime', 'leadtime', 'location', 'height'))
        pavg.units = 'kW'
        pavg.description = 'forecast power output'
        pavg[0, :, :, :] = pdata[:, :, :, 0]

        for q, qval in enumerate(quantiles):

            varname = 'POWER.P%02d' % qval

            var = dataset_out.createVariable(
                varname, 'f', ('reftime', 'leadtime', 'location', 'height'))
            if pnorm:
                var.units = 'ratio'
            else:
                var.units = 'kW'
            var.description = 'forecast power output'

            var[0, :, :, :] = pdata[:, :, :, q + 1]

        #logger.debug(dataset_out)

        dataset_in.close()
        if dataset_out != dataset_in:
            dataset_out.close()