def power(config): """Reads 'time series' from netcdf time series file, and adds power as a variable. """ if __name__ == "__main__": logger = loghelper.create_logger(config) else: logger = loghelper.get_logger(config['log.name']) # Number of samples to use should be in here # Whether to normalise power should be in here pnorm = config['pnorm'] pdist = config['pdist'] sstd = config['sstd'] dstd = config['dstd'] pquants = config['pquants'] quantiles = np.array(pquants) logger.debug(pnorm) if pdist: n=pdist grid_id = config['grid_id'] init_time = config['init_time'] pcurve_dir = config['pcurve_dir'] ts_dir = config['tseries_dir'] tseries_file = expand(config['tseries_file'], config) power_file = expand(config['power_file'], config) logger.info('Estimating power from time series: %s ' % tseries_file) logger.info('Writing power time series to: %s ' % power_file) dataset_in = Dataset(tseries_file, 'a') # Get dimensions dims = dataset_in.dimensions ntime = len(dims['time']) nloc = len(dims['location']) nheight = len(dims['height']) loc_str_len = len(dims['loc_str_length']) # Get coordinate variables nctime = dataset_in.variables['time'] datetimes = netcdftime.num2date(nctime, nctime.units) location = [''.join(l.filled(' ')).strip() for l in dataset_in.variables['location']] height = dataset_in.variables['height'] # Get attributes metadata = config['metadata'] if power_file == tseries_file: dataset_out = dataset_in else: dataset_out = Dataset(power_file, 'w') # Get number of quantiles nq = len(quantiles) pdata = np.ma.zeros((ntime,nloc,nheight,nq+1), np.float) # mean will be 1st value use_locs = [] for l,loc in enumerate(location): pcurve_file = '%s/%s.csv' %(pcurve_dir, loc) # mask power data if no power curve found for this park if not os.path.exists(pcurve_file): #logger.debug("Power curve: %s not found, skipping" % pcurve_file) pdata[:,l,:,:] = np.ma.masked continue logger.info('Predicting power output for %s' % loc ) # # Open power curve # use_locs.append(l) pcurve = from_file(pcurve_file) for h in range(nheight): speed = dataset_in.variables['SPEED'][:,l,h] direction = dataset_in.variables['DIRECTION'][:,l,h] #pwr = pcurve.power(speed,direction) # pdist will create a distribution for each timetep based on sampling # n times from a normal distribution. pdist = pcurve.power_dist(speed, direction, sstd=sstd,dstd=dstd,n=n, normalise=pnorm) pmean = np.mean(pdist, axis=1) pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles/100.0,axis=1, alphap=0.5, betap=0.5) pdata[:,l,h,0] = pmean pdata[:,l,h,1:] = pquants[:,:] logger.info('finished %s' % loc) use_inds = np.array(use_locs) logger.debug(use_inds) logger.debug(pdata.shape) logger.debug(pdata[:,use_inds,:,:].shape) if dataset_out != dataset_in: dataset_out.createDimension('time', None) dataset_out.createVariable('time', 'float', ('time',)) dataset_out.variables['time'][:] = nctime[:] dataset_out.variables['time'].units = nctime.units dataset_out.variables['time'].calendar = nctime.calendar dataset_out.createDimension('location', len(use_locs)) dataset_out.createDimension('loc_str_length', loc_str_len) loc_data =np.array([list(l.ljust(loc_str_len, ' ')) for l in location]) dataset_out.createVariable('location', 'c', ('location', 'loc_str_length')) dataset_out.variables['location'][:] = loc_data[use_inds,:] dataset_out.createDimension('height', nheight) dataset_out.createVariable('height', 'i', ('height',)) dataset_out.variables['height'][:] = height[:] dataset_out.GRID_ID = dataset_in.GRID_ID dataset_out.DX = dataset_in.DX dataset_out.DY = dataset_in.DY try: dataset_out.variables['height'].units = height.units except Exception: logger.warn("height units missing") pdata = pdata[:, use_inds, :, :] for key in metadata.keys(): key = key.upper() logger.debug(key) dataset_out.setncattr(key,dataset_in.getncattr(key)) pavg = dataset_out.createVariable('POWER','f',('time','location','height')) pavg.units = 'kW' pavg.description = 'forecast power output' pavg[:] = pdata[:,:,:,0] for q, qval in enumerate(quantiles): varname = 'POWER.P%02d' % qval logger.debug("creating variable %s" % varname) var = dataset_out.createVariable(varname,'f',('time','location','height')) if pnorm: var.units = 'ratio' else: var.units = 'kW' var.description = 'forecast power output' print pdata[:,:,:,q+1] var[:] = pdata[:,:,:,q+1] dataset_in.close() if dataset_out!=dataset_in: dataset_out.close()
def power(config): """Reads 'time series' from netcdf time series file, and adds power as a variable. """ if __name__ == "__main__": logger = loghelper.create_logger(config) else: logger = loghelper.get_logger(config['log.name']) # listify ensures they are returned as a list, even if it is one file files = shared._listify(config['<files>']) # Number of samples to use should be in here # Whether to normalise power should be in here start = config.get('start') delay = config.get('delay') cycles = shared._listify(config.get('cycles')) pnorm = config.get('pnorm') pdist = config.get('pdist') sstd = config.get('sstd') dstd = config.get('dstd') pquants = config.get('pquants') quantiles = np.array(pquants) pcurve_dir = config.get('pcurve-dir') ts_dir = config.get('tseries-dir') out = config.get('out') metadata = config.get('metadata') basetime = start if start else datetime.datetime.today() prior = shared._prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if not files: logger.debug("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise ConfigError('either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) print("hello") logger.debug(files) # if we get to this point and there are still no files, then we have a problem if not files: raise IOError("no files found") logger.debug("input files: ") logger.debug(files) for f in files: logger.debug("\t%s" % f) # if pdist if pdist: n=pdist #grid_id = config['grid_id'] out_pattern = config.get('out') for tseries_file in files: dataset_in = Dataset(tseries_file, 'a') # Get dimensions dims = dataset_in.dimensions nreftime = len(dims['reftime']) ntime = len(dims['leadtime']) nloc = len(dims['location']) nheight = len(dims['height']) loc_str_len = len(dims['loc_str_length']) # Get coordinate variables reftime = dataset_in.variables['reftime'] leadtime = dataset_in.variables['leadtime'] validtime = nctools._valid_time(reftime, leadtime) refdt = num2date(reftime[:], reftime.units) power_file = substitute.sub_date(out, init_time=refdt[0]) logger.info('Estimating power from time series: %s ' % tseries_file) logger.info('Writing power time series to: %s ' % power_file) location = [''.join(l.filled(' ')).strip() for l in dataset_in.variables['location']] height = dataset_in.variables['height'] if power_file == tseries_file: dataset_out = dataset_in else: dataset_out = Dataset(power_file, 'w') # Get number of quantiles nq = len(quantiles) pdata = np.ma.zeros((ntime,nloc,nheight,nq+1), np.float) # mean will be 1st value use_locs = [] # loop through locations and look for power-curve file for l,loc in enumerate(location): pcurve_file = '%s/%s.csv' %(pcurve_dir, loc) # mask power data if no power curve found for this park if not os.path.exists(pcurve_file): #logger.debug("Power curve: %s not found, skipping" % pcurve_file) pdata[:,l,:,:] = np.ma.masked continue logger.info('Predicting power output for %s' % loc ) # # Open power curve # use_locs.append(l) pcurve = from_file(pcurve_file) for h in range(nheight): speed = dataset_in.variables['SPEED'][0,:,l,h] direction = dataset_in.variables['DIRECTION'][0,:,l,h] #pwr = pcurve.power(speed,direction) # pdist will create a distribution for each timetep based on sampling # n times from a normal distribution. pdist = pcurve.power_dist(speed, direction, sstd=sstd,dstd=dstd,n=n, normalise=pnorm) pmean = np.mean(pdist, axis=1) pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles/100.0,axis=1, alphap=0.5, betap=0.5) pdata[:,l,h,0] = pmean pdata[:,l,h,1:] = pquants[:,:] #logger.info('finished %s' % loc) use_inds = np.array(use_locs) if dataset_out != dataset_in: dataset_out.createDimension('reftime', None) dataset_out.createVariable('reftime', 'float', ('reftime',)) dataset_out.variables['reftime'][:] = reftime[:] dataset_out.variables['reftime'].units = reftime.units dataset_out.variables['reftime'].calendar = reftime.calendar dataset_out.variables['reftime'].long_name = reftime.long_name dataset_out.variables['reftime'].standard_name = reftime.standard_name dataset_out.createDimension('leadtime', len(leadtime)) dataset_out.createVariable('leadtime', 'int', ('leadtime',)) dataset_out.variables['leadtime'][:] = leadtime[:] dataset_out.variables['leadtime'].units = leadtime.units dataset_out.variables['leadtime'].long_name = leadtime.long_name dataset_out.variables['leadtime'].standard_name = leadtime.standard_name dataset_out.createDimension('location', len(use_locs)) dataset_out.createDimension('loc_str_length', loc_str_len) loc_data =np.array([list(l.ljust(loc_str_len, ' ')) for l in location]) dataset_out.createVariable('location', 'c', ('location', 'loc_str_length')) dataset_out.variables['location'][:] = loc_data[use_inds,:] dataset_out.createDimension('height', nheight) dataset_out.createVariable('height', 'i', ('height',)) dataset_out.variables['height'][:] = height[:] dataset_out.GRID_ID = dataset_in.GRID_ID dataset_out.DX = dataset_in.DX dataset_out.DY = dataset_in.DY try: dataset_out.variables['height'].units = height.units except Exception: logger.warn("height units missing") pdata = pdata[:, use_inds, :, :] for key in metadata.keys(): key = key.upper() dataset_out.setncattr(key,dataset_in.getncattr(key)) pavg = dataset_out.createVariable('POWER','f',('reftime','leadtime','location','height')) pavg.units = 'kW' pavg.description = 'forecast power output' pavg[0,:,:,:] = pdata[:,:,:,0] for q, qval in enumerate(quantiles): varname = 'POWER.P%02d' % qval var = dataset_out.createVariable(varname,'f',('reftime','leadtime','location','height')) if pnorm: var.units = 'ratio' else: var.units = 'kW' var.description = 'forecast power output' var[0,:,:,:] = pdata[:,:,:,q+1] #logger.debug(dataset_out) dataset_in.close() if dataset_out!=dataset_in: dataset_out.close()
landsea_mask.units = "" landsea_mask.scale_factor = 1.0 landsea_mask.add_offset = 0.0 landsea_mask.missing_value = -9999.0 landsea_mask.vmin = 0.0 landsea_mask.vmax = 0.0 landsea_mask.num_bins = 0 # What about extents and DX? # ... landsea.description = 'Land-Sea mask with binary 0/1 values' landsea.history = 'Created ' + time.ctime(time.time()) landsea.MAP_PROJECTION = "EQUIDISTANT CYLINDRICAL" landsea.SOUTH_WEST_CORNER_LAT = -90.0 landsea.SOUTH_WEST_CORNER_LON = -180.0 landsea.DX = 0.5 landsea.DY = 0.5 landsea_lons.units = 'east_west' landsea_lats.units = 'north_south' # Now put some data into out variables # lets do the lats and lons first # Raw data values for lat and lon lats = np.arange(-90, 90, 0.5) lons = np.arange(-180, 180, 0.5) # Now assign the raw data to netcdf coordinate variable landsea_lons[:] = lons landsea_lats[:] = lats
def power(config): """Reads 'time series' from netcdf time series file, and adds power as a variable. """ if __name__ == "__main__": logger = loghelper.create_logger(config) else: logger = loghelper.get_logger(config['log.name']) # Number of samples to use should be in here # Whether to normalise power should be in here pnorm = config['pnorm'] pdist = config['pdist'] sstd = config['sstd'] dstd = config['dstd'] pquants = config['pquants'] quantiles = np.array(pquants) logger.debug(pnorm) if pdist: n = pdist grid_id = config['grid_id'] init_time = config['init_time'] pcurve_dir = config['pcurve_dir'] ts_dir = config['tseries_dir'] tseries_file = expand(config['tseries_file'], config) power_file = expand(config['power_file'], config) logger.info('Estimating power from time series: %s ' % tseries_file) logger.info('Writing power time series to: %s ' % power_file) dataset_in = Dataset(tseries_file, 'a') # Get dimensions dims = dataset_in.dimensions ntime = len(dims['time']) nloc = len(dims['location']) nheight = len(dims['height']) loc_str_len = len(dims['loc_str_length']) # Get coordinate variables nctime = dataset_in.variables['time'] datetimes = netcdftime.num2date(nctime, nctime.units) location = [ ''.join(l.filled(' ')).strip() for l in dataset_in.variables['location'] ] height = dataset_in.variables['height'] # Get attributes metadata = config['metadata'] if power_file == tseries_file: dataset_out = dataset_in else: dataset_out = Dataset(power_file, 'w') # Get number of quantiles nq = len(quantiles) pdata = np.ma.zeros((ntime, nloc, nheight, nq + 1), np.float) # mean will be 1st value use_locs = [] for l, loc in enumerate(location): pcurve_file = '%s/%s.csv' % (pcurve_dir, loc) # mask power data if no power curve found for this park if not os.path.exists(pcurve_file): #logger.debug("Power curve: %s not found, skipping" % pcurve_file) pdata[:, l, :, :] = np.ma.masked continue logger.info('Predicting power output for %s' % loc) # # Open power curve # use_locs.append(l) pcurve = from_file(pcurve_file) for h in range(nheight): speed = dataset_in.variables['SPEED'][:, l, h] direction = dataset_in.variables['DIRECTION'][:, l, h] #pwr = pcurve.power(speed,direction) # pdist will create a distribution for each timetep based on sampling # n times from a normal distribution. pdist = pcurve.power_dist(speed, direction, sstd=sstd, dstd=dstd, n=n, normalise=pnorm) pmean = np.mean(pdist, axis=1) pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles / 100.0, axis=1, alphap=0.5, betap=0.5) pdata[:, l, h, 0] = pmean pdata[:, l, h, 1:] = pquants[:, :] logger.info('finished %s' % loc) use_inds = np.array(use_locs) logger.debug(use_inds) logger.debug(pdata.shape) logger.debug(pdata[:, use_inds, :, :].shape) if dataset_out != dataset_in: dataset_out.createDimension('time', None) dataset_out.createVariable('time', 'float', ('time', )) dataset_out.variables['time'][:] = nctime[:] dataset_out.variables['time'].units = nctime.units dataset_out.variables['time'].calendar = nctime.calendar dataset_out.createDimension('location', len(use_locs)) dataset_out.createDimension('loc_str_length', loc_str_len) loc_data = np.array( [list(l.ljust(loc_str_len, ' ')) for l in location]) dataset_out.createVariable('location', 'c', ('location', 'loc_str_length')) dataset_out.variables['location'][:] = loc_data[use_inds, :] dataset_out.createDimension('height', nheight) dataset_out.createVariable('height', 'i', ('height', )) dataset_out.variables['height'][:] = height[:] dataset_out.GRID_ID = dataset_in.GRID_ID dataset_out.DX = dataset_in.DX dataset_out.DY = dataset_in.DY try: dataset_out.variables['height'].units = height.units except Exception: logger.warn("height units missing") pdata = pdata[:, use_inds, :, :] for key in metadata.keys(): key = key.upper() logger.debug(key) dataset_out.setncattr(key, dataset_in.getncattr(key)) pavg = dataset_out.createVariable('POWER', 'f', ('time', 'location', 'height')) pavg.units = 'kW' pavg.description = 'forecast power output' pavg[:] = pdata[:, :, :, 0] for q, qval in enumerate(quantiles): varname = 'POWER.P%02d' % qval logger.debug("creating variable %s" % varname) var = dataset_out.createVariable(varname, 'f', ('time', 'location', 'height')) if pnorm: var.units = 'ratio' else: var.units = 'kW' var.description = 'forecast power output' print pdata[:, :, :, q + 1] var[:] = pdata[:, :, :, q + 1] dataset_in.close() if dataset_out != dataset_in: dataset_out.close()
def power(config): """Reads 'time series' from netcdf time series file, and adds power as a variable. """ if __name__ == "__main__": logger = loghelper.create_logger(config) else: logger = loghelper.get_logger(config['log.name']) # listify ensures they are returned as a list, even if it is one file files = shared._listify(config['<files>']) # Number of samples to use should be in here # Whether to normalise power should be in here start = config.get('start') delay = config.get('delay') cycles = shared._listify(config.get('cycles')) pnorm = config.get('pnorm') pdist = config.get('pdist') sstd = config.get('sstd') dstd = config.get('dstd') pquants = config.get('pquants') quantiles = np.array(pquants) pcurve_dir = config.get('pcurve-dir') ts_dir = config.get('tseries-dir') out = config.get('out') metadata = config.get('metadata') basetime = start if start else datetime.datetime.today() prior = shared._prior_time(basetime, delay=delay, hours=cycles) logger.debug("using %s as a start time" % prior) if not files: logger.debug("no files specified, finding using options") file_pattern = config.get('file-pattern') if not file_pattern: raise ConfigError('either supply files or specify file-pattern') expanded = substitute.sub_date(file_pattern, init_time=prior) files = glob.glob(expanded) print("hello") logger.debug(files) # if we get to this point and there are still no files, then we have a problem if not files: raise IOError("no files found") logger.debug("input files: ") logger.debug(files) for f in files: logger.debug("\t%s" % f) # if pdist if pdist: n = pdist #grid_id = config['grid_id'] out_pattern = config.get('out') for tseries_file in files: dataset_in = Dataset(tseries_file, 'a') # Get dimensions dims = dataset_in.dimensions nreftime = len(dims['reftime']) ntime = len(dims['leadtime']) nloc = len(dims['location']) nheight = len(dims['height']) loc_str_len = len(dims['loc_str_length']) # Get coordinate variables reftime = dataset_in.variables['reftime'] leadtime = dataset_in.variables['leadtime'] validtime = nctools._valid_time(reftime, leadtime) refdt = num2date(reftime[:], reftime.units) power_file = substitute.sub_date(out, init_time=refdt[0]) logger.info('Estimating power from time series: %s ' % tseries_file) logger.info('Writing power time series to: %s ' % power_file) location = [ ''.join(l.filled(' ')).strip() for l in dataset_in.variables['location'] ] height = dataset_in.variables['height'] if power_file == tseries_file: dataset_out = dataset_in else: dataset_out = Dataset(power_file, 'w') # Get number of quantiles nq = len(quantiles) pdata = np.ma.zeros((ntime, nloc, nheight, nq + 1), np.float) # mean will be 1st value use_locs = [] # loop through locations and look for power-curve file for l, loc in enumerate(location): pcurve_file = '%s/%s.csv' % (pcurve_dir, loc) # mask power data if no power curve found for this park if not os.path.exists(pcurve_file): #logger.debug("Power curve: %s not found, skipping" % pcurve_file) pdata[:, l, :, :] = np.ma.masked continue logger.info('Predicting power output for %s' % loc) # # Open power curve # use_locs.append(l) pcurve = from_file(pcurve_file) for h in range(nheight): speed = dataset_in.variables['SPEED'][0, :, l, h] direction = dataset_in.variables['DIRECTION'][0, :, l, h] #pwr = pcurve.power(speed,direction) # pdist will create a distribution for each timetep based on sampling # n times from a normal distribution. pdist = pcurve.power_dist(speed, direction, sstd=sstd, dstd=dstd, n=n, normalise=pnorm) pmean = np.mean(pdist, axis=1) pquants = scipy.stats.mstats.mquantiles(pdist, prob=quantiles / 100.0, axis=1, alphap=0.5, betap=0.5) pdata[:, l, h, 0] = pmean pdata[:, l, h, 1:] = pquants[:, :] #logger.info('finished %s' % loc) use_inds = np.array(use_locs) if dataset_out != dataset_in: dataset_out.createDimension('reftime', None) dataset_out.createVariable('reftime', 'float', ('reftime', )) dataset_out.variables['reftime'][:] = reftime[:] dataset_out.variables['reftime'].units = reftime.units dataset_out.variables['reftime'].calendar = reftime.calendar dataset_out.variables['reftime'].long_name = reftime.long_name dataset_out.variables[ 'reftime'].standard_name = reftime.standard_name dataset_out.createDimension('leadtime', len(leadtime)) dataset_out.createVariable('leadtime', 'int', ('leadtime', )) dataset_out.variables['leadtime'][:] = leadtime[:] dataset_out.variables['leadtime'].units = leadtime.units dataset_out.variables['leadtime'].long_name = leadtime.long_name dataset_out.variables[ 'leadtime'].standard_name = leadtime.standard_name dataset_out.createDimension('location', len(use_locs)) dataset_out.createDimension('loc_str_length', loc_str_len) loc_data = np.array( [list(l.ljust(loc_str_len, ' ')) for l in location]) dataset_out.createVariable('location', 'c', ('location', 'loc_str_length')) dataset_out.variables['location'][:] = loc_data[use_inds, :] dataset_out.createDimension('height', nheight) dataset_out.createVariable('height', 'i', ('height', )) dataset_out.variables['height'][:] = height[:] dataset_out.GRID_ID = dataset_in.GRID_ID dataset_out.DX = dataset_in.DX dataset_out.DY = dataset_in.DY try: dataset_out.variables['height'].units = height.units except Exception: logger.warn("height units missing") pdata = pdata[:, use_inds, :, :] for key in metadata.keys(): key = key.upper() dataset_out.setncattr(key, dataset_in.getncattr(key)) pavg = dataset_out.createVariable( 'POWER', 'f', ('reftime', 'leadtime', 'location', 'height')) pavg.units = 'kW' pavg.description = 'forecast power output' pavg[0, :, :, :] = pdata[:, :, :, 0] for q, qval in enumerate(quantiles): varname = 'POWER.P%02d' % qval var = dataset_out.createVariable( varname, 'f', ('reftime', 'leadtime', 'location', 'height')) if pnorm: var.units = 'ratio' else: var.units = 'kW' var.description = 'forecast power output' var[0, :, :, :] = pdata[:, :, :, q + 1] #logger.debug(dataset_out) dataset_in.close() if dataset_out != dataset_in: dataset_out.close()