def run_job(metadata,
            variable,
            transformation_name,
            transformation,
            unit,
            read_acct,
            rcp,
            pername,
            years,
            model,
            baseline_model,
            seasons,
            agglev,
            aggwt,
            weights=None):

    logger.debug('Beginning job\nkwargs:\t{}'.format(
        pprint.pformat(metadata, indent=2)))

    # Add to job metadata
    metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1])))

    baseline_file = BASELINE_FILE.format(**metadata)
    pattern_file = BCSD_pattern_files.format(**metadata)
    write_file = WRITE_PATH.format(**metadata)

    # do not duplicate
    if os.path.isfile(write_file):
        return

    # Get transformed data
    total = None

    seasonal_baselines = {}
    for season in seasons:
        basef = baseline_file.format(season=season)
        logger.debug('attempting to load baseline file: {}'.format(basef))
        seasonal_baselines[season] = load_baseline(basef, variable)

    season_month_start = {'DJF': 12, 'MAM': 3, 'JJA': 6, 'SON': 9}

    for year in years:
        seasonal = []

        for s, season in enumerate(seasons):

            pattf = pattern_file.format(year=year, season=season)
            logger.debug('attempting to load pattern file: {}'.format(pattf))
            patt = load_bcsd(pattf, variable, broadcast_dims=('day', ))

            logger.debug('{} {} {} - reindexing coords day --> time'.format(
                model, year, season))

            patt = (patt.assign_coords(
                time=xr.DataArray(pd.period_range('{}-{}-1'.format(
                    year - int(season == 'DJF'), season_month_start[season]),
                                                  periods=len(patt.day),
                                                  freq='D'),
                                  coords={'day': patt.day})).swap_dims({
                                      'day':
                                      'time'
                                  }).drop('day'))

            logger.debug(
                '{} {} {} - adding pattern residuals to baseline'.format(
                    model, year, season))

            seasonal.append(patt + seasonal_baselines[season])

        logger.debug(('{} {} - concatenating seasonal data and ' +
                      'applying transform').format(model, year))

        annual = xr.Dataset(
            {variable: xr.concat(seasonal, dim='time').pipe(transformation)})

        if total is None:
            total = annual
        else:
            total += annual

    ds = total / len(years)

    # Reshape to regions

    logger.debug('{} reshaping to regions'.format(model))
    if not agglev.startswith('grid'):
        ds = weighted_aggregate_grid_to_regions(ds,
                                                variable,
                                                aggwt,
                                                agglev,
                                                weights=weights)

    # Update netCDF metadata
    logger.debug('{} udpate metadata'.format(model))
    ds.attrs.update(
        **{k: str(v)
           for k, v in metadata.items() if k in DS_METADATA_FEILDS})

    # Write output
    logger.debug('attempting to write to file: {}'.format(write_file))
    if not os.path.isdir(os.path.dirname(write_file)):
        os.makedirs(os.path.dirname(write_file))

    ds.to_netcdf(write_file)
    logger.debug('done')
def run_job(metadata,
            variable,
            transformation_name,
            transformation,
            unit,
            rcp,
            pername,
            years,
            model,
            agglev,
            aggwt,
            weights=None):

    logger.debug('Beginning job\nkwargs:\t{}'.format(
        pprint.pformat(metadata, indent=2)))

    # Add to job metadata
    metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1])))

    read_file = BCSD_orig_files.format(**metadata)
    write_file = WRITE_PATH.format(**metadata)

    # do not duplicate
    if os.path.isfile(write_file):
        return

    # Prepare annual transformed data
    annual = []
    for y in years:
        fp = read_file.format(year=y)

        logger.debug('attempting to load BCSD file: {}'.format(fp))
        annual.append(
            load_bcsd(fp, variable,
                      broadcast_dims=('time', )).pipe(transformation))

    # Concatente years to single dataset and average across years
    logger.debug('{} - concatenating annual data'.format(model))
    ds = xr.Dataset({
        variable:
        xr.concat(annual, dim=pd.Index(years, name='year')).mean(dim='year')
    })

    # Reshape to regions
    logger.debug('{} reshaping to regions'.format(model))
    if not agglev.startswith('grid'):
        ds = weighted_aggregate_grid_to_regions(ds,
                                                variable,
                                                aggwt,
                                                agglev,
                                                weights=weights)

    # Update netCDF metadata
    logger.debug('{} udpate metadata'.format(model))
    ds.attrs.update(
        **{k: str(v)
           for k, v in metadata.items() if k in DS_METADATA_FEILDS})

    # Write output
    logger.debug('attempting to write to file: {}'.format(write_file))
    if not os.path.isdir(os.path.dirname(write_file)):
        os.makedirs(os.path.dirname(write_file))

    ds.to_netcdf(write_file)
    logger.debug('done')
def run_job(metadata,
            variable,
            transformation,
            source_variable,
            unit,
            scenario,
            read_acct,
            year,
            model,
            agglev,
            aggwt,
            weights=None):

    import xarray as xr
    import metacsv

    from climate_toolbox import (load_bcsd, weighted_aggregate_grid_to_regions)

    # Add to job metadata
    metadata.update(ADDITIONAL_METADATA)

    file_dependencies = {}

    read_file = BCSD_orig_files.format(**metadata)
    write_file = WRITE_PATH.format(**metadata)

    # do not duplicate
    if os.path.isfile(write_file):
        return

    # Get transformed data
    fp = read_file.format(year=year)

    with xr.open_dataset(fp) as ds:
        ds.load()

    file_dependencies[os.path.splitext(os.path.basename(fp))[0]] = (str(
        ds.attrs.get('version', '1.0')))

    logger.debug('year {} - attempting to read file "{}"'.format(year, fp))
    ds = (load_bcsd(ds, source_variable,
                    broadcast_dims=('time', )).pipe(transformation))

    varattrs = {var: dict(ds[var].attrs) for var in ds.data_vars.keys()}

    # Reshape to regions
    if not agglev.startswith('grid'):
        logger.debug('aggregating to "{}" using "{}"'.format(agglev, aggwt))
        ds = weighted_aggregate_grid_to_regions(ds,
                                                variable,
                                                aggwt,
                                                agglev,
                                                weights=weights)

    # Update netCDF metadata
    ds.attrs.update(
        **{k: str(v)
           for k, v in metadata.items() if k in INCLUDED_METADATA})
    ds.attrs.update(ADDITIONAL_METADATA)

    # Write output
    if not os.path.isdir(os.path.dirname(write_file)):
        logger.debug('attempting to create_directory "{}"'.format(
            os.path.dirname(write_file)))

        os.makedirs(os.path.dirname(write_file))

    logger.debug('attempting to write to file "{}"'.format(write_file))

    attrs = dict(ds.attrs)
    attrs['file_dependencies'] = file_dependencies

    for var, vattrs in varattrs.items():
        ds[var].attrs.update(vattrs)

    ds.to_netcdf(write_file)

    metacsv.to_header(write_file.replace('.nc', '.fgh'),
                      attrs=dict(attrs),
                      variables=varattrs)

    logger.debug('job done')
def run_job(metadata,
            variable,
            transformation_name,
            transformation,
            unit,
            rcp,
            pername,
            years,
            model,
            agglev,
            aggwt,
            weights=None):

    # Add to job metadata
    metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1])))

    logger.debug('Beginning job:\n\tkwargs:\t{}'.format(
        pprint.pformat(metadata, indent=2)))

    read_file = BCSD_orig_files.format(**metadata)
    write_file = WRITE_PATH.format(**metadata)

    # do not duplicate
    if os.path.isfile(write_file):
        return

    # Get transformed data
    annual = []
    for y in years:

        fp = read_file.format(year=y)

        logging.debug('year {} - attempting to read file "{}"'.format(y, fp))
        annual.append(
            load_bcsd(fp, variable,
                      broadcast_dims=('time', )).pipe(transformation))

    logging.debug('concatenating & reducing annual data')
    ds = xr.Dataset({
        variable:
        xr.concat(annual, dim=pd.Index(years, name='year')).mean(dim='year')
    })

    # Reshape to regions
    if not agglev.startswith('grid'):
        logger.debug('aggregating to "{}" using "{}"'.format(agglev, aggwt))
        ds = weighted_aggregate_grid_to_regions(ds,
                                                variable,
                                                aggwt,
                                                agglev,
                                                weights=weights)

    # Update netCDF metadata
    ds.attrs.update(**metadata)

    # Write output
    if not os.path.isdir(os.path.dirname(write_file)):
        logger.debug('attempting to create_directory "{}"'.format(
            os.path.dirname(write_file)))

        os.makedirs(os.path.dirname(write_file))

    logger.debug('attempting to write to file "{}"'.format(write_file))

    ds.to_netcdf(write_file)

    logger.debug('job done')
Exemple #5
0
def run_job(metadata,
            variable,
            transformation_name,
            transformation,
            unit,
            rcp,
            pername,
            read_acct,
            years,
            model,
            baseline_model,
            season,
            agglev,
            aggwt,
            weights=None):

    logger.debug('Beginning job\nkwargs:\t{}'.format(
        pprint.pformat(metadata, indent=2)))

    # Add to job metadata
    metadata.update(dict(time_horizon='{}-{}'.format(years[0], years[-1])))

    baseline_file = BASELINE_FILE.format(**metadata)
    pattern_file = BCSD_pattern_files.format(**metadata)
    write_file = WRITE_PATH.format(**metadata)

    # do not duplicate
    if os.path.isfile(write_file):
        return

    # Get transformed data
    total = []

    for year in years:

        # Get transformed data
        pattf = pattern_file.format(year=year)
        logger.debug('attempting to load pattern file: {}'.format(pattf))
        annual = load_bcsd(pattf, variable, broadcast_dims=('day', ))

        logger.debug('{} {} - applying transform'.format(model, year))
        annual = xr.Dataset({variable: annual.pipe(transformation)})

        logger.debug('{} {} - adding to running total'.format(model, year))
        total.append(annual)

    ds = xr.concat(total, dim=pd.Index(years, name='year')).mean(dim='year')

    # load baseline
    logger.debug('attempting to load baseline file: '.format(baseline_file))
    base = load_baseline(baseline_file, variable)

    logger.debug('{} - adding pattern residuals to baseline'.format(model))
    ds = (ds + base)

    # Reshape to regions
    logger.debug('{} - reshaping to regions'.format(model))
    if not agglev.startswith('grid'):
        ds = weighted_aggregate_grid_to_regions(ds,
                                                variable,
                                                aggwt,
                                                agglev,
                                                weights=weights)

    # Update netCDF metadata
    logger.debug('{} udpate metadata'.format(model))
    ds.attrs.update(**metadata)

    # Write output
    logger.debug('attempting to write to file: {}'.format(write_file))
    if not os.path.isdir(os.path.dirname(write_file)):
        os.makedirs(os.path.dirname(write_file))

    ds.to_netcdf(write_file)