Esempio n. 1
0
def test_set_crs_epsg(tmpdir):
    """ Tests for EPSG codes specifically """

    ds = Dataset(str(tmpdir.join('test.nc')), 'w')
    data_var = ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(init='EPSG:4326'), set_proj4_att=True)
    data_atts = get_ncattrs(data_var)
    crs_var = ds.variables[data_atts['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert data_atts['proj4'] == '+proj=latlong +datum=WGS84 +no_defs'
    assert ncatts['grid_mapping_name'] == 'latitude_longitude'
    assert ncatts['semi_major_axis'] == 6378137.0
    assert ncatts['inverse_flattening'] == 298.257223563

    data_var = ds.createVariable('data2', 'S1')
    set_crs(ds, 'data2', Proj(init='EPSG:4269'), set_proj4_att=True)
    data_atts = get_ncattrs(data_var)
    crs_var = ds.variables[data_atts['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert data_atts['proj4'] == '+proj=latlong +datum=NAD83 +no_defs'
    assert ncatts['grid_mapping_name'] == 'latitude_longitude'
    assert ncatts['semi_major_axis'] == 6378137.0
    assert ncatts['inverse_flattening'] == 298.257223563
Esempio n. 2
0
def test_set_crs_epsg(tmpdir):
    """ Tests for EPSG codes specifically """

    ds = Dataset(str(tmpdir.join('test.nc')), 'w')
    data_var = ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(init='EPSG:4326'), set_proj4_att=True)
    data_atts = get_ncattrs(data_var)
    crs_var = ds.variables[data_atts['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert data_atts['proj4'] == '+proj=latlong +datum=WGS84 +no_defs'
    assert ncatts['grid_mapping_name'] == 'latitude_longitude'
    assert ncatts['semi_major_axis'] == 6378137.0
    assert ncatts['inverse_flattening'] == 298.257223563

    data_var = ds.createVariable('data2', 'S1')
    set_crs(ds, 'data2', Proj(init='EPSG:4269'), set_proj4_att=True)
    data_atts = get_ncattrs(data_var)
    crs_var = ds.variables[data_atts['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert data_atts['proj4'] == '+proj=latlong +datum=NAD83 +no_defs'
    assert ncatts['grid_mapping_name'] == 'latitude_longitude'
    assert ncatts['semi_major_axis'] == 6378137.0
    assert ncatts['inverse_flattening'] == 298.257223563
Esempio n. 3
0
def raster_to_netcdf(filename_or_raster, outfilename=None, variable_name='data', format='NETCDF4', **kwargs):
    """
    Parameters
    ----------
    filename_or_raster: name of file to open with rasterio, or opened rasterio raster dataset
    outfilename: name of output file.  If blank, will be same name as input with *.nc extension added
    variable_name: output format for netCDF file: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4
    format
    kwargs: arguments passed to variable creation: zlib

    Note: only rasters with descending y coordinates are currently supported
    """

    start = time.time()

    if isinstance(filename_or_raster, string_types):
        if not os.path.exists(filename_or_raster):
            raise ValueError('File does not exist: {0}'.format(filename_or_raster))

        src = rasterio.open(filename_or_raster)
        managed_raster = True
    else:
        src = filename_or_raster
        managed_raster = False

    if not src.count == 1:
        raise NotImplementedError('ERROR: multi-band rasters not yet supported for this operation')

    prj = pyproj.Proj(**src.crs)

    outfilename = outfilename or src.name + '.nc'
    with Dataset(outfilename, 'w', format=format) as target:
        if prj.is_latlong():
            x_varname = 'longitude'
            y_varname = 'latitude'
        else:
            x_varname = 'x'
            y_varname = 'y'

        # TODO: may need to do this in blocks if source is big
        data = src.read(1, masked=True)

        coords = SpatialCoordinateVariables.from_bbox(BBox(src.bounds, prj), src.width, src.height)
        coords.add_to_dataset(target, x_varname, y_varname, **kwargs)

        out_var = target.createVariable(variable_name, data.dtype, dimensions=(y_varname, x_varname), **kwargs)
        out_var[:] = data
        set_crs(target, variable_name, prj, set_proj4_att=False)

    if managed_raster:
        src.close()

    print('Elapsed {0:.3f} seconds'.format(time.time() - start))
Esempio n. 4
0
    def _create_elevation_service(self, zone, band, data, nodata_value, coords):
        low, high = band[:2]
        elevation_service_name = "zones/elevation/{}_{}_{}".format(zone.zone_uid, low, high)

        bbox = coords.bbox

        # Delete and recreate service as needed
        service = Service.objects.filter(name=elevation_service_name)
        if service.exists():
            return service.first()

        rel_path = elevation_service_name + ".nc"
        abs_path = os.path.join(SERVICE_DATA_ROOT, rel_path)

        if not os.path.exists(os.path.dirname(abs_path)):
            os.makedirs(os.path.dirname(abs_path))

        with Dataset(abs_path, "w", format="NETCDF4") as ds:
            coords.add_to_dataset(ds, "longitude", "latitude")
            data_var = ds.createVariable(
                "data", data.dtype, dimensions=("latitude", "longitude"), fill_value=nodata_value,
            )
            data_var[:] = data
            set_crs(ds, "data", Proj("epsg:4326"))

        # extract out unmasked data
        masked_data = data[data != nodata_value]
        renderer = StretchedRenderer(
            [(masked_data.min().item(), Color(46, 173, 60),), (masked_data.max().item(), Color(46, 173, 60),),]
        )

        service = Service.objects.create(
            name=elevation_service_name,
            description="Elevation for zone {}, {} - {}".format(zone.name, low, high),
            data_path=rel_path,
            projection="epsg:4326",
            full_extent=bbox,
            initial_extent=bbox,
        )

        Variable.objects.create(
            service=service,
            index=0,
            variable="data",
            projection="epsg:4326",
            x_dimension="longitude",
            y_dimension="latitude",
            name="data",
            renderer=renderer,
            full_extent=bbox,
        )

        return service
Esempio n. 5
0
def test_symmetric_proj4(tmpdir):
    """ Test writing and reading proj4 string as attribute of variable """

    ds = Dataset(str(tmpdir.join('test.nc')), 'w')
    proj4 = '+proj=stere +units=m +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000'
    ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(proj4), set_proj4_att=True)
    out_proj4 = get_crs(ds, 'data')

    out_data = CRS.from_string(out_proj4).to_dict()

    assert len(out_data) == 9  # There should be 9 parameters
    assert CRS.from_string(proj4).to_dict() == out_data
Esempio n. 6
0
def test_symmetric_proj4(tmpdir):
    """ Test writing and reading proj4 string as attribute of variable """

    ds = Dataset(str(tmpdir.join('test.nc')), 'w')
    proj4 = '+proj=stere +units=m +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000'
    ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(proj4), set_proj4_att=True)
    out_proj4 = get_crs(ds, 'data')

    out_data = CRS.from_string(out_proj4).to_dict()

    assert len(out_data) == 9  # There should be 9 parameters
    assert CRS.from_string(proj4).to_dict() == out_data
Esempio n. 7
0
def set_crs(filename, proj4, variables):
    try:
        proj = Proj(proj4)
    except RuntimeError:
        raise click.BadArgumentUsage('Invalid projection: ' + proj4)

    with Dataset(filename, 'a') as ds:
        if not variables:
            variables_li = [v for v in ds.variables if v not in ds.dimensions]
        else:
            variables_li = [x.strip() for x in variables.split(',')]
            bad_variables = set(variables_li).difference(ds.variables.keys())
            if bad_variables:
                raise click.BadArgumentUsage(
                    'The following variables do not exist in this dataset: ' +
                    ', '.join(bad_variables))

        for variable in variables_li:
            crs.set_crs(ds, variable, proj)
Esempio n. 8
0
def test_utm(tmpdir):
    ds = Dataset(str(tmpdir.join('test.nc')), 'w')
    proj4 = '+init=epsg:3157'  # UTM Zone 10
    ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(proj4), set_proj4_att=True)
    out_proj4 = get_crs(ds, 'data')

    out_data = CRS.from_string(out_proj4).to_dict()

    # ESPG will have been converted to long form
    assert len(out_data) == 6

    expected = {
        u'zone': 10,
        u'ellps': u'GRS80',
        u'no_defs': True,
        u'proj': u'utm',
        u'units': u'm',
        u'towgs84': u'0,0,0,0,0,0,0'
    }
    assert expected == out_data
Esempio n. 9
0
def test_utm(tmpdir):
    ds = Dataset(str(tmpdir.join('test.nc')), 'w')
    proj4 = '+init=epsg:3157'  # UTM Zone 10
    ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(proj4), set_proj4_att=True)
    out_proj4 = get_crs(ds, 'data')

    out_data = CRS.from_string(out_proj4).to_dict()

    # ESPG will have been converted to long form
    assert len(out_data) == 6

    expected = {
        u'zone': 10,
        u'ellps': u'GRS80',
        u'no_defs': True,
        u'proj': u'utm',
        u'units': u'm',
        u'towgs84': u'0,0,0,0,0,0,0'
    }
    assert expected == out_data
Esempio n. 10
0
def test_set_crs(tmpdir):
    """ Test proper encoding of projection into CF Convention parameters """

    ds = Dataset(str(tmpdir.join('test.nc')), 'w')

    # Test polar stereographic
    proj4 = '+proj=stere +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000'
    data_var = ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(proj4))
    crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert ncatts['grid_mapping_name'] == 'polar_stereographic'
    assert ncatts['inverse_flattening'] == 298.257223563
    assert ncatts['latitude_of_projection_origin'] == 90
    assert ncatts['straight_vertical_longitude_from_pole'] == 263
    assert ncatts['standard_parallel'] == 60
    assert ncatts['false_northing'] == 7475000
    assert ncatts['false_easting'] == 3475000

    # Test Lambert conformal conic
    proj4 = '+proj=lcc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000'
    data_var = ds.createVariable('data2', 'S1')
    set_crs(ds, 'data2', Proj(proj4))
    crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert ncatts['grid_mapping_name'] == 'lambert_conformal_conic'
    assert ncatts['latitude_of_projection_origin'] == 47.5
    assert ncatts['longitude_of_central_meridian'] == -97
    assert ncatts['standard_parallel'] == [30, 60]
    assert ncatts['false_northing'] == 3200000
    assert ncatts['false_easting'] == 3825000

    # Unsupported projection should fail
    proj4 = '+proj=merc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000'
    ds.createVariable('data3', 'S1')
    with pytest.raises(ValueError):
        set_crs(ds, 'data3', Proj(proj4))
Esempio n. 11
0
def test_set_crs(tmpdir):
    """ Test proper encoding of projection into CF Convention parameters """

    ds = Dataset(str(tmpdir.join('test.nc')), 'w')

    # Test polar stereographic
    proj4 = '+proj=stere +datum=WGS84 +lat_ts=60 +lat_0=90 +lon_0=263 +lat_1=60 +x_0=3475000 +y_0=7475000'
    data_var = ds.createVariable('data', 'S1')
    set_crs(ds, 'data', Proj(proj4))
    crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert ncatts['grid_mapping_name'] == 'polar_stereographic'
    assert ncatts['inverse_flattening'] == 298.257223563
    assert ncatts['latitude_of_projection_origin'] == 90
    assert ncatts['straight_vertical_longitude_from_pole'] == 263
    assert ncatts['standard_parallel'] == 60
    assert ncatts['false_northing'] == 7475000
    assert ncatts['false_easting'] == 3475000

    # Test Lambert conformal conic
    proj4 = '+proj=lcc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000'
    data_var = ds.createVariable('data2', 'S1')
    set_crs(ds, 'data2', Proj(proj4))
    crs_var = ds.variables[get_ncattrs(data_var)['grid_mapping']]
    ncatts = get_ncattrs(crs_var)

    assert ncatts['grid_mapping_name'] == 'lambert_conformal_conic'
    assert ncatts['latitude_of_projection_origin'] == 47.5
    assert ncatts['longitude_of_central_meridian'] == -97
    assert ncatts['standard_parallel'] == [30, 60]
    assert ncatts['false_northing'] == 3200000
    assert ncatts['false_easting'] == 3825000

    # Unsupported projection should fail
    proj4 = '+proj=merc +lat_1=30 +lat_2=60 +lat_0=47.5 +lon_0=-97 +x_0=3825000 +y_0=3200000'
    ds.createVariable('data3', 'S1')
    with pytest.raises(ValueError):
        set_crs(ds, 'data3', Proj(proj4))
Esempio n. 12
0
def main(original_file, climatena_file, out_dir, valid_variables):
    with rasterio.open(original_file) as ds:
        bounds = ds.bounds
        affine = ds.transform
        shape = ds.shape

    with open(climatena_file, 'r') as f_in:
        headers = csv.DictReader(f_in).fieldnames
        variables = [
            x for x in headers
            if x not in ('ID1', 'ID2', 'Latitude', 'Longitude', 'Elevation')
        ]

    if valid_variables:
        valid = [x.strip().lower() for x in valid_variables.split(',')]
    else:
        valid = variables

    print('Creating datasets...')

    grid = numpy.zeros(shape, dtype='int32')
    grid = numpy.ma.masked_where(grid == 0, grid)

    for var in (x for x in variables if x.lower() in valid):
        out_path = os.path.join(
            out_dir, '{}_{}.nc'.format(
                os.path.splitext(os.path.basename(climatena_file))[0], var))

        if os.path.exists(out_path):
            continue

        with Dataset(out_path, 'w', format='NETCDF4') as ds:
            projection = Proj('EPSG:4326')
            coord_vars = SpatialCoordinateVariables.from_bbox(
                BBox(bounds, projection=projection), *reversed(grid.shape))
            coord_vars.add_to_dataset(ds, 'longitude', 'latitude')
            data_var = ds.createVariable(var,
                                         grid.dtype,
                                         dimensions=('latitude', 'longitude'),
                                         fill_value=grid.fill_value)
            data_var[:] = grid
            set_crs(ds, var, projection)

    print('Copying from ClimateNA data... (0%)', end='\r')
    with open(climatena_file, 'r') as f_in:
        f_in.seek(0, os.SEEK_END)
        end = f_in.tell()
        f_in.seek(0)
        f_in.readline()  # Skip headers

        while f_in.tell() < end:
            lines = ''.join(f_in.readline() for _ in range(1000000))

            arr = numpy.loadtxt(StringIO(lines),
                                delimiter=',',
                                usecols=[
                                    headers.index(x)
                                    for x in ['Latitude', 'Longitude'] +
                                    variables
                                ])
            arr = numpy.moveaxis(arr, 1, 0)

            latitudes = arr[0]
            longitudes = arr[1]

            for i, var in enumerate(variables):
                if var.lower() in valid:
                    out_path = os.path.join(
                        out_dir, '{}_{}.nc'.format(
                            os.path.splitext(
                                os.path.basename(climatena_file))[0], var))

                    variable = arr[i + 2]

                    with Dataset(out_path, 'a') as ds:
                        grid = ds.variables[var][:]
                        fill_value = grid.fill_value
                        grid = grid.data

                        for j, value in enumerate(variable):
                            if value == -9999:
                                continue

                            col, row = [
                                int(round(x)) for x in ~affine *
                                (longitudes[j], latitudes[j])
                            ]

                            if var in MULTIPLIERS:
                                value *= MULTIPLIERS[var]

                            grid[row][col] = value

                        ds.variables[var][:] = numpy.ma.masked_where(
                            grid == fill_value, grid)

            print('Copying from ClimateNA data... ({}%)'.format(
                round(f_in.tell() / end * 100)),
                  end='\r')
        print('Copying from ClimateNA data... (100%)')
    print('Done.')
Esempio n. 13
0
def to_netcdf(
    files,
    output,
    variable,
    dtype,
    src_crs,
    x_name,
    y_name,
    z_name,
    datetime_pattern,
    netcdf3,
    compress,
    packed,
    xy_dtype,
    # z_dtype,
    calendar,
    autocrop):
    """
    Convert rasters to NetCDF and stack them according to a dimension.

    X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y
    otherwise) unless specified.

    Will overwrite an existing NetCDF file.

    Only the first band of the input will be turned into a NetCDF file.
    """

    # TODO: add format string template to this to parse out components

    filenames = list(glob.glob(files))
    if not filenames:
        raise click.BadParameter('No files found matching that pattern', param='files', param_hint='FILES')

    z_values = []

    if datetime_pattern is not None:
        datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames)

        # Sort both datimes and filenames by datetimes
        z_values, filenames = [list(x) for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0)))]

    items = tuple(enumerate(filenames))

    has_z = len(filenames) > 1

    if has_z and not z_name:
        raise click.BadParameter('Required when > 1 input file', param='--z', param_hint='--z')

    if src_crs:
        src_crs = CRS.from_string(src_crs)

    template_ds = rasterio.open(filenames[0])
    src_crs = template_ds.crs or src_crs

    if not src_crs:
        raise click.BadParameter('Required when no CRS information available in source files', param='--src-crs',
                                 param_hint='--src-crs')

    prj = Proj(**src_crs.to_dict())
    bounds = template_ds.bounds
    width = template_ds.width
    height = template_ds.height
    window = None

    src_dtype = numpy.dtype(template_ds.dtypes[0])
    dtype = numpy.dtype(dtype) if dtype else src_dtype

    if dtype == src_dtype:
        fill_value = template_ds.nodata
        if src_dtype.kind in ('u', 'i'):
            # nodata always comes from rasterio as floating point
            fill_value = int(fill_value)
    else:
        fill_value = get_fill_value(dtype)

    x_name = x_name or ('lon' if src_crs.is_geographic else 'x')
    y_name = y_name or ('lat' if src_crs.is_geographic else 'y')

    var_kwargs = {
        'fill_value': fill_value
    }

    format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4'

    with Dataset(output, 'w', format=format) as out:
        if packed or autocrop:
            mins = []
            maxs = []
            windows = []

            click.echo('Inspecting input datasets...')
            with click.progressbar(items) as iter:
                for index, filename in iter:
                    with rasterio.open(filename) as src:
                        data = src.read(1, masked=True)
                        if packed:
                            mins.append(data.min())
                            maxs.append(data.max())
                        if autocrop:
                            data_window = get_data_window(data)
                            if data_window != ((0, height), (0, width)):
                                windows.append(data_window)

            if packed:
                min_value = min(mins)
                max_value = max(maxs)
                scale, offset = get_pack_atts(dtype, min_value, max_value)
            if autocrop and windows:
                window = union(windows)
                bounds = template_ds.window_bounds(window)
                height = window[0][1] - window[0][0]
                width = window[1][1] - window[1][0]

        coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width, height, xy_dtype)
        coords.add_to_dataset(out, x_name, y_name, zlib=compress)

        var_dimensions = [y_name, x_name]
        shape = list(coords.shape)
        if has_z:
            shape.insert(0, len(filenames))
            out.createDimension(z_name, shape[0])
            var_dimensions.insert(0, z_name)
            if z_values:
                dates = DateVariable(numpy.array(z_values),
                                     units_start_date=z_values[0], calendar=calendar)
                dates.add_to_dataset(out, z_name)


        click.echo('Creating {0}:{1} with shape {2}'.format(output, variable, shape))

        out_var = out.createVariable(variable, dtype, dimensions=var_dimensions,
                                     zlib=compress, **var_kwargs)
        set_crs(out, variable, prj, set_proj4_att=True)

        if packed:
            out_var.setncattr('scale_factor', scale)
            out_var.setncattr('add_offset', offset)



        click.echo('Copying data from input files...')
        with click.progressbar(items) as iter:
            for index, filename in iter:
                with rasterio.open(filename) as src:
                    data = src.read(1, masked=True, window=window)

                    if has_z:
                        out_var[index, :] = data
                    else:
                        out_var[:] = data

                out.sync()
Esempio n. 14
0
def process_web_outputs(results,
                        job,
                        publish_raster_results=False,
                        renderer_or_fn=None):
    outputs = results.format_args()

    for k, v in iter(outputs.items()):
        if is_raster(v) and publish_raster_results:
            service_name = '{0}/{1}'.format(job.uuid, k)
            rel_path = '{}.nc'.format(service_name)
            abs_path = os.path.join(SERVICE_DATA_ROOT, rel_path)
            os.makedirs(os.path.dirname(abs_path))

            with Dataset(abs_path, 'w', format='NETCDF4') as ds:
                if is_latlong(v.extent.projection):
                    x_var = 'longitude'
                    y_var = 'latitude'
                else:
                    x_var = 'x'
                    y_var = 'y'

                coord_vars = SpatialCoordinateVariables.from_bbox(
                    v.extent, *reversed(v.shape))
                coord_vars.add_to_dataset(ds, x_var, y_var)

                fill_value = v.fill_value if numpy.ma.core.is_masked(
                    v) else None
                data_var = ds.createVariable('data',
                                             v.dtype,
                                             dimensions=(y_var, x_var),
                                             fill_value=fill_value)
                data_var[:] = v
                set_crs(ds, 'data', v.extent.projection)

            if callable(renderer_or_fn):
                renderer = renderer_or_fn(v)
            elif renderer_or_fn is None:
                renderer = StretchedRenderer([
                    (numpy.min(v).item(), Color(0, 0, 0)),
                    (numpy.max(v).item(), Color(255, 255, 255))
                ])
            else:
                renderer = renderer_or_fn

            with transaction.atomic():
                service = Service.objects.create(
                    name=service_name,
                    description=
                    ('This service has been automatically generated from the result of a geoprocessing job.'
                     ),
                    data_path=rel_path,
                    projection=v.extent.projection.srs,
                    full_extent=v.extent,
                    initial_extent=v.extent,
                )
                Variable.objects.create(service=service,
                                        index=0,
                                        variable='data',
                                        projection=v.extent.projection.srs,
                                        x_dimension=x_var,
                                        y_dimension=y_var,
                                        name='data',
                                        renderer=renderer,
                                        full_extent=v.extent)
                ProcessingResultService.objects.create(job=job,
                                                       service=service)

            outputs[k] = service_name

        elif is_ndarray(v):
            if v.size < numpy.get_printoptions()['threshold']:
                outputs[k] = v.tolist()
            else:
                outputs[k] = str(v)

    return outputs
Esempio n. 15
0
def raster_to_netcdf(filename_or_raster,
                     outfilename=None,
                     variable_name='data',
                     format='NETCDF4',
                     **kwargs):
    """
    Parameters
    ----------
    filename_or_raster: name of file to open with rasterio, or opened rasterio raster dataset
    outfilename: name of output file.  If blank, will be same name as input with *.nc extension added
    variable_name: output format for netCDF file: NETCDF3_CLASSIC, NETCDF3_64BIT, NETCDF4_CLASSIC, NETCDF4
    format
    kwargs: arguments passed to variable creation: zlib

    Note: only rasters with descending y coordinates are currently supported
    """

    start = time.time()

    if isinstance(filename_or_raster, string_types):
        if not os.path.exists(filename_or_raster):
            raise ValueError(
                'File does not exist: {0}'.format(filename_or_raster))

        src = rasterio.open(filename_or_raster)
        managed_raster = True
    else:
        src = filename_or_raster
        managed_raster = False

    if not src.count == 1:
        raise NotImplementedError(
            'ERROR: multi-band rasters not yet supported for this operation')

    prj = pyproj.Proj(**src.crs)

    outfilename = outfilename or src.name + '.nc'
    with Dataset(outfilename, 'w', format=format) as target:
        if is_latlong(prj):
            x_varname = 'longitude'
            y_varname = 'latitude'
        else:
            x_varname = 'x'
            y_varname = 'y'

        # TODO: may need to do this in blocks if source is big
        data = src.read(1, masked=True)

        coords = SpatialCoordinateVariables.from_bbox(BBox(src.bounds, prj),
                                                      src.width, src.height)
        coords.add_to_dataset(target, x_varname, y_varname, **kwargs)

        out_var = target.createVariable(variable_name,
                                        data.dtype,
                                        dimensions=(y_varname, x_varname),
                                        **kwargs)
        out_var[:] = data
        set_crs(target, variable_name, prj, set_proj4_att=False)

    if managed_raster:
        src.close()

    print('Elapsed {0:.3f} seconds'.format(time.time() - start))
Esempio n. 16
0
def main(in_pattern, out_pattern, boundary, single, varname):
    """
    Clips and masks large NetCDF datasets to regional datasets based on the boundary. The in_pattern and out_pattern
    arguments should be filename patterns (can include path) with the pattern: /path/to/in_netcdf_{variable}.nc.

    Example usage: python cut_to_region.py NorthAmerica/NA_{variable}.nc USWest/west_{variable}.nc west.shp
    """

    if single and not varname:
        print('--varname is required when --single is used')
        sys.exit(-1)

    if single:
        if not os.path.exists(in_pattern):
            print('Input file {} does not exist.'.format(in_pattern))
            sys.exit(-1)

        input_paths = [(in_pattern, varname)]
    else:
        input_paths = [(in_pattern.format(variable=x), x) for x in VARIABLES]

        for path, _ in input_paths:
            if not os.path.exists(path):
                print('Input file {} does not exist.'.format(path))
                sys.exit(-1)

    with fiona.open(boundary, 'r') as shp:
        features = []
        wgs84 = Proj('+init=EPSG:4326')
        shp_projection = Proj(shp.crs)
        bounds = shp.bounds

        ll = transform(shp_projection, wgs84, bounds[0], bounds[1])
        ur = transform(shp_projection, wgs84, bounds[2], bounds[3])

        bbox = BBox([*ll, *ur], projection=wgs84)

        for feature in shp.items():
            geometry = transform_geom(shp.crs, {'init': 'EPSG: 4326'},
                                      feature[1]['geometry'])
            features.append(geometry)

    for in_path, variable in input_paths:
        if single:
            out_path = out_pattern
        else:
            out_path = out_pattern.format(variable=variable)

        if os.path.exists(out_path):
            confirm = input(
                "The output file '{}' already exists? Do you with to replace it? [y/n] "
                .format(out_path))
            if confirm.lower().strip() not in ['y', 'yes']:
                print('Exiting...')
                sys.exit()

        with Dataset(in_path, 'r') as ds:
            coords = SpatialCoordinateVariables.from_dataset(
                ds, x_name='longitude', y_name='latitude')

            x_start, x_stop = coords.x.indices_for_range(bbox.xmin, bbox.xmax)
            y_start, y_stop = coords.y.indices_for_range(bbox.ymin, bbox.ymax)

            x_slice = slice(x_start, x_stop)
            y_slice = slice(y_start, y_stop)

            clipped_coords = coords.slice_by_bbox(bbox)

            grid = ds.variables[variable][y_slice, x_slice]

        if is_masked(grid):
            mask = grid.mask.astype('uint8')
        else:
            mask = numpy.zeros(grid.shape, dtype='uint8')

        mask |= rasterize(((x, 0) for x in features),
                          out_shape=mask.shape,
                          transform=clipped_coords.affine,
                          fill=1,
                          default_value=0)
        grid = numpy.ma.masked_where(mask == 1, grid.data)

        print('Writing {}...'.format(out_path))
        with Dataset(out_path, 'w', format='NETCDF4') as ds:
            clipped_coords.add_to_dataset(ds, 'longitude', 'latitude')
            data_var = ds.createVariable(variable,
                                         grid.dtype,
                                         dimensions=('latitude', 'longitude'),
                                         fill_value=grid.fill_value)

            if data_var.shape != grid.shape:
                grid = grid[:data_var.shape[0], :data_var.shape[1]]

            data_var[:] = grid
            set_crs(ds, variable, Proj('+init=EPSG:4326'))
    def handle(self, output_directory, region_name, zoneset, *args, **kwargs):
        output_directory = output_directory[0]
        region_name = region_name[0]

        if zoneset is None or zoneset.strip() == "":
            sources = ZoneSource.objects.all().order_by("name")
            if len(sources) == 0:
                raise CommandError("No zonesets available")

        else:
            sources = ZoneSource.objects.filter(
                name__in=zoneset.split(",")).order_by("name")
            if len(sources) == 0:
                raise CommandError(
                    "No zonesets available to analyze that match --zones values"
                )

        region = Region.objects.filter(name=region_name)
        if not region.exists():
            raise CommandError(
                "Region {} is not available".format(region_name))

        region = region.first()

        ### Create output directories
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        with ElevationDataset() as elevation_ds:
            elevation_ds.load_region(region.name)

            for source in sources:
                all_species = [
                    e["species"]
                    for e in source.seedzone_set.values("species").distinct()
                ]

                for species in all_species:
                    zones = source.seedzone_set.filter(
                        species=species).order_by("zone_id")

                    out_index = 0
                    zone_ids = []
                    zone_input_ids = []
                    out = numpy.empty(shape=elevation_ds.data.shape,
                                      dtype="uint16")
                    out.fill(NODATA)

                    with ZoneConfig(source.name) as config:
                        for zone in Bar(
                                "Processing {} - {} zones".format(
                                    source.name, species),
                                max=source.seedzone_set.count(),
                        ).iter(zones):

                            source_name = zone.source

                            window, coords = elevation_ds.get_read_window(
                                zone.polygon.extent)
                            transform = coords.affine

                            elevation = elevation_ds.data[window]

                            zone_mask = rasterize(
                                (json.loads(zone.polygon.geojson), ),
                                out_shape=elevation.shape,
                                transform=transform,
                                fill=1,  # mask is True OUTSIDE the zone
                                default_value=0,
                                dtype=numpy.dtype("uint8"),
                            ).astype("bool")

                            nodata_mask = elevation == elevation_ds.nodata_value
                            mask = nodata_mask | zone_mask

                            # Create a 2D array for extracting to new dataset, in integer feet
                            elevation = (numpy.where(
                                ~mask, elevation / 0.3048,
                                elevation_ds.nodata_value).round().astype(
                                    "int"))

                            # if there are no pixels in the mask, skip this zone
                            if elevation.size == 0:
                                continue

                            elevation_data = elevation[
                                elevation != elevation_ds.nodata_value]
                            min_elevation = math.floor(
                                numpy.nanmin(elevation_data))
                            max_elevation = math.ceil(
                                numpy.nanmax(elevation_data))

                            bands = list(
                                config.get_elevation_bands(
                                    zone, min_elevation, max_elevation))
                            bands = generate_missing_bands(
                                bands, min_elevation, max_elevation)

                            if not bands:
                                # min / max elevation outside defined bands
                                warnings.warn(
                                    "\nElevation range {} - {} ft outside defined bands\n"
                                    .format(min_elevation, max_elevation))
                                continue

                            for band in bands:
                                low, high = band[:2]
                                band_mask = (elevation >= low) & (elevation <=
                                                                  high)

                                if not numpy.any(band_mask):
                                    continue

                                # extract actual elevation range within the mask as integer feet
                                band_elevation = elevation.flat[
                                    band_mask.flatten()]
                                band_range = [
                                    math.floor(numpy.nanmin(band_elevation)),
                                    math.ceil(numpy.nanmax(band_elevation)),
                                ]

                                # extract 2D version of elevation within the band
                                value = numpy.where(
                                    (~mask) & band_mask,
                                    out_index,
                                    out[window],
                                )

                                if not numpy.any(value == out_index):
                                    continue

                                out[window] = value
                                # zone ids are based on actual elevation range
                                zone_ids.append("{}_{}_{}".format(
                                    zone.zone_uid, *band_range))
                                # zone_input is based on input elevation range
                                zone_input_ids.append("{}_{}_{}".format(
                                    zone.zone_uid, low, high))

                                out_index += 1

                    if out_index > NODATA - 1:
                        raise ValueError(
                            "Too many zone / band combinations for uint16")

                    # Find the data window of the zones
                    data_window = (windows.get_data_window(
                        out, NODATA).round_offsets(op="floor").round_lengths(
                            op="ceil"))
                    out = out[data_window.toslices()]
                    data_coords = elevation_ds.coords.slice_by_window(
                        Window(*data_window.toslices()))

                    filename = os.path.join(
                        output_directory,
                        "{}_{}_zones.nc".format(source_name, species))

                    with Dataset(filename, "w", format="NETCDF4") as ds:
                        # create ID variable
                        ds.createDimension("zone", len(zone_ids))
                        id_var = ds.createVariable("zone",
                                                   str,
                                                   dimensions=("zone", ))
                        id_var[:] = numpy.array(zone_ids)

                        data_coords.add_to_dataset(ds, "longitude", "latitude")
                        data_var = ds.createVariable(
                            "zones",
                            "uint16",
                            dimensions=("latitude", "longitude"),
                            fill_value=NODATA,
                        )
                        data_var[:] = out
                        set_crs(ds, "zones", Proj({"init": "EPSG:4326"}))

                    with open(filename.replace(".nc", ".csv"), "w") as fp:
                        writer = csv.writer(fp)
                        writer.writerow(["value", "zone", "zone_input"])
                        writer.writerows([[i, zone_ids[i], zone_input_ids[i]]
                                          for i in range(len(zone_ids))])
Esempio n. 18
0
    def handle(self, *args, **options):
        message = (
            "WARNING: This will update all service data, casting each to it's smallest possible data type. Do you want "
            "to continue? [y/n]")
        if input(message).lower() not in {'y', 'yes'}:
            return

        for service in Service.objects.all():
            if service.variable_set.all().count() > 1:
                print("Skipping service '{}' with more than one variable...".
                      format(service.name))
                continue

            variable = service.variable_set.all().get()
            path = os.path.join(SERVICE_DATA_ROOT, service.data_path)

            tmp_dir = mkdtemp()
            tmp_path = os.path.join(tmp_dir,
                                    os.path.basename(service.data_path))

            try:
                with Dataset(path, 'r') as ds:
                    data = ds.variables[variable.variable][:]
                    coords = SpatialCoordinateVariables.from_bbox(
                        service.full_extent, *reversed(data.shape))

                if data.dtype.kind != 'i':
                    print("Ignoring service '{}' with non-int type".format(
                        service.name))
                    continue

                # The fill value will be the minimum value of the chosen type, so we want to make sure it's not
                # included in the actual data range
                min_value = data.min() - 1
                max_value = data.max()

                # Determine the most suitable data type by finding the minimum type for the min/max values and then
                # using the type that will accurately represent both
                min_type = str(numpy.min_scalar_type(min_value))
                max_type = str(numpy.min_scalar_type(max_value))

                min_unsigned, min_size = min_type.split('int')
                max_unsigned, max_size = max_type.split('int')

                dtype = '{}int{}'.format(min_unsigned and max_unsigned,
                                         max(int(min_size), int(max_size)))

                if data.dtype == dtype:
                    print(
                        "Service '{}' already has the smallest possible type: {}"
                        .format(service.name, dtype))
                    continue

                print("Converting service '{}' to type: {}".format(
                    service.name, dtype))

                with Dataset(tmp_path, 'w', format='NETCDF4') as ds:
                    coords.add_to_dataset(ds, variable.x_dimension,
                                          variable.y_dimension)

                    data = data.astype(dtype)
                    fill_value = numpy.ma.maximum_fill_value(
                        numpy.dtype(dtype))
                    numpy.ma.set_fill_value(data, fill_value)

                    data_var = ds.createVariable(
                        variable.variable,
                        dtype,
                        dimensions=(variable.y_dimension,
                                    variable.x_dimension),
                        fill_value=fill_value)
                    data_var[:] = data

                    set_crs(ds, variable.variable,
                            service.full_extent.projection)

                os.unlink(path)
                shutil.copy2(tmp_path, path)

            finally:
                try:
                    shutil.rmtree(tmp_dir)
                except OSError:
                    pass
Esempio n. 19
0
def to_netcdf(
        files,
        output,
        variable,
        dtype,
        src_crs,
        x_name,
        y_name,
        z_name,
        datetime_pattern,
        netcdf3,
        compress,
        packed,
        xy_dtype,
        # z_dtype,
        calendar,
        autocrop):
    """
    Convert rasters to NetCDF and stack them according to a dimension.

    X and Y dimension names will be named according to the source projection (lon, lat if geographic projection, x, y
    otherwise) unless specified.

    Will overwrite an existing NetCDF file.

    Only the first band of the input will be turned into a NetCDF file.
    """

    # TODO: add format string template to this to parse out components

    filenames = list(glob.glob(files))
    if not filenames:
        raise click.BadParameter('No files found matching that pattern',
                                 param='files',
                                 param_hint='FILES')

    z_values = []

    if datetime_pattern is not None:
        datetimes = (datetime.strptime(x, datetime_pattern) for x in filenames)

        # Sort both datimes and filenames by datetimes
        z_values, filenames = [
            list(x)
            for x in zip(*sorted(zip(datetimes, filenames), key=itemgetter(0)))
        ]

    items = tuple(enumerate(filenames))

    has_z = len(filenames) > 1

    if has_z and not z_name:
        raise click.BadParameter('Required when > 1 input file',
                                 param='--z',
                                 param_hint='--z')

    if src_crs:
        src_crs = CRS.from_string(src_crs)

    template_ds = rasterio.open(filenames[0])
    src_crs = template_ds.crs or src_crs

    if not src_crs:
        raise click.BadParameter(
            'Required when no CRS information available in source files',
            param='--src-crs',
            param_hint='--src-crs')

    prj = Proj(**src_crs.to_dict())
    bounds = template_ds.bounds
    width = template_ds.width
    height = template_ds.height
    window = None

    src_dtype = numpy.dtype(template_ds.dtypes[0])
    dtype = numpy.dtype(dtype) if dtype else src_dtype

    if dtype == src_dtype:
        fill_value = template_ds.nodata
        if src_dtype.kind in ('u', 'i'):
            # nodata always comes from rasterio as floating point
            fill_value = int(fill_value)
    else:
        fill_value = get_fill_value(dtype)

    x_name = x_name or ('lon' if src_crs.is_geographic else 'x')
    y_name = y_name or ('lat' if src_crs.is_geographic else 'y')

    var_kwargs = {'fill_value': fill_value}

    format = 'NETCDF3_CLASSIC' if netcdf3 else 'NETCDF4'

    with Dataset(output, 'w', format=format) as out:
        if packed or autocrop:
            mins = []
            maxs = []
            windows = []

            click.echo('Inspecting input datasets...')
            with click.progressbar(items) as iter:
                for index, filename in iter:
                    with rasterio.open(filename) as src:
                        data = src.read(1, masked=True)
                        if packed:
                            mins.append(data.min())
                            maxs.append(data.max())
                        if autocrop:
                            data_window = get_data_window(data)
                            if data_window != ((0, height), (0, width)):
                                windows.append(data_window)

            if packed:
                min_value = min(mins)
                max_value = max(maxs)
                scale, offset = get_pack_atts(dtype, min_value, max_value)
            if autocrop and windows:
                window = union(windows)
                bounds = template_ds.window_bounds(window)
                height = window[0][1] - window[0][0]
                width = window[1][1] - window[1][0]

        coords = SpatialCoordinateVariables.from_bbox(BBox(bounds, prj), width,
                                                      height, xy_dtype)
        coords.add_to_dataset(out, x_name, y_name, zlib=compress)

        var_dimensions = [y_name, x_name]
        shape = list(coords.shape)
        if has_z:
            shape.insert(0, len(filenames))
            out.createDimension(z_name, shape[0])
            var_dimensions.insert(0, z_name)
            if z_values:
                dates = DateVariable(numpy.array(z_values),
                                     units_start_date=z_values[0],
                                     calendar=calendar)
                dates.add_to_dataset(out, z_name)

        click.echo('Creating {0}:{1} with shape {2}'.format(
            output, variable, shape))

        out_var = out.createVariable(variable,
                                     dtype,
                                     dimensions=var_dimensions,
                                     zlib=compress,
                                     **var_kwargs)
        set_crs(out, variable, prj, set_proj4_att=True)

        if packed:
            out_var.setncattr('scale_factor', scale)
            out_var.setncattr('add_offset', offset)

        click.echo('Copying data from input files...')
        with click.progressbar(items) as iter:
            for index, filename in iter:
                with rasterio.open(filename) as src:
                    data = src.read(1, masked=True, window=window)

                    if has_z:
                        out_var[index, :] = data
                    else:
                        out_var[:] = data

                out.sync()