def test_open_nc(local, online):
    with open_nc(local) as nc_local, open_nc(online) as nc_online:
        for key in nc_local.dimensions.keys():
            assert nc_local.dimensions[key].name == nc_online.dimensions[
                key].name
            assert nc_local.dimensions[key].size == nc_online.dimensions[
                key].size
Example #2
0
    def get_spatially_averaged_data(data_file, time_idx, is_thredds):
        """
        From the NetCDF data file pointed at by `data_file`,
        get the spatial average over the area specified by `area`
        of the data for variable `variable`
        at time index `time_idx`.

        :param data_file (modelmeta.DataFile): source data file
        :param time_idx (int): index of time of interest
        :param is_thredds (bool): whether data target is on thredds server
        :return: float
        """
        if isinstance(is_thredds, str):
            is_thredds = strtobool(is_thredds)

        if is_thredds:
            data_filename = os.getenv("THREDDS_URL_ROOT") + data_file.filename
        else:
            data_filename = data_file.filename

        with open_nc(data_filename) as nc:
            a = get_array(nc, data_filename, time_idx, area, variable)
        return np.mean(a).item()
 def getdata(file_, time_idx):
     with open_nc(file_.filename) as nc:
         a = get_array(nc, file_.filename, time_idx, area, variable)
     return np.asscalar(np.mean(a))
Example #4
0
def stats(
    sesh,
    id_,
    time,
    area,
    variable,
    is_thredds=False,
):
    """Request and calculate summary statistics averaged across a region

    For performing regional analysis, one typically wants to summarize
    statistical information across a region. This call allows one to
    request either a single timestep (or an average across all
    timesteps), and averaged across all cells within the given region.

    The stats call may only be called for a single data file and single
    variable per invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object

        id_ (str): Unique id which is a key to the data file requested

        time (int): Timestep index (0-based) representing the time of year;
            0-11 for monthly, 0-3 for seasonal, 0 for annual datasets.

        area (str): WKT polygon of selected area

        variable (str): Short name of the variable to be returned

        is_thredds (bool): If set to `True` the filepath will be searched for
            on THREDDS server. This flag is not needed when running the backend
            as a server as the files are accessed over the web.

    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'mean', 'stdev', 'min', 'max',
        'median', 'ncells', 'units', 'time'.

        For example ::

            {'file0':
                {
                    'mean': 303.97227647569446,
                    'stdev': 8.428096450998078,
                    'min': 288.71807861328125,
                    'max': 318.9695739746094,
                    'median': 301.61065673828125,
                    'ncells': 72,
                    'units': 'K',
                    'time': datetime.datetime(1985, 6, 30, 12, 0, 0),
                    'modtime': datetime.datetime(2010, 1, 1, 17, 30, 4)
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these cases, the numerical values will all be NaN, and the
        results dict will be missing the 'units' and 'time' keys.

    Raises:
        Exception: If `time` parameter cannot be converted to an integer

    """
    # Validate arguments
    if time:
        try:
            time = int(time)
        except ValueError:
            raise Exception(
                'time parameter "{}" not convertable to an integer.'.format(
                    time))
    else:
        time = None

    if isinstance(is_thredds, str):
        is_thredds = strtobool(is_thredds)

    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
        resource = df.filename if not is_thredds else apply_thredds_root(
            df.filename)
    except NoResultFound:
        return {}

    try:
        with open_nc(resource) as nc:
            array = get_array(nc, resource, time, area, variable)
            units = get_units_from_netcdf_file(nc, variable)
    except Exception as e:
        log.error(e)
        return {id_: na_array_stats}

    stats = array_stats(array)

    query = sesh.query(Time.timestep).filter(Time.time_set_id == df.timeset.id)
    if time:
        query.filter(Time.time_idx == time)
    timevals = [t for t, in query.all()]
    timeval = mean_datetime(timevals)

    stats.update({"units": units, "time": timeval, "modtime": df.index_time})
    return {id_: stats}
Example #5
0
def grid(sesh, id_):
    """Request centroid latitudes and longitudes of all cells within a
    given file.

    This is used for loading the front end with geographic extents
    information, from which enclosing polygons can be constructed in
    response to users clicking on a map.

    The grid call may only be called for a single data file per
    invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object

        id_ (str): Unique id which is a key to the data file requested

    Returns:
        dict: Empty dictionary if id_ is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'latitudes', 'longitudes'.

        For example ::

            {'file0':
                {
                    'latitudes': [
                        -87.86380134,
                        -85.09652949,
                        -82.31291545, ...
                        ],
                    'longitudes': [
                        -180,
                        -177.1875,
                        -174.375, ...
                        ],
                    'modtime': datetime.datetime(2011, 11, 11, 11, 11, 11)
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these the first case, an empty dict is returned.  In the
        second case, a dict with the id_ key and empty lists for
        latitudes and longtitudes is returned.

    Raises:
        None?

    """
    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
    except NoResultFound:
        return {}

    with open_nc(df.filename) as nc:
        try:
            grid = get_grid_from_netcdf_file(nc)
        except (RuntimeError, KeyError):
            return {id_: na_grid}

    grid.update({"modtime": df.index_time})

    return {id_: grid}
def test_open_nc_exception(bad_path):
    with pytest.raises(Exception):
        with open_nc(bad_path) as nc:
            # Test won't make it this far, but in case we do, let's fail the test
            assert False
def stats(sesh, id_, time, area, variable):
    '''Request and calculate summary statistics averaged across a region

    For performing regional analysis, one typically wants to summarize
    statistical information across a region. This call allows one to
    request either a single timestep (or an average across all
    timesteps), and averaged across all cells within the given region.

    The stats call may only be called for a single data file and single
    variable per invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object
        id_ (str): Unique id which is a key to the data file requested
        time (int): Timestep integer (1-17) representing the time of year
        area (str): WKT polygon of selected area
        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'mean', 'stdev', 'min', 'max',
        'median', 'ncells', 'units', 'time'.

        For example ::

            {'file0':
                {
                    'mean': 303.97227647569446,
                    'stdev': 8.428096450998078,
                    'min': 288.71807861328125,
                    'max': 318.9695739746094,
                    'median': 301.61065673828125,
                    'ncells': 72,
                    'units': 'K',
                    'time': '1985-06-30T12:00:00Z'
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these cases, the numerical values will all be NaN, and the
        results dict will be missing the 'units' and 'time' keys.

    Raises:
        None?

    '''
    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
        fname = df.filename
    except NoResultFound:
        return {}

    try:
        with open_nc(fname) as nc:
            array = get_array(nc, fname, time, area, variable)
            units = get_units_from_netcdf_file(nc, variable)
    except Exception as e:
        log.error(e)
        return {id_: na_array_stats}

    stats = array_stats(array)

    query = sesh.query(Time.timestep).filter(Time.time_set_id == df.timeset.id)
    if time:
        query.filter(Time.time_idx == time)
    timevals = [ t for t, in query.all() ]
    timeval = mean_datetime(timevals)

    stats.update({
        'units': units,
        'time': timeval.strftime('%Y-%m-%dT%H:%M:%SZ')
    })
    return {id_: stats}
def timeseries(sesh, id_, area, variable):
    """Delegate for performing data lookups within a single file

    Opens the data file specified by the id_ parameter and returns the
    data values at each timestep in the file.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object
        id_ (str): Unique id which is a key to the data file requested
        area (str): WKT polygon of selected area
        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if id_ is not found in the database.

        Otherwise returns a single dict with keys `id`, `units` and
        `data`. The value for `data` is another dictionary where keys
        correspond to the time values (formatted as
        '%Y-%m-%dT%H:%M:%SZ') and values correspond to the data values
        themselves. The value for `id` is the unique_id for the file
        and the value for `units` is the unit string of the data
        values.

        For example::

            {
                'id': 'tmax_monClim_PRISM_historical_run1_198101-201012',
                'units': 'degC',
                'data':
                {
                    '1985-01-15T00:00:00Z': 1.5,
                    '1985-02-15T00:00:00Z': 2.5,
                    '1985-03-15T00:00:00Z': 5.5,
                    '1985-04-15T00:00:00Z': 10.2,
                    ...
                    '1985-12-15T00:00:00Z': 2.5,
                }
            }

    Raises:
        None?

    """
    try:
        file_ = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
    except NoResultFound:
        return {}

    # Get all time indexes for this file
    ti = [(time.timestep, time.time_idx) for time in file_.timeset.times]
    ti.sort(key=lambda x: x[1])

    with open_nc(file_.filename) as nc:

        data = OrderedDict(
            [
                (
                    timeval.strftime("%Y-%m-%dT%H:%M:%SZ"),
                    np.asscalar(np.mean(get_array(nc, file_.filename, idx, area, variable))),
                )
                for timeval, idx in ti
            ]
        )
        units = get_units_from_netcdf_file(nc, variable)

    return {"id": id_, "data": data, "units": units}
def timeseries(sesh, id_, area, variable):
    """Delegate for performing data lookups within a single file

    Opens the data file specified by the id_ parameter and returns the
    data values at each timestep in the file.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object

        id_ (str): Unique id which is a key to the data file requested

        area (str): WKT polygon of selected area

        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if id_ is not found in the database.

        Otherwise returns a single dict with keys `id`, `units` and
        `data`. The value for `data` is another dictionary where keys
        correspond to the time values (formatted as
        '%Y-%m-%dT%H:%M:%SZ') and values correspond to the data values
        themselves. The value for `id` is the unique_id for the file
        and the value for `units` is the unit string of the data
        values.

        For example::

            {
                'id': 'tmax_monClim_PRISM_historical_run1_198101-201012',
                'units': 'degC',
                'data':
                {
                    '1985-01-15T00:00:00Z': 1.5,
                    '1985-02-15T00:00:00Z': 2.5,
                    '1985-03-15T00:00:00Z': 5.5,
                    '1985-04-15T00:00:00Z': 10.2,
                    ...
                    '1985-12-15T00:00:00Z': 2.5,
                },
                'modtime': '2010-01-01T17:30:04Z'
            }

    Raises:
        None?

    """
    try:
        file_ = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
    except NoResultFound:
        return {}

    # Get all time indexes for this file
    ti = [(time.timestep, time.time_idx) for time in file_.timeset.times]
    ti.sort(key=lambda x: x[1])

    with open_nc(file_.filename) as nc:

        data = OrderedDict([(
            timeval.strftime("%Y-%m-%dT%H:%M:%SZ"),
            np.mean(get_array(nc, file_.filename, idx, area, variable)).item(),
        ) for timeval, idx in ti])
        units = get_units_from_netcdf_file(nc, variable)

    return {
        "id": id_,
        "data": data,
        "units": units,
        "modtime": file_.index_time
    }
def grid(sesh, id_):
    '''Request centroid latitudes and longitudes of all cells within a 
    given file.

    This is used for loading the front end with geographic extents
    information, from which enclosing polygons can be constructed in
    response to users clicking on a map.

    The grid call may only be called for a single data file per 
    invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object
        id_ (str): Unique id which is a key to the data file requested
     
    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'latitudes', 'longitudes'.

        For example ::

            {'file0':
                {
                    'latitudes': [
                        -87.86380134, 
                        -85.09652949, 
                        -82.31291545, ...
                        ],
                    'longitudes': [
                        -180, 
                        -177.1875, 
                        -174.375, ... 
                        ]
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these the first case, an empty dict is returned.  In the 
        second case, a dict with the id_ key and empty lists for 
        latitudes and longtitudes is returned.

    Raises:
        None?

    '''
    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
    except NoResultFound:
        return {}

    with open_nc(df.filename) as nc:
        try:
            grid = get_grid_from_netcdf_file(nc)
        except (RuntimeError, KeyError):
            return {id_: na_grid}

    return {id_: grid}