Ejemplo n.º 1
0
def stats(
    sesh,
    id_,
    time,
    area,
    variable,
    is_thredds=False,
):
    """Request and calculate summary statistics averaged across a region

    For performing regional analysis, one typically wants to summarize
    statistical information across a region. This call allows one to
    request either a single timestep (or an average across all
    timesteps), and averaged across all cells within the given region.

    The stats call may only be called for a single data file and single
    variable per invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object

        id_ (str): Unique id which is a key to the data file requested

        time (int): Timestep index (0-based) representing the time of year;
            0-11 for monthly, 0-3 for seasonal, 0 for annual datasets.

        area (str): WKT polygon of selected area

        variable (str): Short name of the variable to be returned

        is_thredds (bool): If set to `True` the filepath will be searched for
            on THREDDS server. This flag is not needed when running the backend
            as a server as the files are accessed over the web.

    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'mean', 'stdev', 'min', 'max',
        'median', 'ncells', 'units', 'time'.

        For example ::

            {'file0':
                {
                    'mean': 303.97227647569446,
                    'stdev': 8.428096450998078,
                    'min': 288.71807861328125,
                    'max': 318.9695739746094,
                    'median': 301.61065673828125,
                    'ncells': 72,
                    'units': 'K',
                    'time': datetime.datetime(1985, 6, 30, 12, 0, 0),
                    'modtime': datetime.datetime(2010, 1, 1, 17, 30, 4)
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these cases, the numerical values will all be NaN, and the
        results dict will be missing the 'units' and 'time' keys.

    Raises:
        Exception: If `time` parameter cannot be converted to an integer

    """
    # Validate arguments
    if time:
        try:
            time = int(time)
        except ValueError:
            raise Exception(
                'time parameter "{}" not convertable to an integer.'.format(
                    time))
    else:
        time = None

    if isinstance(is_thredds, str):
        is_thredds = strtobool(is_thredds)

    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
        resource = df.filename if not is_thredds else apply_thredds_root(
            df.filename)
    except NoResultFound:
        return {}

    try:
        with open_nc(resource) as nc:
            array = get_array(nc, resource, time, area, variable)
            units = get_units_from_netcdf_file(nc, variable)
    except Exception as e:
        log.error(e)
        return {id_: na_array_stats}

    stats = array_stats(array)

    query = sesh.query(Time.timestep).filter(Time.time_set_id == df.timeset.id)
    if time:
        query.filter(Time.time_idx == time)
    timevals = [t for t, in query.all()]
    timeval = mean_datetime(timevals)

    stats.update({"units": units, "time": timeval, "modtime": df.index_time})
    return {id_: stats}
def timeseries(sesh, id_, area, variable):
    """Delegate for performing data lookups within a single file

    Opens the data file specified by the id_ parameter and returns the
    data values at each timestep in the file.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object
        id_ (str): Unique id which is a key to the data file requested
        area (str): WKT polygon of selected area
        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if id_ is not found in the database.

        Otherwise returns a single dict with keys `id`, `units` and
        `data`. The value for `data` is another dictionary where keys
        correspond to the time values (formatted as
        '%Y-%m-%dT%H:%M:%SZ') and values correspond to the data values
        themselves. The value for `id` is the unique_id for the file
        and the value for `units` is the unit string of the data
        values.

        For example::

            {
                'id': 'tmax_monClim_PRISM_historical_run1_198101-201012',
                'units': 'degC',
                'data':
                {
                    '1985-01-15T00:00:00Z': 1.5,
                    '1985-02-15T00:00:00Z': 2.5,
                    '1985-03-15T00:00:00Z': 5.5,
                    '1985-04-15T00:00:00Z': 10.2,
                    ...
                    '1985-12-15T00:00:00Z': 2.5,
                }
            }

    Raises:
        None?

    """
    try:
        file_ = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
    except NoResultFound:
        return {}

    # Get all time indexes for this file
    ti = [(time.timestep, time.time_idx) for time in file_.timeset.times]
    ti.sort(key=lambda x: x[1])

    with open_nc(file_.filename) as nc:

        data = OrderedDict(
            [
                (
                    timeval.strftime("%Y-%m-%dT%H:%M:%SZ"),
                    np.asscalar(np.mean(get_array(nc, file_.filename, idx, area, variable))),
                )
                for timeval, idx in ti
            ]
        )
        units = get_units_from_netcdf_file(nc, variable)

    return {"id": id_, "data": data, "units": units}
def stats(sesh, id_, time, area, variable):
    '''Request and calculate summary statistics averaged across a region

    For performing regional analysis, one typically wants to summarize
    statistical information across a region. This call allows one to
    request either a single timestep (or an average across all
    timesteps), and averaged across all cells within the given region.

    The stats call may only be called for a single data file and single
    variable per invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object
        id_ (str): Unique id which is a key to the data file requested
        time (int): Timestep integer (1-17) representing the time of year
        area (str): WKT polygon of selected area
        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'mean', 'stdev', 'min', 'max',
        'median', 'ncells', 'units', 'time'.

        For example ::

            {'file0':
                {
                    'mean': 303.97227647569446,
                    'stdev': 8.428096450998078,
                    'min': 288.71807861328125,
                    'max': 318.9695739746094,
                    'median': 301.61065673828125,
                    'ncells': 72,
                    'units': 'K',
                    'time': '1985-06-30T12:00:00Z'
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these cases, the numerical values will all be NaN, and the
        results dict will be missing the 'units' and 'time' keys.

    Raises:
        None?

    '''
    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
        fname = df.filename
    except NoResultFound:
        return {}

    try:
        with open_nc(fname) as nc:
            array = get_array(nc, fname, time, area, variable)
            units = get_units_from_netcdf_file(nc, variable)
    except Exception as e:
        log.error(e)
        return {id_: na_array_stats}

    stats = array_stats(array)

    query = sesh.query(Time.timestep).filter(Time.time_set_id == df.timeset.id)
    if time:
        query.filter(Time.time_idx == time)
    timevals = [ t for t, in query.all() ]
    timeval = mean_datetime(timevals)

    stats.update({
        'units': units,
        'time': timeval.strftime('%Y-%m-%dT%H:%M:%SZ')
    })
    return {id_: stats}
Ejemplo n.º 4
0
def timeseries(sesh, id_, area, variable):
    """Delegate for performing data lookups within a single file

    Opens the data file specified by the id_ parameter and returns the
    data values at each timestep in the file.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object

        id_ (str): Unique id which is a key to the data file requested

        area (str): WKT polygon of selected area

        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if id_ is not found in the database.

        Otherwise returns a single dict with keys `id`, `units` and
        `data`. The value for `data` is another dictionary where keys
        correspond to the time values (formatted as
        '%Y-%m-%dT%H:%M:%SZ') and values correspond to the data values
        themselves. The value for `id` is the unique_id for the file
        and the value for `units` is the unit string of the data
        values.

        For example::

            {
                'id': 'tmax_monClim_PRISM_historical_run1_198101-201012',
                'units': 'degC',
                'data':
                {
                    '1985-01-15T00:00:00Z': 1.5,
                    '1985-02-15T00:00:00Z': 2.5,
                    '1985-03-15T00:00:00Z': 5.5,
                    '1985-04-15T00:00:00Z': 10.2,
                    ...
                    '1985-12-15T00:00:00Z': 2.5,
                },
                'modtime': '2010-01-01T17:30:04Z'
            }

    Raises:
        None?

    """
    try:
        file_ = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
    except NoResultFound:
        return {}

    # Get all time indexes for this file
    ti = [(time.timestep, time.time_idx) for time in file_.timeset.times]
    ti.sort(key=lambda x: x[1])

    with open_nc(file_.filename) as nc:

        data = OrderedDict([(
            timeval.strftime("%Y-%m-%dT%H:%M:%SZ"),
            np.mean(get_array(nc, file_.filename, idx, area, variable)).item(),
        ) for timeval, idx in ti])
        units = get_units_from_netcdf_file(nc, variable)

    return {
        "id": id_,
        "data": data,
        "units": units,
        "modtime": file_.index_time
    }