def test_mean_datetime(input_, output):
    x = [ parse(t).replace(tzinfo=utc) for t in input_ ]
    assert mean_datetime(x) == parse(output).replace(tzinfo=utc)
def test_mean_datetime(input_, output):
    x = [parse(t).replace(tzinfo=utc) for t in input_]
    assert mean_datetime(x) == parse(output).replace(tzinfo=utc)
Exemple #3
0
def stats(
    sesh,
    id_,
    time,
    area,
    variable,
    is_thredds=False,
):
    """Request and calculate summary statistics averaged across a region

    For performing regional analysis, one typically wants to summarize
    statistical information across a region. This call allows one to
    request either a single timestep (or an average across all
    timesteps), and averaged across all cells within the given region.

    The stats call may only be called for a single data file and single
    variable per invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object

        id_ (str): Unique id which is a key to the data file requested

        time (int): Timestep index (0-based) representing the time of year;
            0-11 for monthly, 0-3 for seasonal, 0 for annual datasets.

        area (str): WKT polygon of selected area

        variable (str): Short name of the variable to be returned

        is_thredds (bool): If set to `True` the filepath will be searched for
            on THREDDS server. This flag is not needed when running the backend
            as a server as the files are accessed over the web.

    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'mean', 'stdev', 'min', 'max',
        'median', 'ncells', 'units', 'time'.

        For example ::

            {'file0':
                {
                    'mean': 303.97227647569446,
                    'stdev': 8.428096450998078,
                    'min': 288.71807861328125,
                    'max': 318.9695739746094,
                    'median': 301.61065673828125,
                    'ncells': 72,
                    'units': 'K',
                    'time': datetime.datetime(1985, 6, 30, 12, 0, 0),
                    'modtime': datetime.datetime(2010, 1, 1, 17, 30, 4)
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these cases, the numerical values will all be NaN, and the
        results dict will be missing the 'units' and 'time' keys.

    Raises:
        Exception: If `time` parameter cannot be converted to an integer

    """
    # Validate arguments
    if time:
        try:
            time = int(time)
        except ValueError:
            raise Exception(
                'time parameter "{}" not convertable to an integer.'.format(
                    time))
    else:
        time = None

    if isinstance(is_thredds, str):
        is_thredds = strtobool(is_thredds)

    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
        resource = df.filename if not is_thredds else apply_thredds_root(
            df.filename)
    except NoResultFound:
        return {}

    try:
        with open_nc(resource) as nc:
            array = get_array(nc, resource, time, area, variable)
            units = get_units_from_netcdf_file(nc, variable)
    except Exception as e:
        log.error(e)
        return {id_: na_array_stats}

    stats = array_stats(array)

    query = sesh.query(Time.timestep).filter(Time.time_set_id == df.timeset.id)
    if time:
        query.filter(Time.time_idx == time)
    timevals = [t for t, in query.all()]
    timeval = mean_datetime(timevals)

    stats.update({"units": units, "time": timeval, "modtime": df.index_time})
    return {id_: stats}
def stats(sesh, id_, time, area, variable):
    '''Request and calculate summary statistics averaged across a region

    For performing regional analysis, one typically wants to summarize
    statistical information across a region. This call allows one to
    request either a single timestep (or an average across all
    timesteps), and averaged across all cells within the given region.

    The stats call may only be called for a single data file and single
    variable per invocation.

    Args:
        sesh (sqlalchemy.orm.session.Session): A database Session object
        id_ (str): Unique id which is a key to the data file requested
        time (int): Timestep integer (1-17) representing the time of year
        area (str): WKT polygon of selected area
        variable (str): Short name of the variable to be returned

    Returns:
        dict: Empty dictionary if model_id is not found in the database.

        Otherwise, returns a single dict with the key of the file's
        unique_id and the value consisting of a nested dictionary with
        the following attributes: 'mean', 'stdev', 'min', 'max',
        'median', 'ncells', 'units', 'time'.

        For example ::

            {'file0':
                {
                    'mean': 303.97227647569446,
                    'stdev': 8.428096450998078,
                    'min': 288.71807861328125,
                    'max': 318.9695739746094,
                    'median': 301.61065673828125,
                    'ncells': 72,
                    'units': 'K',
                    'time': '1985-06-30T12:00:00Z'
                }
            }

        There are two semi-error cases which should be mentioned, when
        the filesystem is out of sync with the database.

        1. The file pointed to by `id_` does not exist in the filesystem
        2. The requested variable does not exist in the given file

        In these cases, the numerical values will all be NaN, and the
        results dict will be missing the 'units' and 'time' keys.

    Raises:
        None?

    '''
    try:
        df = sesh.query(DataFile).filter(DataFile.unique_id == id_).one()
        fname = df.filename
    except NoResultFound:
        return {}

    try:
        with open_nc(fname) as nc:
            array = get_array(nc, fname, time, area, variable)
            units = get_units_from_netcdf_file(nc, variable)
    except Exception as e:
        log.error(e)
        return {id_: na_array_stats}

    stats = array_stats(array)

    query = sesh.query(Time.timestep).filter(Time.time_set_id == df.timeset.id)
    if time:
        query.filter(Time.time_idx == time)
    timevals = [ t for t, in query.all() ]
    timeval = mean_datetime(timevals)

    stats.update({
        'units': units,
        'time': timeval.strftime('%Y-%m-%dT%H:%M:%SZ')
    })
    return {id_: stats}