Beispiel #1
0
 def simple_serialiser(node, table):
     '''
     Serialize using the given table.
     '''
     stream = leaves(node, Node)
     converter = make_converter(table)
     return reduce_(add, [converter(value)[1] for value in stream])
Beispiel #2
0
 def simple_serialiser(node, table):
     '''
     Serialize using the given table.
     '''
     stream = leaves(node, Node)
     converter = make_converter(table)
     return reduce_(add, [converter(value)[1] for value in stream])
Beispiel #3
0
def mcompose(*funcs):
    def _composer(f, g):
        def inner(*args, **kwargs):
            return f(*g(*args, **kwargs))

        return inner

    return reduce_(_composer, funcs)
Beispiel #4
0
def count_sold(iterable: list, default=0):
    function = (lambda a, b: a + b.quantity
                if type(a) is int else a.quantity + b.quantity)
    if not iterable:
        return default
    elif len(iterable) == 1:
        return iterable[0].quantity
    else:
        return reduce_(function, iterable)
        def build_row(cell):
            cidxs = list(reversed([c[1] for c in cell]))

            cm = [c[0] for c in cell]

            mvalues = [
                reduce_(
                    lambda memo, cur: memo[cur],  # navigate to values[coords]
                    cidxs + [mi],
                    values) for mi, m in enumerate(measures)
            ]

            return cm + mvalues
Beispiel #6
0
 def post_process(self, rows):
     """
     Build a table including:
     Project name | Bug pts | Story pts | Debt %
     Name1        |
     NameN        |
     Grand Total  |
     """
     project_points_by_type = reduce_(self._to_story_points, rows, {})
     results = ({
         'project_name': k,
         'bug_points': self._format_points(v[0]),
         'story_points': self._format_points(v[1]),
         'tech_debt': self._tech_debt_perc(v)
     } for k, v in project_points_by_type.items())
     sort_by_name = partial(self._sort_by_name, 'project_name')
     return sorted(results, key=cmp_to_key(sort_by_name))
    def read_time_series(
            self,
            variables=(),
            start_time=pytz.utc.localize(datetime.min),
            end_time=pytz.utc.localize(datetime.max),
            selectdim=None,
            size_limit=1000 * 1000 * 1000,
            series=None,
            series_name_fmt=None):
        """ Read a list of time-series variables from this fileset.

        Args:
            variables: A list of strs containing time series variable
                names to be read.
            start_time: A datetime, which is timezone aware, of the start
                time of the series to read.
            end_time: A datetime, timezone aware, end time of series to read.
            selectdim: A dict containing for each dimension name of type
                string, the indices of the dimension to read.
                For example: {"station":[3,4,5]} to read indices 3,4 and 5
                (indexed from 0) of the station dimension for variables
                which have that dimension.
            size_limit: Limit on the total size in bytes to read, used to
                screen huge requests.
            series: A list of series to be read by name.
            series_fmt: a datetime.strftime format to create a
                series name for the data found in each file, based
                on the time associated with the file.
                If series_name_fmt is None, all data is put in a dictionary
                element named ''.

        Returns:
            A dict containing, by series name:
                'time' : list of UTC timestamps,
                'data': list of numpy.ndarray containing the data for
                    each variable,
                'vmap': dict by variable name,
                    containing the index into the series data for the variable,
                'dim2': dict by variable name, of values for second dimension
                    of the data, such as height,
            }

        Raises:
            nc_exc.NoDataFoundException
            nc_exc.NoDataException

        The 'data' element in the returned dict is a list of numpy arrays,
        and not a dict by variable name. The 'vmap' element provides the
        mapping from a variable name to an index into 'data'. The data object
        is typically JSON-ified and sent to a browser. If it were a dict,
        the variable names may contain characters which cause headaches with
        JSON and javascript in django templates. For example, the JSON-ified
        string is typically passed to javascript in a django template by
        surrounding it with single quotes:
            var data = jQuery.parseJSON('{{ data }}');
        A single quote within the data JSON string causes grief, and we want
        to support single quotes in variable names. The only work around I
        know of is to convert the single quotes within the string to '\u0027'.
        This is, of course, a time-consuming step we want to avoid when
        JSON-ifying a large chunk of data.  It is less time-consuming to
        replace the quotes in the smaller vmap.

        The series names will not contain single quotes.

        """

        debug = False

        dsinfo = self.get_dataset_info()

        if not dsinfo['time_name']:
            self.get_variables()
            dsinfo = self.get_dataset_info()

        dsinfo_vars = dsinfo['variables']

        if not selectdim:
            selectdim = {}

        vshapes = self.resolve_variable_shapes(variables, selectdim)

        res_data = {}

        total_size = 0
        ntimes = 0

        files = self.get_files(start_time, end_time)
        if debug:
            _logger.debug(
                "len(files)=%d, series_name_fmt=%s",
                len(files), series_name_fmt)

        if series_name_fmt:
            file_tuples = [(f.time.strftime(series_name_fmt), f.path) \
                for f in files]
        else:
            file_tuples = [("", f.path) for f in files]

        for (series_name, ncpath) in file_tuples:

            if series and not series_name in series:
                continue

            if debug:
                _logger.debug("series=%s", str(series))
                _logger.debug("series_name=%s ,ncpath=%s", series_name, ncpath)

            # the files might be in the process of being moved, deleted, etc
            fileok = False
            exc = None
            for itry in range(0, 3):
                try:
                    ncfile = netCDF4.Dataset(ncpath)
                    fileok = True
                    break
                except (OSError, RuntimeError) as exc:
                    time.sleep(itry)

            if not fileok:
                _logger.error("%s: %s", ncpath, exc)
                continue

            if not series_name in res_data:
                res_data[series_name] = {
                    'time': [],
                    'data': [],
                    'vmap': {},
                    'dim2': {},
                }

            otime = res_data[series_name]['time']
            odata = res_data[series_name]['data']
            ovmap = res_data[series_name]['vmap']
            odim2 = res_data[series_name]['dim2']

            try:
                size1 = sys.getsizeof(otime)

                # times are apended to otime
                time_slice = self.read_times(
                    ncfile, ncpath, start_time, end_time, otime,
                    size_limit - total_size)

                # time_slice.start is None if nothing to read
                if time_slice.start is None or \
                    time_slice.stop <= time_slice.start:
                    continue

                total_size += sys.getsizeof(otime) - size1

                for exp_vname in variables:

                    # skip if variable is not a time series or
                    # doesn't have a selected dimension
                    if not exp_vname in dsinfo_vars or not exp_vname in vshapes:
                        continue

                    # selected shape of this variable
                    vshape = vshapes[exp_vname]
                    vsize = reduce_(
                        operator.mul, vshape, 1) * \
                        dsinfo_vars[exp_vname]["dtype"].itemsize

                    if total_size + vsize > size_limit:
                        raise nc_exc.TooMuchDataException(
                            "too much data requested, will exceed {} mbytes".
                            format(size_limit/(1000 * 1000)))

                    dim2 = {}
                    vdata = self.read_time_series_data(
                        ncfile, ncpath, exp_vname, time_slice, vshape,
                        selectdim, dim2)

                    if not exp_vname in odim2:
                        odim2[exp_vname] = dim2

                    if not exp_vname in ovmap:
                        size1 = 0
                        vindex = len(odata)
                        odata.append(vdata)
                        ovmap[exp_vname] = vindex
                    else:
                        if debug:
                            _logger.debug(
                                "odata[%s].shape=%s, vdata.shape=%s",
                                exp_vname, odata[exp_vname].shape, vdata.shape)

                        vindex = ovmap[exp_vname]
                        size1 = sys.getsizeof(odata[vindex])

                        time_index = dsinfo_vars[exp_vname]["time_index"]
                        odata[vindex] = np.append(
                            odata[vindex], vdata, axis=time_index)

                    total_size += sys.getsizeof(odata[vindex]) - size1

            finally:
                ncfile.close()

            ntimes += len(otime)

        if ntimes == 0:
            exc = nc_exc.NoDataException(
                "No data found between {} and {}".
                format(
                    start_time.isoformat(),
                    end_time.isoformat()))
            # _logger.warning("%s: %s", str(self), repr(exc))
            raise exc

        ncol_read = sum([len(cdata) for (i, cdata) in res_data.items()])
        if ncol_read == 0:
            exc = nc_exc.NoDataException(
                "No variables named {} found between {} and {}".
                format(
                    repr(variables),
                    start_time.isoformat(),
                    end_time.isoformat()))
            # _logger.warning("%s: %s", str(self), repr(exc))
            raise exc

        if debug:
            for series_name in res_data.keys():
                for exp_vname in res_data[series_name]['vmap']:
                    var_index = res_data[series_name]['vmap'][exp_vname]
                    _logger.debug(
                        "res_data[%s][%d].shape=%s, exp_vname=%s",
                        series_name, var_index,
                        repr(res_data[series_name][var_index].shape),
                        exp_vname)
            _logger.debug(
                "total_size=%d", total_size)

        return res_data
Beispiel #8
0
def reduce(f, initializer, iterable):
    return reduce_(f, iterable, initializer)
Beispiel #9
0
 def _fill_data(self, name, index, dest):
     var = self.variables[name]
     shape = tuple(self.coordinates[dim].length for dim in var.dimensions)
     size = reduce_(lambda x, y: x*y, shape, 1)
     numpy.copyto(dest, numpy.arange(size).reshape(shape)[index])
Beispiel #10
0
 def add_all(iterable):
     return reduce_(operator.add, iterable)
Beispiel #11
0
import numpy as np
import pyopencl as cl
from .tensor import OpenCLTensor
# tools
from functools import lru_cache, reduce as reduce_
from itertools import zip_longest, chain
# utils
from pyopencl.tools import dtype_to_ctype
from numpy import uint32 as i32
from math import ceil, log2
# typing
from typing import Tuple

__all__ = ['atom', 'dot', 'reduce', 'conv']

prod = lambda arr: reduce_(lambda x, y: x * y, arr, 1)
nl = lambda i=0: ('\n' + ' ' * i)

#
#   Elementwise Kernel
#


@lru_cache(maxsize=None)
def cache_build_atom_kernel(
    context: cl.Context,
    op: str,  # operation to execute on variables
    # buffers
    buffers: tuple,  # unique names of variables / buffers
    buffer_dtypes: tuple,  # types of variables / buffers
    ndim: int,  # number of dimensions
Beispiel #12
0
def prod(a):
    """Product of a sequence"""
    return reduce_(mul_op, a, 1)
Beispiel #13
0
 def _fill_data(self, name, index, dest):
     var = self.variables[name]
     shape = tuple(self.coordinates[dim].length for dim in var.dimensions)
     size = reduce_(lambda x, y: x * y, shape, 1)
     numpy.copyto(dest, numpy.arange(size).reshape(shape)[index])
    def to_pandas(self, filter_empty_measures=True):
        tidy = self.tidy
        columns = []
        table = []
        properties = self._agg_params.get('properties', [])
        measures = self._agg_params['measures']

        props = parse_properties(properties)
        pnames = [Identifier.parse(i).segments[-1].name for i in properties]

        # header row
        if self._agg_params['parents']:
            slices = []
            for dd in tidy['axes']:
                slices.append(dd['level_depth'])
                for ancestor_level in self._cube.dimensions_by_name[
                        dd['name']]['hierarchies'][0]['levels'][
                            1:dd['level_depth']]:
                    columns += [
                        'ID %s' % ancestor_level['caption'],
                        ancestor_level['caption']
                    ]
                columns += ['ID %s' % dd['level'], dd['level']]

            # property names
            columns += pnames

            # measure names
            columns += [m['caption'] for m in measures]

            for row in tidy['data']:
                r = []
                for j, cell in enumerate(row[:len(tidy['axes'])]):
                    for ancestor in reversed(cell['ancestors'][:slices[j] -
                                                               1]):
                        r += [ancestor['key'], ancestor['caption']]
                    r += [cell['key'], cell['caption']]

                r += get_props(row[:-len(measures)], pnames, props,
                               tidy['axes'])

                for mvalue in row[len(tidy['axes']):]:
                    r.append(mvalue)

                table.append(r)

        else:  # no parents
            for dd in tidy['axes']:
                columns += ['ID %s' % dd['level'], dd['level']]
            # measure names
            columns += [m['caption'] for m in self._agg_params['measures']]

            for row in tidy['data']:
                r = []
                for cell in row[:len(tidy['axes'])]:
                    r += [cell['key'], cell['caption']]

                r += get_props(row[:-len(measures)], pnames, props,
                               tidy['axes'])

                for mvalue in row[len(tidy['axes']):]:
                    r.append(mvalue)

                table.append(r)

        df = pd.DataFrame(table,
                          columns=columns) \
               .set_index(columns[:-len(self._agg_params['measures'])])

        if filter_empty_measures:
            df = df[reduce_(
                np.logical_and,
                [df[msr['name']].notnull() for msr in self.measures])]

        return df
Beispiel #15
0
    def read_time_series(
            self,
            variables=(),
            start_time=pytz.utc.localize(datetime.min),
            end_time=pytz.utc.localize(datetime.max),
            selectdim=None,
            size_limit=1000 * 1000 * 1000,
            series=None,
            series_name_fmt=None):
        """ Read a list of time-series variables from this fileset.

        Args:
            variables: A list of strs containing time series variable
                names to be read.
            start_time: A datetime, which is timezone aware, of the start
                time of the series to read.
            end_time: A datetime, timezone aware, end time of series to read.
            selectdim: A dict containing for each dimension name of type
                string, the indices of the dimension to read.
                For example: {"station":[3,4,5]} to read indices 3,4 and 5
                (indexed from 0) of the station dimension for variables
                which have that dimension.
            size_limit: Limit on the total size in bytes to read, used to
                screen huge requests.
            series: A list of series to be read by name. For soundings
                a series name is something like "Aug23_0000Z", as
                created by series_fmt from the time associated with
                a file.  In this way the data read can be split into
                named series.  If series_fmt is None, the series name
                should be a list of one empty string, [''],
                and all data are concatenated together in time order.
            series_fmt: a datetime.strftime format to create a
                series name for the data found in each file, based
                on the time associated with the file.
                If series_name_fmt is None, all data is put in a dictionary
                element named ''.

        Returns:
            A dict containing, by series name:
                'time' : list of UTC timestamps,
                'data': list of numpy.ndarray containing the data for
                    each variable,
                'vmap': dict by variable name,
                    containing the index into the series data for the variable,
                'dim2': dict by variable name, of values for second dimension
                    of the data, such as height,
                'stnnames': dict by variable name, of the list of the
                    station names for the variable that were read,
                    as selected by selectdim. A list of length 1 containing
                    an empty string indicates the variable does not have
                    a station dimension.

        Raises:
            OSError
            nc_exc.NoDataException

        The 'data' element in the returned dict is a list of numpy arrays,
        and not a dict by variable name. The 'vmap' element provides the
        mapping from a variable name to an index into 'data'. The data object
        is typically JSON-ified and sent to a browser. If it were a dict,
        the variable names may contain characters which cause headaches with
        JSON and javascript in django templates. For example, the JSON-ified
        string is typically passed to javascript in a django template by
        surrounding it with single quotes:
            var data = jQuery.parseJSON('{{ data }}');
        A single quote within the data JSON string causes grief, and we want
        to support single quotes in variable names. The only work around I
        know of is to convert the single quotes within the string to '\u0027'.
        This is, of course, a time-consuming step we want to avoid when
        JSON-ifying a large chunk of data.  It is less time-consuming to
        replace the quotes in the smaller vmap.

        The series names will not contain single quotes.

        """

        debug = False

        dsinfo = self.get_dataset_info()

        if not dsinfo['time_name']:
            self.scan_files()
            dsinfo = self.get_dataset_info()

        dsinfo_vars = dsinfo['variables']

        if not selectdim:
            selectdim = {}

        vshapes = self.resolve_variable_shapes(variables, selectdim)

        res_data = {}

        total_size = 0
        ntimes = 0

        files = self.get_files(start_time, end_time)
        if debug:
            _logger.debug(
                "len(files)=%d, series_name_fmt=%s",
                len(files), series_name_fmt)

        if series_name_fmt:
            file_tuples = [(f.time.strftime(series_name_fmt), f.path) \
                for f in files]
        else:
            file_tuples = [("", f.path) for f in files]

        for (series_name, ncpath) in file_tuples:

            if series and not series_name in series:
                continue

            if debug:
                _logger.debug("series=%s", str(series))
                _logger.debug("series_name=%s ,ncpath=%s", series_name, ncpath)

            # the files might be in the process of being moved, deleted, etc
            fileok = False
            exc = None
            for itry in range(0, 3):
                try:
                    ncfile = netCDF4.Dataset(ncpath)
                    fileok = True
                    break
                except (OSError, RuntimeError) as excx:
                    exc = excx
                    time.sleep(itry)

            if not fileok:
                _logger.error("%s: %s", ncpath, exc)
                continue

            if not series_name in res_data:
                res_data[series_name] = {
                    'time': [],
                    'data': [],
                    'vmap': {},
                    'dim2': {},
                    'stnnames': {},
                }

            otime = res_data[series_name]['time']
            odata = res_data[series_name]['data']
            ovmap = res_data[series_name]['vmap']
            odim2 = res_data[series_name]['dim2']
            ostns = res_data[series_name]['stnnames']

            try:
                size1 = sys.getsizeof(otime)

                # times are apended to otime
                time_slice = self.read_times(
                    ncfile, ncpath, start_time, end_time, otime,
                    size_limit - total_size)

                # time_slice.start is None if nothing to read
                if time_slice.start is None or \
                    time_slice.stop <= time_slice.start:
                    continue

                total_size += sys.getsizeof(otime) - size1

                for exp_vname in variables:

                    # skip if variable is not a time series or
                    # doesn't have a selected dimension
                    if not exp_vname in dsinfo_vars or not exp_vname in vshapes:
                        continue

                    # selected shape of this variable
                    vshape = vshapes[exp_vname]
                    vsize = reduce_(
                        operator.mul, vshape, 1) * \
                        dsinfo_vars[exp_vname]["dtype"].itemsize

                    if not vsize:
                        continue

                    if total_size + vsize > size_limit:
                        raise nc_exc.TooMuchDataException(
                            "too much data requested, will exceed {} mbytes".
                            format(size_limit/(1000 * 1000)))

                    dim2 = {}
                    stnnames = []
                    vdata = self.read_time_series_data(
                        ncfile, ncpath, exp_vname, time_slice, vshape,
                        selectdim, dim2, stnnames)

                    if vdata is None:
                        continue

                    # dim2 will be empty if variable is not found in file
                    if dim2 and not exp_vname in odim2:
                        odim2[exp_vname] = dim2

                    # stnnames will be empty if variable is not found in file
                    if stnnames and not exp_vname in ostns:
                        ostns[exp_vname] = stnnames

                    if not exp_vname in ovmap:
                        size1 = 0
                        vindex = len(odata)
                        odata.append(vdata)
                        ovmap[exp_vname] = vindex
                    else:
                        if debug:
                            _logger.debug(
                                "odata[%s].shape=%s, vdata.shape=%s",
                                exp_vname, odata[vindex].shape, vdata.shape)

                        vindex = ovmap[exp_vname]
                        size1 = sys.getsizeof(odata[vindex])

                        time_index = dsinfo_vars[exp_vname]["time_index"]
                        odata[vindex] = np.append(
                            odata[vindex], vdata, axis=time_index)

                    total_size += sys.getsizeof(odata[vindex]) - size1

            finally:
                ncfile.close()

            ntimes += len(otime)

        if ntimes == 0:
            exc = nc_exc.NoDataException(
                "No data between {} and {}".
                format(
                    start_time.isoformat(),
                    end_time.isoformat()))
            # _logger.warning("%s: %s", str(self), repr(exc))
            raise exc

        if debug:
            for series_name in res_data:
                for exp_vname in res_data[series_name]['vmap']:
                    var_index = res_data[series_name]['vmap'][exp_vname]
                    _logger.debug(
                        "res_data[%s]['data'][%d].shape=%s, exp_vname=%s",
                        series_name, var_index,
                        repr(res_data[series_name]['data'][var_index].shape),
                        exp_vname)
            _logger.debug(
                "total_size=%d", total_size)

        return res_data