def select(self, select):
        selects = listwrap(select)

        if len(selects) == 1 and isinstance(
                selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if isinstance(select, list):
            push_and_pull = [(s.name, jx_expression_to_function(s.value))
                             for s in selects]

            def selector(d):
                output = Data()
                for n, p in push_and_pull:
                    output[n] = unwraplist(p(wrap(d)))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)

        return ListContainer("from " + self.name,
                             data=new_data,
                             schema=new_schema)
Ejemplo n.º 2
0
    def select(self, select):
        selects = listwrap(select)

        if not all(isinstance(s.value, Variable) for s in selects):
            Log.error("selecting on structure, or expressions, not supported yet")
        if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if isinstance(select, list):
            push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
            def selector(d):
                output = Data()
                for n, p in push_and_pull:
                    output[n] = p(wrap(d))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)

        return ListContainer("from "+self.name, data=new_data, schema=new_schema)
Ejemplo n.º 3
0
    def select(self, select):
        selects = listwrap(select)

        if not all(isinstance(s.value, Variable) for s in selects):
            Log.error(
                "selecting on structure, or expressions, not supported yet")
        if len(selects) == 1 and isinstance(
                selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if isinstance(select, list):
            push_and_pull = [(s.name, jx_expression_to_function(s.value))
                             for s in selects]

            def selector(d):
                output = Dict()
                for n, p in push_and_pull:
                    output[n] = p(wrap(d))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)

        return ListContainer("from " + self.name,
                             data=new_data,
                             schema=new_schema)
Ejemplo n.º 4
0
def make_accessor(e):
    d = e.domain

    if e.value:
        accessor = jx_expression_to_function(e.value)
        if e.allowNulls:

            def output1(row):
                return [d.getIndexByKey(accessor(row))]

            return output1
        else:

            def output2(row):
                c = d.getIndexByKey(accessor(row))
                if c == len(d.partitions):
                    return []
                else:
                    return [c]

            return output2
    elif e.range:
        for p in d.partitions:
            if p["max"] == None or p["min"] == None:
                Log.error(
                    "Inclusive expects domain parts to have `min` and `max` properties"
                )

        mi_accessor = jx_expression_to_function(e.range.min)
        ma_accessor = jx_expression_to_function(e.range.max)

        if e.range.mode == "inclusive":

            def output3(row):
                mi, ma = mi_accessor(row), ma_accessor(row)
                output = [
                    p.dataIndex for p in d.partitions
                    if mi <= p["max"] and p["min"] < ma
                ]
                if e.allowNulls and not output:
                    return [len(d.partitions)]  # ENSURE THIS IS NULL
                return output

            return output3
        else:

            def output4(row):
                mi, ma = mi_accessor(row), ma_accessor(row)
                var = d.key
                output = [
                    p.dataIndex for p in d.partitions if mi <= p[var] < ma
                ]
                if e.allowNulls and not output:
                    return [len(d.partitions)]  # ENSURE THIS IS NULL
                return output

            return output4
Ejemplo n.º 5
0
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Container):
        return data.groupby(keys)

    try:
        keys = listwrap(keys)
        get_key = jx_expression_to_function(keys)
        if not contiguous:
            data = sorted(data, key=get_key)

        def _output():
            for g, v in itertools.groupby(data, get_key):
                group = Dict()
                for k, gg in zip(keys, g):
                    group[k] = gg
                yield (group, wrap(v))

        return _output()

    except Exception, e:
        Log.error("Problem grouping", e)
Ejemplo n.º 6
0
def groupby(data,
            keys=None,
            size=None,
            min_size=None,
            max_size=None,
            contiguous=False):
    """
        return list of (keys, values) pairs where
            group by the set of keys
            values IS LIST OF ALL data that has those keys
        contiguous - MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    """

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    if isinstance(data, Container):
        return data.groupby(keys)

    try:
        keys = listwrap(keys)
        get_key = jx_expression_to_function(keys)
        if not contiguous:
            data = sorted(data, key=get_key)

        return ((wrap({k: v
                       for k, v in zip(keys, g)}), wrap(v))
                for g, v in itertools.groupby(data, get_key))
    except Exception, e:
        Log.error("Problem grouping", e)
Ejemplo n.º 7
0
def filter(data, where):
    """
    where  - a function that accepts (record, rownum, rows) and returns boolean
    """
    if len(data) == 0 or where == None or where == TRUE_FILTER:
        return data

    if isinstance(data, Container):
        return data.filter(where)

    if isinstance(data, (list, set)):
        temp = jx_expression_to_function(where)
        dd = wrap(data)
        return wrap(
            [unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)])
    else:
        Log.error("Do not know how to handle type {{type}}",
                  type=data.__class__.__name__)

    try:
        return drill_filter(where, data)
    except Exception, _:
        # WOW!  THIS IS INEFFICIENT!
        return wrap([
            unwrap(d)
            for d in drill_filter(where, [DataObject(d) for d in data])
        ])
Ejemplo n.º 8
0
def groupby(data,
            keys=None,
            size=None,
            min_size=None,
            max_size=None,
            contiguous=False):
    """
    :param data:
    :param keys:
    :param size:
    :param min_size:
    :param max_size:
    :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    :return: return list of (keys, values) PAIRS, WHERE
                 keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR
                 values IS GENERATOR OF ALL VALUE THAT MATCH keys
        contiguous -
    """
    if isinstance(data, Container):
        return data.groupby(keys)

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    try:
        keys = listwrap(keys)
        if not contiguous:
            from pyLibrary.queries import jx
            data = jx.sort(data, keys)

        if not data:
            return Null

        accessor = jx_expression_to_function(TupleOp(
            "tuple",
            keys))  # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__

        def _output():
            start = 0
            prev = accessor(data[0])
            for i, d in enumerate(data):
                curr = accessor(d)
                if curr != prev:
                    group = {}
                    for k, gg in zip(keys, prev):
                        group[k] = gg
                    yield Data(group), data[start:i:]
                    start = i
                    prev = curr
            group = {}
            for k, gg in zip(keys, prev):
                group[k] = gg
            yield Data(group), data[start::]

        return _output()
    except Exception, e:
        Log.error("Problem grouping", cause=e)
Ejemplo n.º 9
0
def window(data, param):
    """
    MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy)
    data - list of records
    """
    name = param.name            # column to assign window function result
    edges = param.edges          # columns to gourp by
    where = param.where          # DO NOT CONSIDER THESE VALUES
    sortColumns = param.sort     # columns to sort by
    calc_value = wrap_function(jx_expression_to_function(param.value))  # function that takes a record and returns a value (for aggregation)
    aggregate = param.aggregate  # WindowFunction to apply
    _range = param.range         # of form {"min":-10, "max":0} to specify the size and relative position of window

    data = filter(data, where)

    if not aggregate and not edges:
        if sortColumns:
            data = sort(data, sortColumns, already_normalized=True)
        # SIMPLE CALCULATED VALUE
        for rownum, r in enumerate(data):
            r[name] = calc_value(r, rownum, data)
        return

    if not aggregate or aggregate == "none":
        for _, values in groupby(data, edges.value):
            if not values:
                continue     # CAN DO NOTHING WITH THIS ZERO-SAMPLE

            sequence = sort(values, sortColumns, already_normalized=True)

            for rownum, r in enumerate(sequence):
                r[name] = calc_value(r, rownum, sequence)
        return

    for keys, values in groupby(data, edges.value):
        if not values:
            continue     # CAN DO NOTHING WITH THIS ZERO-SAMPLE

        sequence = sort(values, sortColumns)

        for rownum, r in enumerate(sequence):
            r["__temp__"] = calc_value(r, rownum, sequence)

        head = coalesce(_range.max, _range.stop)
        tail = coalesce(_range.min, _range.start)

        # PRELOAD total
        total = aggregate()
        for i in range(tail, head):
            total.add(sequence[i].__temp__)

        # WINDOW FUNCTION APPLICATION
        for i, r in enumerate(sequence):
            r[name] = total.end()
            total.add(sequence[i + head].__temp__)
            total.sub(sequence[i + tail].__temp__)

    for r in data:
        r["__temp__"] = None  # CLEANUP
Ejemplo n.º 10
0
def window(data, param):
    """
    MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy)
    data - list of records
    """
    name = param.name            # column to assign window function result
    edges = param.edges          # columns to gourp by
    where = param.where          # DO NOT CONSIDER THESE VALUES
    sortColumns = param.sort     # columns to sort by
    calc_value = wrap_function(jx_expression_to_function(param.value))  # function that takes a record and returns a value (for aggregation)
    aggregate = param.aggregate  # WindowFunction to apply
    _range = param.range         # of form {"min":-10, "max":0} to specify the size and relative position of window

    data = filter(data, where)

    if not aggregate and not edges:
        if sortColumns:
            data = sort(data, sortColumns, already_normalized=True)
        # SIMPLE CALCULATED VALUE
        for rownum, r in enumerate(data):
            r[name] = calc_value(r, rownum, data)
        return

    if not aggregate or aggregate == "none":
        for _, values in groupby(data, edges.value):
            if not values:
                continue     # CAN DO NOTHING WITH THIS ZERO-SAMPLE

            sequence = sort(values, sortColumns, already_normalized=True)

            for rownum, r in enumerate(sequence):
                r[name] = calc_value(r, rownum, sequence)
        return

    for keys, values in groupby(data, edges.value):
        if not values:
            continue     # CAN DO NOTHING WITH THIS ZERO-SAMPLE

        sequence = sort(values, sortColumns)

        for rownum, r in enumerate(sequence):
            r["__temp__"] = calc_value(r, rownum, sequence)

        head = coalesce(_range.max, _range.stop)
        tail = coalesce(_range.min, _range.start)

        # PRELOAD total
        total = aggregate()
        for i in range(tail, head):
            total.add(sequence[i].__temp__)

        # WINDOW FUNCTION APPLICATION
        for i, r in enumerate(sequence):
            r[name] = total.end()
            total.add(sequence[i + head].__temp__)
            total.sub(sequence[i + tail].__temp__)

    for r in data:
        r["__temp__"] = None  # CLEANUP
Ejemplo n.º 11
0
def groupby(data, keys=None, size=None, min_size=None, max_size=None, contiguous=False):
    """
    :param data:
    :param keys:
    :param size:
    :param min_size:
    :param max_size:
    :param contiguous: MAINTAIN THE ORDER OF THE DATA, STARTING THE NEW GROUP WHEN THE SELECTOR CHANGES
    :return: return list of (keys, values) PAIRS, WHERE
                 keys IS IN LEAF FORM (FOR USE WITH {"eq": terms} OPERATOR
                 values IS GENERATOR OF ALL VALUE THAT MATCH keys
        contiguous -
    """
    if isinstance(data, Container):
        return data.groupby(keys)

    if size != None or min_size != None or max_size != None:
        if size != None:
            max_size = size
        return groupby_min_max_size(data, min_size=min_size, max_size=max_size)

    try:
        keys = listwrap(keys)
        if not contiguous:
            from pyLibrary.queries import jx
            data = jx.sort(data, keys)

        if not data:
            return Null

        if any(isinstance(k, Expression) for k in keys):
            Log.error("can not handle expressions")
        else:
            accessor = jx_expression_to_function(jx_expression({"tuple": keys}))  # CAN RETURN Null, WHICH DOES NOT PLAY WELL WITH __cmp__

        def _output():
            start = 0
            prev = accessor(data[0])
            for i, d in enumerate(data):
                curr = accessor(d)
                if curr != prev:
                    group = {}
                    for k, gg in zip(keys, prev):
                        group[k] = gg
                    yield Data(group), data[start:i:]
                    start = i
                    prev = curr
            group = {}
            for k, gg in zip(keys, prev):
                group[k] = gg
            yield Data(group), data[start::]

        return _output()
    except Exception as e:
        Log.error("Problem grouping", cause=e)
Ejemplo n.º 12
0
def make_accessor(e):
    d = e.domain

    if e.value:
        accessor = jx_expression_to_function(e.value)
        if e.allowNulls:
            def output1(row):
                return [d.getIndexByKey(accessor(row))]
            return output1
        else:
            def output2(row):
                c = d.getIndexByKey(accessor(row))
                if c == len(d.partitions):
                    return []
                else:
                    return [c]
            return output2
    elif e.range:
        for p in d.partitions:
            if p["max"] == None or p["min"] == None:
                Log.error("Inclusive expects domain parts to have `min` and `max` properties")

        mi_accessor = jx_expression_to_function(e.range.min)
        ma_accessor = jx_expression_to_function(e.range.max)

        if e.range.mode == "inclusive":
            def output3(row):
                mi, ma = mi_accessor(row), ma_accessor(row)
                output = [p.dataIndex for p in d.partitions if mi <= p["max"] and p["min"] < ma]
                if e.allowNulls and not output:
                    return [len(d.partitions)]  # ENSURE THIS IS NULL
                return output
            return output3
        else:
            def output4(row):
                mi, ma = mi_accessor(row), ma_accessor(row)
                var = d.key
                output = [p.dataIndex for p in d.partitions if mi <= p[var] < ma]
                if e.allowNulls and not output:
                    return [len(d.partitions)]  # ENSURE THIS IS NULL
                return output
            return output4
Ejemplo n.º 13
0
    def groupby(self, keys, contiguous=False):
        try:
            keys = listwrap(keys)
            get_key = jx_expression_to_function(keys)
            if not contiguous:
                data = sorted(self.data, key=get_key)

            def _output():
                for g, v in itertools.groupby(data, get_key):
                    group = Dict()
                    for k, gg in zip(keys, g):
                        group[k] = gg
                    yield (group, wrap(list(v)))

            return _output()
        except Exception, e:
            Log.error("Problem grouping", e)
Ejemplo n.º 14
0
    def groupby(self, keys, contiguous=False):
        try:
            keys = listwrap(keys)
            get_key = jx_expression_to_function(keys)
            if not contiguous:
                data = sorted(self.data, key=get_key)

            def _output():
                for g, v in itertools.groupby(data, get_key):
                    group = Data()
                    for k, gg in zip(keys, g):
                        group[k] = gg
                    yield (group, wrap(list(v)))

            return _output()
        except Exception, e:
            Log.error("Problem grouping", e)
Ejemplo n.º 15
0
    def select(self, select):
        selects = listwrap(select)

        if len(selects) == 1 and isinstance(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]

        def constructor(d):
            output = Dict()
            for n, p in push_and_pull:
                output[n] = p(d)
            return _get(d, "_dict")

        new_data = map(constructor, self.data)
        return ListContainer("from " + self.name, data=new_data, schema=new_schema)
Ejemplo n.º 16
0
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if isinstance(s.value, Variable):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value.var
                    },
                    "facet_filter":
                    simplify_esfilter(query.where.to_esfilter())
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script": jx_expression_to_function(s.value)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
Ejemplo n.º 17
0
def sort(data, fieldnames=None, already_normalized=False):
    """
    PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
    """
    try:
        if data == None:
            return Null

        if not fieldnames:
            return wrap(sorted(data, value_compare))

        if already_normalized:
            formal = fieldnames
        else:
            formal = query._normalize_sort(fieldnames)

        funcs = [(jx_expression_to_function(f.value), f.sort) for f in formal]

        def comparer(left, right):
            for func, sort_ in funcs:
                try:
                    result = value_compare(func(left), func(right), sort_)
                    if result != 0:
                        return result
                except Exception as e:
                    Log.error("problem with compare", e)
            return 0

        if isinstance(data, list):
            output = FlatList([unwrap(d) for d in sorted(data, cmp=comparer)])
        elif hasattr(data, "__iter__"):
            output = FlatList(
                [unwrap(d) for d in sorted(list(data), cmp=comparer)])
        else:
            Log.error("Do not know how to handle")
            output = None

        return output
    except Exception as e:
        Log.error("Problem sorting\n{{data}}", data=data, cause=e)
Ejemplo n.º 18
0
def sort(data, fieldnames=None, already_normalized=False):
    """
    PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
    """
    try:
        if data == None:
            return Null

        if not fieldnames:
            return wrap(sorted(data, value_compare))

        if already_normalized:
            formal = fieldnames
        else:
            formal = query._normalize_sort(fieldnames)

        funcs = [(jx_expression_to_function(f.value), f.sort) for f in formal]

        def comparer(left, right):
            for func, sort_ in funcs:
                try:
                    result = value_compare(func(left), func(right), sort_)
                    if result != 0:
                        return result
                except Exception as e:
                    Log.error("problem with compare", e)
            return 0

        if isinstance(data, list):
            output = FlatList([unwrap(d) for d in sorted(data, cmp=comparer)])
        elif hasattr(data, "__iter__"):
            output = FlatList([unwrap(d) for d in sorted(list(data), cmp=comparer)])
        else:
            Log.error("Do not know how to handle")
            output = None

        return output
    except Exception as e:
        Log.error("Problem sorting\n{{data}}",  data=data, cause=e)
Ejemplo n.º 19
0
def filter(data, where):
    """
    where  - a function that accepts (record, rownum, rows) and returns boolean
    """
    if len(data) == 0 or where == None or where == TRUE_FILTER:
        return data

    if isinstance(data, Container):
        return data.filter(where)

    if isinstance(data, (list, set)):
        temp = jx_expression_to_function(where)
        dd = wrap(data)
        return [d for i, d in enumerate(data) if temp(wrap(d), i, dd)]
    else:
        Log.error("Do not know how to handle type {{type}}", type=data.__class__.__name__)

    try:
        return drill_filter(where, data)
    except Exception, _:
        # WOW!  THIS IS INEFFICIENT!
        return wrap([unwrap(d) for d in drill_filter(where, [DictObject(d) for d in data])])
Ejemplo n.º 20
0
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD


    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()   # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if isinstance(s.value, Variable):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value.var
                    },
                    "facet_filter": simplify_esfilter(query.where.to_esfilter())
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script": jx_expression_to_function(s.value)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
Ejemplo n.º 21
0
    def select(self, select):
        selects = listwrap(select)

        if len(selects) == 1 and isinstance(
                selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        push_and_pull = [(s.name, jx_expression_to_function(s.value))
                         for s in selects]

        def constructor(d):
            output = Dict()
            for n, p in push_and_pull:
                output[n] = p(d)
            return _get(d, "_dict")

        new_data = map(constructor, self.data)
        return ListContainer("from " + self.name,
                             data=new_data,
                             schema=new_schema)
Ejemplo n.º 22
0
def cube_aggs(frum, query):
    select = listwrap(query.select)

    #MATCH EDGES IN QUERY TO ONES IN frum
    for e in query.edges:
        for fs in frum.select:
            if fs.name == e.value:
                Log.error("Not implemented yet")
        if isinstance(e.domain, DefaultDomain):
            # DEFAULT DOMAINS CAN EASILY BE LOOKED UP FROM frum
            for fe in frum.edges:
                if fe.name == e.value:
                    e.domain = SimpleSetDomain(**fe.domain.as_dict())
                    e.value = e.value + "." + fe.domain.key
                    break
        else:
            for fe in frum.edges:
                if fe.name == e.value:
                    e.value = e.value + "." + fe.domain.key
                    break

    result = {
        s.name: Matrix(dims=[
            len(e.domain.partitions) + (1 if e.allowNulls else 0)
            for e in query.edges
        ],
                       zeros=s.default)
        for s in select
    }
    where = jx_expression_to_function(query.where)
    for d in filter(where, frum.values()):
        coord = [
        ]  # LIST OF MATCHING COORDINATE FAMILIES, USUALLY ONLY ONE PER FAMILY BUT JOINS WITH EDGES CAN CAUSE MORE
        for e in query.edges:
            matches = get_matches(e, d)
            coord.append(matches)
            if len(matches) == 1 and d[e.name] == None:
                d[e.name] = e.domain.partitions[matches[0]]

        for s in select:
            mat = result[s.name]
            agg = s.aggregate
            var = s.value
            expr = jx_expression_to_function(var)
            val = expr(d)
            if agg == "count":
                if var == "." or var == None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
                    continue

                if val != None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
            else:
                for c in itertools.product(*coord):
                    acc = mat[c]
                    if acc == None:
                        acc = windows.name2accumulator.get(agg)
                        if acc == None:
                            Log.error(
                                "select aggregate {{agg}} is not recognized",
                                agg=agg)
                        acc = acc(**s)
                        mat[c] = acc
                    acc.add(val)

    for s in select:
        if s.aggregate == "count":
            continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    from pyLibrary.queries.containers.cube import Cube

    return Cube(select, query.edges, result)
Ejemplo n.º 23
0
def cube_aggs(frum, query):
    select = listwrap(query.select)

    #MATCH EDGES IN QUERY TO ONES IN frum
    for e in query.edges:
        for fs in frum.select:
            if fs.name == e.value:
                Log.error("Not implemented yet")
        if isinstance(e.domain, DefaultDomain):
            # DEFAULT DOMAINS CAN EASILY BE LOOKED UP FROM frum
            for fe in frum.edges:
                if fe.name == e.value:
                    e.domain = SimpleSetDomain(**fe.domain.as_dict())
                    e.value = e.value + "." + fe.domain.key
                    break
        else:
            for fe in frum.edges:
                if fe.name == e.value:
                    e.value = e.value + "." + fe.domain.key
                    break


    result = {
        s.name: Matrix(
            dims=[len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges],
            zeros=s.default
        )
        for s in select
    }
    where = jx_expression_to_function(query.where)
    for d in filter(where, frum.values()):
        coord = []  # LIST OF MATCHING COORDINATE FAMILIES, USUALLY ONLY ONE PER FAMILY BUT JOINS WITH EDGES CAN CAUSE MORE
        for e in query.edges:
            matches = get_matches(e, d)
            coord.append(matches)
            if len(matches) == 1 and d[e.name] == None:
                d[e.name] = e.domain.partitions[matches[0]]

        for s in select:
            mat = result[s.name]
            agg = s.aggregate
            var = s.value
            expr = jx_expression_to_function(var)
            val = expr(d)
            if agg == "count":
                if var == "." or var == None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
                    continue

                if val != None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
            else:
                for c in itertools.product(*coord):
                    acc = mat[c]
                    if acc == None:
                        acc = windows.name2accumulator.get(agg)
                        if acc == None:
                            Log.error("select aggregate {{agg}} is not recognized",  agg= agg)
                        acc = acc(**s)
                        mat[c] = acc
                    acc.add(val)

    for s in select:
        if s.aggregate == "count":
            continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    from pyLibrary.queries.containers.cube import Cube

    return Cube(select, query.edges, result)
Ejemplo n.º 24
0
def list_aggs(frum, query):
    frum = wrap(frum)
    select = listwrap(query.select)

    for e in query.edges:
        if isinstance(e.domain, DefaultDomain):
            accessor = jx_expression_to_function(e.value)
            unique_values = set(map(accessor, frum))
            if None in unique_values:
                e.allowNulls = coalesce(e.allowNulls, True)
                unique_values -= {None}
            e.domain = SimpleSetDomain(partitions=list(sorted(unique_values)))
        else:
            pass

    s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select]

    result = {
        s.name: Matrix(
            dims=[len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges],
            zeros=lambda: windows.name2accumulator.get(s.aggregate)(**s)
        )
        for s in select
    }
    where = jx_expression_to_function(query.where)
    coord = [None]*len(query.edges)
    edge_accessor = [(i, make_accessor(e)) for i, e in enumerate(query.edges)]

    net_new_edge_names = set(wrap(query.edges).name) - UNION(e.value.vars() for e in query.edges)
    if net_new_edge_names & UNION(ss.value.vars() for ss in select):
        # s_accessor NEEDS THESE EDGES, SO WE PASS THEM ANYWAY
        for d in filter(where, frum):
            d = d.copy()
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    for e, cc in zip(query.edges, c):
                        d[e.name] = e.domain.partitions[cc]
                    val = s_accessor(d, c, frum)
                    acc.add(val)
    else:
        # FASTER
        for d in filter(where, frum):
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    val = s_accessor(d, c, frum)
                    acc.add(val)

    for s in select:
        # if s.aggregate == "count":
        #     continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    from pyLibrary.queries.containers.cube import Cube

    output = Cube(select, query.edges, result)
    return output
Ejemplo n.º 25
0
def get(expr):
    """
    RETURN FUNCTION FOR EXPRESSION
    """
    return jx_expression_to_function(expr)
Ejemplo n.º 26
0
def list_aggs(frum, query):
    frum = wrap(frum)
    select = listwrap(query.select)

    for e in query.edges:
        if isinstance(e.domain, DefaultDomain):
            accessor = jx_expression_to_function(e.value)
            unique_values = set(map(accessor, frum))
            if None in unique_values:
                e.allowNulls = coalesce(e.allowNulls, True)
                unique_values -= {None}
            e.domain = SimpleSetDomain(partitions=list(sorted(unique_values)))
        else:
            pass

    s_accessors = [(ss.name, compile_expression(ss.value.to_python())) for ss in select]

    result = {
        s.name: Matrix(
            dims=[len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges],
            zeros=lambda: windows.name2accumulator.get(s.aggregate)(**s)
        )
        for s in select
    }
    where = jx_expression_to_function(query.where)
    coord = [None]*len(query.edges)
    edge_accessor = [(i, make_accessor(e)) for i, e in enumerate(query.edges)]

    net_new_edge_names = set(wrap(query.edges).name) - UNION(e.value.vars() for e in query.edges)
    if net_new_edge_names & UNION(ss.value.vars() for ss in select):
        # s_accessor NEEDS THESE EDGES, SO WE PASS THEM ANYWAY
        for d in filter(where, frum):
            d = d.copy()
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    for e, cc in zip(query.edges, c):
                        d[e.name] = e.domain.partitions[cc]
                    val = s_accessor(d, c, frum)
                    acc.add(val)
    else:
        # FASTER
        for d in filter(where, frum):
            for c, get_matches in edge_accessor:
                coord[c] = get_matches(d)

            for s_name, s_accessor in s_accessors:
                mat = result[s_name]
                for c in itertools.product(*coord):
                    acc = mat[c]
                    val = s_accessor(d, c, frum)
                    acc.add(val)

    for s in select:
        # if s.aggregate == "count":
        #     continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    from pyLibrary.queries.containers.cube import Cube

    output = Cube(select, query.edges, result)
    return output
Ejemplo n.º 27
0
def get(expr):
    """
    RETURN FUNCTION FOR EXPRESSION
    """
    return jx_expression_to_function(expr)