Ejemplo n.º 1
0
def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter":{"exists":{"field":s.value}}
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field": es09.expressions.compile_expression(s.value, query),
                    "size": 200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
Ejemplo n.º 2
0
def _es_terms2(es, mvel, query):
    """
    WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
    """

    # REQUEST VALUES IN FIRST DIMENSION
    q1 = query.copy()
    q1.edges = query.edges[0:1:]
    values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value

    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:
        for i, v in enumerate(values1):
            FromES.facets[s.name + "," + str(i)] = {
                "terms": {
                    "field": query.edges[1].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter({"and": [
                    query.where,
                    {"term": {query.edges[0].value: v}}
                ]})
            }

    data = es09.util.post(es, FromES, query.limit)

    # UNION ALL TERMS FROM SECOND DIMENSION
    values2 = set()
    for k, f in data.facets.items():
        values2.update(f.terms.term)
    values2 = qb.sort(values2)
    term2index = {v: i for i, v in enumerate(values2)}
    query.edges[1].domain.partitions = DictList([{"name": v, "value": v} for v in values2])

    # MAKE CUBE
    output = {}
    dims = [len(values1), len(values2)]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        coord = facetName.split(",")
        s = [s for s in select if s.name == coord[0]][0]
        i1 = int(coord[1])
        for term in facet.terms:
            i2 = term2index[term.term]
            output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]

    cube = Cube(query.select, query.edges, output)
    cube.query = query
    return cube
Ejemplo n.º 3
0
def format_cube_from_aggop(decoders, aggs, start, query, select):
    agg = aggs
    b = coalesce(agg._filter, agg._nested)
    while b:
        agg = b
        b = coalesce(agg._filter, agg._nested)

    matricies = [(s, Matrix(dims=[], zeros=(s.aggregate == "count"))) for s in select]
    for s, m in matricies:
        m[tuple()] = agg[s.pull]
    cube = Cube(query.select, [], {s.name: m for s, m in matricies})
    cube.frum = query
    return cube
Ejemplo n.º 4
0
def format_cube_from_aggop(decoders, aggs, start, query, select):
    agg = aggs
    b = coalesce(agg._filter, agg._nested)
    while b:
        agg = b
        b = coalesce(agg._filter, agg._nested)

    matricies = [(s, Matrix(dims=[], zeros=(s.aggregate == "count")))
                 for s in select]
    for s, m in matricies:
        m[tuple()] = agg[s.pull]
    cube = Cube(query.select, [], {s.name: m for s, m in matricies})
    cube.frum = query
    return cube
Ejemplo n.º 5
0
def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter": simplify_esfilter(query.where)
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR Qb INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [
            e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)
        ]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube
Ejemplo n.º 6
0
def es_deepop(es, mvel, query):
    FromES = es09.util.build_es_query(query)

    select = query.edges

    temp_query = query.copy()
    temp_query.select = select
    temp_query.edges = DictList()
    FromES.facets.mvel = {
        "terms": {
            "script_field": mvel.code(temp_query),
            "size": query.limit
        },
        "facet_filter": simplify_esfilter(query.where)
    }

    data = es09.util.post(es, FromES, query.limit)

    rows = unpack_terms(data.facets.mvel, query.edges)
    terms = zip(*rows)

    # NUMBER ALL EDGES FOR Qb INDEXING
    edges = query.edges
    for f, e in enumerate(edges):
        for r in terms[f]:
            e.domain.getPartByKey(r)

        e.index = f
        for p, part in enumerate(e.domain.partitions):
            part.dataIndex = p
        e.domain.NULL.dataIndex = len(e.domain.partitions)

    # MAKE CUBE
    dims = [len(e.domain.partitions) for e in query.edges]
    output = Matrix(*dims)

    # FILL CUBE
    for r in rows:
        term_coord = [e.domain.getPartByKey(r[i]).dataIndex for i, e in enumerate(edges)]
        output[term_coord] = SUM(output[term_coord], r[-1])

    cube = Cube(query.select, query.edges, {query.select.name: output})
    cube.frum = query
    return cube
Ejemplo n.º 7
0
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD

    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()  # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script":
                        es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {
        s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[
            aggregates[s.aggregate]])
        for s in select
    }
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
Ejemplo n.º 8
0
def es_countop(es, mvel, query):
    """
    RETURN SINGLE COUNT
    """
    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:

        if is_keyword(s.value):
            FromES.facets[s.name] = {
                "terms": {
                    "field": s.value,
                    "size": query.limit,
                },
                "facet_filter": {
                    "exists": {
                        "field": s.value
                    }
                }
            }
        else:
            # COMPLICATED value IS PROBABLY A SCRIPT, USE IT
            FromES.facets[s.name] = {
                "terms": {
                    "script_field":
                    es09.expressions.compile_expression(s.value, query),
                    "size":
                    200000
                }
            }

    data = es09.util.post(es, FromES, query.limit)

    matricies = {}
    for s in select:
        matricies[s.name] = Matrix(value=data.hits.facets[s.name].total)

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
Ejemplo n.º 9
0
def es_aggop(es, mvel, query):
    select = listwrap(query.select)
    FromES = build_es_query(query)

    isSimple = AND(aggregates[s.aggregate] == "count" for s in select)
    if isSimple:
        return es_countop(es, query)  # SIMPLE, USE TERMS FACET INSTEAD


    value2facet = dict()  # ONLY ONE FACET NEEDED PER
    name2facet = dict()   # MAP name TO FACET WITH STATS

    for s in select:
        if s.value not in value2facet:
            if is_keyword(s.value):
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "field": s.value
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            else:
                unwrap(FromES.facets)[s.name] = {
                    "statistical": {
                        "script": es09.expressions.compile_expression(s.value, query)
                    },
                    "facet_filter": simplify_esfilter(query.where)
                }
            value2facet[s.value] = s.name
        name2facet[s.name] = value2facet[s.value]

    data = es09.util.post(es, FromES, query.limit)

    matricies = {s.name: Matrix(value=fix_es_stats(data.facets[literal_field(s.name)])[aggregates[s.aggregate]]) for s in select}
    cube = Cube(query.select, [], matricies)
    cube.frum = query
    return cube
Ejemplo n.º 10
0
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        for term in facet.terms:
            term_coord = term2Parts(term.term).dataIndex
            for s in select:
                try:
                    output[s.name][term_coord] = term[aggregates[s.aggregate]]
                except Exception, e:
                    # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
                    pass
    cube = Cube(query.select, query.edges, output)
    cube.query = query
    return cube


def _es_terms2(es, mvel, query):
    """
    WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
    """

    # REQUEST VALUES IN FIRST DIMENSION
    q1 = query.copy()
    q1.edges = query.edges[0:1:]
    values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value

    select = listwrap(query.select)
Ejemplo n.º 11
0
    dims = tuple(
        len(e.domain.partitions) + (0 if e.allowNulls is False else 1)
        for e in new_edges)
    matricies = [(s, Matrix(dims=dims, zeros=(s.aggregate == "count")))
                 for s in select]
    for row, agg in aggs_iterator(aggs, decoders):
        coord = tuple(d.get_index(row) for d in decoders)
        for s, m in matricies:
            try:
                if m[coord]:
                    Log.error("Not expected")
                m[coord] = agg[s.pull]
            except Exception, e:
                tuple(d.get_index(row) for d in decoders)
                Log.error("", e)
    cube = Cube(query.select, new_edges, {s.name: m for s, m in matricies})
    cube.frum = query
    return cube


def format_cube_from_aggop(decoders, aggs, start, query, select):
    agg = aggs
    b = coalesce(agg._filter, agg._nested)
    while b:
        agg = b
        b = coalesce(agg._filter, agg._nested)

    matricies = [(s, Matrix(dims=[], zeros=(s.aggregate == "count")))
                 for s in select]
    for s, m in matricies:
        m[tuple()] = agg[s.pull]
Ejemplo n.º 12
0
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([s.value == None and s.aggregate not in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex and len(select) == 1:
        if not select[0].value:
            FromES.query = {"filtered": {
                "query": {"match_all": {}},
                "filter": simplify_esfilter(query.where)
            }}
            FromES.size = 1  # PREVENT QUERY CHECKER FROM THROWING ERROR
        elif isKeyword(select[0].value):
            FromES.facets.mvel = {
                "terms": {
                    "field": select[0].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter(query.where)
            }
            if query.sort:
                s = query.sort
                if len(s) > 1:
                    Log.error("can not sort by more than one field")

                s0 = s[0]
                if s0.field != select[0].value:
                    Log.error("can not sort by anything other than count, or term")

                FromES.facets.terms.order = "term" if s0.sort >= 0 else "reverse_term"
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1:
        if not select[0].value:
            # SPECIAL CASE FOR SINGLE COUNT
            output = Matrix(value=data.hits.total)
            cube = Cube(query.select, [], {select[0].name: output})
        elif isKeyword(select[0].value):
            # SPECIAL CASE FOR SINGLE TERM
            T = data.facets.terms
            output = Matrix.wrap([t.term for t in T])
            cube = Cube(query.select, [], {select[0].name: output})
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(select, [], {s.name: Matrix(list=output[i]) for i, s in enumerate(select)})

    cube.frum = query
    return cube
Ejemplo n.º 13
0
def list_aggs(frum, query):
    select = listwrap(query.select)

    is_join = False  # True IF MANY TO MANY JOIN WITH AN EDGE
    for e in query.edges:
        if isinstance(e.domain, DefaultDomain):
            e.domain = SimpleSetDomain(
                partitions=list(sorted(set(frum.select(e.value)))))

    for s in listwrap(query.select):
        s["exec"] = qb_expression_to_function(s.value)

    result = {
        s.name: Matrix(dims=[
            len(e.domain.partitions) + (1 if e.allowNulls else 0)
            for e in query.edges
        ],
                       zeros=s.aggregate == "count")
        for s in select
    }
    where = qb_expression_to_function(query.where)
    for d in filter(where, frum):
        d = d.copy()
        coord = [
        ]  # LIST OF MATCHING COORDINATE FAMILIES, USUALLY ONLY ONE PER FAMILY BUT JOINS WITH EDGES CAN CAUSE MORE
        for e in query.edges:
            coord.append(get_matches(e, d))

        for s in select:
            mat = result[s.name]
            agg = s.aggregate
            var = s.value
            if agg == "count":
                for c in itertools.product(*coord):
                    if var == "." or var == None:
                        mat[c] += 1
                        continue

                    for e, cc in zip(query.edges, c):
                        d[e.name] = cc
                    val = s["exec"](d, c, frum)
                    if val != None:
                        mat[c] += 1
            else:
                for c in itertools.product(*coord):
                    acc = mat[c]
                    if acc == None:
                        acc = windows.name2accumulator.get(agg)
                        if acc == None:
                            Log.error(
                                "select aggregate {{agg}} is not recognized",
                                agg=agg)
                        acc = acc(**s)
                        mat[c] = acc
                    for e, cc in zip(
                            query.edges, c
                    ):  # BECAUSE WE DO NOT KNOW IF s.exec NEEDS THESE EDGES, SO WE PASS THEM ANYWAY
                        d[e.name] = e.domain.partitions[cc]
                    val = s["exec"](d, c, frum)
                    acc.add(val)

    for s in select:
        if s.aggregate == "count":
            continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    output = Cube(select, query.edges, result)
    return output
Ejemplo n.º 14
0
                        [unwraplist(t[source][s.value]) for t in T])

                elif isinstance(s.value, basestring):  # fields
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source].get(s.value)) for t in T])
                else:
                    matricies[s.name] = Matrix.wrap(
                        [unwraplist(t[source].get(s.name)) for t in T])
        except Exception, e:
            Log.error("", e)
    cube = Cube(select,
                edges=[{
                    "name": "rownum",
                    "domain": {
                        "type": "rownum",
                        "min": 0,
                        "max": len(T),
                        "interval": 1
                    }
                }],
                data=matricies)
    return cube


set_default(
    format_dispatch, {
        None: (format_cube, None, "application/json"),
        "cube": (format_cube, None, "application/json"),
        "table": (format_table, None, "application/json"),
        "list": (format_list, None, "application/json")
    })
Ejemplo n.º 15
0
            matricies[s.name] = Matrix.wrap([t._source for t in T])
        elif isinstance(s.value, Mapping):
            # for k, v in s.value.items():
            #     matricies[join_field(split_field(s.name)+[k])] = Matrix.wrap([unwrap(t.fields)[v] for t in T])
            matricies[s.name] = Matrix.wrap([{k: unwrap(t.fields).get(v, None) for k, v in s.value.items()}for t in T])
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([tuple(unwrap(t.fields).get(ss, None) for ss in s.value) for t in T])
        elif not s.value:
            matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap([unwrap(t.fields).get(s.value, None) for t in T])
            except Exception, e:
                Log.error("", e)

    cube = Cube(query.select, query.edges, matricies, frum=query)
    cube.frum = query
    return cube


def is_setop(query):
    select = listwrap(query.select)

    if not query.edges:
        isDeep = len(split_field(query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        simpleAgg = AND([s.aggregate in ("count", "none") for s in select])   # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

        # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
        if simpleAgg or isDeep:
            return True
    else:
Ejemplo n.º 16
0
def cube_aggs(frum, query):
    select = listwrap(query.select)

    #MATCH EDGES IN QUERY TO ONES IN frum
    for e in query.edges:
        for fs in frum.select:
            if fs.name == e.value:
                Log.error("Not implemented yet")
        if isinstance(e.domain, DefaultDomain):
            # DEFAULT DOMAINS CAN EASILY BE LOOKED UP FROM frum
            for fe in frum.edges:
                if fe.name == e.value:
                    e.domain = SimpleSetDomain(**fe.domain.as_dict())
                    e.value = e.value + "." + fe.domain.key
                    break
        else:
            for fe in frum.edges:
                if fe.name == e.value:
                    e.value = e.value + "." + fe.domain.key
                    break

    result = {
        s.name: Matrix(dims=[
            len(e.domain.partitions) + (1 if e.allowNulls else 0)
            for e in query.edges
        ],
                       zeros=s.aggregate == "count")
        for s in select
    }
    where = qb_expression_to_function(query.where)
    for d in filter(where, frum.values()):
        coord = [
        ]  # LIST OF MATCHING COORDINATE FAMILIES, USUALLY ONLY ONE PER FAMILY BUT JOINS WITH EDGES CAN CAUSE MORE
        for e in query.edges:
            matches = get_matches(e, d)
            coord.append(matches)
            if len(matches) == 1 and d[e.name] == None:
                d[e.name] = e.domain.partitions[matches[0]]

        for s in select:
            mat = result[s.name]
            agg = s.aggregate
            var = s.value
            expr = qb_expression_to_function(var)
            val = expr(d)
            if agg == "count":
                if var == "." or var == None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
                    continue

                if val != None:
                    for c in itertools.product(*coord):
                        mat[c] += 1
            else:
                for c in itertools.product(*coord):
                    acc = mat[c]
                    if acc == None:
                        acc = windows.name2accumulator.get(agg)
                        if acc == None:
                            Log.error(
                                "select aggregate {{agg}} is not recognized",
                                agg=agg)
                        acc = acc(**s)
                        mat[c] = acc
                    acc.add(val)

    for s in select:
        if s.aggregate == "count":
            continue
        m = result[s.name]
        for c, var in m.items():
            if var != None:
                m[c] = var.end()

    return Cube(select, query.edges, result)
Ejemplo n.º 17
0
        elif isinstance(s.value, list):
            matricies[s.name] = Matrix.wrap([
                tuple(unwrap(t.fields).get(ss, None) for ss in s.value)
                for t in T
            ])
        elif not s.value:
            matricies[s.name] = Matrix.wrap(
                [unwrap(t.fields).get(s.value, None) for t in T])
        else:
            try:
                matricies[s.name] = Matrix.wrap(
                    [unwrap(t.fields).get(s.value, None) for t in T])
            except Exception, e:
                Log.error("", e)

    cube = Cube(query.select, query.edges, matricies, frum=query)
    cube.frum = query
    return cube


def is_setop(query):
    select = listwrap(query.select)

    if not query.edges:
        isDeep = len(split_field(
            query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
        simpleAgg = AND([
            s.aggregate in ("count", "none") for s in select
        ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

        # NO EDGES IMPLIES SIMPLER QUERIES: EITHER A SET OPERATION, OR RETURN SINGLE AGGREGATE
Ejemplo n.º 18
0
def es_setop(es, mvel, query):
    FromES = es09.util.build_es_query(query)
    select = listwrap(query.select)

    isDeep = len(split_field(
        query.frum.name)) > 1  # LOOKING INTO NESTED WILL REQUIRE A SCRIPT
    isComplex = OR([
        s.value == None and s.aggregate not in ("count", "none")
        for s in select
    ])  # CONVERTING esfilter DEFINED PARTS WILL REQUIRE SCRIPT

    if not isDeep and not isComplex and len(select) == 1:
        if not select[0].value:
            FromES.query = {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": simplify_esfilter(query.where)
                }
            }
            FromES.size = 1  # PREVENT QUERY CHECKER FROM THROWING ERROR
        elif isKeyword(select[0].value):
            FromES.facets.mvel = {
                "terms": {
                    "field": select[0].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter": simplify_esfilter(query.where)
            }
            if query.sort:
                s = query.sort
                if len(s) > 1:
                    Log.error("can not sort by more than one field")

                s0 = s[0]
                if s0.field != select[0].value:
                    Log.error(
                        "can not sort by anything other than count, or term")

                FromES.facets.terms.order = "term" if s0.sort >= 0 else "reverse_term"
    elif not isDeep:
        simple_query = query.copy()
        simple_query.where = TRUE_FILTER  # THE FACET FILTER IS FASTER
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(simple_query),
                "size": coalesce(simple_query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }
    else:
        FromES.facets.mvel = {
            "terms": {
                "script_field": mvel.code(query),
                "size": coalesce(query.limit, 200000)
            },
            "facet_filter": simplify_esfilter(query.where)
        }

    data = es09.util.post(es, FromES, query.limit)

    if len(select) == 1:
        if not select[0].value:
            # SPECIAL CASE FOR SINGLE COUNT
            output = Matrix(value=data.hits.total)
            cube = Cube(query.select, [], {select[0].name: output})
        elif isKeyword(select[0].value):
            # SPECIAL CASE FOR SINGLE TERM
            T = data.facets.terms
            output = Matrix.wrap([t.term for t in T])
            cube = Cube(query.select, [], {select[0].name: output})
    else:
        data_list = unpack_terms(data.facets.mvel, select)
        if not data_list:
            cube = Cube(select, [], {s.name: Matrix.wrap([]) for s in select})
        else:
            output = zip(*data_list)
            cube = Cube(
                select, [],
                {s.name: Matrix(list=output[i])
                 for i, s in enumerate(select)})

    cube.frum = query
    return cube
Ejemplo n.º 19
0
    ]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        for term in facet.terms:
            term_coord = term2Parts(term.term).dataIndex
            for s in select:
                try:
                    output[s.name][term_coord] = term[aggregates[s.aggregate]]
                except Exception, e:
                    # USUALLY CAUSED BY output[s.name] NOT BEING BIG ENOUGH TO HANDLE NULL COUNTS
                    pass
    cube = Cube(query.select, query.edges, output)
    cube.query = query
    return cube


def _es_terms2(es, mvel, query):
    """
    WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
    """

    # REQUEST VALUES IN FIRST DIMENSION
    q1 = query.copy()
    q1.edges = query.edges[0:1:]
    values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value

    select = listwrap(query.select)
Ejemplo n.º 20
0
def _es_terms2(es, mvel, query):
    """
    WE ASSUME THERE ARE JUST TWO EDGES, AND EACH HAS A SIMPLE value
    """

    # REQUEST VALUES IN FIRST DIMENSION
    q1 = query.copy()
    q1.edges = query.edges[0:1:]
    values1 = es_terms(es, mvel, q1).edges[0].domain.partitions.value

    select = listwrap(query.select)
    FromES = build_es_query(query)
    for s in select:
        for i, v in enumerate(values1):
            FromES.facets[s.name + "," + str(i)] = {
                "terms": {
                    "field": query.edges[1].value,
                    "size": coalesce(query.limit, 200000)
                },
                "facet_filter":
                simplify_esfilter({
                    "and": [query.where, {
                        "term": {
                            query.edges[0].value: v
                        }
                    }]
                })
            }

    data = es09.util.post(es, FromES, query.limit)

    # UNION ALL TERMS FROM SECOND DIMENSION
    values2 = set()
    for k, f in data.facets.items():
        values2.update(f.terms.term)
    values2 = qb.sort(values2)
    term2index = {v: i for i, v in enumerate(values2)}
    query.edges[1].domain.partitions = DictList([{
        "name": v,
        "value": v
    } for v in values2])

    # MAKE CUBE
    output = {}
    dims = [len(values1), len(values2)]
    for s in select:
        output[s.name] = Matrix(*dims)

    # FILL CUBE
    # EXPECTING ONLY SELECT CLAUSE FACETS
    for facetName, facet in data.facets.items():
        coord = facetName.split(",")
        s = [s for s in select if s.name == coord[0]][0]
        i1 = int(coord[1])
        for term in facet.terms:
            i2 = term2index[term.term]
            output[s.name][(i1, i2)] = term[aggregates[s.aggregate]]

    cube = Cube(query.select, query.edges, output)
    cube.query = query
    return cube
Ejemplo n.º 21
0
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []  # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error(
                    "There is more than one open-ended edge: self can not be handled"
                )
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value))
                    or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions)
                           for f in facetEdges) * len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {
                "aggregate": "count"
            },
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note(
            "{{theory_count}} theoretical combinations, {{real_count}} actual combos found",
            real_count=len(esFacets),
            theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [
                len(e.domain.partitions) + (1 if e.allowNulls else 0)
                for e in query.edges
            ]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error(
            "not implemented yet"
        )  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({
                    "name": fedge.domain.name,
                    "value": parts[f]
                })
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field":
                    calcTerm.field,
                    "value_field":
                    s.value if is_keyword(s.value) else None,
                    "value_script":
                    mvel.compile_expression(s.value)
                    if not is_keyword(s.value) else None,
                    "size":
                    coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter(
                    {"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [
        len(e.domain.partitions) + (1 if e.allowNulls else 0)
        for e in query.edges
    ]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index] + (
                    special.dataIndex, ) + pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube
Ejemplo n.º 22
0
def es_terms_stats(esq, mvel, query):
    select = listwrap(query.select)
    facetEdges = []    # EDGES THAT WILL REQUIRE A FACET FOR EACH PART
    termsEdges = DictList()
    specialEdge = None
    special_index = -1

    # A SPECIAL EDGE IS ONE THAT HAS AN UNDEFINED NUMBER OF PARTITIONS AT QUERY TIME
    # FIND THE specialEdge, IF ONE
    for f, tedge in enumerate(query.edges):
        if tedge.domain.type in domains.KNOWN:
            for p, part in enumerate(tedge.domain.partitions):
                part.dataIndex = p

            # FACETS ARE ONLY REQUIRED IF SQL JOIN ON DOMAIN IS REQUIRED (RANGE QUERY)
            # OR IF WE ARE NOT SIMPLY COUNTING
            # OR IF NO SCRIPTING IS ALLOWED (SOME OTHER CODE IS RESPONSIBLE FOR SETTING isFacet)
            # OR IF WE JUST WANT TO FORCE IT :)
            # OF COURSE THE default EDGE IS NOT EXPLICIT, SO MUST BE A TERM

            facetEdges.append(tedge)
        else:
            if specialEdge:
                Log.error("There is more than one open-ended edge: self can not be handled")
            specialEdge = tedge
            special_index = f
            termsEdges.append(tedge)

    if not specialEdge:
        # WE SERIOUSLY WANT A SPECIAL EDGE, OTHERWISE WE WILL HAVE TOO MANY FACETS
        # THE BIGGEST EDGE MAY BE COLLAPSED TO A TERM, MAYBE?
        num_parts = 0
        special_index = -1
        for i, e in enumerate(facetEdges):
            l = len(e.domain.partitions)
            if ((e.value and is_keyword(e.value)) or len(e.domain.dimension.fields) == 1) and l > num_parts:
                num_parts = l
                specialEdge = e
                special_index = i

        facetEdges.pop(special_index)
        termsEdges.append(specialEdge)

    total_facets = PRODUCT(len(f.domain.partitions) for f in facetEdges)*len(select)
    if total_facets > 100:
        # WE GOT A PROBLEM, LETS COUNT THE SIZE OF REALITY:
        counts = esq.query({
            "from": query.frum,
            "select": {"aggregate": "count"},
            "edges": facetEdges,
            "where": query.where,
            "limit": query.limit
        })

        esFacets = []

        def add_facet(value, parts, cube):
            if value:
                esFacets.append(parts)

        counts["count"].forall(add_facet)

        Log.note("{{theory_count}} theoretical combinations, {{real_count}} actual combos found",  real_count= len(esFacets),  theory_count=total_facets)

        if not esFacets:
            # MAKE EMPTY CUBE
            matricies = {}
            dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
            for s in select:
                matricies[s.name] = Matrix(*dims)
            cube = Cube(query.select, query.edges, matricies)
            cube.frum = query
            return cube

    else:
        # GENERATE ALL COMBOS
        esFacets = getAllEdges(facetEdges)

    calcTerm = compileEdges2Term(mvel, termsEdges, DictList())
    term2parts = calcTerm.term2parts

    if len(esFacets) * len(select) > 1000:
        Log.error("not implemented yet")  # WE HAVE SOME SERIOUS PERMUTATIONS, WE MUST ISSUE MULTIPLE QUERIES
        pass

    FromES = build_es_query(query)

    for s in select:
        for parts in esFacets:
            condition = DictList()
            constants = DictList()
            name = [literal_field(s.name)]
            for f, fedge in enumerate(facetEdges):
                name.append(str(parts[f].dataIndex))
                condition.append(buildCondition(mvel, fedge, parts[f]))
                constants.append({"name": fedge.domain.name, "value": parts[f]})
            condition.append(query.where)
            name = ",".join(name)

            FromES.facets[name] = {
                "terms_stats": {
                    "key_field": calcTerm.field,
                    "value_field": s.value if is_keyword(s.value) else None,
                    "value_script": mvel.compile_expression(s.value) if not is_keyword(s.value) else None,
                    "size": coalesce(query.limit, 200000)
                }
            }
            if condition:
                FromES.facets[name].facet_filter = simplify_esfilter({"and": condition})

    data = es09.util.post(esq.es, FromES, query.limit)

    if specialEdge.domain.type not in domains.KNOWN:
        # WE BUILD THE PARTS BASED ON THE RESULTS WE RECEIVED
        partitions = DictList()
        map = {}
        for facetName, parts in data.facets.items():
            for stats in parts.terms:
                if not map[stats]:
                    part = {"value": stats, "name": stats}
                    partitions.append(part)
                    map[stats] = part

        partitions.sort(specialEdge.domain.compare)
        for p, part in enumerate(partitions):
            part.dataIndex = p

        specialEdge.domain.map = map
        specialEdge.domain.partitions = partitions

    # MAKE CUBE
    matricies = {}
    dims = [len(e.domain.partitions) + (1 if e.allowNulls else 0) for e in query.edges]
    for s in select:
        matricies[s.name] = Matrix(*dims)

    name2agg = {s.name: aggregates[s.aggregate] for s in select}

    # FILL CUBE
    for edgeName, parts in data.facets.items():
        temp = edgeName.split(",")
        pre_coord = tuple(int(c) for c in temp[1:])
        sname = temp[0]

        for stats in parts.terms:
            if specialEdge:
                special = term2parts(stats.term)[0]
                coord = pre_coord[:special_index]+(special.dataIndex, )+pre_coord[special_index:]
            else:
                coord = pre_coord
            matricies[sname][coord] = stats[name2agg[sname]]

    cube = Cube(query.select, query.edges, matricies)
    cube.frum = query
    return cube