Esempio n. 1
0
    def __init__(self, query_path, snowflake):
        if not is_list(snowflake.query_paths[0]):
            Log.error("Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)")
        self.snowflake = snowflake
        try:
            path = [
                p
                for p in snowflake.query_paths
                if untype_path(p[0]) == query_path
            ]
            if path:
                # WE DO NOT NEED TO LOOK INTO MULTI-VALUED FIELDS AS A TABLE
                self.multi = None
                self.query_path = path[0]
            else:
                # LOOK INTO A SPECIFIC MULTI VALUED COLUMN
                try:
                    self.multi = [
                        c
                        for c in self.snowflake.columns
                        if untype_path(c.name) == query_path and c.multi > 1
                    ][0]
                    self.query_path = [self.multi.name] + self.multi.nested_path
                except Exception as e:
                    # PROBLEM WITH METADATA UPDATE
                    self.multi = None
                    self.query_path = [query_path] + ["."]

                    Log.warning("Problem getting query path {{path|quote}} in snowflake {{sf|quote}}", path=query_path, sf=snowflake.name, cause=e)

            if not is_list(self.query_path) or self.query_path[len(self.query_path) - 1] != ".":
                Log.error("error")

        except Exception as e:
            Log.error("logic error", cause=e)
Esempio n. 2
0
    def write(self, content):
        """
        :param content: text, or iterable of text
        :return:
        """
        if not self.parent.exists:
            self.parent.create()
        with open(self._filename, "wb") as f:
            if is_list(content) and self.key:
                Log.error(u"list of data and keys are not supported, encrypt before sending to file")

            if is_list(content):
                pass
            elif isinstance(content, text):
                content = [content]
            elif hasattr(content, "__iter__"):
                pass

            for d in content:
                if not is_text(d):
                    Log.error(u"Expecting unicode data only")
                if self.key:
                    from mo_math.aes_crypto import encrypt
                    f.write(encrypt(d, self.key).encode("utf8"))
                else:
                    f.write(d.encode("utf8"))
Esempio n. 3
0
def _normalize_selects(
    selects,
    frum,
    schema=None,
):
    if frum == None or isinstance(frum, (list, set, text)):
        if is_list(selects):
            if len(selects) == 0:
                return Null
            else:
                output = [
                    _normalize_select_no_context(s, schema=schema)
                    for s in selects
                ]
        else:
            return _normalize_select_no_context(selects, schema=schema)
    elif is_list(selects):
        output = [
            ss for s in selects
            for ss in _normalize_select(s, frum=frum, schema=schema)
        ]
    else:
        output = _normalize_select(selects, frum, schema=schema)

    exists = set()
    for s in output:
        if s.name in exists:
            Log.error("{{name}} has already been defined", name=s.name)
        exists.add(s.name)
    return output
Esempio n. 4
0
    def __init__(self, query_path, snowflake):
        if not is_list(snowflake.query_paths[0]):
            Log.error("Snowflake query paths should be a list of string tuples (well, technically, a list of lists of strings)")
        self.snowflake = snowflake
        try:
            path = [
                p
                for p in snowflake.query_paths
                if untype_path(p[0]) == query_path
            ]
            if path:
                # WE DO NOT NEED TO LOOK INTO MULTI-VALUED FIELDS AS A TABLE
                self.multi = None
                self.query_path = path[0]
            else:
                # LOOK INTO A SPECIFIC MULTI VALUED COLUMN
                try:
                    self.multi = [
                        c
                        for c in self.snowflake.columns
                        if untype_path(c.name) == query_path and c.multi > 1
                    ][0]
                    self.query_path = [self.multi.name] + self.multi.nested_path
                except Exception as e:
                    # PROBLEM WITH METADATA UPDATE
                    self.multi = None
                    self.query_path = [query_path] + ["."]

                    Log.warning("Problem getting query path {{path|quote}} in snowflake {{sf|quote}}", path=query_path, sf=snowflake.name, cause=e)

            if not is_list(self.query_path) or self.query_path[len(self.query_path) - 1] != ".":
                Log.error("error")

        except Exception as e:
            Log.error("logic error", cause=e)
Esempio n. 5
0
def compare_to_expected(query, result, expect, places):
    query = wrap(query)
    expect = wrap(expect)

    if result.meta.format == "table":
        try:
            assertAlmostEqual(set(result.header), set(expect.header))
        except Exception as e:
            Log.error("format=table headers do not match", cause=e)

        # MAP FROM expected COLUMN TO result COLUMN
        mapping = transpose(*transpose(*filter(
            lambda v: v[0][1] == v[1][1],
            itertools.product(enumerate(expect.header), enumerate(result.header))
        ))[1])[0]
        result.header = [result.header[m] for m in mapping]

        if result.data:
            columns = transpose(*unwrap(result.data))
            result.data = transpose(*(columns[m] for m in mapping))

        if not query.sort:
            sort_table(result)
            sort_table(expect)
    elif result.meta.format == "list":
        if not query.sort:
            try:
                # result.data MAY BE A LIST OF VALUES, NOT OBJECTS
                data_columns = jx.sort(set(jx.get_columns(result.data, leaves=True)) | set(jx.get_columns(expect.data, leaves=True)), "name")
            except Exception:
                data_columns = [{"name": "."}]

            sort_order = listwrap(coalesce(query.edges, query.groupby)) + data_columns

            if is_list(expect.data):
                try:
                    expect.data = jx.sort(expect.data, sort_order.name)
                except Exception as _:
                    pass

            if is_list(result.data):
                try:
                    result.data = jx.sort(result.data, sort_order.name)
                except Exception as _:
                    pass

    elif result.meta.format == "cube" and len(result.edges) == 1 and result.edges[0].name == "rownum" and not query.sort:
        result_data, result_header = cube2list(result.data)
        result_header = map(literal_field, result_header)
        result_data = unwrap(jx.sort(result_data, result_header))
        result.data = list2cube(result_data, result_header)

        expect_data, expect_header = cube2list(expect.data)
        expect_header = map(literal_field, expect_header)
        expect_data = jx.sort(expect_data, expect_header)
        expect.data = list2cube(expect_data, expect_header)

    # CONFIRM MATCH
    assertAlmostEqual(result, expect, places=places)
Esempio n. 6
0
def parse_sql(sql):
    # TODO: CONVERT tuple OF LITERALS INTO LITERAL LIST
    # # IF ALL MEMBERS OF A LIST ARE LITERALS, THEN MAKE THE LIST LITERAL
    # if all(isinstance(r, number_types) for r in output):
    #     pass
    # elif all(isinstance(r, number_types) or (is_data(r) and "literal" in r.keys()) for r in output):
    #     output = {"literal": [r['literal'] if is_data(r) else r for r in output]}
    query = wrap(moz_sql_parser.parse(sql))
    redundant_select = []
    # PULL OUT THE AGGREGATES
    for s in listwrap(query.select):
        val = s if s == '*' else s.value

        # EXTRACT KNOWN AGGREGATE FUNCTIONS
        if is_data(val):
            for a in KNOWN_SQL_AGGREGATES:
                value = val[a]
                if value != None:
                    if is_list(value):
                        # AGGREGATE WITH PARAMETERS  EG percentile(value, 0.90)
                        s.aggregate = a
                        s[a] = unwraplist(value[1::])
                        s.value = value[0]
                    else:
                        # SIMPLE AGGREGATE
                        s.aggregate = a
                        s.value = value
                    break

        # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION
        for g in listwrap(query.groupby):
            try:
                assertAlmostEqual(g.value, val, "")
                g.name = s.name
                redundant_select.append(s)
                break
            except Exception:
                pass

    # REMOVE THE REDUNDANT select
    if is_list(query.select):
        for r in redundant_select:
            query.select.remove(r)
    elif query.select and redundant_select:
        query.select = None

    # RENAME orderby TO sort
    query.sort, query.orderby = query.orderby, None
    query.format = "table"
    return query
Esempio n. 7
0
def format_list(aggs, es_query, query, decoders, select):
    table = format_table(aggs, es_query, query, decoders, select)
    header = table.header

    if query.edges or query.groupby:
        data = []
        for row in table.data:
            d = Data()
            for h, r in zip(header, row):
                d[h] = r
            data.append(d)
        format = "list"
    elif is_list(query.select):
        data = Data()
        for h, r in zip(header, table.data[0]):
            data[h] = r
        format = "value"
    else:
        data = table.data[0][0]
        format = "value"

    output = Data(
        meta={"format": format},
        data=data
    )
    return output
Esempio n. 8
0
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None):
    """
    Snagged from unittest/case.py, then modified (Aug2014)
    """
    if expected is NULL:
        if test == None:  # pandas dataframes reject any comparision with an exception!
            return
        else:
            raise AssertionError(expand_template("{{test}} != {{expected}}", locals()))

    if expected == None:  # None has no expectations
        return
    if test == expected:
        # shortcut
        return

    if not is_number(expected):
        # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL
        if is_list(expected) and len(expected) == 0 and test == None:
            return
        if is_data(expected) and not expected.keys() and test == None:
            return
        if test != expected:
            raise AssertionError(expand_template("{{test}} != {{expected}}", locals()))
        return

    num_param = 0
    if digits != None:
        num_param += 1
    if places != None:
        num_param += 1
    if delta != None:
        num_param += 1
    if num_param>1:
        raise TypeError("specify only one of digits, places or delta")

    if digits is not None:
        with suppress_exception:
            diff = log10(abs(test-expected))
            if diff < digits:
                return

        standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals())
    elif delta is not None:
        if abs(test - expected) <= delta:
            return

        standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals())
    else:
        if places is None:
            places = 15

        with suppress_exception:
            diff = mo_math.log10(abs(test-expected))
            if diff < mo_math.ceiling(mo_math.log10(abs(test)))-places:
                return

        standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals())

    raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
Esempio n. 9
0
def format_list(aggs, es_query, query, decoders, select):
    table = format_table(aggs, es_query, query, decoders, select)
    header = table.header

    if query.edges or query.groupby:
        data = []
        for row in table.data:
            d = Data()
            for h, r in zip(header, row):
                d[h] = r
            data.append(d)
        format = "list"
    elif is_list(query.select):
        data = Data()
        for h, r in zip(header, table.data[0]):
            data[h] = r
        format = "value"
    else:
        data = table.data[0][0]
        format = "value"

    output = Data(
        meta={"format": format},
        data=data
    )
    return output
Esempio n. 10
0
    def select(self, select):
        selects = listwrap(select)

        if len(selects) == 1 and is_op(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if is_list(select):
            if all(
                is_op(s.value, Variable) and s.name == s.value.var
                for s in select
            ):
                names = set(s.value.var for s in select)
                new_schema = Schema(".", [c for c in self.schema.columns if c.name in names])

            push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
            def selector(d):
                output = Data()
                for n, p in push_and_pull:
                    output[n] = unwraplist(p(wrap(d)))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)
            if is_op(select.value, Variable):
                column = copy(first(c for c in self.schema.columns if c.name == select.value.var))
                column.name = '.'
                new_schema = Schema("from " + self.name, [column])

        return ListContainer("from "+self.name, data=new_data, schema=new_schema)
Esempio n. 11
0
    def _update_meta(self):
        if not self.dirty:
            return

        for mcl in self.data.get("meta.columns").values():
            for mc in mcl:
                count = 0
                values = set()
                objects = 0
                multi = 1
                for column in self._all_columns():
                    value = column[mc.name]
                    if value == None:
                        pass
                    else:
                        count += 1
                        if is_list(value):
                            multi = max(multi, len(value))
                            try:
                                values |= set(value)
                            except Exception:
                                objects += len(value)
                        elif is_data(value):
                            objects += 1
                        else:
                            values.add(value)
                mc.count = count
                mc.cardinality = len(values) + objects
                mc.partitions = jx.sort(values)
                mc.multi = multi
                mc.last_updated = Date.now()
        self.dirty = False
    def select(self, select):
        selects = listwrap(select)

        if len(selects) == 1 and is_op(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if is_list(select):
            if all(
                is_op(s.value, Variable) and s.name == s.value.var
                for s in select
            ):
                names = set(s.value.var for s in select)
                new_schema = Schema(".", [c for c in self.schema.columns if c.name in names])

            push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
            def selector(d):
                output = Data()
                for n, p in push_and_pull:
                    output[n] = unwraplist(p(wrap(d)))
                return unwrap(output)

            new_data = map(selector, self.data)
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = map(select_value, self.data)
            if is_op(select.value, Variable):
                column = copy(first(c for c in self.schema.columns if c.name == select.value.var))
                column.name = '.'
                new_schema = Schema("from " + self.name, [column])

        return ListContainer("from "+self.name, data=new_data, schema=new_schema)
Esempio n. 13
0
def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if is_data(field_name) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING
        field_name = field_name["value"]

    # SIMPLE PYTHON ITERABLE ASSUMED
    if is_text(field_name):
        if len(split_field(field_name)) == 1:
            return [(d[field_name],) for d in data]
        else:
            path = split_field(field_name)
            output = []
            flat_list._tuple1(data, path, 0, output)
            return output
    elif is_list(field_name):
        paths = [_select_a_field(f) for f in field_name]
        output = FlatList()
        _tuple((), unwrap(data), paths, 0, output)
        return output
    else:
        paths = [_select_a_field(field_name)]
        output = FlatList()
        _tuple((), data, paths, 0, output)
        return output
Esempio n. 14
0
def _expand(template, seq):
    """
    seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE
    """
    if is_text(template):
        return _simple_expand(template, seq)
    elif is_data(template):
        # EXPAND LISTS OF ITEMS USING THIS FORM
        # {"from":from, "template":template, "separator":separator}
        template = wrap(template)
        assert template["from"], "Expecting template to have 'from' attribute"
        assert template.template, "Expecting template to have 'template' attribute"

        data = seq[-1][template["from"]]
        output = []
        for d in data:
            s = seq + (d,)
            output.append(_expand(template.template, s))
        return coalesce(template.separator, "").join(output)
    elif is_list(template):
        return "".join(_expand(t, seq) for t in template)
    else:
        if not _Log:
            _late_import()

        _Log.error("can not handle")
Esempio n. 15
0
    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception as e:
                Log.error("{{name}} does not exist", name=fieldname)
            if is_list(d) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1 :])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])
        return fieldname, None
Esempio n. 16
0
def _select_deep(v, field, depth, record):
    """
    field = {"name":name, "value":["attribute", "path"]}
    r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
    """
    if hasattr(field.value, "__call__"):
        try:
            record[field.name] = field.value(wrap(v))
        except Exception as e:
            record[field.name] = None
        return 0, None

    for i, f in enumerate(field.value[depth : len(field.value) - 1 :]):
        v = v.get(f)
        if v is None:
            return 0, None
        if is_list(v):
            return depth + i + 1, v

    f = field.value.last()
    try:
        if not f:  # NO NAME FIELD INDICATES SELECT VALUE
            record[field.name] = v
        else:
            record[field.name] = v.get(f)
    except Exception as e:
        Log.error(
            "{{value}} does not have {{field}} property", value=v, field=f, cause=e
        )
    return 0, None
Esempio n. 17
0
def _select_deep(v, field, depth, record):
    """
    field = {"name":name, "value":["attribute", "path"]}
    r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
    """
    if hasattr(field.value, "__call__"):
        try:
            record[field.name] = field.value(wrap(v))
        except Exception as e:
            record[field.name] = None
        return 0, None

    for i, f in enumerate(field.value[depth:len(field.value) - 1:]):
        v = v.get(f)
        if v is None:
            return 0, None
        if is_list(v):
            return depth + i + 1, v

    f = field.value.last()
    try:
        if not f:  # NO NAME FIELD INDICATES SELECT VALUE
            record[field.name] = v
        else:
            record[field.name] = v.get(f)
    except Exception as e:
        Log.error("{{value}} does not have {{field}} property",
                  value=v,
                  field=f,
                  cause=e)
    return 0, None
Esempio n. 18
0
def tuple(data, field_name):
    """
    RETURN LIST  OF TUPLES
    """
    if isinstance(data, Cube):
        Log.error("not supported yet")

    if isinstance(data, FlatList):
        Log.error("not supported yet")

    if is_data(field_name) and "value" in field_name:
        # SIMPLIFY {"value":value} AS STRING
        field_name = field_name["value"]

    # SIMPLE PYTHON ITERABLE ASSUMED
    if is_text(field_name):
        if len(split_field(field_name)) == 1:
            return [(d[field_name], ) for d in data]
        else:
            path = split_field(field_name)
            output = []
            flat_list._tuple1(data, path, 0, output)
            return output
    elif is_list(field_name):
        paths = [_select_a_field(f) for f in field_name]
        output = FlatList()
        _tuple((), unwrap(data), paths, 0, output)
        return output
    else:
        paths = [_select_a_field(field_name)]
        output = FlatList()
        _tuple((), data, paths, 0, output)
        return output
Esempio n. 19
0
    def parse_field(fieldname, data, depth):
        """
        RETURN (first, rest) OF fieldname
        """
        col = split_field(fieldname)
        d = data
        for i, c in enumerate(col):
            try:
                d = d[c]
            except Exception as e:
                Log.error("{{name}} does not exist", name=fieldname)
            if is_list(d) and len(col) > 1:
                if len(primary_column) <= depth + i:
                    primary_nested.append(True)
                    primary_column.append(c)
                    primary_branch.append(d)
                elif primary_nested[depth] and primary_column[depth + i] != c:
                    Log.error("only one branch of tree allowed")
                else:
                    primary_nested[depth + i] = True
                    primary_column[depth + i] = c
                    primary_branch[depth + i] = d

                return c, join_field(col[i + 1:])
            else:
                if len(primary_column) <= depth + i:
                    primary_nested.append(False)
                    primary_column.append(c)
                    primary_branch.append([d])
        return fieldname, None
Esempio n. 20
0
def _expand(template, seq):
    """
    seq IS TUPLE OF OBJECTS IN PATH ORDER INTO THE DATA TREE
    """
    if is_text(template):
        return _simple_expand(template, seq)
    elif is_data(template):
        # EXPAND LISTS OF ITEMS USING THIS FORM
        # {"from":from, "template":template, "separator":separator}
        template = to_data(template)
        assert template["from"], "Expecting template to have 'from' attribute"
        assert template.template, "Expecting template to have 'template' attribute"

        data = seq[-1][template["from"]]
        output = []
        for d in data:
            s = seq + (d, )
            output.append(_expand(template.template, s))
        return coalesce(template.separator, "").join(output)
    elif is_list(template):
        return "".join(_expand(t, seq) for t in template)
    else:
        if not _Log:
            _late_import()

        _Log.error("can not handle")
Esempio n. 21
0
    def _update_meta(self):
        if not self.dirty:
            return

        now = Date.now()
        for mc in META_COLUMNS_DESC.columns:
            count = 0
            values = set()
            objects = 0
            multi = 1
            for column in self._all_columns():
                value = column[mc.name]
                if value == None:
                    pass
                else:
                    count += 1
                    if is_list(value):
                        multi = max(multi, len(value))
                        try:
                            values |= set(value)
                        except Exception:
                            objects += len(value)
                    elif is_data(value):
                        objects += 1
                    else:
                        values.add(value)
            mc.count = count
            mc.cardinality = len(values) + objects
            mc.partitions = jx.sort(values)
            mc.multi = multi
            mc.last_updated = now

        META_COLUMNS_DESC.last_updated = now
        self.dirty = False
Esempio n. 22
0
    def select(self, select):
        selects = listwrap(select)

        if len(selects) == 1 and is_op(selects[0].value, Variable) and selects[0].value.var == ".":
            new_schema = self.schema
            if selects[0].name == ".":
                return self
        else:
            new_schema = None

        if is_list(select):
            if all(
                is_op(s.value, Variable) and s.name == s.value.var
                for s in select
            ):
                names = set(s.value.var for s in select)
                new_schema = Schema(".", [c for c in self.schema.columns if c.name in names])

            push_and_pull = [(s.name, jx_expression_to_function(s.value)) for s in selects]
            def selector(d):
                output = Data()
                for n, p in push_and_pull:
                    output[n] = unwraplist(p(to_data(d)))
                return unwrap(output)

            new_data = list(map(selector, self.data))
        else:
            select_value = jx_expression_to_function(select.value)
            new_data = list(map(select_value, self.data))
            if is_op(select.value, Variable):
                column = dict(**first(c for c in self.schema.columns if c.name == select.value.var))
                column.update({"name": ".", "jx_type": NESTED, "es_type": "nested", "multi":1001, "cardinality":1})
                new_schema = Schema("from " + self.name, [Column(**column)])

        return ListContainer("from "+self.name, data=new_data, schema=new_schema)
Esempio n. 23
0
def command_loop(local):
    DEBUG and Log.note("mo-python process running with {{config|json}}",
                       config=local['config'])
    while not please_stop:
        line = sys.stdin.readline()
        try:
            command = json2value(line.decode('utf8'))
            DEBUG and Log.note("got {{command}}", command=command)

            if "import" in command:
                dummy = {}
                if is_text(command['import']):
                    exec("from " + command['import'] + " import *", dummy,
                         context)
                else:
                    exec(
                        "from " + command['import']['from'] + " import " +
                        ",".join(listwrap(command['import']['vars'])), dummy,
                        context)
                STDOUT.write(DONE)
            elif "set" in command:
                for k, v in command.set.items():
                    context[k] = v
                STDOUT.write(DONE)
            elif "get" in command:
                STDOUT.write(
                    value2json({
                        "out":
                        coalesce(local.get(command['get']),
                                 context.get(command['get']))
                    }))
                STDOUT.write('\n')
            elif "stop" in command:
                STDOUT.write(DONE)
                please_stop.go()
            elif "exec" in command:
                if not is_text(command['exec']):
                    Log.error("exec expects only text")
                exec(command['exec'], context, local)
                STDOUT.write(DONE)
            else:
                for k, v in command.items():
                    if is_list(v):
                        exec(
                            "_return = " + k + "(" +
                            ",".join(map(value2json, v)) + ")", context, local)
                    else:
                        exec(
                            "_return = " + k + "(" +
                            ",".join(kk + "=" + value2json(vv)
                                     for kk, vv in v.items()) + ")", context,
                            local)
                    STDOUT.write(value2json({"out": local['_return']}))
                    STDOUT.write('\n')
        except Exception as e:
            STDOUT.write(value2json({"err": e}))
            STDOUT.write('\n')
        finally:
            STDOUT.flush()
Esempio n. 24
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if is_data(node):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = get_attr(new_value, ref.fragment)

        DEBUG and Log.note(
            "Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value)

        if not output:
            output = new_value
        elif is_text(output):
            Log.error("Can not handle set_default({{output}},{{new_value}})",
                      output=output,
                      new_value=new_value)
        else:
            output = unwrap(set_default(output, new_value))

        DEBUG and Log.note("Return {{output}}", output=output)

        return output
    elif is_list(node):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
Esempio n. 25
0
 def value(self):
     if self.is_none:
         return Null
     if self.edges:
         Log.error("can not get value of with dimension")
     if is_list(self.select):
         Log.error("can not get value of multi-valued cubes")
     return self.data[self.select.name].cube
Esempio n. 26
0
 def value(self):
     if self.is_none:
         return Null
     if self.edges:
         Log.error("can not get value of with dimension")
     if is_list(self.select):
         Log.error("can not get value of multi-valued cubes")
     return self.data[self.select.name].cube
Esempio n. 27
0
def parse_hg_date(date):
    if is_text(date):
        return Date(date)
    elif is_list(date):
        # FIRST IN TUPLE (timestamp, time_zone) TUPLE, WHERE timestamp IS GMT
        return Date(date[0])
    else:
        Log.error("Can not deal with date like {{date|json}}", date=date)
Esempio n. 28
0
    def error(
            cls,
            template,  # human readable template
            default_params={},  # parameters for template
            cause=None,  # pausible cause
            stack_depth=0,
            **more_params):
        """
        raise an exception with a trace for the cause too

        :param template: *string* human readable string with placeholders for parameters
        :param default_params: *dict* parameters to fill in template
        :param cause: *Exception* for chaining
        :param stack_depth:  *int* how many calls you want popped off the stack to report the *true* caller
        :param log_context: *dict* extra key:value pairs for your convenience
        :param more_params: *any more parameters (which will overwrite default_params)
        :return:
        """
        if not is_text(template):
            # sys.stderr.write(str("Log.error was expecting a unicode template"))
            Log.error("Log.error was expecting a unicode template")

        if default_params and isinstance(
                listwrap(default_params)[0], BaseException):
            cause = default_params
            default_params = {}

        params = Data(dict(default_params, **more_params))

        add_to_trace = False
        if cause == None:
            causes = None
        elif is_list(cause):
            causes = []
            for c in listwrap(
                    cause
            ):  # CAN NOT USE LIST-COMPREHENSION IN PYTHON3 (EXTRA STACK DEPTH FROM THE IN-LINED GENERATOR)
                causes.append(Except.wrap(c, stack_depth=1))
            causes = FlatList(causes)
        elif isinstance(cause, BaseException):
            causes = Except.wrap(cause, stack_depth=1)
        else:
            causes = None
            Log.error("can only accept Exception, or list of exceptions")

        trace = exceptions.get_stacktrace(stack_depth + 1)

        if add_to_trace:
            cause[0].trace.extend(trace[1:])

        e = Except(
            context=exceptions.ERROR,
            template=template,
            params=params,
            cause=causes,
            trace=trace,
        )
        raise_from_none(e)
Esempio n. 29
0
    def select(self, fields):
        if is_data(fields):
            fields=fields.value

        if is_text(fields):
            # RETURN LIST OF VALUES
            if len(split_field(fields)) == 1:
                if self.path[0] == fields:
                    return [d[1] for d in self.data]
                else:
                    return [d[0][fields] for d in self.data]
            else:
                keys = split_field(fields)
                depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
                short_key = keys[depth:]

                output = FlatList()
                _select1((wrap(d[depth]) for d in self.data), short_key, 0, output)
                return output

        if is_list(fields):
            output = FlatList()

            meta = []
            for f in fields:
                if hasattr(f.value, "__call__"):
                    meta.append((f.name, f.value))
                else:
                    meta.append((f.name, functools.partial(lambda v, d: d[v], f.value)))

            for row in self._values():
                agg = Data()
                for name, f in meta:
                    agg[name] = f(row)

                output.append(agg)

            return output

            # meta = []
            # for f in fields:
            #     keys = split_field(f.value)
            #     depth = coalesce(MIN([i for i, (k, p) in enumerate(zip(keys, self.path)) if k != p]), len(self.path))  # LENGTH OF COMMON PREFIX
            #     short_key = join_field(keys[depth:])
            #
            #     meta.append((f.name, depth, short_key))
            #
            # for row in self._data:
            #     agg = Data()
            #     for name, depth, short_key in meta:
            #         if short_key:
            #             agg[name] = row[depth][short_key]
            #         else:
            #             agg[name] = row[depth]
            #     output.append(agg)
            # return output

        Log.error("multiselect over FlatList not supported")
 def get(self, select):
     """
     :param select: the variable to extract from list
     :return:  a simple list of the extraction
     """
     if is_list(select):
         return [(d[s] for s in select) for d in self.data]
     else:
         return [d[select] for d in self.data]
Esempio n. 31
0
 def get(self, select):
     """
     :param select: the variable to extract from list
     :return:  a simple list of the extraction
     """
     if is_list(select):
         return [(d[s] for s in select) for d in self.data]
     else:
         return [d[select] for d in self.data]
Esempio n. 32
0
def parse_sql(sql):
    query = wrap(moz_sql_parser.parse(sql))
    redundant_select = []
    # PULL OUT THE AGGREGATES
    for s in listwrap(query.select):
        val = s if s == '*' else s.value

        # EXTRACT KNOWN AGGREGATE FUNCTIONS
        if is_data(val):
            for a in KNOWN_SQL_AGGREGATES:
                value = val[a]
                if value != None:
                    if is_list(value):
                        # AGGREGATE WITH PARAMETERS  EG percentile(value, 0.90)
                        s.aggregate = a
                        s[a] = unwraplist(value[1::])
                        s.value = value[0]
                    else:
                        # SIMPLE AGGREGATE
                        s.aggregate = a
                        s.value = value
                    break

        # LOOK FOR GROUPBY COLUMN IN SELECT CLAUSE, REMOVE DUPLICATION
        for g in listwrap(query.groupby):
            try:
                assertAlmostEqual(g.value, val, "")
                g.name = s.name
                redundant_select.append(s)
                break
            except Exception:
                pass

    # REMOVE THE REDUNDANT select
    if is_list(query.select):
        for r in redundant_select:
            query.select.remove(r)
    elif query.select and redundant_select:
        query.select = None

    # RENAME orderby TO sort
    query.sort, query.orderby = query.orderby, None
    query.format = "table"
    return query
Esempio n. 33
0
    def _groupby(self, edges):
        """
        RETURNS LIST OF (coord, values) TUPLES, WHERE
            coord IS THE INDEX INTO self CUBE (-1 INDEX FOR COORDINATES NOT GROUPED BY)
            values ALL VALUES THAT BELONG TO THE SLICE

        """
        edges = FlatList([n for e in edges for n in _normalize_edge(e)])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[
            getKey[i](p)
            for p in e.domain.partitions + ([None] if e.allowNulls else [])
        ] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves(
                {keys[i]: lookup[i][c]
                 for i, c in enumerate(coord)})
            return output

        if is_list(self.select):
            selects = listwrap(self.select)
            index, v = transpose(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = transpose(*self.data[s.name].group_by(selector))
                values.append(v)

            output = transpose(coord, [
                Cube(self.select, remainder,
                     {s.name: v[i]
                      for i, s in enumerate(selects)}) for v in zip(*values)
            ])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (coord2term(coord), v)
                for coord, v in self.data[self.select.name].groupby(selector))
        else:
            output = (
                (coord2term(coord), Cube(self.select, remainder, v))
                for coord, v in self.data[self.select.name].groupby(selector))

        return output
Esempio n. 34
0
def _replace_ref(node, url):
    if url.path.endswith("/"):
        url.path = url.path[:-1]

    if is_data(node):
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = URL(v)
            else:
                output[k] = _replace_ref(v, url)

        if not ref:
            return output

        node = output

        if not ref.scheme and not ref.path:
            # DO NOT TOUCH LOCAL REF YET
            output["$ref"] = ref
            return output

        if not ref.scheme:
            # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH
            # REQUIRES THE CURRENT DOCUMENT'S SCHEME
            ref.scheme = url.scheme

        # FIND THE SCHEME AND LOAD IT
        if ref.scheme in scheme_loaders:
            new_value = scheme_loaders[ref.scheme](ref, url)
        else:
            raise Log.error("unknown protocol {{scheme}}", scheme=ref.scheme)

        if ref.fragment:
            new_value = mo_dots.get_attr(new_value, ref.fragment)

        DEBUG and Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value)

        if not output:
            output = new_value
        elif is_text(output):
            Log.error("Can not handle set_default({{output}},{{new_value}})", output=output, new_value=new_value)
        else:
            output = unwrap(set_default(output, new_value))

        DEBUG and Log.note("Return {{output}}", output=output)

        return output
    elif is_list(node):
        output = [_replace_ref(n, url) for n in node]
        # if all(p[0] is p[1] for p in zip(output, node)):
        #     return node
        return output

    return node
Esempio n. 35
0
def _normalize_selects(selects, frum, schema=None, ):
    if frum == None or isinstance(frum, (list, set, text_type)):
        if is_list(selects):
            if len(selects) == 0:
                return Null
            else:
                output = [_normalize_select_no_context(s, schema=schema) for s in selects]
        else:
            return _normalize_select_no_context(selects, schema=schema)
    elif is_list(selects):
        output = [ss for s in selects for ss in _normalize_select(s, frum=frum, schema=schema)]
    else:
        output = _normalize_select(selects, frum, schema=schema)

    exists = set()
    for s in output:
        if s.name in exists:
            Log.error("{{name}} has already been defined",  name=s.name)
        exists.add(s.name)
    return output
Esempio n. 36
0
    def groupby(self, edges):
        """
        SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
        simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
        """
        edges = FlatList([n for e in edges for n in _normalize_edge(e)])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[
            getKey[i](p)
            for p in e.domain.partitions + ([None] if e.allowNulls else [])
        ] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves(
                {keys[i]: lookup[i][c]
                 for i, c in enumerate(coord)})
            return output

        if is_list(self.select):
            selects = listwrap(self.select)
            index, v = transpose(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = transpose(coord, [
                Cube(self.select, remainder,
                     {s.name: v[i]
                      for i, s in enumerate(selects)}) for v in zip(*values)
            ])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (coord2term(coord), v)
                for coord, v in self.data[self.select.name].groupby(selector))
        else:
            output = (
                (coord2term(coord), Cube(self.select, remainder, v))
                for coord, v in self.data[self.select.name].groupby(selector))

        return output
Esempio n. 37
0
    def groupby(self, edges):
        """
        SLICE THIS CUBE IN TO ONES WITH LESS DIMENSIONALITY
        simple==True WILL HAVE GROUPS BASED ON PARTITION VALUE, NOT PARTITION OBJECTS
        """
        edges = FlatList([n for e in edges for n in _normalize_edge(e) ])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        def coord2term(coord):
            output = wrap_leaves({keys[i]: lookup[i][c] for i, c in enumerate(coord)})
            return output

        if is_list(self.select):
            selects = listwrap(self.select)
            index, v = transpose(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = zip(*self.data[s.name].group_by(selector))
                values.append(v)

            output = transpose(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output
Esempio n. 38
0
def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if is_data(where):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term, schema.edges)
                return output
            except Exception as e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = FlatList()
            for k, v in where.terms.items():
                if not is_container(v):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if is_text(edge):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception as e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if is_data(fields):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
            return {"and": output}
        elif where["or"]:
            return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]}
        elif where["and"]:
            return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]}
        elif where["not"]:
            return {"not": unwrap(_where_terms(master, where["not"], schema))}
    return where
Esempio n. 39
0
def is_type(value, type):
    if value == None:
        return False
    elif is_text(value) and type == "string":
        return value
    elif is_list(value):
        return False
    elif is_data(value) and type == "object":
        return True
    elif isinstance(value, (int, float, Date)) and type == "number":
        return True
    return False
Esempio n. 40
0
def _where_terms(master, where, schema):
    """
    USE THE SCHEMA TO CONVERT DIMENSION NAMES TO ES FILTERS
    master - TOP LEVEL WHERE (FOR PLACING NESTED FILTERS)
    """
    if is_data(where):
        if where.term:
            # MAP TERM
            try:
                output = _map_term_using_schema(master, [], where.term, schema.edges)
                return output
            except Exception as e:
                Log.error("programmer problem?", e)
        elif where.terms:
            # MAP TERM
            output = FlatList()
            for k, v in where.terms.items():
                if not is_container(v):
                    Log.error("terms filter expects list of values")
                edge = schema.edges[k]
                if not edge:
                    output.append({"terms": {k: v}})
                else:
                    if is_text(edge):
                        # DIRECT FIELD REFERENCE
                        return {"terms": {edge: v}}
                    try:
                        domain = edge.getDomain()
                    except Exception as e:
                        Log.error("programmer error", e)
                    fields = domain.dimension.fields
                    if is_data(fields):
                        or_agg = []
                        for vv in v:
                            and_agg = []
                            for local_field, es_field in fields.items():
                                vvv = vv[local_field]
                                if vvv != None:
                                    and_agg.append({"term": {es_field: vvv}})
                            or_agg.append({"and": and_agg})
                        output.append({"or": or_agg})
                    elif is_list(fields) and len(fields) == 1 and is_variable_name(fields[0]):
                        output.append({"terms": {fields[0]: v}})
                    elif domain.partitions:
                        output.append({"or": [domain.getPartByKey(vv).esfilter for vv in v]})
            return {"and": output}
        elif where["or"]:
            return {"or": [unwrap(_where_terms(master, vv, schema)) for vv in where["or"]]}
        elif where["and"]:
            return {"and": [unwrap(_where_terms(master, vv, schema)) for vv in where["and"]]}
        elif where["not"]:
            return {"not": unwrap(_where_terms(master, where["not"], schema))}
    return where
Esempio n. 41
0
def _replace_locals(node, doc_path):
    if is_data(node):
        # RECURS, DEEP COPY
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = v
            elif k == "$concat":
                if not is_sequence(v):
                    Log.error("$concat expects an array of strings")
                return coalesce(node.get("separator"), "").join(v)
            elif v == None:
                continue
            else:
                output[k] = _replace_locals(v, [v] + doc_path)

        if not ref:
            return output

        # REFER TO SELF
        frag = ref.fragment
        if frag[0] == ".":
            # RELATIVE
            for i, p in enumerate(frag):
                if p != ".":
                    if i > len(doc_path):
                        Log.error(
                            "{{frag|quote}} reaches up past the root document",
                            frag=frag)
                    new_value = get_attr(doc_path[i - 1], frag[i::])
                    break
            else:
                new_value = doc_path[len(frag) - 1]
        else:
            # ABSOLUTE
            new_value = get_attr(doc_path[-1], frag)

        new_value = _replace_locals(new_value, [new_value] + doc_path)

        if not output:
            return new_value  # OPTIMIZATION FOR CASE WHEN node IS {}
        else:
            return unwrap(set_default(output, new_value))

    elif is_list(node):
        candidate = [_replace_locals(n, [n] + doc_path) for n in node]
        # if all(p[0] is p[1] for p in zip(candidate, node)):
        #     return node
        return candidate

    return node
Esempio n. 42
0
def jx_expression_to_function(expr):
    """
    RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None):
    """
    if is_expression(expr):
        if is_op(expr, ScriptOp) and not is_text(expr.script):
            return expr.script
        else:
            return compile_expression(Python[expr].to_python())
    if (expr != None and not is_data(expr) and not is_list(expr)
            and hasattr(expr, "__call__")):
        return expr
    return compile_expression(Python[jx_expression(expr)].to_python())
Esempio n. 43
0
    def write(self, data):
        if not self.parent.exists:
            self.parent.create()
        with open(self._filename, "wb") as f:
            if is_list(data) and self.key:
                Log.error(u"list of data and keys are not supported, encrypt before sending to file")

            if is_list(data):
                pass
            elif isinstance(data, (binary_type, text_type)):
                data=[data]
            elif hasattr(data, "__iter__"):
                pass

            for d in data:
                if not is_text(d):
                    Log.error(u"Expecting unicode data only")
                if self.key:
                    from mo_math.crypto import encrypt
                    f.write(encrypt(d, self.key).encode("utf8"))
                else:
                    f.write(d.encode("utf8"))
Esempio n. 44
0
    def __init__(self, select, edges, data, frum=None):
        """
        data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
        ALLOWED, USING THE select AND edges TO DESCRIBE THE data
        """

        self.is_value = False if is_list(select) else True
        self.select = select
        self.meta = Data(format="cube")       # PUT EXTRA MARKUP HERE
        self.is_none = False

        if not all(data.values()):
            is_none = True

        # ENSURE frum IS PROPER FORM
        if is_list(select):
            if edges and OR(not isinstance(v, Matrix) for v in data.values()):
                Log.error("Expecting data to be a dict with Matrix values")

        if not edges:
            if not data:
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.ZERO}
                self.edges = Null
            elif is_data(data):
                # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
                length = MAX([len(v) for v in data.values()])
                if length >= 1:
                    self.edges = list_to_data([{"name": "rownum", "domain": {"type": "rownum"}}])
                else:
                    self.edges = Null
            elif is_list(data):
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.wrap(data)}
                self.edges = list_to_data([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}])
            elif isinstance(data, Matrix):
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: data}
            else:
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix(value=data)}
                self.edges = Null
        else:
            self.edges = to_data(edges)

        self.data = data
Esempio n. 45
0
    def __init__(self, select, edges, data, frum=None):
        """
        data IS EXPECTED TO BE A dict TO MATRICES, BUT OTHER COLLECTIONS ARE
        ALLOWED, USING THE select AND edges TO DESCRIBE THE data
        """

        self.is_value = False if is_list(select) else True
        self.select = select
        self.meta = Data(format="cube")       # PUT EXTRA MARKUP HERE
        self.is_none = False

        if not all(data.values()):
            is_none = True

        # ENSURE frum IS PROPER FORM
        if is_list(select):
            if edges and OR(not isinstance(v, Matrix) for v in data.values()):
                Log.error("Expecting data to be a dict with Matrix values")

        if not edges:
            if not data:
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.ZERO}
                self.edges = FlatList.EMPTY
            elif is_data(data):
                # EXPECTING NO MORE THAN ONE rownum EDGE IN THE DATA
                length = MAX([len(v) for v in data.values()])
                if length >= 1:
                    self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum"}}])
                else:
                    self.edges = FlatList.EMPTY
            elif is_list(data):
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix.wrap(data)}
                self.edges = wrap([{"name": "rownum", "domain": {"type": "rownum", "min": 0, "max": len(data), "interval": 1}}])
            elif isinstance(data, Matrix):
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: data}
            else:
                if is_list(select):
                    Log.error("not expecting a list of records")

                data = {select.name: Matrix(value=data)}
                self.edges = FlatList.EMPTY
        else:
            self.edges = wrap(edges)

        self.data = data
Esempio n. 46
0
    def _groupby(self, edges):
        """
        RETURNS LIST OF (coord, values) TUPLES, WHERE
            coord IS THE INDEX INTO self CUBE (-1 INDEX FOR COORDINATES NOT GROUPED BY)
            values ALL VALUES THAT BELONG TO THE SLICE

        """
        edges = FlatList([n for e in edges for n in _normalize_edge(e)])

        stacked = [e for e in self.edges if e.name in edges.name]
        remainder = [e for e in self.edges if e.name not in edges.name]
        selector = [1 if e.name in edges.name else 0 for e in self.edges]

        if len(stacked) + len(remainder) != len(self.edges):
            Log.error("can not find some edges to group by")
        # CACHE SOME RESULTS
        keys = edges.name
        getKey = [e.domain.getKey for e in self.edges]
        lookup = [[getKey[i](p) for p in e.domain.partitions+([None] if e.allowNulls else [])] for i, e in enumerate(self.edges)]

        if is_list(self.select):
            selects = listwrap(self.select)
            index, v = transpose(*self.data[selects[0].name].groupby(selector))

            coord = wrap([coord2term(c) for c in index])

            values = [v]
            for s in selects[1::]:
                i, v = transpose(*self.data[s.name].group_by(selector))
                values.append(v)

            output = transpose(coord, [Cube(self.select, remainder, {s.name: v[i] for i, s in enumerate(selects)}) for v in zip(*values)])
        elif not remainder:
            # v IS A VALUE, NO NEED TO WRAP IT IN A Cube
            output = (
                (
                    coord2term(coord),
                    v
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )
        else:
            output = (
                (
                    coord2term(coord),
                    Cube(self.select, remainder, v)
                )
                for coord, v in self.data[self.select.name].groupby(selector)
            )

        return output
Esempio n. 47
0
def url_param2value(param):
    """
    CONVERT URL QUERY PARAMETERS INTO DICT
    """
    if param == None:
        return Null
    if param == None:
        return Null

    def _decode(v):
        output = []
        i = 0
        while i < len(v):
            c = v[i]
            if c == "%":
                d = hex2chr(v[i + 1:i + 3])
                output.append(d)
                i += 3
            else:
                output.append(c)
                i += 1

        output = text("".join(output))
        try:
            from mo_json import json2value

            return json2value(output)
        except Exception:
            pass
        return output

    query = Data()
    for p in param.split('&'):
        if not p:
            continue
        if p.find("=") == -1:
            k = p
            v = True
        else:
            k, v = p.split("=")
            v = _decode(v)

        u = query.get(k)
        if u is None:
            query[k] = v
        elif is_list(u):
            u += [v]
        else:
            query[k] = [u, v]

    return query
Esempio n. 48
0
def get_type(v):
    if v == None:
        return None
    elif isinstance(v, bool):
        return BOOLEAN
    elif is_text(v):
        return STRING
    elif is_data(v):
        return OBJECT
    elif isinstance(v, (int, float, Date)):
        return NUMBER
    elif is_list(v):
        return NESTED
    return None
Esempio n. 49
0
def url_param2value(param):
    """
    CONVERT URL QUERY PARAMETERS INTO DICT
    """
    if param == None:
        return Null
    if param == None:
        return Null

    def _decode(v):
        output = []
        i = 0
        while i < len(v):
            c = v[i]
            if c == "%":
                d = hex2chr(v[i + 1:i + 3])
                output.append(d)
                i += 3
            else:
                output.append(c)
                i += 1

        output = text_type("".join(output))
        try:
            return json2value(output)
        except Exception:
            pass
        return output

    query = Data()
    for p in param.split('&'):
        if not p:
            continue
        if p.find("=") == -1:
            k = p
            v = True
        else:
            k, v = p.split("=")
            v = _decode(v)

        u = query.get(k)
        if u is None:
            query[k] = v
        elif is_list(u):
            u += [v]
        else:
            query[k] = [u, v]

    return query
Esempio n. 50
0
def _select1(data, field, depth, output):
    """
    SELECT A SINGLE FIELD
    """
    for d in data:
        for i, f in enumerate(field[depth:]):
            d = d[f]
            if d == None:
                output.append(None)
                break
            elif is_list(d):
                _select1(d, field, i + 1, output)
                break
        else:
            output.append(d)
Esempio n. 51
0
def _tuple_deep(v, field, depth, record):
    """
    field = {"name":name, "value":["attribute", "path"]}
    r[field.name]=v[field.value], BUT WE MUST DEAL WITH POSSIBLE LIST IN field.value PATH
    """
    if hasattr(field.value, "__call__"):
        return 0, None, record + (field.value(v),)

    for i, f in enumerate(field.value[depth : len(field.value) - 1 :]):
        v = v.get(f)
        if is_list(v):
            return depth + i + 1, v, record

    f = field.value.last()
    return 0, None, record + (v.get(f),)
Esempio n. 52
0
    def _convert_domain(self, domain=None):
        if not domain:
            return Domain(type="default")
        elif isinstance(domain, Dimension):
            return domain.getDomain()
        elif isinstance(domain, Domain):
            return domain

        if not domain.name:
            domain = domain.copy()
            domain.name = domain.type

        if not is_list(domain.partitions):
            domain.partitions = list(domain.partitions)

        return Domain(**domain)
Esempio n. 53
0
def _replace_locals(node, doc_path):
    if is_data(node):
        # RECURS, DEEP COPY
        ref = None
        output = {}
        for k, v in node.items():
            if k == "$ref":
                ref = v
            elif v == None:
                continue
            else:
                output[k] = _replace_locals(v, [v] + doc_path)

        if not ref:
            return output

        # REFER TO SELF
        frag = ref.fragment
        if frag[0] == ".":
            # RELATIVE
            for i, p in enumerate(frag):
                if p != ".":
                    if i>len(doc_path):
                        Log.error("{{frag|quote}} reaches up past the root document",  frag=frag)
                    new_value = mo_dots.get_attr(doc_path[i-1], frag[i::])
                    break
            else:
                new_value = doc_path[len(frag) - 1]
        else:
            # ABSOLUTE
            new_value = mo_dots.get_attr(doc_path[-1], frag)

        new_value = _replace_locals(new_value, [new_value] + doc_path)

        if not output:
            return new_value  # OPTIMIZATION FOR CASE WHEN node IS {}
        else:
            return unwrap(set_default(output, new_value))

    elif is_list(node):
        candidate = [_replace_locals(n, [n] + doc_path) for n in node]
        # if all(p[0] is p[1] for p in zip(candidate, node)):
        #     return node
        return candidate

    return node
Esempio n. 54
0
def quote_column(column_name, table=None):
    if column_name == None:
        Log.error("missing column_name")
    elif is_text(column_name):
        if table:
            return join_column(table, column_name)
        else:
            return SQL("`" + '`.`'.join(split_field(column_name)) + "`")  # MYSQL QUOTE OF COLUMN NAMES
    elif is_binary(column_name):
        return quote_column(column_name.decode('utf8'), table)
    elif is_list(column_name):
        if table:
            return sql_list(join_column(table, c) for c in column_name)
        return sql_list(quote_column(c) for c in column_name)
    else:
        # ASSUME {"name":name, "value":value} FORM
        return SQL(sql_alias(column_name.value, quote_column(column_name.name)))
Esempio n. 55
0
def jx_expression_to_function(expr):
    """
    RETURN FUNCTION THAT REQUIRES PARAMETERS (row, rownum=None, rows=None):
    """
    if is_expression(expr):
        if is_op(expr, ScriptOp) and not is_text(expr.script):
            return expr.script
        else:
            return compile_expression(Python[expr].to_python())
    if (
        expr != None
        and not is_data(expr)
        and not is_list(expr)
        and hasattr(expr, "__call__")
    ):
        return expr
    return compile_expression(Python[jx_expression(expr)].to_python())
Esempio n. 56
0
    def map(self, map_):
        def map_select(s, map_):
            return set_default(
                {"value": s.value.map(map_)},
                s
            )

        def map_edge(e, map_):
            partitions = unwraplist([
                set_default(
                    {"where": p.where.map(map_)},
                    p
                )
                for p in e.domain.partitions
            ])

            domain = copy(e.domain)
            domain.where = e.domain.where.map(map_)
            domain.partitions = partitions

            edge = copy(e)
            edge.value = e.value.map(map_)
            edge.domain = domain
            if e.range:
                edge.range.min = e.range.min.map(map_)
                edge.range.max = e.range.max.map(map_)
            return edge

        if is_list(self.select):
            select = wrap([map_select(s, map_) for s in self.select])
        else:
            select = map_select(self.select, map_)

        return QueryOp(
            frum=self.frum.map(map_),
            select=select,
            edges=wrap([map_edge(e, map_) for e in self.edges]),
            groupby=wrap([g.map(map_) for g in self.groupby]),
            window=wrap([w.map(map_) for w in self.window]),
            where=self.where.map(map_),
            sort=wrap([map_select(s, map_) for s in listwrap(self.sort)]),
            limit=self.limit,
            format=self.format
        )
Esempio n. 57
0
def command_loop(local):
    DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config'])
    while not please_stop:
        line = sys.stdin.readline()
        try:
            command = json2value(line.decode('utf8'))
            DEBUG and Log.note("got {{command}}", command=command)

            if "import" in command:
                dummy={}
                if is_text(command['import']):
                    exec ("from " + command['import'] + " import *", dummy, context)
                else:
                    exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context)
                STDOUT.write(DONE)
            elif "set" in command:
                for k, v in command.set.items():
                    context[k] = v
                STDOUT.write(DONE)
            elif "get" in command:
                STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))}))
                STDOUT.write('\n')
            elif "stop" in command:
                STDOUT.write(DONE)
                please_stop.go()
            elif "exec" in command:
                if not is_text(command['exec']):
                    Log.error("exec expects only text")
                exec (command['exec'], context, local)
                STDOUT.write(DONE)
            else:
                for k, v in command.items():
                    if is_list(v):
                        exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local)
                    else:
                        exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local)
                    STDOUT.write(value2json({"out": local['_return']}))
                    STDOUT.write('\n')
        except Exception as e:
            STDOUT.write(value2json({"err": e}))
            STDOUT.write('\n')
        finally:
            STDOUT.flush()
Esempio n. 58
0
def select(data, field_name):
    """
    return list with values from field_name
    """
    if isinstance(data, Cube):
        return data._select(_normalize_selects(field_name))

    if isinstance(data, PartFlatList):
        return data.select(field_name)

    if isinstance(data, UniqueIndex):
        data = (
            data._data.values()
        )  # THE SELECT ROUTINE REQUIRES dicts, NOT Data WHILE ITERATING

    if is_data(data):
        return select_one(data, field_name)

    if is_data(field_name):
        field_name = wrap(field_name)
        if field_name.value in ["*", "."]:
            return data

        if field_name.value:
            # SIMPLIFY {"value":value} AS STRING
            field_name = field_name.value

    # SIMPLE PYTHON ITERABLE ASSUMED
    if is_text(field_name):
        path = split_field(field_name)
        if len(path) == 1:
            return FlatList([d[field_name] for d in data])
        else:
            output = FlatList()
            flat_list._select1(data, path, 0, output)
            return output
    elif is_list(field_name):
        keys = [_select_a_field(wrap(f)) for f in field_name]
        return _select(Data(), unwrap(data), keys, 0)
    else:
        keys = [_select_a_field(field_name)]
        return _select(Data(), unwrap(data), keys, 0)
Esempio n. 59
0
def sort(data, fieldnames=None, already_normalized=False):
    """
    PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
    """
    try:
        if data == None:
            return Null

        if not fieldnames:
            return wrap(sort_using_cmp(data, value_compare))

        if already_normalized:
            formal = fieldnames
        else:
            formal = query._normalize_sort(fieldnames)

        funcs = [(jx_expression_to_function(f.value), f.sort) for f in formal]

        def comparer(left, right):
            for func, sort_ in funcs:
                try:
                    result = value_compare(func(left), func(right), sort_)
                    if result != 0:
                        return result
                except Exception as e:
                    Log.error("problem with compare", e)
            return 0

        if is_list(data):
            output = FlatList([unwrap(d) for d in sort_using_cmp(data, cmp=comparer)])
        elif hasattr(data, "__iter__"):
            output = FlatList(
                [unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)]
            )
        else:
            Log.error("Do not know how to handle")
            output = None

        return output
    except Exception as e:
        Log.error("Problem sorting\n{{data}}", data=data, cause=e)