Exemple #1
0
    def to_esfilter(self, schema):
        if is_op(self.value, Variable_):
            var = self.value.var
            cols = schema.leaves(var)
            if not cols:
                return MATCH_NONE
            col = first(cols)
            var = col.es_column

            if col.jx_type == BOOLEAN:
                if is_literal(
                        self.superset) and not is_many(self.superset.value):
                    return {"term": {var: value2boolean(self.superset.value)}}
                else:
                    return {
                        "terms": {
                            var: map(value2boolean, self.superset.value)
                        }
                    }
            else:
                if is_literal(
                        self.superset) and not is_many(self.superset.value):
                    return {"term": {var: self.superset.value}}
                else:
                    return {"terms": {var: self.superset.value}}
        else:
            return Painless[self].to_es_script(schema).to_esfilter(schema)
    def all_comments(self):
        """
        EMIT JUST THE COMMENTS
        """
        if not self:
            return
        elif is_many(self):
            for vv in self:
                yield from vv.all_comments()
            return
        elif not isinstance(self, Formatter):
            return

        yield from self.before.before_comment
        yield self.before.line_comment
        yield from self.before_comment
        yield self.line_comment

        for f in self.node._fields:
            v = self[f]
            if not v:
                continue
            elif is_many(v):
                for vv in v:
                    yield from vv.all_comments()
            elif isinstance(v, Formatter):
                yield from v.all_comments()
            else:
                continue

        yield from self.after.before_comment
        yield self.after.line_comment
        yield from self.after_comment
Exemple #3
0
 def _drill(d, p):
     if p:
         if is_many(d):
             for dd in d:
                 for v in _drill(dd, p):
                     yield v
         else:
             for v in _drill(listwrap(d[p[0]]), p[1:]):
                 yield v
     elif is_many(d):
         for dd in d:
             for v in _drill(dd, p):
                 yield v
     else:
         yield d
Exemple #4
0
def get_statcan_data(cube_id, coord, num):
    """
    RETURN DATA FOR ONE COORDINATE
    :param cube_id:
    :param coord:
    :param num: HOW FAR BACK TO GO, LENGTH OF SERIES TO CAPTURE
    :return:
    """
    coordinates = [format_coordinate(c) for c in listwrap(coord)]
    data = http.post_json(
        "https://www150.statcan.gc.ca/t1/wds/rest/getDataFromCubePidCoordAndLatestNPeriods",
        json=[
            {"productId": cube_id, "coordinate": c, "latestN": num} for c in coordinates
        ],
    )
    output = [None] * len(coordinates)
    for d in data:
        df = pd.DataFrame(columns=[k for k, _ in d.object.vectorDataPoint[0].items()])
        # TODO: GOT TO BE A FASTER WAY
        for point in from_data(d.object.vectorDataPoint):
            df = df.append(point, ignore_index=True)
        i = coordinates.index(d.object.coordinate)
        output[i] = df

    if is_many(coord):
        return output
    else:
        return output[0]
Exemple #5
0
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_variable_name(expr):
         return coalesce(self.dimensions[expr], expr)
     elif is_text(expr):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif is_op(expr, QueryOp):
         return self._convert_query(expr)
     elif is_data(expr):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return dict_to_data({name: self.convert(value) for name, value in expr.leaves()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif is_many(expr):
         return list_to_data([self.convert(value) for value in expr])
     else:
         return expr
Exemple #6
0
 def convert(self, expr):
     """
     EXPAND INSTANCES OF name TO value
     """
     if expr is True or expr == None or expr is False:
         return expr
     elif is_number(expr):
         return expr
     elif expr == ".":
         return "."
     elif is_variable_name(expr):
         return coalesce(self.dimensions[expr], expr)
     elif is_text(expr):
         Log.error("{{name|quote}} is not a valid variable name", name=expr)
     elif isinstance(expr, Date):
         return expr
     elif is_op(expr, QueryOp):
         return self._convert_query(expr)
     elif is_data(expr):
         if expr["from"]:
             return self._convert_query(expr)
         elif len(expr) >= 2:
             #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION
             return wrap({name: self.convert(value) for name, value in expr.leaves()})
         else:
             # ASSUME SINGLE-CLAUSE EXPRESSION
             k, v = expr.items()[0]
             return converter_map.get(k, self._convert_bop)(self, k, v)
     elif is_many(expr):
         return wrap([self.convert(value) for value in expr])
     else:
         return expr
Exemple #7
0
 def __init__(self, terms, **clauses):
     Expression.__init__(self, terms)
     if is_many(terms):
         # SHORTCUT: ASSUME AN ARRAY OF IS A TUPLE
         self.terms = self.lang[TupleOp(terms)]
     else:
         self.terms = terms
 def __init__(self, terms):
     Expression.__init__(self, terms)
     if terms == None:
         self.terms = []
     elif is_many(terms):
         self.terms = terms
     else:
         self.terms = [terms]
Exemple #9
0
def union_type(*types):
    if len(types) == 1 and is_many(types[0]):
        Log.error("expecting many parameters")
    output = T_IS_NULL

    for t in types:
        output |= t
    return output
Exemple #10
0
    def to_es(self, schema):
        value = self.value
        if is_op(value, Variable):
            var = value.var
            cols = schema.leaves(var)
            if not cols:
                return MATCH_NONE
            col = first(cols)
            var = col.es_column

            if is_literal(self.superset):
                if col.jx_type == BOOLEAN:
                    if is_literal(self.superset) and not is_many(
                            self.superset.value):
                        return {
                            "term": {
                                var: value2boolean(self.superset.value)
                            }
                        }
                    else:
                        return {
                            "terms": {
                                var:
                                list(map(value2boolean, self.superset.value))
                            }
                        }
                else:
                    if is_literal(self.superset) and not is_many(
                            self.superset.value):
                        return {"term": {var: self.superset.value}}
                    else:
                        return {"terms": {var: self.superset.value}}
            elif is_op(self.superset, TupleOp):
                return (OrOp([EqOp([value, s]) for s in self.superset.terms
                              ]).partial_eval().to_es(schema))
        if (is_op(value, NestedOp) and is_literal(self.superset)
                and is_op(value.select, Variable)):
            output = (ES52[NestedOp(
                path=value.path,
                select=NULL,
                where=AndOp([value.where,
                             InOp([value.select, self.superset])]),
            )].exists().partial_eval().to_es(schema))
            return output
        # THE HARD WAY
        return Painless[self].to_es_script(schema).to_es(schema)
Exemple #11
0
def value_to_json_type(value):
    if is_many(value):
        return _primitive(_A,
                          union_type(*(value_to_json_type(v) for v in value)))
    elif is_data(value):
        return JsonType(**{k: value_to_json_type(v) for k, v in value.items()})
    else:
        return _python_type_to_json_type[value.__class__]
Exemple #12
0
    def wrap(query, container, namespace):
        """
        NORMALIZE QUERY SO IT CAN STILL BE JSON
        """
        if is_op(query, QueryOp) or query == None:
            return query

        query = wrap(query)
        table = container.get_table(query['from'])
        schema = table.schema
        output = QueryOp(
            frum=table,
            format=query.format,
            chunk_size=query.chunk_size,
            destination=query.destination,
        )

        _import_temper_limit()
        output.limit = temper_limit(query.limit, query)

        if query.select or is_many(query.select) or is_data(query.select):
            output.select = _normalize_selects(query.select,
                                               query.frum,
                                               schema=schema)
        else:
            if query.edges or query.groupby:
                output.select = DEFAULT_SELECT
            else:
                output.select = _normalize_selects(".", query.frum)

        if query.groupby and query.edges:
            Log.error(
                "You can not use both the `groupby` and `edges` clauses in the same query!"
            )
        elif query.edges:
            output.edges = _normalize_edges(query.edges,
                                            limit=output.limit,
                                            schema=schema)
            output.groupby = Null
        elif query.groupby:
            output.edges = Null
            output.groupby = _normalize_groupby(query.groupby,
                                                limit=output.limit,
                                                schema=schema)
        else:
            output.edges = Null
            output.groupby = Null

        output.where = _normalize_where({"and": listwrap(query.where)},
                                        schema=schema)
        output.window = [_normalize_window(w) for w in listwrap(query.window)]
        output.sort = _normalize_sort(query.sort)
        if output.limit != None and (not mo_math.is_integer(output.limit)
                                     or output.limit < 0):
            Log.error("Expecting limit >= 0")

        return output
Exemple #13
0
    def __new__(cls, terms):
        if is_many(terms):
            return object.__new__(cls)

        items = terms.items()
        if len(items) == 1:
            if is_many(items[0][1]):
                return cls.lang[InOp(items[0])]
            else:
                return cls.lang[EqOp(items[0])]
        else:
            acc = []
            for lhs, rhs in items:
                if rhs.json.startswith("["):
                    acc.append(cls.lang[InOp([Variable(lhs), rhs])])
                else:
                    acc.append(cls.lang[EqOp([Variable(lhs), rhs])])
            return cls.lang[AndOp(acc)]
Exemple #14
0
 def __init__(self, terms, separator=Literal(""), default=NULL):
     if not is_many(terms):
         Log.error("Expecting many terms")
     if not is_literal(separator):
         Log.error("Expecting a literal separator")
     Expression.__init__(self, terms + [separator, default])
     self.terms = terms
     self.separator = separator
     self.default = default
Exemple #15
0
    def send_queries(self, subtest, places=6):
        subtest = wrap(subtest)

        try:
            # EXECUTE QUERY
            num_expectations = 0
            for i, (k, v) in enumerate(subtest.items()):
                if k in ["expecting", "expecting_error"]:  # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS)
                    format = None
                elif k.startswith("expecting_"):  # WHAT FORMAT ARE WE REQUESTING
                    format = k[len("expecting_"):]
                else:
                    continue

                num_expectations += 1
                expected = v

                subtest.query.format = format
                subtest.query.meta.testing = (num_expectations == 1)  # MARK FIRST QUERY FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION
                query = value2json(subtest.query).encode('utf8')
                # EXECUTE QUERY
                response = self.try_till_response(self.testing.query, data=query)

                if k == "expecting_error":
                    if response.status_code != 200:
                        message = response.content.decode('utf8')
                        if v in message:
                            Log.note("PASS {{name|quote}} (expected error)", name=subtest.name)
                            continue
                        else:
                            Log.error("expecting {{expecting}} not {{error}}", expecting=v, error=message)
                    else:
                        Log.error("expecting a failure")
                else:
                    if response.status_code != 200:
                        error(response)
                    result = json2value(response.all_content.decode('utf8'))

                container = jx_elasticsearch.new_instance(self._es_test_settings)
                query = QueryOp.wrap(subtest.query, container, container.namespace)
                if is_many(expected.data) and len(result.data) != len(expected.data):
                    Log.error(
                        "expecting data (len={{rlen}}) to have length of {{elen}}",
                        rlen=len(result.data),
                        elen=len(expected.data)
                    )

                compare_to_expected(query, result, expected, places)
                Log.note("PASS {{name|quote}} (format={{format}})", name=subtest.name, format=format)
            if num_expectations == 0:
                Log.error(
                    "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses",
                    name=subtest.name
                )
        except Exception as e:
            Log.error("Failed test {{name|quote}}", name=subtest.name, cause=e)
Exemple #16
0
def value2intlist(value):
    if value == None:
        return []
    elif is_many(value):
        output = [int(d) for d in value if d != "" and d != None]
        return output
    elif isinstance(value, int):
        return [value]
    elif value.strip() == "":
        return []
    else:
        return [int(value)]
Exemple #17
0
def _parse_traceback(tb):
    if is_many(tb):
        get_logger().error("Expecting a tracback object, not a list")
    trace = []
    while tb is not None:
        f = tb.tb_frame
        trace.append({
            "file": f.f_code.co_filename,
            "line": tb.tb_lineno,
            "method": f.f_code.co_name,
        })
        tb = tb.tb_next
    trace.reverse()
    return trace
Exemple #18
0
    def to_esfilter(self, schema):
        if is_op(self.value, Variable_):
            var = self.value.var
            cols = schema.leaves(var)
            if not cols:
                return MATCH_NONE
            col = first(cols)
            var = col.es_column

            if is_literal(self.superset):
                if col.jx_type == BOOLEAN:
                    if is_literal(self.superset) and not is_many(
                            self.superset.value):
                        return {
                            "term": {
                                var: value2boolean(self.superset.value)
                            }
                        }
                    else:
                        return {
                            "terms": {
                                var:
                                list(map(value2boolean, self.superset.value))
                            }
                        }
                else:
                    if is_literal(self.superset) and not is_many(
                            self.superset.value):
                        return {"term": {var: self.superset.value}}
                    else:
                        return {"terms": {var: self.superset.value}}
            elif is_op(self.superset, TupleOp):
                return (OrOp([
                    EqOp([self.value, s]) for s in self.superset.terms
                ]).partial_eval().to_esfilter(schema))
        # THE HARD WAY
        return Painless[self].to_es_script(schema).to_esfilter(schema)
Exemple #19
0
 def to_es(self, schema):
     if is_op(self.lhs, Variable_) and is_literal(self.rhs):
         lhs = self.lhs.var
         cols = schema.leaves(lhs)
         if cols:
             lhs = first(cols).es_column
         rhs = self.rhs.value
         if is_many(rhs):
             if len(rhs) == 1:
                 return {"term": {lhs: first(rhs)}}
             else:
                 return {"terms": {lhs: rhs}}
         else:
             return {"term": {lhs: rhs}}
     else:
         return Painless[self].to_es_script(schema).to_es(schema)
Exemple #20
0
 def __eq__(self, other):
     if other == None:
         return not self.__bool__()
     elif is_text(other):
         try:
             return "".join(self) == other
         except Exception as e:
             return False
     elif is_many(other):
         return all(s == o for s, o in zip_longest(self, other))
     elif self.length() == 1:
         return self[0] == other
     elif not self:
         return False
     else:
         Log.error("do not know how to handle")
Exemple #21
0
def toString(val):
    if _Duration is None:
        _late_import()

    if val == None:
        return ""
    elif is_data(val) or is_many(val):
        return _json_encoder(val, pretty=True)
    elif hasattr(val, "__data__"):
        return _json_encoder(val.__data__(), pretty=True)
    elif hasattr(val, "__json__"):
        return val.__json__()
    elif isinstance(val, _Duration):
        return text(round(val.seconds, places=4)) + " seconds"
    elif isinstance(val, timedelta):
        duration = val.total_seconds()
        return text(round(duration, 3)) + " seconds"
    elif is_text(val):
        return val
    elif isinstance(val, binary_type):
        try:
            return val.decode("utf8")
        except Exception as _:
            pass

        try:
            return val.decode("latin1")
        except Exception as e:
            if not _Log:
                _late_import()

            _Log.error(text(type(val)) +
                       " type can not be converted to unicode",
                       cause=e)
    else:
        try:
            return text(val)
        except Exception as e:
            if not _Log:
                _late_import()

            _Log.error(text(type(val)) +
                       " type can not be converted to unicode",
                       cause=e)
Exemple #22
0
    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif is_many(frum):
            return _ListContainer(frum)
        elif is_text(frum):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error(
                    "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info"
                )

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                }, config.default.settings)
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif is_data(frum):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}",
                      type=frum.__class__.__name__)
Exemple #23
0
    def new_instance(type, frum, schema=None):
        """
        Factory!
        """
        if not type2container:
            _delayed_imports()

        if isinstance(frum, Container):
            return frum
        elif isinstance(frum, _Cube):
            return frum
        elif isinstance(frum, _Query):
            return _run(frum)
        elif is_many(frum):
            return _ListContainer(frum)
        elif is_text(frum):
            # USE DEFAULT STORAGE TO FIND Container
            if not config.default.settings:
                Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info")

            settings = set_default(
                {
                    "index": join_field(split_field(frum)[:1:]),
                    "name": frum,
                },
                config.default.settings
            )
            settings.type = None  # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY
            return type2container["elasticsearch"](settings)
        elif is_data(frum):
            frum = wrap(frum)
            if frum.type and type2container[frum.type]:
                return type2container[frum.type](frum.settings)
            elif frum["from"]:
                frum = copy(frum)
                frum["from"] = Container(frum["from"])
                return _Query.wrap(frum)
            else:
                Log.error("Do not know how to handle {{frum|json}}", frum=frum)
        else:
            Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
Exemple #24
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(key + ".json.gz")

        buff = TemporaryFile()
        archive = gzip.GzipFile(fileobj=buff, mode='w')
        count = 0
        for l in lines:
            if is_many(l):
                for ll in l:
                    archive.write(ll.encode("utf8"))
                    archive.write(b"\n")
                    count += 1
            else:
                archive.write(l.encode("utf8"))
                archive.write(b"\n")
                count += 1

        archive.close()
        file_length = buff.tell()

        retry = 3
        while retry:
            try:
                with Timer("Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", {"key": key, "file_length": file_length, "count": count}, verbose=self.settings.debug):
                    buff.seek(0)
                    storage.set_contents_from_file(buff)
                break
            except Exception as e:
                e = Except.wrap(e)
                retry -= 1
                if retry == 0 or 'Access Denied' in e or "No space left on device" in e:
                    Log.error("could not push data to s3", cause=e)
                else:
                    Log.warning("could not push data to s3", cause=e)

        if self.settings.public:
            storage.set_acl('public-read')
        return
Exemple #25
0
def value2url_param(value):
    """
    :param value:
    :return: ascii URL
    """
    from mo_json import value2json, json2value

    def _encode(value):
        return "".join(_map2url[c] for c in value.encode("utf8"))

    if value == None:
        return None

    if is_data(value):
        value_ = to_data(value)
        output = "&".join(
            kk + "=" + vv
            for k, v in sorted(value_.leaves(), key=lambda p: p[0])
            for kk, vv in [(value2url_param(k), value2url_param(v))]
            if vv or vv == 0
        )
    elif is_text(value):
        try:
            json2value(value)
            output = _encode(value2json(value))
        except Exception:
            output = _encode(value)
    elif is_binary(value):
        output = "".join(_map2url[c] for c in value)
    elif is_many(value):
        output = ",".join(
            vv for v in value for vv in [value2url_param(v)] if vv or vv == 0
        )
    else:
        output = _encode(value2json(value))
    return output
Exemple #26
0
def run(query, container=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if container == None:
        container = wrap(query)["from"]
        query_op = QueryOp.wrap(query,
                                container=container,
                                namespace=container.schema)
    else:
        query_op = QueryOp.wrap(query, container, container.namespace)

    if container == None:
        from jx_python.containers.list_usingPythonList import DUAL

        return DUAL.query(query_op)
    elif isinstance(container, Container):
        return container.query(query_op)
    elif is_many(container):
        container = wrap(list(container))
    elif isinstance(container, Cube):
        if is_aggs(query_op):
            return cube_aggs(container, query_op)
    elif is_op(container, QueryOp):
        container = run(container)
    elif is_data(container):
        query = container
        container = query["from"]
        container = run(QueryOp.wrap(query, container, container.namespace),
                        container)
    else:
        Log.error("Do not know how to handle {{type}}",
                  type=container.__class__.__name__)

    if is_aggs(query_op):
        container = list_aggs(container, query_op)
    else:  # SETOP
        if query_op.where is not TRUE:
            container = filter(container, query_op.where)

        if query_op.sort:
            container = sort(container, query_op.sort, already_normalized=True)

        if query_op.select:
            container = select(container, query_op.select)

    if query_op.window:
        if isinstance(container, Cube):
            container = list(container.values())

        for param in query_op.window:
            window(container, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        container = list2cube(container)
    elif query_op.format == "table":
        container = list2table(container)
        container.meta.format = "table"
    else:
        container = wrap({"meta": {"format": "list"}, "data": container})

    return container
Exemple #27
0
def run(query, container=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if container == None:
        container = wrap(query)["from"]
        query_op = QueryOp.wrap(query, container=container, namespace=container.schema)
    else:
        query_op = QueryOp.wrap(query, container, container.namespace)

    if container == None:
        from jx_python.containers.list_usingPythonList import DUAL

        return DUAL.query(query_op)
    elif isinstance(container, Container):
        return container.query(query_op)
    elif is_many(container):
        container = wrap(list(container))
    elif isinstance(container, Cube):
        if is_aggs(query_op):
            return cube_aggs(container, query_op)
    elif is_op(container, QueryOp):
        container = run(container)
    elif is_data(container):
        query = container
        container = query["from"]
        container = run(QueryOp.wrap(query, container, container.namespace), container)
    else:
        Log.error(
            "Do not know how to handle {{type}}", type=container.__class__.__name__
        )

    if is_aggs(query_op):
        container = list_aggs(container, query_op)
    else:  # SETOP
        if query_op.where is not TRUE:
            container = filter(container, query_op.where)

        if query_op.sort:
            container = sort(container, query_op.sort, already_normalized=True)

        if query_op.select:
            container = select(container, query_op.select)

    if query_op.window:
        if isinstance(container, Cube):
            container = list(container.values())

        for param in query_op.window:
            window(container, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        container = convert.list2cube(container)
    elif query_op.format == "table":
        container = convert.list2table(container)
        container.meta.format = "table"
    else:
        container = wrap({"meta": {"format": "list"}, "data": container})

    return container
def assertAlmostEqual(test,
                      expected,
                      digits=None,
                      places=None,
                      msg=None,
                      delta=None):
    show_detail = True
    test = unwrap(test)
    expected = unwrap(expected)
    try:
        if test is None and (is_null_op(expected) or expected is None):
            return
        elif test is expected:
            return
        elif is_text(expected):
            assertAlmostEqualValue(test,
                                   expected,
                                   msg=msg,
                                   digits=digits,
                                   places=places,
                                   delta=delta)
        elif isinstance(test, UniqueIndex):
            if test ^ expected:
                Log.error("Sets do not match")
        elif is_data(expected) and is_data(test):
            for k, e in unwrap(expected).items():
                t = test.get(k)
                assertAlmostEqual(t,
                                  e,
                                  msg=coalesce(msg, "") + "key " + quote(k) +
                                  ": ",
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        elif is_data(expected):
            if is_many(test):
                test = list(test)
                if len(test) != 1:
                    Log.error("Expecting data, not a list")
                test = test[0]
            for k, e in expected.items():
                if is_text(k):
                    t = mo_dots.get_attr(test, literal_field(k))
                else:
                    t = test[k]
                assertAlmostEqual(t,
                                  e,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        elif is_container(test) and isinstance(expected, set):
            test = set(to_data(t) for t in test)
            if len(test) != len(expected):
                Log.error(
                    "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}",
                    test=test,
                    expected=expected)

            for e in expected:
                for t in test:
                    try:
                        assertAlmostEqual(t,
                                          e,
                                          msg=msg,
                                          digits=digits,
                                          places=places,
                                          delta=delta)
                        break
                    except Exception as _:
                        pass
                else:
                    Log.error(
                        "Sets do not match. {{value|json}} not found in {{test|json}}",
                        value=e,
                        test=test)

        elif isinstance(expected, types.FunctionType):
            return expected(test)
        elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"):
            if test.__class__.__name__ == "ndarray":  # numpy
                test = test.tolist()
            elif test.__class__.__name__ == "DataFrame":  # pandas
                test = test[test.columns[0]].values.tolist()
            elif test.__class__.__name__ == "Series":  # pandas
                test = test.values.tolist()

            if not expected and test == None:
                return
            if expected == None:
                expected = []  # REPRESENT NOTHING
            for t, e in zip_longest(test, expected):
                assertAlmostEqual(t,
                                  e,
                                  msg=msg,
                                  digits=digits,
                                  places=places,
                                  delta=delta)
        else:
            assertAlmostEqualValue(test,
                                   expected,
                                   msg=msg,
                                   digits=digits,
                                   places=places,
                                   delta=delta)
    except Exception as e:
        Log.error(
            "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}",
            test=test if show_detail else "[can not show]",
            expected=expected if show_detail else "[can not show]",
            cause=e)
Exemple #29
0
 def typer(v):
     if is_many(v):
         return merge_json_type(*map(typer, v))
     else:
         return python_type_to_json_type[v.__class__]
Exemple #30
0
    def write_lines(self, key, lines):
        self._verify_key_format(key)
        storage = self.bucket.new_key(str(key + ".json.gz"))

        if VERIFY_UPLOAD:
            lines = list(lines)

        with mo_files.TempFile() as tempfile:
            with open(tempfile.abspath, "wb") as buff:
                DEBUG and Log.note("Temp file {{filename}}",
                                   filename=tempfile.abspath)
                archive = gzip.GzipFile(filename=str(key + ".json"),
                                        fileobj=buff,
                                        mode="w")
                count = 0
                for l in lines:
                    if is_many(l):
                        for ll in l:
                            archive.write(ll.encode("utf8"))
                            archive.write(b"\n")
                            count += 1
                    else:
                        archive.write(l.encode("utf8"))
                        archive.write(b"\n")
                        count += 1
                archive.close()

            retry = 3
            while retry:
                try:
                    with Timer(
                            "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}",
                        {
                            "key": key,
                            "file_length": tempfile.length,
                            "count": count
                        },
                            verbose=self.settings.debug,
                    ):
                        storage.set_contents_from_filename(
                            tempfile.abspath,
                            headers={"Content-Type": mimetype.GZIP})
                    break
                except Exception as e:
                    e = Except.wrap(e)
                    retry -= 1
                    if (retry == 0 or "Access Denied" in e
                            or "No space left on device" in e):
                        Log.error("could not push data to s3", cause=e)
                    else:
                        Log.warning("could not push data to s3, will retry",
                                    cause=e)

            if self.settings.public:
                storage.set_acl("public-read")

            if VERIFY_UPLOAD:
                try:
                    with open(tempfile.abspath, mode="rb") as source:
                        result = list(ibytes2ilines(
                            scompressed2ibytes(source)))
                        assertAlmostEqual(result,
                                          lines,
                                          msg="file is different")

                    # full_url = "https://"+self.name+".s3-us-west-2.amazonaws.com/"+storage.key.replace(":", "%3A")
                    # https://active-data-test-result.s3-us-west-2.amazonaws.com/tc.1524896%3A152488763.0.json.gz

                    # dest_bucket = s3.MultiBucket(bucket="self.name", kwargs=self.settings.aws)

                    result = list(self.read_lines(strip_extension(key)))
                    assertAlmostEqual(result,
                                      lines,
                                      result,
                                      msg="S3 is different")

                except Exception as e:
                    from activedata_etl.transforms import TRY_AGAIN_LATER

                    Log.error(TRY_AGAIN_LATER,
                              reason="did not pass verification",
                              cause=e)
        return
Exemple #31
0
 def __new__(cls, terms):
     if is_op(terms[0], Variable) and is_op(terms[1], Literal):
         name, value = terms
         if not is_many(value.value):
             return (EqOp([name, Literal([value.value])]))
     return object.__new__(cls)
Exemple #32
0
def _typed_encode(value, schema):
    """
    RETURN TRIPLE
    output - THE ENCODED VALUE
    update - THE ADDITIONAL SCHEMA OVER schema PROVIDED
    nested - True IF NESTING IS REQUIRED (CONSIDERED SERIOUS SCHEMA CHANGE)
    """
    if is_many(value):
        if len(value) == 0:
            return None, None, False
        output = []
        update = {}
        nest_added = False
        child_schema = schema.get(NESTED_TYPE)
        if not child_schema:
            nest_added = True
            child_schema = schema[NESTED_TYPE] = {}

        for r in value:
            v, m, n = _typed_encode(r, child_schema)
            output.append(v)
            set_default(update, m)
            nest_added |= n

        if update:
            return {text(REPEATED): output}, {NESTED_TYPE: update}, nest_added
        else:
            return {text(REPEATED): output}, None, nest_added
    elif NESTED_TYPE in schema:
        if not value:
            return {text(REPEATED): []}, None, False
        else:
            return _typed_encode([value], schema)
    elif is_data(value):
        output = {}
        update = {}
        nest_added = False
        for k, v in value.items():
            child_schema = schema.get(k)
            if not child_schema:
                child_schema = schema[k] = {}
            result, more_update, n = _typed_encode(v, child_schema)
            if result != None:
                output[text(escape_name(k))] = result
            set_default(update, {k: more_update})
            nest_added |= n
        return output, update or None, nest_added
    elif is_text(schema):
        v, inserter_type, json_type = schema_type(value)
        if schema != json_type:
            Log.error(
                "Can not convert {{existing_type}} to {{expected_type}}",
                existing_type=json_type,
                expected_type=schema,
            )
        return v, None, False
    elif value == None:
        return {
            text(escape_name(t)): None
            for t, child_schema in schema.items()
        } or None, None, False
    else:
        try:
            v, inserter_type, json_type = schema_type(value)
        except Exception as e:
            # LAST DESPERATE ATTEMPT
            return _typed_encode(value.__data__(), schema)
        child_schema = schema.get(inserter_type)
        update = None
        if not child_schema:
            if schema.get(TIME_TYPE):
                # ATTEMPT TO CONVERT TO TIME, IF EXPECTING TIME
                try:
                    v = parse(v).format(TIMESTAMP_FORMAT)
                    return {text(escape_name(TIME_TYPE)): v}, update, False
                except Exception as e:
                    Log.warning(
                        "Failed attempt to convert {{value}} to TIMESTAMP string",
                        value=v,
                        cause=e,
                    )

            schema[inserter_type] = json_type
            update = {inserter_type: json_type}
        return {text(escape_name(inserter_type)): v}, update, False
Exemple #33
0
 def insert(self, docs):
     if not is_many(docs):
         Log.error("Expecting a list of documents")
     doc_collection = self.flatten_many(docs)
     self._insert(doc_collection)