def to_esfilter(self, schema): if is_op(self.value, Variable_): var = self.value.var cols = schema.leaves(var) if not cols: return MATCH_NONE col = first(cols) var = col.es_column if col.jx_type == BOOLEAN: if is_literal( self.superset) and not is_many(self.superset.value): return {"term": {var: value2boolean(self.superset.value)}} else: return { "terms": { var: map(value2boolean, self.superset.value) } } else: if is_literal( self.superset) and not is_many(self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} else: return Painless[self].to_es_script(schema).to_esfilter(schema)
def all_comments(self): """ EMIT JUST THE COMMENTS """ if not self: return elif is_many(self): for vv in self: yield from vv.all_comments() return elif not isinstance(self, Formatter): return yield from self.before.before_comment yield self.before.line_comment yield from self.before_comment yield self.line_comment for f in self.node._fields: v = self[f] if not v: continue elif is_many(v): for vv in v: yield from vv.all_comments() elif isinstance(v, Formatter): yield from v.all_comments() else: continue yield from self.after.before_comment yield self.after.line_comment yield from self.after_comment
def _drill(d, p): if p: if is_many(d): for dd in d: for v in _drill(dd, p): yield v else: for v in _drill(listwrap(d[p[0]]), p[1:]): yield v elif is_many(d): for dd in d: for v in _drill(dd, p): yield v else: yield d
def get_statcan_data(cube_id, coord, num): """ RETURN DATA FOR ONE COORDINATE :param cube_id: :param coord: :param num: HOW FAR BACK TO GO, LENGTH OF SERIES TO CAPTURE :return: """ coordinates = [format_coordinate(c) for c in listwrap(coord)] data = http.post_json( "https://www150.statcan.gc.ca/t1/wds/rest/getDataFromCubePidCoordAndLatestNPeriods", json=[ {"productId": cube_id, "coordinate": c, "latestN": num} for c in coordinates ], ) output = [None] * len(coordinates) for d in data: df = pd.DataFrame(columns=[k for k, _ in d.object.vectorDataPoint[0].items()]) # TODO: GOT TO BE A FASTER WAY for point in from_data(d.object.vectorDataPoint): df = df.append(point, ignore_index=True) i = coordinates.index(d.object.coordinate) output[i] = df if is_many(coord): return output else: return output[0]
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return dict_to_data({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return list_to_data([self.convert(value) for value in expr]) else: return expr
def convert(self, expr): """ EXPAND INSTANCES OF name TO value """ if expr is True or expr == None or expr is False: return expr elif is_number(expr): return expr elif expr == ".": return "." elif is_variable_name(expr): return coalesce(self.dimensions[expr], expr) elif is_text(expr): Log.error("{{name|quote}} is not a valid variable name", name=expr) elif isinstance(expr, Date): return expr elif is_op(expr, QueryOp): return self._convert_query(expr) elif is_data(expr): if expr["from"]: return self._convert_query(expr) elif len(expr) >= 2: #ASSUME WE HAVE A NAMED STRUCTURE, NOT AN EXPRESSION return wrap({name: self.convert(value) for name, value in expr.leaves()}) else: # ASSUME SINGLE-CLAUSE EXPRESSION k, v = expr.items()[0] return converter_map.get(k, self._convert_bop)(self, k, v) elif is_many(expr): return wrap([self.convert(value) for value in expr]) else: return expr
def __init__(self, terms, **clauses): Expression.__init__(self, terms) if is_many(terms): # SHORTCUT: ASSUME AN ARRAY OF IS A TUPLE self.terms = self.lang[TupleOp(terms)] else: self.terms = terms
def __init__(self, terms): Expression.__init__(self, terms) if terms == None: self.terms = [] elif is_many(terms): self.terms = terms else: self.terms = [terms]
def union_type(*types): if len(types) == 1 and is_many(types[0]): Log.error("expecting many parameters") output = T_IS_NULL for t in types: output |= t return output
def to_es(self, schema): value = self.value if is_op(value, Variable): var = value.var cols = schema.leaves(var) if not cols: return MATCH_NONE col = first(cols) var = col.es_column if is_literal(self.superset): if col.jx_type == BOOLEAN: if is_literal(self.superset) and not is_many( self.superset.value): return { "term": { var: value2boolean(self.superset.value) } } else: return { "terms": { var: list(map(value2boolean, self.superset.value)) } } else: if is_literal(self.superset) and not is_many( self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} elif is_op(self.superset, TupleOp): return (OrOp([EqOp([value, s]) for s in self.superset.terms ]).partial_eval().to_es(schema)) if (is_op(value, NestedOp) and is_literal(self.superset) and is_op(value.select, Variable)): output = (ES52[NestedOp( path=value.path, select=NULL, where=AndOp([value.where, InOp([value.select, self.superset])]), )].exists().partial_eval().to_es(schema)) return output # THE HARD WAY return Painless[self].to_es_script(schema).to_es(schema)
def value_to_json_type(value): if is_many(value): return _primitive(_A, union_type(*(value_to_json_type(v) for v in value))) elif is_data(value): return JsonType(**{k: value_to_json_type(v) for k, v in value.items()}) else: return _python_type_to_json_type[value.__class__]
def wrap(query, container, namespace): """ NORMALIZE QUERY SO IT CAN STILL BE JSON """ if is_op(query, QueryOp) or query == None: return query query = wrap(query) table = container.get_table(query['from']) schema = table.schema output = QueryOp( frum=table, format=query.format, chunk_size=query.chunk_size, destination=query.destination, ) _import_temper_limit() output.limit = temper_limit(query.limit, query) if query.select or is_many(query.select) or is_data(query.select): output.select = _normalize_selects(query.select, query.frum, schema=schema) else: if query.edges or query.groupby: output.select = DEFAULT_SELECT else: output.select = _normalize_selects(".", query.frum) if query.groupby and query.edges: Log.error( "You can not use both the `groupby` and `edges` clauses in the same query!" ) elif query.edges: output.edges = _normalize_edges(query.edges, limit=output.limit, schema=schema) output.groupby = Null elif query.groupby: output.edges = Null output.groupby = _normalize_groupby(query.groupby, limit=output.limit, schema=schema) else: output.edges = Null output.groupby = Null output.where = _normalize_where({"and": listwrap(query.where)}, schema=schema) output.window = [_normalize_window(w) for w in listwrap(query.window)] output.sort = _normalize_sort(query.sort) if output.limit != None and (not mo_math.is_integer(output.limit) or output.limit < 0): Log.error("Expecting limit >= 0") return output
def __new__(cls, terms): if is_many(terms): return object.__new__(cls) items = terms.items() if len(items) == 1: if is_many(items[0][1]): return cls.lang[InOp(items[0])] else: return cls.lang[EqOp(items[0])] else: acc = [] for lhs, rhs in items: if rhs.json.startswith("["): acc.append(cls.lang[InOp([Variable(lhs), rhs])]) else: acc.append(cls.lang[EqOp([Variable(lhs), rhs])]) return cls.lang[AndOp(acc)]
def __init__(self, terms, separator=Literal(""), default=NULL): if not is_many(terms): Log.error("Expecting many terms") if not is_literal(separator): Log.error("Expecting a literal separator") Expression.__init__(self, terms + [separator, default]) self.terms = terms self.separator = separator self.default = default
def send_queries(self, subtest, places=6): subtest = wrap(subtest) try: # EXECUTE QUERY num_expectations = 0 for i, (k, v) in enumerate(subtest.items()): if k in ["expecting", "expecting_error"]: # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS) format = None elif k.startswith("expecting_"): # WHAT FORMAT ARE WE REQUESTING format = k[len("expecting_"):] else: continue num_expectations += 1 expected = v subtest.query.format = format subtest.query.meta.testing = (num_expectations == 1) # MARK FIRST QUERY FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION query = value2json(subtest.query).encode('utf8') # EXECUTE QUERY response = self.try_till_response(self.testing.query, data=query) if k == "expecting_error": if response.status_code != 200: message = response.content.decode('utf8') if v in message: Log.note("PASS {{name|quote}} (expected error)", name=subtest.name) continue else: Log.error("expecting {{expecting}} not {{error}}", expecting=v, error=message) else: Log.error("expecting a failure") else: if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) container = jx_elasticsearch.new_instance(self._es_test_settings) query = QueryOp.wrap(subtest.query, container, container.namespace) if is_many(expected.data) and len(result.data) != len(expected.data): Log.error( "expecting data (len={{rlen}}) to have length of {{elen}}", rlen=len(result.data), elen=len(expected.data) ) compare_to_expected(query, result, expected, places) Log.note("PASS {{name|quote}} (format={{format}})", name=subtest.name, format=format) if num_expectations == 0: Log.error( "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses", name=subtest.name ) except Exception as e: Log.error("Failed test {{name|quote}}", name=subtest.name, cause=e)
def value2intlist(value): if value == None: return [] elif is_many(value): output = [int(d) for d in value if d != "" and d != None] return output elif isinstance(value, int): return [value] elif value.strip() == "": return [] else: return [int(value)]
def _parse_traceback(tb): if is_many(tb): get_logger().error("Expecting a tracback object, not a list") trace = [] while tb is not None: f = tb.tb_frame trace.append({ "file": f.f_code.co_filename, "line": tb.tb_lineno, "method": f.f_code.co_name, }) tb = tb.tb_next trace.reverse() return trace
def to_esfilter(self, schema): if is_op(self.value, Variable_): var = self.value.var cols = schema.leaves(var) if not cols: return MATCH_NONE col = first(cols) var = col.es_column if is_literal(self.superset): if col.jx_type == BOOLEAN: if is_literal(self.superset) and not is_many( self.superset.value): return { "term": { var: value2boolean(self.superset.value) } } else: return { "terms": { var: list(map(value2boolean, self.superset.value)) } } else: if is_literal(self.superset) and not is_many( self.superset.value): return {"term": {var: self.superset.value}} else: return {"terms": {var: self.superset.value}} elif is_op(self.superset, TupleOp): return (OrOp([ EqOp([self.value, s]) for s in self.superset.terms ]).partial_eval().to_esfilter(schema)) # THE HARD WAY return Painless[self].to_es_script(schema).to_esfilter(schema)
def to_es(self, schema): if is_op(self.lhs, Variable_) and is_literal(self.rhs): lhs = self.lhs.var cols = schema.leaves(lhs) if cols: lhs = first(cols).es_column rhs = self.rhs.value if is_many(rhs): if len(rhs) == 1: return {"term": {lhs: first(rhs)}} else: return {"terms": {lhs: rhs}} else: return {"term": {lhs: rhs}} else: return Painless[self].to_es_script(schema).to_es(schema)
def __eq__(self, other): if other == None: return not self.__bool__() elif is_text(other): try: return "".join(self) == other except Exception as e: return False elif is_many(other): return all(s == o for s, o in zip_longest(self, other)) elif self.length() == 1: return self[0] == other elif not self: return False else: Log.error("do not know how to handle")
def toString(val): if _Duration is None: _late_import() if val == None: return "" elif is_data(val) or is_many(val): return _json_encoder(val, pretty=True) elif hasattr(val, "__data__"): return _json_encoder(val.__data__(), pretty=True) elif hasattr(val, "__json__"): return val.__json__() elif isinstance(val, _Duration): return text(round(val.seconds, places=4)) + " seconds" elif isinstance(val, timedelta): duration = val.total_seconds() return text(round(duration, 3)) + " seconds" elif is_text(val): return val elif isinstance(val, binary_type): try: return val.decode("utf8") except Exception as _: pass try: return val.decode("latin1") except Exception as e: if not _Log: _late_import() _Log.error(text(type(val)) + " type can not be converted to unicode", cause=e) else: try: return text(val) except Exception as e: if not _Log: _late_import() _Log.error(text(type(val)) + " type can not be converted to unicode", cause=e)
def new_instance(type, frum, schema=None): """ Factory! """ if not type2container: _delayed_imports() if isinstance(frum, Container): return frum elif isinstance(frum, _Cube): return frum elif isinstance(frum, _Query): return _run(frum) elif is_many(frum): return _ListContainer(frum) elif is_text(frum): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error( "expecting jx_base.container.config.default.settings to contain default elasticsearch connection info" ) settings = set_default( { "index": join_field(split_field(frum)[:1:]), "name": frum, }, config.default.settings) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) elif is_data(frum): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) elif frum["from"]: frum = copy(frum) frum["from"] = Container(frum["from"]) return _Query.wrap(frum) else: Log.error("Do not know how to handle {{frum|json}}", frum=frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
def new_instance(type, frum, schema=None): """ Factory! """ if not type2container: _delayed_imports() if isinstance(frum, Container): return frum elif isinstance(frum, _Cube): return frum elif isinstance(frum, _Query): return _run(frum) elif is_many(frum): return _ListContainer(frum) elif is_text(frum): # USE DEFAULT STORAGE TO FIND Container if not config.default.settings: Log.error("expecting jx_base.container.config.default.settings to contain default elasticsearch connection info") settings = set_default( { "index": join_field(split_field(frum)[:1:]), "name": frum, }, config.default.settings ) settings.type = None # WE DO NOT WANT TO INFLUENCE THE TYPE BECAUSE NONE IS IN THE frum STRING ANYWAY return type2container["elasticsearch"](settings) elif is_data(frum): frum = wrap(frum) if frum.type and type2container[frum.type]: return type2container[frum.type](frum.settings) elif frum["from"]: frum = copy(frum) frum["from"] = Container(frum["from"]) return _Query.wrap(frum) else: Log.error("Do not know how to handle {{frum|json}}", frum=frum) else: Log.error("Do not know how to handle {{type}}", type=frum.__class__.__name__)
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if is_many(l): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer("Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", {"key": key, "file_length": file_length, "count": count}, verbose=self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception as e: e = Except.wrap(e) retry -= 1 if retry == 0 or 'Access Denied' in e or "No space left on device" in e: Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3", cause=e) if self.settings.public: storage.set_acl('public-read') return
def value2url_param(value): """ :param value: :return: ascii URL """ from mo_json import value2json, json2value def _encode(value): return "".join(_map2url[c] for c in value.encode("utf8")) if value == None: return None if is_data(value): value_ = to_data(value) output = "&".join( kk + "=" + vv for k, v in sorted(value_.leaves(), key=lambda p: p[0]) for kk, vv in [(value2url_param(k), value2url_param(v))] if vv or vv == 0 ) elif is_text(value): try: json2value(value) output = _encode(value2json(value)) except Exception: output = _encode(value) elif is_binary(value): output = "".join(_map2url[c] for c in value) elif is_many(value): output = ",".join( vv for v in value for vv in [value2url_param(v)] if vv or vv == 0 ) else: output = _encode(value2json(value)) return output
def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: container = wrap(query)["from"] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container, container.namespace) if container == None: from jx_python.containers.list_usingPythonList import DUAL return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) elif is_many(container): container = wrap(list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) elif is_op(container, QueryOp): container = run(container) elif is_data(container): query = container container = query["from"] container = run(QueryOp.wrap(query, container, container.namespace), container) else: Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__) if is_aggs(query_op): container = list_aggs(container, query_op) else: # SETOP if query_op.where is not TRUE: container = filter(container, query_op.where) if query_op.sort: container = sort(container, query_op.sort, already_normalized=True) if query_op.select: container = select(container, query_op.select) if query_op.window: if isinstance(container, Cube): container = list(container.values()) for param in query_op.window: window(container, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": container = list2cube(container) elif query_op.format == "table": container = list2table(container) container.meta.format = "table" else: container = wrap({"meta": {"format": "list"}, "data": container}) return container
def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: container = wrap(query)["from"] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container, container.namespace) if container == None: from jx_python.containers.list_usingPythonList import DUAL return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) elif is_many(container): container = wrap(list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) elif is_op(container, QueryOp): container = run(container) elif is_data(container): query = container container = query["from"] container = run(QueryOp.wrap(query, container, container.namespace), container) else: Log.error( "Do not know how to handle {{type}}", type=container.__class__.__name__ ) if is_aggs(query_op): container = list_aggs(container, query_op) else: # SETOP if query_op.where is not TRUE: container = filter(container, query_op.where) if query_op.sort: container = sort(container, query_op.sort, already_normalized=True) if query_op.select: container = select(container, query_op.select) if query_op.window: if isinstance(container, Cube): container = list(container.values()) for param in query_op.window: window(container, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": container = convert.list2cube(container) elif query_op.format == "table": container = convert.list2table(container) container.meta.format = "table" else: container = wrap({"meta": {"format": "list"}, "data": container}) return container
def assertAlmostEqual(test, expected, digits=None, places=None, msg=None, delta=None): show_detail = True test = unwrap(test) expected = unwrap(expected) try: if test is None and (is_null_op(expected) or expected is None): return elif test is expected: return elif is_text(expected): assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) elif isinstance(test, UniqueIndex): if test ^ expected: Log.error("Sets do not match") elif is_data(expected) and is_data(test): for k, e in unwrap(expected).items(): t = test.get(k) assertAlmostEqual(t, e, msg=coalesce(msg, "") + "key " + quote(k) + ": ", digits=digits, places=places, delta=delta) elif is_data(expected): if is_many(test): test = list(test) if len(test) != 1: Log.error("Expecting data, not a list") test = test[0] for k, e in expected.items(): if is_text(k): t = mo_dots.get_attr(test, literal_field(k)) else: t = test[k] assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta) elif is_container(test) and isinstance(expected, set): test = set(to_data(t) for t in test) if len(test) != len(expected): Log.error( "Sets do not match, element count different:\n{{test|json|indent}}\nexpecting{{expectedtest|json|indent}}", test=test, expected=expected) for e in expected: for t in test: try: assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta) break except Exception as _: pass else: Log.error( "Sets do not match. {{value|json}} not found in {{test|json}}", value=e, test=test) elif isinstance(expected, types.FunctionType): return expected(test) elif hasattr(test, "__iter__") and hasattr(expected, "__iter__"): if test.__class__.__name__ == "ndarray": # numpy test = test.tolist() elif test.__class__.__name__ == "DataFrame": # pandas test = test[test.columns[0]].values.tolist() elif test.__class__.__name__ == "Series": # pandas test = test.values.tolist() if not expected and test == None: return if expected == None: expected = [] # REPRESENT NOTHING for t, e in zip_longest(test, expected): assertAlmostEqual(t, e, msg=msg, digits=digits, places=places, delta=delta) else: assertAlmostEqualValue(test, expected, msg=msg, digits=digits, places=places, delta=delta) except Exception as e: Log.error( "{{test|json|limit(10000)}} does not match expected {{expected|json|limit(10000)}}", test=test if show_detail else "[can not show]", expected=expected if show_detail else "[can not show]", cause=e)
def typer(v): if is_many(v): return merge_json_type(*map(typer, v)) else: return python_type_to_json_type[v.__class__]
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(str(key + ".json.gz")) if VERIFY_UPLOAD: lines = list(lines) with mo_files.TempFile() as tempfile: with open(tempfile.abspath, "wb") as buff: DEBUG and Log.note("Temp file {{filename}}", filename=tempfile.abspath) archive = gzip.GzipFile(filename=str(key + ".json"), fileobj=buff, mode="w") count = 0 for l in lines: if is_many(l): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() retry = 3 while retry: try: with Timer( "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", { "key": key, "file_length": tempfile.length, "count": count }, verbose=self.settings.debug, ): storage.set_contents_from_filename( tempfile.abspath, headers={"Content-Type": mimetype.GZIP}) break except Exception as e: e = Except.wrap(e) retry -= 1 if (retry == 0 or "Access Denied" in e or "No space left on device" in e): Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3, will retry", cause=e) if self.settings.public: storage.set_acl("public-read") if VERIFY_UPLOAD: try: with open(tempfile.abspath, mode="rb") as source: result = list(ibytes2ilines( scompressed2ibytes(source))) assertAlmostEqual(result, lines, msg="file is different") # full_url = "https://"+self.name+".s3-us-west-2.amazonaws.com/"+storage.key.replace(":", "%3A") # https://active-data-test-result.s3-us-west-2.amazonaws.com/tc.1524896%3A152488763.0.json.gz # dest_bucket = s3.MultiBucket(bucket="self.name", kwargs=self.settings.aws) result = list(self.read_lines(strip_extension(key))) assertAlmostEqual(result, lines, result, msg="S3 is different") except Exception as e: from activedata_etl.transforms import TRY_AGAIN_LATER Log.error(TRY_AGAIN_LATER, reason="did not pass verification", cause=e) return
def __new__(cls, terms): if is_op(terms[0], Variable) and is_op(terms[1], Literal): name, value = terms if not is_many(value.value): return (EqOp([name, Literal([value.value])])) return object.__new__(cls)
def _typed_encode(value, schema): """ RETURN TRIPLE output - THE ENCODED VALUE update - THE ADDITIONAL SCHEMA OVER schema PROVIDED nested - True IF NESTING IS REQUIRED (CONSIDERED SERIOUS SCHEMA CHANGE) """ if is_many(value): if len(value) == 0: return None, None, False output = [] update = {} nest_added = False child_schema = schema.get(NESTED_TYPE) if not child_schema: nest_added = True child_schema = schema[NESTED_TYPE] = {} for r in value: v, m, n = _typed_encode(r, child_schema) output.append(v) set_default(update, m) nest_added |= n if update: return {text(REPEATED): output}, {NESTED_TYPE: update}, nest_added else: return {text(REPEATED): output}, None, nest_added elif NESTED_TYPE in schema: if not value: return {text(REPEATED): []}, None, False else: return _typed_encode([value], schema) elif is_data(value): output = {} update = {} nest_added = False for k, v in value.items(): child_schema = schema.get(k) if not child_schema: child_schema = schema[k] = {} result, more_update, n = _typed_encode(v, child_schema) if result != None: output[text(escape_name(k))] = result set_default(update, {k: more_update}) nest_added |= n return output, update or None, nest_added elif is_text(schema): v, inserter_type, json_type = schema_type(value) if schema != json_type: Log.error( "Can not convert {{existing_type}} to {{expected_type}}", existing_type=json_type, expected_type=schema, ) return v, None, False elif value == None: return { text(escape_name(t)): None for t, child_schema in schema.items() } or None, None, False else: try: v, inserter_type, json_type = schema_type(value) except Exception as e: # LAST DESPERATE ATTEMPT return _typed_encode(value.__data__(), schema) child_schema = schema.get(inserter_type) update = None if not child_schema: if schema.get(TIME_TYPE): # ATTEMPT TO CONVERT TO TIME, IF EXPECTING TIME try: v = parse(v).format(TIMESTAMP_FORMAT) return {text(escape_name(TIME_TYPE)): v}, update, False except Exception as e: Log.warning( "Failed attempt to convert {{value}} to TIMESTAMP string", value=v, cause=e, ) schema[inserter_type] = json_type update = {inserter_type: json_type} return {text(escape_name(inserter_type)): v}, update, False
def insert(self, docs): if not is_many(docs): Log.error("Expecting a list of documents") doc_collection = self.flatten_many(docs) self._insert(doc_collection)