def pull_nested_source(doc): hits = doc.get(pos, Null).inner_hits[name].hits.hits if not hits: return [] temp = [(index(h), value(h)) for h in hits] acc = [None] * len(temp) for i, v in temp: acc[i] = untyped(v) return acc
def doc_to_column(doc): try: doc = wrap(untyped(doc)) # I HAVE MANAGED TO MAKE MANY MISTAKES WRITING COLUMNS TO ES. HERE ARE THE FIXES # FIX if not doc.last_updated: doc.last_updated = Date.now() - YEAR # FIX if doc.es_type == None: if doc.jx_type == OBJECT: doc.es_type = "object" else: Log.warning("{{doc}} has no es_type", doc=doc) # FIX doc.multi = 1001 if doc.es_type == "nested" else doc.multi # FIX doc.nested_path = tuple(listwrap(doc.nested_path)) if last(split_field( doc.es_column)) == NESTED_TYPE and doc.es_type != "nested": doc.es_type = "nested" doc.jx_type = NESTED doc.multi = 1001 doc.last_updated = Date.now() # FIX expected_nested_path = get_nested_path(doc.es_column) if len(doc.nested_path) > 1 and doc.nested_path[-2] == '.': doc.nested_path = doc.nested_path[:-1] # FIX if untype_path(doc.es_column) == doc.es_column: if doc.nested_path != (".", ): if doc.es_index in {"repo"}: pass else: Log.note("not expected") doc.nested_path = expected_nested_path else: if doc.nested_path != expected_nested_path: doc.nested_path = expected_nested_path # FIX if last(split_field(doc.es_column)) == EXISTS_TYPE: doc.jx_type = EXISTS return Column(**doc) except Exception: doc.nested_path = ["."] mark_as_deleted(Column(**doc)) return None
def doc_to_column(doc): try: doc = wrap(untyped(doc)) if not doc.last_updated: doc.last_updated = Date.now() - YEAR if doc.es_type == None: if doc.jx_type == OBJECT: doc.es_type = "object" else: Log.warning("{{doc}} has no es_type", doc=doc) doc.multi = 1001 if doc.es_type == "nested" else doc.multi doc.nested_path = tuple(listwrap(doc.nested_path)) if last(split_field( doc.es_column)) == NESTED_TYPE and doc.es_type != "nested": doc.es_type = "nested" doc.jx_type = NESTED doc.multi = 1001 doc.last_updated = Date.now() expected_nested_path = get_nested_path(doc.es_column) if len(doc.nested_path) > 1 and doc.nested_path[-2] == '.': doc.nested_path = doc.nested_path[:-1] if untype_path(doc.es_column) == doc.es_column: if doc.nested_path != (".", ): if doc.es_index in {"repo"}: pass else: Log.note("not expected") doc.nested_path = expected_nested_path else: if doc.nested_path != expected_nested_path: doc.nested_path = expected_nested_path return Column(**doc) except Exception: doc.nested_path = ["."] mark_as_deleted(Column(**doc)) return None
def output(row): return untyped(row._source[es_column])
def output(row): return untyped(row._source[es_column])
def doc_to_column(doc): kwargs = set_default(untyped(doc), {"last_updated": Date.now() - YEAR}) return Column(**wrap(kwargs))
def doc_to_column(doc): now = Date.now() try: doc = to_data(untyped(doc)) # I HAVE MANAGED TO MAKE MANY MISTAKES WRITING COLUMNS TO ES. HERE ARE THE FIXES # FIX if not doc.last_updated: doc.last_updated = Date.now() - YEAR # FIX if doc.es_type == None: if doc.jx_type == OBJECT: doc.es_type = "object" else: Log.warning("{{doc}} has no es_type", doc=doc) # FIX if doc.es_type == "nested": doc.multi = 1001 if doc.multi == None: doc.multi = 1 # FIX if doc.es_column.endswith("." + NESTED_TYPE): if doc.jx_type == OBJECT: doc.jx_type = NESTED doc.last_updated = now if doc.es_type == "nested": doc.es_type = "nested" doc.last_updated = now # FIX doc.nested_path = tuple(listwrap(doc.nested_path)) if last(split_field( doc.es_column)) == NESTED_TYPE and doc.es_type != "nested": doc.es_type = "nested" doc.jx_type = NESTED doc.multi = 1001 doc.last_updated = now # FIX expected_nested_path = get_nested_path(doc.es_column) if len(doc.nested_path) > 1 and doc.nested_path[-2] == '.': doc.nested_path = doc.nested_path[:-1] doc.last_updated = now # FIX if untype_path(doc.es_column) == doc.es_column: if doc.nested_path != (".", ): if doc.es_index in {"repo"}: pass else: Log.note("not expected") doc.nested_path = expected_nested_path doc.last_updated = now else: if doc.nested_path != expected_nested_path: doc.nested_path = expected_nested_path doc.last_updated = now # FIX if last(split_field(doc.es_column)) == EXISTS_TYPE: if doc.jx_type != EXISTS: doc.jx_type = EXISTS doc.last_updated = now if doc.cardinality == None: doc.cardinality = 1 doc.last_updated = now # FIX if doc.jx_type in STRUCT: if doc.cardinality not in [0, 1]: doc.cardinality = 1 # DO NOT KNOW IF EXISTS OR NOT doc.last_updated = now return Column(**doc) except Exception as e: try: mark_as_deleted(Column(**doc), now) except Exception: pass return None
def get_pull_function(column): func = jx_expression_to_function(get_pull(column)) if column.jx_type in INTERNAL: return lambda doc: untyped(func(doc)) else: return func
def _untyper(func): return lambda row: untyped(func(row))
def doc_to_column(doc): return Column(**wrap(untyped(doc)))
def test_empty_object(self): typed = {EXISTS_KEY: 1} test = untyped(typed) self.assertIsInstance(test, dict) self.assertEqual(len(test), 0)
def pull_property(row): return untyped(row.get(pos, Null)[field])
def pull_source(row): return untyped(row.get(pos, Null)._source[field])