def es_setop(es, query): schema = query.frum.schema new_select, all_paths, split_select, var_to_columns = pre_process(query) es_query = setop_to_es_queries(query, all_paths, split_select, var_to_columns) size = coalesce(query.limit, DEFAULT_LIMIT) sort = jx_sort_to_es_sort(query.sort, schema) for q in es_query: q.size = size q.sort = sort with Timer("call to ES", verbose=DEBUG) as call_timer: results = es.multisearch(es_query) T = [] for result in results: T.extend(result.hits.hits) try: formatter, _, mime_type = set_formatters[query.format] with Timer("formatter", silent=True): output = formatter(T, new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)
def es_setop(es, query): schema = query.frum.schema query_path = schema.query_path[0] new_select, split_select = get_selects(query) split_wheres = split_expression_by_path(query.where, schema, lang=ES52) es_query = es_query_proto(query_path, split_select, split_wheres, schema) es_query.size = coalesce(query.limit, DEFAULT_LIMIT) es_query.sort = jx_sort_to_es_sort(query.sort, schema) with Timer("call to ES", silent=DEBUG) as call_timer: result = es.search(es_query) # Log.note("{{result}}", result=result) T = result.hits.hits try: formatter, _, mime_type = set_formatters[query.format] with Timer("formatter", silent=True): output = formatter(T, new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)
def test_lock_speed(self): SCALE = 1000 * 100 with Timer("create"): locks = [_allocate_lock() for _ in range(SCALE)] with Timer("acquire"): for i in range(SCALE): locks[i].acquire() with Timer("release"): for i in range(SCALE): locks[i].release()
def test_simple(filename): with Timer("simple time"): with codecs.open(filename, "r", encoding="utf-8") as f: for line in f: id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note("{{id}}", id=id)
def test_multiple_agg_on_same_field(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "max_bytes", "value": "run.stats.bytes", "aggregate": "max" }, { "name": "count", "value": "run.stats.bytes", "aggregate": "count" }] } }) query = unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def format_cube(T, select, query=None): with Timer("format table"): table = format_table(T, select, query) if len(table.data) == 0: return Cube( select, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": 0, "interval": 1 } }], data={h: Matrix(list=[]) for i, h in enumerate(table.header)}) cols = transpose(*unwrap(table.data)) return Cube( select, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": len(table.data), "interval": 1 } }], data={h: Matrix(list=cols[i]) for i, h in enumerate(table.header)})
def test_branch_count(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ {"aggregate": "count"}, ], "edges": [ "build.branch" ], "where": {"or": [ {"missing": "build.id"} # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "table" }}) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def test_queue_speed(self): SCALE = 1000*10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", queue=slow) def empty(please_stop): while not please_stop: item = q.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
def get_raw_json(path): active_data_timer = Timer("total duration") body = flask.request.get_data() try: with active_data_timer: args = wrap(Data(**flask.request.args)) limit = args.limit if args.limit else 10 args.limit = None frum = wrap_from(path) result = jx.run( { "from": path, "where": { "eq": args }, "limit": limit, "format": "list" }, frum) if isinstance( result, Container ): #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format("list") result.meta.active_data_response_time = active_data_timer.duration response_data = convert.unicode2utf8( convert.value2json(result.data, pretty=True)) Log.note("Response is {{num}} bytes", num=len(response_data)) return Response(response_data, status=200) except Exception, e: e = Except.wrap(e) return _send_error(active_data_timer, body, e)
def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None): """ :param keys: THE KEYS TO LOAD FROM source :param source: THE SOURCE (USUALLY S3 BUCKET) :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING :param sample_size: FOR RANDOM SAMPLE OF THE source DATA :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION :return: LIST OF SUB-keys PUSHED INTO ES """ num_keys = 0 queue = None pending = [] # FOR WHEN WE DO NOT HAVE QUEUE YET for key in keys: timer = Timer("Process {{key}}", param={"key": key}) try: with timer: for rownum, line in enumerate(source.read_lines(strip_extension(key))): if not line: continue if rownum > 0 and rownum % 1000 == 0: Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name) row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size) num_keys += 1 if queue == None: queue = self._get_queue(row) if queue == None: pending.append(row) if len(pending) > 1000: self._get_queue(row) Log.error("first 1000 (key={{key}}) records have no indication what index to put data", key=tuple(keys)[0]) continue elif queue is DATA_TOO_OLD: break if pending: queue.extend(pending) pending = [] queue.add(row) if please_stop: break except Exception as e: done_copy = None Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e) if done_copy: if queue == None: done_copy() else: queue.add(done_copy) if pending: Log.error("Did not find an index to place the data for key={{key}}", key=tuple(keys)[0]) Log.note("{{num}} keys from {{key|json}} added", num=num_keys, key=keys) return num_keys
def test_simple_binary(filename): with Timer("simple binary time"): with io.open(filename, "rb") as f: for line in f: line = line.decode("utf-8") id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note("{{id}}", id=id)
def test_io(filename): with Timer("io time"): with io.open(filename, "r", buffering=2**25) as f: for line in f: line = line.decode("utf-8") id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note("{{id}}", id=id)
def test_buffered(filename): with Timer("buffered time"): with codecs.open(filename, "r", encoding="utf-8", buffering=2**25) as f: for line in f: id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note("{{id}}", id=id)
def test_timing(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "count", "value": "run.duration", "aggregate": "count" }, { "name": "total", "value": "run.duration", "aggregate": "sum" }], "edges": [{ "name": "chunk", "value": ["run.suite", "run.chunk"] }, "result.ok"], "where": { "and": [{ "lt": { "timestamp": Date.floor(Date.now()).milli / 1000 } }, { "gte": { "timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000 } }] }, "format": "cube", "samples": { "limit": 30 } } }) query = unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def es_aggsop(es, frum, query): query = query.copy() # WE WILL MARK UP THIS QUERY schema = frum.schema query_path = schema.query_path[0] selects = listwrap(query.select) acc, decoders, es_query = build_es_query(selects, query_path, schema, query) with Timer("ES query time", verbose=DEBUG) as es_duration: result = es.search(es_query) # Log.note("{{result}}", result=result) try: format_time = Timer("formatting", verbose=DEBUG) with format_time: # result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE aggs = unwrap(result.aggregations) edges_formatter, groupby_formatter, value_fomratter, mime_type = agg_formatters[ query.format] if query.edges: output = edges_formatter(aggs, acc, query, decoders, selects) elif query.groupby: output = groupby_formatter(aggs, acc, query, decoders, selects) else: output = value_fomratter(aggs, acc, query, decoders, selects) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: if query.format not in agg_formatters: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", cause=e)
def test_simple_query(self): if self.not_real_service(): return query = value2json({"from": "unittest"}).encode('utf8') # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def test_simple_query(self): if self.not_real_service(): return query = convert.unicode2utf8(convert.value2json({"from": "unittest"})) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def es_setop(es, query): schema = query.frum.schema all_paths, split_decoders, var_to_columns = pre_process(query) new_select, split_select, flatten = get_selects(query) # THE SELECTS MAY BE REACHING DEEPER INTO THE NESTED RECORDS all_paths = list( reversed(sorted(set(split_select.keys()) | set(all_paths)))) es_query = setop_to_es_queries(query, all_paths, split_select, var_to_columns) if not es_query: # NO QUERY TO SEND formatter, _, mime_type = set_formatters[query.format] output = formatter([], new_select, query) output.meta.content_type = mime_type output.meta.es_query = es_query return output size = coalesce(query.limit, DEFAULT_LIMIT) sort = jx_sort_to_es_sort(query.sort, schema) for q in es_query: q["size"] = size q["sort"] = sort with Timer("call to ES", verbose=DEBUG) as call_timer: results = es.multisearch(es_query) T = [copy(row) for row in flatten(results)] try: formatter, _, mime_type = set_formatters[query.format] with Timer("formatter", silent=True): output = formatter(T, new_select, query) output.meta.timing.es = call_timer.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: Log.error("problem formatting", e)
def setUpClass(cls): Log.start(settings.debug) with Timer("setup database"): try: with MySQL(schema=None, kwargs=settings.database) as db: db.query("drop database testing") except Exception as e: if "Can't drop database " in e: pass else: Log.warning("problem removing db", cause=e) MySQL.execute_file("tests/resources/database.sql", schema=None, kwargs=settings.database)
def _parse_properties(self, abs_index, properties, meta): # IT IS IMPORTANT THAT NESTED PROPERTIES NAME ALL COLUMNS, AND # ALL COLUMNS ARE GIVEN NAMES FOR ALL NESTED PROPERTIES abs_columns = _elasticsearch.parse_properties(abs_index, None, properties.properties) abs_columns = abs_columns.filter( # TODO: REMOVE WHEN jobs PROPERTY EXPLOSION IS CONTAINED lambda r: not r.es_column.startswith("other.") and not r.es_column. startswith("previous_values.cf_") and not r.es_index.startswith( "debug") and r.es_column.find("=") == -1 and r.es_column.find( " ") == -1) def add_column(c, query_path): c.last_updated = Date.now() if query_path[0] != ".": c.names[query_path[0]] = relative_field( c.names["."], query_path[0]) with self.meta.columns.locker: self._upsert_column(c) for alias in meta.aliases: c = copy(c) c.es_index = alias self._upsert_column(c) with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG): # LIST OF EVERY NESTED PATH query_paths = [[c.es_column] for c in abs_columns if c.type == "nested"] for a, b in itertools.product(query_paths, query_paths): aa = a[0] bb = b[0] if aa and bb.startswith(aa): for i, b_prefix in enumerate(b): if len(b_prefix) > len(aa): continue if aa == b_prefix: break # SPLIT ALREADY FOUND b.insert(i, aa) break for q in query_paths: q.append(".") query_paths.append(ROOT_PATH) # ADD RELATIVE COLUMNS for abs_column in abs_columns: for query_path in query_paths: add_column(abs_column, query_path)
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if hasattr(l, "__iter__"): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer( "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", { "key": key, "file_length": file_length, "count": count }, verbose=self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception as e: e = Except.wrap(e) retry -= 1 if retry == 0 or 'Access Denied' in e or "No space left on device" in e: Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3", cause=e) if self.settings.public: storage.set_acl('public-read') return
def _parse_properties(self, abs_index, properties, meta): # IT IS IMPORTANT THAT NESTED PROPERTIES NAME ALL COLUMNS, AND # ALL COLUMNS ARE GIVEN NAMES FOR ALL NESTED PROPERTIES def add_column(c, query_path): c.last_updated = Date.now() - TOO_OLD if query_path[0] != ".": c.names[query_path[0]] = relative_field( c.names["."], query_path[0]) with self.meta.columns.locker: for alias in meta.aliases: c_ = copy(c) c_.es_index = alias self._upsert_column(c_) self._upsert_column(c) abs_columns = elasticsearch.parse_properties(abs_index, None, properties.properties) self.abs_columns.update(abs_columns) with Timer("upserting {{num}} columns", {"num": len(abs_columns)}, debug=DEBUG): # LIST OF EVERY NESTED PATH query_paths = [[c.es_column] for c in abs_columns if c.type == "nested"] for a, b in itertools.product(query_paths, query_paths): aa = a[0] bb = b[0] if aa and bb.startswith(aa): for i, b_prefix in enumerate(b): if len(b_prefix) > len(aa): continue if aa == b_prefix: break # SPLIT ALREADY FOUND b.insert(i, aa) break for q in query_paths: q.append(".") query_paths.append(SELF_PATH) # ADD RELATIVE COLUMNS for abs_column in abs_columns: abs_column = abs_column.__copy__() abs_column.type = es_type_to_json_type[abs_column.type] for query_path in query_paths: add_column(abs_column, query_path) pass
def test_binary(filename, buffering=2**14): with Timer("binary time (buffering=={{buffering}})", {"buffering": buffering}): remainder = "" with io.open(filename, "rb") as f: while True: block = f.read(buffering) if block == "": if remainder == "": return None return remainder lines = (remainder + block).split("\n") for line in lines[:-1]: line = line.decode("utf-8") id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note("{{id}}", id=id) remainder = lines[-1]
def test_longest_running_tests(self): test = wrap({ "query": { "sort": { "sort": -1, "field": "avg" }, "from": { "from": "unittest", "where": { "and": [{ "gt": { "build.date": "1439337600" } }] }, "groupby": [ "build.platform", "build.type", "run.suite", "result.test" ], "select": [{ "aggregate": "avg", "name": "avg", "value": "result.duration" }], "format": "table", "limit": 100 }, "limit": 100, "format": "list" } }) query = unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def _test_queue_speed(self, test=False): SCALE = 1000 * 10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", slow_queue=slow) def empty(please_stop): while not please_stop: item = slow.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() Log.note( "{{num}} items through queue in {{seconds|round(3)}} seconds", num=SCALE, seconds=timer.duration.seconds, ) if PY2 and "windows" not in platform.system().lower(): expected_time = 15 # LINUX PY2 IS CRAZY SLOW elif PY3 and "windows" not in platform.system().lower(): expected_time = 6 # LINUX PY3 IS SLOW else: expected_time = 6 if test: self.assertLess( timer.duration.seconds, expected_time, "Expecting queue to be fast, not " + text(timer.duration.seconds) + " seconds", )
def test_failures_by_directory(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [ { "aggregate": "count" } ], "edges": [ "result.test", "result.ok" ], "where": { "prefix": { "result.test": "/" } }, "format": "table" }}) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if hasattr(l, "__iter__"): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer( "Sending {{count}} lines in {{file_length|comma}} bytes", { "file_length": file_length, "count": count }, debug=self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception, e: Log.warning("could not push data to s3", cause=e) retry -= 1
def test_chunk_timing(self): if self.not_real_service(): return test = wrap({"query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": {"value": "run.stats.duration", "aggregate": "average"}, "edges": [ {"name": "chunk", "value": ["run.suite", "run.chunk"]} ], "where": {"and": [ {"term": {"etl.id": 0}}, {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}} ]}, "format": "cube", "samples": { "limit": 30 } }}) query = value2json(test.query).encode('utf8') # EXECUTE QUERY with Timer("query"): response = self.utils.try_till_response(self.testing.query, data=query) if response.status_code != 200: error(response) result = json2value(response.all_content.decode('utf8')) Log.note("result\n{{result|indent}}", {"result": result})
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(str(key + ".json.gz")) if VERIFY_UPLOAD: lines = list(lines) with mo_files.TempFile() as tempfile: with open(tempfile.abspath, "wb") as buff: DEBUG and Log.note("Temp file {{filename}}", filename=tempfile.abspath) archive = gzip.GzipFile(filename=str(key + ".json"), fileobj=buff, mode="w") count = 0 for l in lines: if is_many(l): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() retry = 3 while retry: try: with Timer( "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", { "key": key, "file_length": tempfile.length, "count": count }, verbose=self.settings.debug, ): storage.set_contents_from_filename( tempfile.abspath, headers={"Content-Type": mimetype.GZIP}) break except Exception as e: e = Except.wrap(e) retry -= 1 if (retry == 0 or "Access Denied" in e or "No space left on device" in e): Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3, will retry", cause=e) if self.settings.public: storage.set_acl("public-read") if VERIFY_UPLOAD: try: with open(tempfile.abspath, mode="rb") as source: result = list(ibytes2ilines( scompressed2ibytes(source))) assertAlmostEqual(result, lines, msg="file is different") # full_url = "https://"+self.name+".s3-us-west-2.amazonaws.com/"+storage.key.replace(":", "%3A") # https://active-data-test-result.s3-us-west-2.amazonaws.com/tc.1524896%3A152488763.0.json.gz # dest_bucket = s3.MultiBucket(bucket="self.name", kwargs=self.settings.aws) result = list(self.read_lines(strip_extension(key))) assertAlmostEqual(result, lines, result, msg="S3 is different") except Exception as e: from activedata_etl.transforms import TRY_AGAIN_LATER Log.error(TRY_AGAIN_LATER, reason="did not pass verification", cause=e) return
def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None): """ :param keys: THE KEYS TO LOAD FROM source :param source: THE SOURCE (USUALLY S3 BUCKET) :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING :param sample_size: FOR RANDOM SAMPLE OF THE source DATA :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION :return: LIST OF SUB-keys PUSHED INTO ES """ num_keys = 0 queue = None pending = [] # FOR WHEN WE DO NOT HAVE QUEUE YET for key in keys: timer = Timer("Process {{key}}", param={"key": key}, silent=not DEBUG) try: with timer: for rownum, line in enumerate( source.read_lines(strip_extension(key))): if not line: continue if rownum > 0 and rownum % 1000 == 0: Log.note( "Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name) insert_me, please_stop = fix(key, rownum, line, source, sample_only_filter, sample_size) if insert_me == None: continue value = insert_me['value'] if '_id' not in value: Log.warning( "expecting an _id in all S3 records. If missing, there can be duplicates" ) if queue == None: queue = self._get_queue(insert_me) if queue == None: pending.append(insert_me) if len(pending) > 1000: if done_copy: done_copy() Log.error( "first 1000 (key={{key}}) records for {{alias}} have no indication what index to put data", key=tuple(keys)[0], alias=self.settings.index) continue elif queue is DATA_TOO_OLD: break if pending: queue.extend(pending) pending = [] num_keys += 1 queue.add(insert_me) if please_stop: break except Exception as e: if KEY_IS_WRONG_FORMAT in e: Log.warning( "Could not process {{key}} because bad format. Never trying again.", key=key, cause=e) pass elif CAN_NOT_DECODE_JSON in e: Log.warning( "Could not process {{key}} because of bad JSON. Never trying again.", key=key, cause=e) pass else: Log.warning( "Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e) done_copy = None if done_copy: if queue == None: done_copy() elif queue is DATA_TOO_OLD: done_copy() else: queue.add(done_copy) if [ p for p in pending if wrap(p).value.task.state not in ('failed', 'exception') ]: Log.error( "Did not find an index for {{alias}} to place the data for key={{key}}", key=tuple(keys)[0], alias=self.settings.index) Log.note("{{num}} keys from {{key|json}} added", num=num_keys, key=keys) return num_keys