def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ cr_count = 0 # COUNT NUMBER OF BLANK LINES while not please_stop: # if DEBUG: # Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: line = sys.stdin.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: _wait_for_interrupt(please_stop) break # if DEBUG: # Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if line == "": cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == "exit": Log.alert("'exit' Detected! Stopping...") return
def query(self, _query): try: query = QueryOp.wrap(_query, container=self, namespace=self.namespace) for s in listwrap(query.select): if s.aggregate != None and not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate", name=s.name, aggregate=s.aggregate ) frum = query["from"] if isinstance(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_deepop(self.es, query): return es_deepop(self.es, query) if is_aggsop(self.es, query): return es_aggsop(self.es, frum, query) if is_setop(self.es, query): return es_setop(self.es, query) Log.error("Can not handle") except Exception as e: e = Except.wrap(e) if "Data too large, data for" in e: http.post(self.es.cluster.url / "_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", e) Log.error("problem", e)
def _db_insert_column(self, column): try: self.db.execute( "INSERT INTO" + db_table_name + sql_iso(all_columns) + "VALUES" + sql_iso( sql_list( [ quote_value(column[c.name]) if c.name not in ("nested_path", "partitions") else quote_value(value2json(column[c.name])) for c in METADATA_COLUMNS ] ) ) ) except Exception as e: e = Except.wrap(e) if "UNIQUE constraint failed" in e or " are not unique" in e: # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA self.todo.add((UPDATE, column), force=True) else: Log.error("do not know how to handle", cause=e)
def get_branches(hg, branches, kwargs=None): # TRY ES cluster = elasticsearch.Cluster(branches) try: es = cluster.get_index(kwargs=branches, read_only=False) esq = jx_elasticsearch.new_instance(branches) found_branches = esq.query({"from": "branches", "format": "list", "limit": 10000}).data # IF IT IS TOO OLD, THEN PULL FROM HG oldest = Date(MAX(found_branches.etl.timestamp)) if oldest == None or Date.now() - oldest > OLD_BRANCH: found_branches = _get_branches_from_hg(hg) es.extend({"id": b.name + " " + b.locale, "value": b} for b in found_branches) es.flush() try: return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False) except Exception as e: Log.error("Bad branch in ES index", cause=e) except Exception as e: e = Except.wrap(e) if "Can not find index " in e: set_default(branches, {"schema": branches_schema}) es = cluster.get_or_create_index(branches) es.add_alias() return get_branches(kwargs) Log.error("problem getting branches", cause=e)
def write(self, template, params): try: self.queue.add({"template": template, "params": params}) return self except Exception as e: e = Except.wrap(e) raise e # OH NO!
def _db_transaction(self): self.db.execute(str("BEGIN")) try: yield self.db.execute(str("COMMIT")) except Exception as e: e = Except.wrap(e) self.db.execute(str("ROLLBACK")) Log.error("Transaction failed", cause=e)
def delete(self): try: if os.path.isdir(self._filename): shutil.rmtree(self._filename) elif os.path.isfile(self._filename): os.remove(self._filename) return self except Exception as e: e = Except.wrap(e) if "The system cannot find the path specified" in e: return Log.error("Could not remove file", e)
def delete_daemon(file, caller_stack, please_stop): # WINDOWS WILL HANG ONTO A FILE FOR A BIT AFTER WE CLOSED IT while not please_stop: try: file.delete() return except Exception as e: e = Except.wrap(e) e.trace = e.trace[0:2]+caller_stack Log.warning(u"problem deleting file {{file}}", file=file.abspath, cause=e) (Till(seconds=10)|please_stop).wait()
def relay_post(path): try: return cache.request("post", path, flask.request.headers) except Exception as e: e = Except.wrap(e) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )
def ujson_encode(value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return ujson_dumps(scrubbed, ensure_ascii=False, sort_keys=True, escape_forward_slashes=False).decode('utf8') except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: scrubbed = scrub(value) return unicode(self.encoder.encode(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=_repr(value), cause=e) raise e
def raise_error(e, packed): err = text_type(e) e = Except.wrap(e) if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err): missing = [p for p in params if str(p) not in packed] given = [p for p in params if str(p) in packed] get_logger().error( "Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}", func_name=func_name, missing=missing, given=given, stack_depth=2 ) get_logger().error("Error dispatching call", e)
def value2json(obj, pretty=False, sort_keys=False): try: json = json_encoder(obj, pretty=pretty) if json == None: Log.note(str(type(obj)) + " is not valid{{type}}JSON", type= " (pretty) " if pretty else " ") Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj))) return json except Exception as e: e = Except.wrap(e) try: json = pypy_json_encode(obj) return json except Exception: pass Log.error("Can not encode into JSON: {{value}}", value=repr(obj), cause=e)
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) with Timer("encode", too_long=0.1): return text_type(self.encoder(scrubbed)) except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def _run(self): self.id = get_ident() with RegisterThread(self): try: if self.target is not None: a, k, self.args, self.kwargs = self.args, self.kwargs, None, None self.end_of_thread.response = self.target(*a, **k) self.parent.remove_child(self) # IF THREAD ENDS OK, THEN FORGET ABOUT IT except Exception as e: e = Except.wrap(e) with self.synch_lock: self.end_of_thread.exception = e with self.parent.child_lock: emit_problem = self not in self.parent.children if emit_problem: # THREAD FAILURES ARE A PROBLEM ONLY IF NO ONE WILL BE JOINING WITH IT try: Log.fatal("Problem in thread {{name|quote}}", name=self.name, cause=e) except Exception: sys.stderr.write(str("ERROR in thread: " + self.name + " " + text_type(e) + "\n")) finally: try: with self.child_lock: children = copy(self.children) for c in children: try: DEBUG and sys.stdout.write(str("Stopping thread " + c.name + "\n")) c.stop() except Exception as e: Log.warning("Problem stopping thread {{thread}}", thread=c.name, cause=e) for c in children: try: DEBUG and sys.stdout.write(str("Joining on thread " + c.name + "\n")) c.join() except Exception as e: Log.warning("Problem joining thread {{thread}}", thread=c.name, cause=e) finally: DEBUG and sys.stdout.write(str("Joined on thread " + c.name + "\n")) del self.target, self.args, self.kwargs DEBUG and Log.note("thread {{name|quote}} stopping", name=self.name) except Exception as e: DEBUG and Log.warning("problem with thread {{name|quote}}", cause=e, name=self.name) finally: self.stopped.go() DEBUG and Log.note("thread {{name|quote}} is done", name=self.name)
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if hasattr(l, "__iter__"): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer( "Sending {{count}} lines in {{file_length|comma}} bytes", { "file_length": file_length, "count": count }, silent=not self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception as e: e = Except.wrap(e) retry -= 1 if retry == 0 or 'Access Denied' in e or "No space left on device" in e: Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3", cause=e) if self.settings.public: storage.set_acl('public-read') return
def _get_file(ref, url): if ref.path.startswith("~"): home_path = os.path.expanduser("~") if os.sep == "\\": home_path = "/" + home_path.replace(os.sep, "/") if home_path.endswith("/"): home_path = home_path[:-1] ref.path = home_path + ref.path[1::] elif not ref.path.startswith("/"): # CONVERT RELATIVE TO ABSOLUTE if ref.path[0] == ".": num_dot = 1 while ref.path[num_dot] == ".": num_dot += 1 parent = url.path.rstrip("/").split("/")[:-num_dot] ref.path = "/".join(parent) + ref.path[num_dot:] else: parent = url.path.rstrip("/").split("/")[:-1] ref.path = "/".join(parent) + "/" + ref.path path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\") try: DEBUG and Log.note("reading file {{path}}", path=path) content = File(path).read() except Exception as e: content = None Log.error("Could not read file {{filename}}", filename=path, cause=e) try: new_value = json2value(content, params=ref.query, flexible=True, leaves=True) except Exception as e: e = Except.wrap(e) try: new_value = ini2value(content) except Exception: raise Log.error("Can not read {{file}}", file=path, cause=e) new_value = _replace_ref(new_value, ref) return new_value
def raise_error(e, packed): err = text_type(e) e = Except.wrap(e) if err.startswith(func_name) and ("takes at least" in err or "required positional argument" in err): missing = [p for p in params if str(p) not in packed] given = [p for p in params if str(p) in packed] if not missing: raise e else: get_logger().error( "Problem calling {{func_name}}: Expecting parameter {{missing}}, given {{given}}", func_name=func_name, missing=missing, given=given, stack_depth=2, cause=e ) raise e
def find_query(hash): """ FIND QUERY BY HASH, RETURN Response OBJECT :param hash: :return: Response OBJECT """ try: hash = hash.split("/")[0] query = query_finder.find(hash) if not query: return Response(b'{"type": "ERROR", "template": "not found"}', status=404) else: return Response(query.encode("utf8"), status=200) except Exception as e: e = Except.wrap(e) Log.warning("problem finding query with hash={{hash}}", hash=hash, cause=e) return Response(value2json(e).encode("utf8"), status=400)
def _db_insert_column(self, column): try: self.db.execute( "INSERT INTO" + db_table_name + sql_iso(all_columns) + "VALUES" + sql_iso( sql_list([ quote_value(column[c.name]) if c.name not in ("nested_path", "partitions" ) else quote_value(value2json(column[c.name])) for c in METADATA_COLUMNS ]))) except Exception as e: e = Except.wrap(e) if "UNIQUE constraint failed" in e or " are not unique" in e: # THIS CAN HAPPEN BECAUSE todo HAS OLD COLUMN DATA self.todo.add((UPDATE, column), force=True) else: Log.error("do not know how to handle", cause=e)
def get_file(ref, url): if ref.path.startswith("~"): home_path = os.path.expanduser("~") if os.sep == "\\": home_path = "/" + home_path.replace(os.sep, "/") if home_path.endswith("/"): home_path = home_path[:-1] ref.path = home_path + ref.path[1::] elif not ref.path.startswith("/"): # CONVERT RELATIVE TO ABSOLUTE if ref.path[0] == ".": num_dot = 1 while ref.path[num_dot] == ".": num_dot += 1 parent = url.path.rstrip("/").split("/")[:-num_dot] ref.path = "/".join(parent) + ref.path[num_dot:] else: parent = url.path.rstrip("/").split("/")[:-1] ref.path = "/".join(parent) + "/" + ref.path path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\") try: if DEBUG: Log.note("reading file {{path}}", path=path) content = File(path).read() except Exception as e: content = None Log.error("Could not read file {{filename}}", filename=path, cause=e) try: new_value = json2value(content, params=ref.query, flexible=True, leaves=True) except Exception as e: e = Except.wrap(e) try: new_value = ini2value(content) except Exception: raise Log.error("Can not read {{file}}", file=path, cause=e) new_value = _replace_ref(new_value, ref) return new_value
def value2json(obj, pretty=False, sort_keys=False): try: json = json_encoder(obj, pretty=pretty) if json == None: Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ") Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj))) return json except Exception as e: e = Except.wrap(e) try: json = pypy_json_encode(obj) return json except Exception: pass Log.error("Can not encode into JSON: {{value}}", value=repr(obj), cause=e)
def query(self, _query): try: query = QueryOp.wrap(_query, container=self, namespace=self.namespace) self.stats.record(query) for s in listwrap(query.select): if s.aggregate != None and not aggregates.get(s.aggregate): Log.error( "ES can not aggregate {{name}} because {{aggregate|quote}} is" " not a recognized aggregate", name=s.name, aggregate=s.aggregate, ) frum = query["from"] if is_op(frum, QueryOp): result = self.query(frum) q2 = query.copy() q2.frum = result return jx.run(q2) if is_bulk_agg(self.es, query): return es_bulkaggsop(self, frum, query) if is_bulk_set(self.es, query): return es_bulksetop(self, frum, query) query.limit = temper_limit(query.limit, query) if is_aggsop(self.es, query): return es_aggsop(self.es, frum, query) if is_setop(self.es, query): return es_setop(self.es, query) Log.error("Can not handle") except Exception as cause: cause = Except.wrap(cause) if "Data too large, data for" in cause: http.post(self.es.cluster.url / "_cache/clear") Log.error("Problem (Tried to clear Elasticsearch cache)", cause) Log.error("problem", cause=cause)
def assertRaises(self, problem, function, *args, **kwargs): try: function(*args, **kwargs) except Exception as e: f = Except.wrap(e) if is_text(problem): if problem in f: return Log.error( "expecting an exception returning {{problem|quote}} got something else instead", problem=problem, cause=f ) elif not isinstance(f, problem) and not isinstance(e, problem): Log.error("expecting an exception of type {{type}} to be raised", type=problem) else: return Log.error("Expecting an exception to be raised")
def encode(self, value, pretty=False): if pretty: return pretty_json(value) try: with Timer("scrub", too_long=0.1): scrubbed = scrub(value) param = {"size": 0} with Timer("encode {{size}} characters", param=param, too_long=0.1): output = text_type(self.encoder(scrubbed)) param["size"] = len(output) return output except Exception as e: from mo_logs.exceptions import Except from mo_logs import Log e = Except.wrap(e) Log.warning("problem serializing {{type}}", type=text_type(repr(value)), cause=e) raise e
def _execute(self, command): with self.lock: if self.current_task is not None: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None self.process.stdin.add(value2json(command)) self.current_task.wait() with self.lock: try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = None self.current_response = None self.current_error = None
def _merge(*schemas): if len(schemas) == 1: return schemas[0] try: return OrderedDict( (k, _merge(*[ss for s in schemas for ss in [s.get(k)] if ss])) for k in jx.sort(set(k for s in schemas for k in s.keys())) ) except Exception as e: e = Except.wrap(e) if "Expecting types to match" in e: raise e t = list(set(schemas)) if len(t) == 1: return t[0] elif len(t) == 2 and STRING in t and NUMBER in t: return STRING else: Log.error("Expecting types to match {{types|json}}", types=t)
def close(self): if self.transaction_level > 0: if self.readonly: self.commit() # AUTO-COMMIT else: Log.error("expecting commit() or rollback() before close") self.cursor = None # NOT NEEDED try: self.db.close() except Exception as e: e = Except.wrap(e) if "Already closed" in e: return Log.warning("can not close()", e) finally: try: all_db.remove(self) except Exception as e: Log.error("not expected", cause=e)
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: if is_sequence(val) and float(var) == _round(float(var), 0): val = val[int(var)] else: val = val[var] for func_name in ops[1:]: parts = func_name.split("(") if len(parts) > 1: val = eval(parts[0] + "(val, " + "(".join(parts[1::])) else: val = FORMATTERS[func_name](val) val = toString(val) return val except Exception as e: from mo_logs import Except e = Except.wrap(e) try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception as f: if not _Log: _late_import() _Log.warning( "Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e, ) return "[template expansion error: (" + str(e.message) + ")]"
def get_or_create_table( self, table, schema=None, typed=True, read_only=False, sharded=False, partition=None, cluster=None, # TUPLE OF FIELDS TO SORT DATA id=None, kwargs=None, ): if kwargs.lookup != None or kwargs.flake != None: Log.error("expecting schema, not lookup") try: return Table(kwargs=kwargs, container=self) except Exception as e: e = Except.wrap(e) if not read_only and "Not found: Table" in e: return self.create_table(kwargs) Log.error("could not get table {{table}}", table=table, cause=e)
def create_or_replace_table( self, table, schema=None, typed=True, read_only=False, partition=None, cluster=None, # TUPLE OF FIELDS TO SORT DATA sharded=False, kwargs=None, ): if kwargs.lookup != None or kwargs.flake != None: Log.error("expecting schema, not lookup") try: self.delete_table(table) except Exception as e: e = Except.wrap(e) if "Not found: Table" not in e and "Unable to get TableReference" not in e: Log.error("could not get table {{table}}", table=table, cause=e) return self.create_table(kwargs=kwargs)
def __exit__(self, exc_type, exc_val, exc_tb): if not exc_val: Log.error("Expecting an error") f = Except.wrap(exc_val) if isinstance(self.problem, (list, tuple)): problems = self.problem else: problems = [self.problem] causes = [] for problem in problems: if isinstance(problem, object.__class__) and issubclass( problem, BaseException) and isinstance(exc_val, problem): return True try: self.this.assertIn(problem, f) return True except Exception as cause: causes.append(cause) Log.error("problem is not raised", cause=first(causes))
def replacer(found): ops = found.group(1).split("|") path = ops[0] var = path.lstrip(".") depth = min(len(seq), max(1, len(path) - len(var))) try: val = seq[-depth] if var: if is_sequence(val) and float(var) == _round(float(var), 0): val = val[int(var)] else: val = val[var] for func_name in ops[1:]: parts = func_name.split('(') if len(parts) > 1: val = eval(parts[0] + "(val, " + ("(".join(parts[1::]))) else: val = FORMATTERS[func_name](val) val = toString(val) return val except Exception as e: from mo_logs import Except e = Except.wrap(e) try: if e.message.find("is not JSON serializable"): # WORK HARDER val = toString(val) return val except Exception as f: if not _Log: _late_import() _Log.warning( "Can not expand " + "|".join(ops) + " in template: {{template_|json}}", template_=template, cause=e ) return "[template expansion error: (" + str(e.message) + ")]"
def join(self, till=None): """ RETURN THE RESULT {"response":r, "exception":e} OF THE THREAD EXECUTION (INCLUDING EXCEPTION, IF EXISTS) """ if self is Thread: Log.error("Thread.join() is not a valid call, use t.join()") with self.child_lock: children = copy(self.children) for c in children: c.join(till=till) DEBUG and Log.note("{{parent|quote}} waiting on thread {{child|quote}}", parent=Thread.current().name, child=self.name) (self.stopped | till).wait() if self.stopped: self.parent.remove_child(self) if not self.end_of_thread.exception: return self.end_of_thread.response else: Log.error("Thread {{name|quote}} did not end well", name=self.name, cause=self.end_of_thread.exception) else: raise Except(context=THREAD_TIMEOUT)
def _execute(self, command): with self.lock: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None if self.process.service_stopped: Log.error("python is not running") self.process.stdin.add(value2json(command)) (self.current_task | self.process.service_stopped).wait() try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = DONE self.current_response = None self.current_error = None
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(key + ".json.gz") buff = TemporaryFile() archive = gzip.GzipFile(fileobj=buff, mode='w') count = 0 for l in lines: if hasattr(l, "__iter__"): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() file_length = buff.tell() retry = 3 while retry: try: with Timer("Sending {{count}} lines in {{file_length|comma}} bytes", {"file_length": file_length, "count": count}, silent=not self.settings.debug): buff.seek(0) storage.set_contents_from_file(buff) break except Exception as e: e = Except.wrap(e) retry -= 1 if retry == 0 or 'Access Denied' in e or "No space left on device" in e: Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3", cause=e) if self.settings.public: storage.set_acl('public-read') return
def sliding_MWU(values): """ RETURN :param values: :return: """ # ADD MEDIAN TO EITHER SIDE OF values prefix = [ np.median(values[:i + weight_radius]) for i in range(weight_radius) ] suffix = [ np.median(values[-i - weight_radius:]) for i in reversed(range(weight_radius)) ] combined = np.array(prefix + list(values) + suffix) b = combined.itemsize window = as_strided(combined, shape=(len(values), weight_radius * 2), strides=(b, b)) med = (len(median_weight) + 1) / 2 try: m_score = np.array([ stats.mannwhitneyu( w[:weight_radius], w[-weight_radius:], use_continuity=True, alternative="two-sided", ) for v in window for r in [rankdata(v)] for w in [(r - med) * median_weight] ]) return m_score except Exception as cause: cause = Except.wrap(cause) if "All numbers are identical" in cause: return np.ones((window.shape[0], 2)) raise cause
def test_transactions(service): # This should pass old = service.get_tuids("/testing/geckodriver/CONTRIBUTING.md", "6162f89a4838", commit=False) new = service.get_tuids("/testing/geckodriver/CONTRIBUTING.md", "06b1a22c5e62", commit=False) assert len(old) == len(new) # listed_inserts = [None] * 100 listed_inserts = [('test' + str(count), str(count)) for count, entry in enumerate(range(100))] listed_inserts.append( 'hello world') # This should cause a transaction failure try: with service.conn.transaction() as t: count = 0 while count < len(listed_inserts): tmp_inserts = listed_inserts[count:count + 50] count += 50 t.execute( "INSERT OR REPLACE INTO latestFileMod (file, revision) VALUES " + sql_list( sql_iso(sql_list(map(quote_value, i))) for i in tmp_inserts)) assert False # SHOULD NOT GET HERE except Exception as e: e = Except.wrap(e) assert "11 values for 2 columns" in e # Check that the transaction was undone latestTestMods = service.conn.get_one( "SELECT revision FROM latestFileMod WHERE file=?", ('test1', )) assert not latestTestMods
def setUpClass(self): while True: try: es = test_jx.global_settings.backend_es http.get_json(URL(es.host, port=es.port)) break except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") Till(seconds=WAIT_AFTER_PROBLEM).wait() else: Log.error("Server raised exception", e) # REMOVE OLD INDEXES cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es) aliases = cluster.get_aliases() for a in aliases: try: if a.index.startswith("testing_"): cluster.delete_index(a.index) except Exception as e: Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
def get_branches(hg, branches, kwargs=None): # TRY ES cluster = elasticsearch.Cluster(branches) try: es = cluster.get_index(kwargs=branches, read_only=False) esq = jx_elasticsearch.new_instance(branches) found_branches = esq.query({ "from": branches.index, "format": "list", "limit": 10000 }).data # IF IT IS TOO OLD, THEN PULL FROM HG oldest = Date(MAX(found_branches.etl.timestamp)) if oldest == None or Date.now() - oldest > OLD_BRANCH: found_branches = _get_branches_from_hg(hg) es.extend([{ "id": b.name + " " + b.locale, "value": b } for b in found_branches]) es.flush() try: return UniqueIndex(["name", "locale"], data=found_branches, fail_on_dup=False) except Exception as e: Log.error("Bad branch in ES index", cause=e) except Exception as e: e = Except.wrap(e) if "Can not find index " in e: branches.schema = branches_schema es = cluster.get_or_create_index(branches) es.add_alias() return get_branches(kwargs) Log.error("problem getting branches", cause=e)
def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True): """ :param obj: THE VALUE TO TURN INTO JSON :param pretty: True TO MAKE A MULTI-LINE PRETTY VERSION :param sort_keys: True TO SORT KEYS :param keep_whitespace: False TO strip() THE WHITESPACE IN THE VALUES :return: """ if FIND_LOOPS: obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else trim_whitespace()) try: json = json_encoder(obj, pretty=pretty) if json == None: Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ") Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj))) return json except Exception as e: e = Except.wrap(e) try: json = pypy_json_encode(obj) return json except Exception: pass Log.error("Can not encode into JSON: {{value}}", value=text(repr(obj)), cause=e)
def value2json(obj, pretty=False, sort_keys=False, keep_whitespace=True): """ :param obj: THE VALUE TO TURN INTO JSON :param pretty: True TO MAKE A MULTI-LINE PRETTY VERSION :param sort_keys: True TO SORT KEYS :param keep_whitespace: False TO strip() THE WHITESPACE IN THE VALUES :return: """ if FIND_LOOPS: obj = scrub(obj, scrub_text=_keep_whitespace if keep_whitespace else _trim_whitespace()) try: json = json_encoder(obj, pretty=pretty) if json == None: Log.note(str(type(obj)) + " is not valid{{type}}JSON", type=" (pretty) " if pretty else " ") Log.error("Not valid JSON: " + str(obj) + " of type " + str(type(obj))) return json except Exception as e: e = Except.wrap(e) try: json = pypy_json_encode(obj) return json except Exception: pass Log.error("Can not encode into JSON: {{value}}", value=text_type(repr(obj)), cause=e)
def column_query(self, sql, param=None): """ RETURN RESULTS IN [column][row_num] GRID """ self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() self.cursor.execute("SET TIME_ZONE='+00:00'") self.cursor.close() self.cursor = self.db.cursor() if param: sql = expand_template(sql, quote_param(param)) sql = self.preamble + outdent(sql) self.debug and Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor] # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] result = transpose(*grid) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None return result except Exception as e: e = Except.wrap(e) if "InterfaceError" in e: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def unicode2Date(value, format=None): """ CONVERT UNICODE STRING TO UNIX TIMESTAMP VALUE """ # http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior if value == None: return None if format != None: try: if format.endswith("%S.%f") and "." not in value: value += ".000" return _unix2Date(datetime2unix(datetime.strptime(value, format))) except Exception as e: from mo_logs import Log Log.error("Can not format {{value}} with {{format}}", value=value, format=format, cause=e) value = value.strip() if value.lower() == "now": return _unix2Date(datetime2unix(_utcnow())) elif value.lower() == "today": return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400) elif value.lower() in ["eod", "tomorrow"]: return _unix2Date( math.floor(datetime2unix(_utcnow()) / 86400) * 86400 + 86400) if any(value.lower().find(n) >= 0 for n in ["now", "today", "eod", "tomorrow"] + list(MILLI_VALUES.keys())): return parse_time_expression(value) try: # 2.7 DOES NOT SUPPORT %z local_value = parse_date(value) #eg 2014-07-16 10:57 +0200 return _unix2Date( datetime2unix( (local_value - coalesce(local_value.utcoffset(), 0)).replace(tzinfo=None))) except Exception as e: e = Except.wrap(e) # FOR DEBUGGING pass formats = [ "%Y-%m-%dT%H:%M:%S%z", "%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f" ] for f in formats: try: return _unix2Date(datetime2unix(datetime.strptime(value, f))) except Exception: pass deformats = [ "%Y-%m", "%Y%m%d", "%d%m%Y", "%d%m%y", "%d%b%Y", "%d%b%y", "%d%B%Y", "%d%B%y", "%B%d%Y", "%b%d%Y", "%B%d%", "%b%d%y", "%Y%m%d%H%M%S%f", "%Y%m%d%H%M%S", "%Y%m%dT%H%M%S", "%d%m%Y%H%M%S", "%d%m%y%H%M%S", "%d%b%Y%H%M%S", "%d%b%y%H%M%S", "%d%B%Y%H%M%S", "%d%B%y%H%M%S" ] value = deformat(value) for f in deformats: try: return unicode2Date(value, format=f) except Exception: pass else: from mo_logs import Log Log.error("Can not interpret {{value}} as a datetime", value=value)
def extractor( guid, num_partitions, esq, query, selects, query_path, schema, chunk_size, cardinality, abs_limit, formatter, please_stop, ): total = 0 # WE MESS WITH THE QUERY LIMITS FOR CHUNKING query.limit = first(query.groupby).domain.limit = chunk_size * 2 start_time = Date.now() try: write_status( guid, { "status": "starting", "chunks": num_partitions, "rows": min(abs_limit, cardinality), "start_time": start_time, "timestamp": Date.now(), }, ) with TempFile() as temp_file: with open(temp_file.abspath, "wb") as output: for i in range(0, num_partitions): if please_stop: Log.error("request to shutdown!") is_last = i == num_partitions - 1 first(query.groupby).allowNulls = is_last acc, decoders, es_query = aggop_to_es_queries( selects, query_path, schema, query) # REACH INTO THE QUERY TO SET THE partitions terms = es_query.aggs._filter.aggs._match.terms terms.include.partition = i terms.include.num_partitions = num_partitions result = esq.es.search(deepcopy(es_query), query.limit) aggs = unwrap(result.aggregations) formatter.add(aggs, acc, query, decoders, selects) for b in formatter.bytes(): if b is DONE: break output.write(b) else: write_status( guid, { "status": "working", "chunk": i, "chunks": num_partitions, "row": total, "rows": min(abs_limit, cardinality), "start_time": start_time, "timestamp": Date.now(), }, ) continue break for b in formatter.footer(): output.write(b) upload(guid + ".json", temp_file) write_status( guid, { "ok": True, "status": "done", "chunks": num_partitions, "rows": min(abs_limit, cardinality), "start_time": start_time, "end_time": Date.now(), "timestamp": Date.now(), }, ) except Exception as e: e = Except.wrap(e) write_status( guid, { "ok": False, "status": "error", "error": e, "start_time": start_time, "end_time": Date.now(), "timestamp": Date.now(), }, ) Log.warning("Could not extract", cause=e)
def life_cycle_watcher(please_stop): failed_attempts=Data() while not please_stop: spot_requests = self._get_managed_spot_requests() last_get = Date.now() instances = wrap({i.id: i for r in self.ec2_conn.get_all_instances() for i in r.instances}) # INSTANCES THAT REQUIRE SETUP time_to_stop_trying = {} please_setup = [ (i, r) for i, r in [(instances[r.instance_id], r) for r in spot_requests] if i.id and not i.tags.get("Name") and i._state.name == "running" and Date.now() > Date(i.launch_time) + DELAY_BEFORE_SETUP ] for i, r in please_setup: try: p = self.settings.utility[i.instance_type] if p == None: try: self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) finally: Log.error("Can not setup unknown {{instance_id}} of type {{type}}", instance_id=i.id, type=i.instance_type) i.markup = p try: self.instance_manager.setup(i, coalesce(p, 0)) except Exception as e: e = Except.wrap(e) failed_attempts[r.id] += [e] Log.error(ERROR_ON_CALL_TO_SETUP, e) i.add_tag("Name", self.settings.ec2.instance.name + " (running)") with self.net_new_locker: self.net_new_spot_requests.remove(r.id) except Exception as e: if not time_to_stop_trying.get(i.id): time_to_stop_trying[i.id] = Date.now() + TIME_FROM_RUNNING_TO_LOGIN if Date.now() > time_to_stop_trying[i.id]: # FAIL TO SETUP AFTER x MINUTES, THEN TERMINATE INSTANCE self.ec2_conn.terminate_instances(instance_ids=[i.id]) with self.net_new_locker: self.net_new_spot_requests.remove(r.id) Log.warning("Problem with setup of {{instance_id}}. Time is up. Instance TERMINATED!", instance_id=i.id, cause=e) elif "Can not setup unknown " in e: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) elif ERROR_ON_CALL_TO_SETUP in e: if len(failed_attempts[r.id]) > 2: Log.warning("Problem with setup() of {{instance_id}}", instance_id=i.id, cause=failed_attempts[r.id]) else: Log.warning("Unexpected failure on startup", instance_id=i.id, cause=e) if Date.now() - last_get > 5 * SECOND: # REFRESH STALE spot_requests = self._get_managed_spot_requests() last_get = Date.now() pending = wrap([r for r in spot_requests if r.status.code in PENDING_STATUS_CODES]) give_up = wrap([r for r in spot_requests if r.status.code in PROBABLY_NOT_FOR_A_WHILE | TERMINATED_STATUS_CODES]) ignore = wrap([r for r in spot_requests if r.status.code in MIGHT_HAPPEN]) # MIGHT HAPPEN, BUT NO NEED TO WAIT FOR IT if self.done_spot_requests: with self.net_new_locker: expired = Date.now() - self.settings.run_interval + 2 * MINUTE for ii in list(self.net_new_spot_requests): if Date(ii.create_time) < expired: ## SOMETIMES REQUESTS NEVER GET INTO THE MAIN LIST OF REQUESTS self.net_new_spot_requests.remove(ii) for g in give_up: self.net_new_spot_requests.remove(g.id) for g in ignore: self.net_new_spot_requests.remove(g.id) pending = UniqueIndex(("id",), data=pending) pending = pending | self.net_new_spot_requests if give_up: self.ec2_conn.cancel_spot_instance_requests(request_ids=give_up.id) Log.note("Cancelled spot requests {{spots}}, {{reasons}}", spots=give_up.id, reasons=give_up.status.code) if not pending and not time_to_stop_trying and self.done_spot_requests: Log.note("No more pending spot requests") please_stop.go() break elif pending: Log.note("waiting for spot requests: {{pending}}", pending=[p.id for p in pending]) (Till(seconds=10) | please_stop).wait() Log.note("life cycle watcher has stopped")
def write_lines(self, key, lines): self._verify_key_format(key) storage = self.bucket.new_key(str(key + ".json.gz")) if VERIFY_UPLOAD: lines = list(lines) with mo_files.TempFile() as tempfile: with open(tempfile.abspath, "wb") as buff: DEBUG and Log.note("Temp file {{filename}}", filename=tempfile.abspath) archive = gzip.GzipFile(filename=str(key + ".json"), fileobj=buff, mode="w") count = 0 for l in lines: if is_many(l): for ll in l: archive.write(ll.encode("utf8")) archive.write(b"\n") count += 1 else: archive.write(l.encode("utf8")) archive.write(b"\n") count += 1 archive.close() retry = 3 while retry: try: with Timer( "Sending {{count}} lines in {{file_length|comma}} bytes for {{key}}", { "key": key, "file_length": tempfile.length, "count": count }, verbose=self.settings.debug, ): storage.set_contents_from_filename( tempfile.abspath, headers={"Content-Type": mimetype.GZIP}) break except Exception as e: e = Except.wrap(e) retry -= 1 if (retry == 0 or "Access Denied" in e or "No space left on device" in e): Log.error("could not push data to s3", cause=e) else: Log.warning("could not push data to s3, will retry", cause=e) if self.settings.public: storage.set_acl("public-read") if VERIFY_UPLOAD: try: with open(tempfile.abspath, mode="rb") as source: result = list(ibytes2ilines( scompressed2ibytes(source))) assertAlmostEqual(result, lines, msg="file is different") # full_url = "https://"+self.name+".s3-us-west-2.amazonaws.com/"+storage.key.replace(":", "%3A") # https://active-data-test-result.s3-us-west-2.amazonaws.com/tc.1524896%3A152488763.0.json.gz # dest_bucket = s3.MultiBucket(bucket="self.name", kwargs=self.settings.aws) result = list(self.read_lines(strip_extension(key))) assertAlmostEqual(result, lines, result, msg="S3 is different") except Exception as e: from activedata_etl.transforms import TRY_AGAIN_LATER Log.error(TRY_AGAIN_LATER, reason="did not pass verification", cause=e) return
def _scrub(value, is_done): type_ = value.__class__ if type_ in (NoneType, NullType): return None elif type_ is unicode: value_ = value.strip() if value_: return value_ else: return None elif type_ is float: if math.isnan(value) or math.isinf(value): return None return value elif type_ in (int, long, bool): return value elif type_ in (date, datetime): return float(datetime2unix(value)) elif type_ is timedelta: return value.total_seconds() elif type_ is Date: return float(value.unix) elif type_ is Duration: return float(value.seconds) elif type_ is str: return utf82unicode(value) elif type_ is Decimal: return float(value) elif type_ is Data: return _scrub(_get(value, '_dict'), is_done) elif isinstance(value, Mapping): _id = id(value) if _id in is_done: Log.warning("possible loop in structure detected") return '"<LOOP IN STRUCTURE>"' is_done.add(_id) output = {} for k, v in value.iteritems(): if isinstance(k, basestring): pass elif hasattr(k, "__unicode__"): k = unicode(k) else: Log.error("keys must be strings") v = _scrub(v, is_done) if v != None or isinstance(v, Mapping): output[k] = v is_done.discard(_id) return output elif type_ in (tuple, list, FlatList): output = [] for v in value: v = _scrub(v, is_done) output.append(v) return output elif type_ is type: return value.__name__ elif type_.__name__ == "bool_": # DEAR ME! Numpy has it's own booleans (value==False could be used, but 0==False in Python. DOH!) if value == False: return False else: return True elif not isinstance(value, Except) and isinstance(value, Exception): return _scrub(Except.wrap(value), is_done) elif hasattr(value, '__data__'): try: return _scrub(value.__data__(), is_done) except Exception as e: Log.error("problem with calling __json__()", e) elif hasattr(value, 'co_code') or hasattr(value, "f_locals"): return None elif hasattr(value, '__iter__'): output = [] for v in value: v = _scrub(v, is_done) output.append(v) return output elif hasattr(value, '__call__'): return repr(value) else: return _scrub(DataObject(value), is_done)
def unicode2Date(value, format=None): """ CONVERT UNICODE STRING TO UNIX TIMESTAMP VALUE """ # http://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior if value == None: return None if format != None: try: if format.endswith("%S.%f") and "." not in value: value += ".000" return _unix2Date(datetime2unix(datetime.strptime(value, format))) except Exception as e: from mo_logs import Log Log.error("Can not format {{value}} with {{format}}", value=value, format=format, cause=e) value = value.strip() if value.lower() == "now": return _unix2Date(datetime2unix(_utcnow())) elif value.lower() == "today": return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400) elif value.lower() in ["eod", "tomorrow"]: return _unix2Date(math.floor(datetime2unix(_utcnow()) / 86400) * 86400 + 86400) if any(value.lower().find(n) >= 0 for n in ["now", "today", "eod", "tomorrow"] + list(MILLI_VALUES.keys())): return parse_time_expression(value) try: # 2.7 DOES NOT SUPPORT %z local_value = parse_date(value) #eg 2014-07-16 10:57 +0200 return _unix2Date(datetime2unix((local_value - local_value.utcoffset()).replace(tzinfo=None))) except Exception as e: e = Except.wrap(e) # FOR DEBUGGING pass formats = [ "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S.%f" ] for f in formats: try: return _unix2Date(datetime2unix(datetime.strptime(value, f))) except Exception: pass deformats = [ "%Y-%m",# eg 2014-07-16 10:57 +0200 "%Y%m%d", "%d%m%Y", "%d%m%y", "%d%b%Y", "%d%b%y", "%d%B%Y", "%d%B%y", "%Y%m%d%H%M%S", "%Y%m%dT%H%M%S", "%d%m%Y%H%M%S", "%d%m%y%H%M%S", "%d%b%Y%H%M%S", "%d%b%y%H%M%S", "%d%B%Y%H%M%S", "%d%B%y%H%M%S" ] value = deformat(value) for f in deformats: try: return unicode2Date(value, format=f) except Exception: pass else: from mo_logs import Log Log.error("Can not interpret {{value}} as a datetime", value=value)
def json2value(json_string, params=Null, flexible=False, leaves=False): """ :param json_string: THE JSON :param params: STANDARD JSON PARAMS :param flexible: REMOVE COMMENTS :param leaves: ASSUME JSON KEYS ARE DOT-DELIMITED :return: Python value """ if isinstance(json_string, str): Log.error("only unicode json accepted") try: if flexible: # REMOVE """COMMENTS""", # COMMENTS, //COMMENTS, AND \n \r # DERIVED FROM https://github.com/jeads/datasource/blob/master/datasource/bases/BaseHub.py# L58 json_string = re.sub(r"\"\"\".*?\"\"\"", r"\n", json_string, flags=re.MULTILINE) json_string = "\n".join(remove_line_comment(l) for l in json_string.split("\n")) # ALLOW DICTIONARY'S NAME:VALUE LIST TO END WITH COMMA json_string = re.sub(r",\s*\}", r"}", json_string) # ALLOW LISTS TO END WITH COMMA json_string = re.sub(r",\s*\]", r"]", json_string) if params: # LOOKUP REFERENCES json_string = expand_template(json_string, params) try: value = wrap(json_decoder(unicode(json_string))) except Exception as e: Log.error("can not decode\n{{content}}", content=json_string, cause=e) if leaves: value = wrap_leaves(value) return value except Exception as e: e = Except.wrap(e) if not json_string.strip(): Log.error("JSON string is only whitespace") c = e while "Expecting '" in c.cause and "' delimiter: line" in c.cause: c = c.cause if "Expecting '" in c and "' delimiter: line" in c: line_index = int(strings.between(c.message, " line ", " column ")) - 1 column = int(strings.between(c.message, " column ", " ")) - 1 line = json_string.split("\n")[line_index].replace("\t", " ") if column > 20: sample = "..." + line[column - 20:] pointer = " " + (" " * 20) + "^" else: sample = line pointer = (" " * column) + "^" if len(sample) > 43: sample = sample[:43] + "..." Log.error("Can not decode JSON at:\n\t" + sample + "\n\t" + pointer + "\n") base_str = strings.limit(json_string, 1000).encode('utf8') hexx_str = bytes2hex(base_str, " ") try: char_str = " " + " ".join((c.decode("latin1") if ord(c) >= 32 else ".") for c in base_str) except Exception as e: char_str = " " Log.error("Can not decode JSON:\n" + char_str + "\n" + hexx_str + "\n", e)
def _db_worker(self, please_stop): while not please_stop: try: with self._db_transaction(): result = self._query( SQL_SELECT + all_columns + SQL_FROM + db_table_name + SQL_WHERE + "last_updated > " + quote_value(self.last_load) + SQL_ORDERBY + sql_list(map(quote_column, ["es_index", "name", "es_column"])) ) with self.locker: for r in result.data: c = row_to_column(result.header, r) self._add(c) if c.last_updated > self.last_load: self.last_load = c.last_updated updates = self.todo.pop_all() DEBUG and updates and Log.note( "{{num}} columns to push to db", num=len(updates) ) for action, column in updates: while not please_stop: try: with self._db_transaction(): DEBUG and Log.note( "{{action}} db for {{table}}.{{column}}", action=action, table=column.es_index, column=column.es_column, ) if action is EXECUTE: self.db.execute(column) elif action is UPDATE: self.db.execute( "UPDATE" + db_table_name + "SET" + sql_list( [ "count=" + quote_value(column.count), "cardinality=" + quote_value(column.cardinality), "multi=" + quote_value(column.multi), "partitions=" + quote_value( value2json(column.partitions) ), "last_updated=" + quote_value(column.last_updated), ] ) + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), "last_updated < " + quote_value(column.last_updated), ] ) ) elif action is DELETE: self.db.execute( "DELETE FROM" + db_table_name + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), ] ) ) else: self._db_insert_column(column) break except Exception as e: e = Except.wrap(e) if "database is locked" in e: Log.note("metadata database is locked") Till(seconds=1).wait() break else: Log.warning("problem updataing database", cause=e) except Exception as e: Log.warning("problem updating database", cause=e) (Till(seconds=10) | please_stop).wait()
def tuid_endpoint(path): with RegisterThread(): try: service.statsdaemon.update_requests(requests_total=1) if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( EXPECTING_QUERY, status=400, headers={ "Content-Type": "text/html" } ) elif int(flask.request.headers["content-length"]) > QUERY_SIZE_LIMIT: service.statsdaemon.update_requests(requests_complete=1, requests_passed=1) return Response( unicode2utf8("request too large"), status=400, headers={ "Content-Type": "text/html" } ) request_body = flask.request.get_data().strip() query = json2value(utf82unicode(request_body)) # ENSURE THE QUERY HAS THE CORRECT FORM if query['from'] != 'files': Log.error("Can only handle queries on the `files` table") ands = listwrap(query.where['and']) if len(ands) != 3: Log.error( 'expecting a simple where clause with following structure\n{{example|json}}', example={"and": [ {"eq": {"branch": "<BRANCH>"}}, {"eq": {"revision": "<REVISION>"}}, {"in": {"path": ["<path1>", "<path2>", "...", "<pathN>"]}} ]} ) rev = None paths = None branch_name = None for a in ands: rev = coalesce(rev, a.eq.revision) paths = unwraplist(coalesce(paths, a['in'].path, a.eq.path)) branch_name = coalesce(branch_name, a.eq.branch) paths = listwrap(paths) if len(paths) == 0: response, completed = [], True elif service.conn.pending_transactions > TOO_BUSY: # CHECK IF service IS VERY BUSY # TODO: BE SURE TO UPDATE STATS TOO Log.note("Too many open transactions") response, completed = [], False elif service.get_thread_count() > TOO_MANY_THREADS: Log.note("Too many threads open") response, completed = [], False else: # RETURN TUIDS with Timer("tuid internal response time for {{num}} files", {"num": len(paths)}): response, completed = service.get_tuids_from_files( revision=rev, files=paths, going_forward=True, repo=branch_name ) if not completed: Log.note( "Request for {{num}} files is incomplete for revision {{rev}}.", num=len(paths), rev=rev ) if query.meta.format == 'list': formatter = _stream_list else: formatter = _stream_table service.statsdaemon.update_requests( requests_complete=1 if completed else 0, requests_incomplete=1 if not completed else 0, requests_passed=1 ) return Response( formatter(response), status=200 if completed else 202, headers={ "Content-Type": "application/json" } ) except Exception as e: e = Except.wrap(e) service.statsdaemon.update_requests(requests_incomplete=1, requests_failed=1) Log.warning("could not handle request", cause=e) return Response( unicode2utf8(value2json(e, pretty=True)), status=400, headers={ "Content-Type": "text/html" } )