def get_columns(self, table_name, column_name=None, force=False): """ RETURN METADATA COLUMNS """ try: # LAST TIME WE GOT INFO FOR THIS TABLE short_name = join_field(split_field(table_name)[0:1]) table = self.get_table(short_name)[0] if not table: table = Table( name=short_name, url=None, query_path=None, timestamp=Date.now() ) with self.meta.tables.locker: self.meta.tables.add(table) self._get_columns(table=short_name) elif force or table.timestamp == None or table.timestamp < Date.now() - MAX_COLUMN_METADATA_AGE: table.timestamp = Date.now() self._get_columns(table=short_name) with self.meta.columns.locker: columns = self.meta.columns.find(table_name, column_name) if columns: columns = jx.sort(columns, "name") # AT LEAST WAIT FOR THE COLUMNS TO UPDATE while len(self.todo) and not all(columns.get("last_updated")): Log.note("waiting for columns to update {{columns|json}}", columns=[c.table+"."+c.es_column for c in columns if not c.last_updated]) Thread.sleep(seconds=1) return columns except Exception, e: Log.error("Not expected", cause=e)
def pop(self, timeout=None): """ :param timeout: OPTIONAL DURATION :return: None, IF timeout PASSES """ with self.lock: while not self.please_stop: if self.db.status.end > self.start: value = self.db[str(self.start)] self.start += 1 return value if timeout is not None: try: self.lock.wait(timeout=timeout) if self.db.status.end <= self.start: return None except Exception: pass else: try: self.lock.wait() except Exception: pass if DEBUG: Log.note("persistent queue already stopped") return Thread.STOP
def main(): """ CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE """ settings = startup.read_settings(defs=[ { "name": ["--bucket"], "help": "bucket to scan", "type": str, "dest": "bucket", "required": True } ]) Log.start(settings.debug) source = Connection(settings.aws).get_bucket(settings.args.bucket) for k in qb.sort(source.keys()): try: data = source.read_bytes(k) if convert.ascii2unicode(data).find("2e2834fa7ecd8d3bb1ad49ec981fdb89eb4df95e18") >= 0: Log.note("Found at {{key}}", key=k) except Exception, e: Log.warning("Problem with {{key}}", key=k, cause=e) finally:
def pretty_json(value): try: if scrub(value) is None: return "null" elif isinstance(value, basestring): if isinstance(value, str): value = utf82unicode(value) try: return quote(value) except Exception, e: from pyLibrary.debugs.logs import Log try: Log.note( "try explicit convert of string with length {{length}}", length=len(value)) acc = [u"\""] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception, h: c2 = c c3 = unicode(c2) acc.append(c3) except BaseException, g: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(u"\"") output = u"".join(acc) Log.note("return value of length {{length}}", length=len(output)) return output
def get_revision(self, revision, locale=None): """ EXPECTING INCOMPLETE revision RETURNS revision """ rev = revision.changeset.id if not rev: return Null elif rev == "None": return Null elif revision.branch.name == None: return Null locale = coalesce(locale, revision.branch.locale, DEFAULT_LOCALE) doc = self._get_from_elasticsearch(revision, locale=locale) if doc: Log.note( "Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=doc.branch.name, locale=locale, revision=doc.changeset.id, ) return doc output = self._load_all_in_push(revision, locale=locale) return output
def copy(self, keys, source, sample_only_filter=None, sample_size=None, done_copy=None): """ :param keys: THE KEYS TO LOAD FROM source :param source: THE SOURCE (USUALLY S3 BUCKET) :param sample_only_filter: SOME FILTER, IN CASE YOU DO NOT WANT TO SEND EVERYTHING :param sample_size: FOR RANDOM SAMPLE OF THE source DATA :param done_copy: CALLBACK, ADDED TO queue, TO FINISH THE TRANSACTION :return: LIST OF SUB-keys PUSHED INTO ES """ num_keys = 0 queue = None for key in keys: timer = Timer("key") try: with timer: for rownum, line in enumerate(source.read_lines(strip_extension(key))): if not line: continue if rownum > 0 and rownum % 1000 == 0: Log.note("Ingested {{num}} records from {{key}} in bucket {{bucket}}", num=rownum, key=key, bucket=source.name) row, please_stop = fix(rownum, line, source, sample_only_filter, sample_size) num_keys += 1 if queue == None: queue = self._get_queue(row) queue.add(row) if please_stop: break except Exception, e: done_copy = None Log.warning("Could not process {{key}} after {{duration|round(places=2)}}seconds", key=key, duration=timer.duration.seconds, cause=e)
def monitor(self, please_stop): please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: try: if not self.todo: with self.columns.locker: old_columns = filter( lambda c: (c.last_updated == None or c.last_updated < Date.now()-TOO_OLD) and c.type not in ["object", "nested"], self.columns ) if old_columns: self.todo.extend(old_columns) # TEST CONSISTENCY for c, d in product(list(self.todo.queue), list(self.todo.queue)): if c.abs_name==d.abs_name and c.table==d.table and c!=d: Log.error("") else: Log.note("no more metatdata to update") column = self.todo.pop(timeout=10*MINUTE) if column: if column.type in ["object", "nested"]: continue elif column.last_updated >= Date.now()-TOO_OLD: continue try: self._update_cardinality(column) Log.note("updated {{column.name}}", column=column) except Exception, e: Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) except Exception, e: Log.warning("problem in cardinality monitor", cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key) ) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) try: _refresh_etl() # TODO: UPON FAILURE, TERMINATE INSTANCE AND SPOT REQUEST except Exception, e: ec2_conn.terminate_instances([i.id]) Log.warning("Problem resetting {{instance}}, terminated", instance=i.id, cause=e) except Exception, e: Log.error("Problem with etl", e)
def _refresh_etl(): with cd("~/TestLog-ETL/"): result = run("git pull origin etl") if result.find("Already up-to-date.") != -1: Log.note("No change required") return sudo("supervisorctl restart all")
def test_multiple_agg_on_same_field(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "max_bytes", "value": "run.stats.bytes", "aggregate": "max" }, { "name": "count", "value": "run.stats.bytes", "aggregate": "count" }] } }) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def on_go(self, target): """ RUN target WHEN SIGNALED """ if not target: if not _Log: _late_import() _Log.error("expecting target") with self.lock: if self._go: if DEBUG_SIGNAL: if not _Log: _late_import() _Log.note( "Signal {{name|quote}} already triggered, running job immediately", name=self.name) target() else: if DEBUG: if not _Log: _late_import() _Log.note("Adding target to signal {{name|quote}}", name=self.name) self.job_queue.append(target)
def safe_size(source): """ READ THE source UP TO SOME LIMIT, THEN COPY TO A FILE IF TOO BIG RETURN A str() OR A FileString() """ if source is None: return None total_bytes = 0 bytes = [] b = source.read(MIN_READ_SIZE) while b: total_bytes += len(b) bytes.append(b) if total_bytes > MAX_STRING_SIZE: try: data = FileString(TemporaryFile()) for bb in bytes: data.write(bb) del bytes del bb b = source.read(MIN_READ_SIZE) while b: total_bytes += len(b) data.write(b) b = source.read(MIN_READ_SIZE) data.seek(0) Log.note("Using file of size {{length}} instead of str()", length= total_bytes) return data except Exception, e: Log.error("Could not write file > {{num}} bytes", num= total_bytes, cause=e) b = source.read(MIN_READ_SIZE)
def _reader(self, pipe, recieve, please_stop): try: while not please_stop: line = pipe.readline() if self.service.returncode is not None: # GRAB A FEW MORE LINES for i in range(100): try: line = pipe.readline() if line: recieve.add(line) if self.debug: Log.note("FROM {{process}}: {{line}}", process=self.name, line=line.rstrip()) except Exception: break return recieve.add(line) if self.debug: Log.note("FROM {{process}}: {{line}}", process=self.name, line=line.rstrip()) finally: pipe.close()
def get_revision(self, revision, locale=None): """ EXPECTING INCOMPLETE revision RETURNS revision """ rev = revision.changeset.id if not rev: return Null elif rev == "None": return Null elif revision.branch.name == None: return Null if not self.current_push: doc = self._get_from_elasticsearch(revision, locale=locale) if doc: Log.note("Got hg ({{branch}}, {{locale}}, {{revision}}) from ES", branch=doc.branch.name, locale=locale, revision=doc.changeset.id) return doc self._load_all_in_push(revision, locale=locale) # THE cache IS FILLED, CALL ONE LAST TIME... return self.get_revision(revision, locale) output = self._get_from_hg(revision, locale=locale) output.changeset.id12 = output.changeset.id[0:12] output.branch = { "name": output.branch.name, "url": output.branch.url, "locale": output.branch.locale } return output
def verify_blobber_file(line_number, name, url): """ :param line_number: for debugging :param name: for debugging :param url: TO BE READ :return: RETURNS BYTES **NOT** UNICODE """ if name in ["emulator-5554.log", "qemu.log"] or any(map(name.endswith, [".png", ".html"])): return None, 0 with Timer("Read {{name}}: {{url}}", {"name": name, "url": url}, debug=DEBUG): response = http.get(url) try: logs = response.all_lines except Exception, e: if name.endswith("_raw.log"): Log.error( "Line {{line}}: {{name}} = {{url}} is NOT structured log", line=line_number, name=name, url=url, cause=e ) if DEBUG: Log.note( "Line {{line}}: {{name}} = {{url}} is NOT structured log", line=line_number, name=name, url=url ) return None, 0
def forall(self, sql, param=None, _execute=None): assert _execute num = 0 self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() if param: sql = expand_template(sql, self.quote_param(param)) sql = self.preamble + outdent(sql) if self.debug: Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) columns = tuple( [utf8_to_unicode(d[0]) for d in self.cursor.description]) for r in self.cursor: num += 1 _execute( wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r])))) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None except Exception, e: Log.error("Problem executing SQL:\n{{sql|indent}}", sql=sql, cause=e, stack_depth=1)
def test_simple(filename): with Timer("simple time"): with codecs.open(filename, "r", encoding="utf-8") as f: for line in f: id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note(str(id))
def main(): try: settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) with startup.SingleInstance(flavor_id=settings.args.filename): with aws.s3.Bucket(settings.destination) as bucket: if settings.param.debug: if settings.source.durable: Log.error("Can not run in debug mode with a durable queue") synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) else: synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) if settings.source.durable: synch.startup() queue = PersistentQueue(settings.param.queue_file) if queue: last_item = queue[len(queue) - 1] synch.source_key = last_item._meta.count + 1 with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key): Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket) Thread.wait_for_shutdown_signal(allow_exit=True) Log.warning("starting shutdown") queue.close() Log.note("write shutdown state to S3") synch.shutdown() except Exception, e: Log.error("Problem with etl", e)
def verify_blobber_file(line_number, name, url): """ :param line_number: for debugging :param name: for debugging :param url: TO BE READ :return: RETURNS BYTES **NOT** UNICODE """ if name in ["emulator-5554.log", "qemu.log"] or any( map(name.endswith, [".png", ".html"])): return None, 0 with Timer("Read {{name}}: {{url}}", { "name": name, "url": url }, debug=DEBUG): response = http.get(url) try: logs = response.all_lines except Exception, e: if name.endswith("_raw.log"): Log.error( "Line {{line}}: {{name}} = {{url}} is NOT structured log", line=line_number, name=name, url=url, cause=e) if DEBUG: Log.note( "Line {{line}}: {{name}} = {{url}} is NOT structured log", line=line_number, name=name, url=url) return None, 0
def test_buffered(filename): with Timer("buffered time"): with codecs.open(filename, "r", encoding="utf-8", buffering=2 ** 25) as f: for line in f: id = int(line.split("\t")[0]) if id % 10000 == 0: Log.note(str(id))
def get_file(ref, url): from pyLibrary.env.files import File if ref.path.startswith("~"): home_path = os.path.expanduser("~") if os.sep == "\\": home_path = "/" + home_path.replace(os.sep, "/") if home_path.endswith("/"): home_path = home_path[:-1] ref.path = home_path + ref.path[1::] elif not ref.path.startswith("/"): # CONVERT RELATIVE TO ABSOLUTE if ref.path[0] == ".": num_dot = 1 while ref.path[num_dot] == ".": num_dot += 1 parent = url.path.rstrip("/").split("/")[:-num_dot] ref.path = "/".join(parent) + ref.path[num_dot:] else: parent = url.path.rstrip("/").split("/")[:-1] ref.path = "/".join(parent) + "/" + ref.path path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\") try: if DEBUG: _Log.note("reading file {{path}}", path=path) content = File(path).read() except Exception, e: content = None _Log.error("Could not read file {{filename}}", filename=path, cause=e)
def event_loop(self, please_stop): got_stop_message = False while not please_stop.is_go(): with Timer("get more work", debug=DEBUG): request = self.in_queue.pop() if request == Thread.STOP: if DEBUG: Log.note("{{name}} got a stop message", name=self.name) got_stop_message = True if self.in_queue: Log.warning( "programmer error, queue not empty. {{num}} requests lost:\n{{requests}}", num=len(self.in_queue.queue), requests=list(self.in_queue.queue)[:5:] + list(self.in_queue.queue)[-5::], ) break if please_stop.is_go(): break with Timer("run {{function}}", {"function": get_function_name(self.function)}, debug=DEBUG): try: result = self.function(**request) if self.out_queue != None: self.out_queue.add({"response": result}) except Exception, e: Log.warning("Can not execute with params={{params}}", params=request, cause=e) if self.out_queue != None: self.out_queue.add({"exception": e}) finally: self.num_runs += 1
def extend(self, records): """ records - MUST HAVE FORM OF [{"value":value}, ... {"value":value}] OR [{"json":json}, ... {"json":json}] OPTIONAL "id" PROPERTY IS ALSO ACCEPTED """ lines = [] try: for r in records: id = r.get("id") if id == None: id = Random.hex(40) if "json" in r: json = r["json"] elif "value" in r: json = convert.value2json(r["value"]) else: json = None Log.error("Expecting every record given to have \"value\" or \"json\" property") lines.append('{"index":{"_id": ' + convert.value2json(id) + '}}') lines.append(json) del records if not lines: return try: data_bytes = "\n".join(lines) + "\n" data_bytes = data_bytes.encode("utf8") except Exception, e: Log.error("can not make request body from\n{{lines|indent}}", lines= lines, cause=e) response = self.cluster._post( self.path + "/_bulk", data=data_bytes, headers={"Content-Type": "text"}, timeout=self.settings.timeout ) items = response["items"] for i, item in enumerate(items): if self.cluster.version.startswith("0.90."): if not item.index.ok: Log.error("{{error}} while loading line:\n{{line}}", error= item.index.error, line= lines[i * 2 + 1]) elif self.cluster.version.startswith("1.4."): if item.index.status not in [200, 201]: Log.error("{{error}} while loading line:\n{{line}}", error= item.index.error, line= lines[i * 2 + 1]) else: Log.error("version not supported {{version}}", version=self.cluster.version) if self.debug: Log.note("{{num}} documents added", num= len(items))
def test_wrap_3(): switch = [ lambda: Random.string(20), lambda: {"i": Random.int(2000)}, lambda: Data(i=Random.int(2000)), lambda: FlatList([{"i": Random.int(2000)}]), lambda: [{"i": Random.int(2000)}] ] inputs = [switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]() for i in range(NUM_INPUT)] for i in range(NUM_REPEAT): results = [] gc.collect() with Profiler("more string: slow_wrap"): for v in inputs: results.append(slow_wrap(v)) results = [] gc.collect() with Profiler("more string: wrap"): for v in inputs: results.append(wrap(v)) results = [] gc.collect() with Profiler("more string: baseline"): for v in inputs: results.append(baseline(v)) Log.note("Done {{i}} of {{num}}", {"i": i, "num": NUM_REPEAT})
def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(Thread.STOP) if self.db.status.end == self.start: if DEBUG: Log.note("persistent queue clear and closed") self.file.delete() else: if DEBUG: Log.note("persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write( convert.value2json({"add": self.db}) + "\n" + ("\n".join(convert.value2json(p) for p in self.pending)) + "\n" ) self._apply_pending() except Exception, e: raise e self.db = None
def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if ( self.db.status.end - self.start < 10 or Random.range(0, 1000) == 0 ): # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note( "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start ) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write(convert.value2json({"add": self.db}) + "\n") else: self._commit() except Exception, e: raise e
def get_raw_json(path): active_data_timer = Timer("total duration") body = flask.request.get_data() try: with active_data_timer: args = wrap(Dict(**flask.request.args)) limit = args.limit if args.limit else 10 args.limit = None frum = wrap_from(path) result = jx.run( { "from": path, "where": { "eq": args }, "limit": limit, "format": "list" }, frum) if isinstance( result, Container ): #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format("list") result.meta.active_data_response_time = active_data_timer.duration response_data = convert.unicode2utf8( convert.value2json(result.data, pretty=True)) Log.note("Response is {{num}} bytes", num=len(response_data)) return Response(response_data, status=200) except Exception, e: e = Except.wrap(e) return _send_error(active_data_timer, body, e)
def delete_index(self, index_name): if not isinstance(index_name, unicode): Log.error("expecting an index name") if self.debug: Log.note("Deleting index {{index}}", index=index_name) # REMOVE ALL ALIASES TOO aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None] if aliases: self.post( path="/_aliases", data={"actions": [{"remove": a} for a in aliases]} ) url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200, got {{code}}", code=response.status_code) details = convert.json2value(utf82unicode(response.content)) if self.debug: Log.note("delete response {{response}}", response=details) return response except Exception, e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def disconnect(): with suppress_exception: self.target_queue.close() Log.note("stop put into queue") self.pulse.disconnect() Log.note("pulse listener was given a disconnect()")
def get_all_in_es(es): in_es = set() all_indexes = es.es.cluster.get_metadata().indices for name, index in all_indexes.items(): if "unittest" not in index.aliases: continue result = elasticsearch.Index(index=name, alias="unittest", settings=es.es.settings).search({ "aggs": { "_match": { "terms": { "field": "etl.source.source.id", "size": 200000 } } } }) good_es = [] for k in result.aggregations._match.buckets.key: try: good_es.append(int(k)) except Exception, e: pass Log.note("got {{num}} from {{index}}", num= len(good_es), index= name) in_es |= set(good_es)
def commit(self): with self.lock: if self.closed: Log.error("Queue is closed, commit not allowed") try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) if self.db.status.end - self.start < 10 or Random.range( 0, 1000) == 0: # FORCE RE-WRITE TO LIMIT FILE SIZE # SIMPLY RE-WRITE FILE if DEBUG: Log.note( "Re-write {{num_keys}} keys to persistent queue", num_keys=self.db.status.end - self.start) for k in self.db.keys(): if k == "status" or int(k) >= self.db.status.start: continue Log.error("Not expecting {{key}}", key=k) self._commit() self.file.write( convert.value2json({"add": self.db}) + "\n") else: self._commit() except Exception, e: raise e
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(Thread.STOP)) while not please_stop: c = self.todo.pop() if c == Thread.STOP: break if not c.last_updated or c.last_updated >= Date.now() - TOO_OLD: continue with self.meta.columns.locker: self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "partitions", ], "where": { "eq": { "es_index": c.es_index, "es_column": c.es_column } } }) Log.note("Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
def delete_record(self, filter): if self.settings.read_only: Log.error("Index opened in read only mode, no changes allowed") self.cluster.get_metadata() if self.cluster.cluster_state.version.number.startswith("0.90"): query = {"filtered": { "query": {"match_all": {}}, "filter": filter }} elif self.cluster.cluster_state.version.number.startswith("1.0"): query = {"query": {"filtered": { "query": {"match_all": {}}, "filter": filter }}} else: raise NotImplementedError if self.debug: Log.note("Delete bugs:\n{{query}}", query= query) result = self.cluster.delete( self.path + "/_query", data=convert.value2json(query), timeout=60 ) for name, status in result._indices.items(): if status._shards.failed > 0: Log.error("Failure to delete from {{index}}", index=name)
def test_public_request(self): # MAKE SOME DATA data = { "a": { # MATCHES SERVER PATTERN "b": "good", "c": [ {"k": "good", "m": 1}, {"k": 2, "m": 2} ] }, "constant": "this is a test", "random-data": convert.bytes2base64(Random.bytes(100)) } content = json.dumps(data) response = requests.post( url=settings.url, data=content, headers={ 'Content-Type': CONTENT_TYPE } ) self.assertEqual(response.status_code, 200, "Expecting 200") about = json.loads(response.content) return about['link'], about['etl']['id'] Log.note("Data located at {{link}} id={{id}}", link=link, id=id)
def delete_index(self, index_name): if not isinstance(index_name, unicode): Log.error("expecting an index name") if self.debug: Log.note("Deleting index {{index}}", index=index_name) # REMOVE ALL ALIASES TOO aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None] if aliases: self.post( path="/_aliases", data={"actions": [{"remove": a} for a in aliases]} ) url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name try: response = http.delete(url) if response.status_code != 200: Log.error("Expecting a 200") details = convert.json2value(utf82unicode(response.content)) if self.debug: Log.note("delete response {{response}}", response=details) return response except Exception, e: Log.error("Problem with call to {{url}}", url=url, cause=e)
def test_big_result_works(self): result = http.post_json(global_settings.service_url, data={ "from": "unittest", "where": { "and": [{ "gte": { "run.timestamp": Date.today() - DAY } }, { "lt": { "run.timestamp": Date.today() } }, { "eq": { "result.ok": False } }] }, "format": "list", "limit": 10000 }) if result.template: result = Except.new_instance(result) Log.error("problem with call", cause=result) Log.note("Got {{num}} test failures", num=len(result.data))
def _upsert_column(self, c): # ASSUMING THE self.meta.columns.locker IS HAD existing_columns = [ r for r in self.meta.columns.data if r.table == c.table and r.name == c.name ] if not existing_columns: self.meta.columns.add(c) Log.note("todo: {{table}}.{{column}}", table=c.table, column=c.es_column) self.todo.add(c) # MARK meta.columns AS DIRTY TOO cols = [ r for r in self.meta.columns.data if r.table == "meta.columns" ] for cc in cols: cc.partitions = cc.cardinality = None cc.last_updated = Date.now() self.todo.extend(cols) else: canonical = existing_columns[0] if canonical.relative and not c.relative: return # RELATIVE COLUMNS WILL SHADOW ABSOLUTE COLUMNS for key in Column.__slots__: canonical[key] = c[key] Log.note("todo: {{table}}.{{column}}", table=canonical.table, column=canonical.es_column) self.todo.add(canonical)
def test_queue_speed(self): SCALE = 1000*10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", queue=slow) def empty(please_stop): while not please_stop: item = q.pop() if item is Thread.STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(Thread.STOP) Log.note("Done insert") done.wait() self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
def test_thread_wait(self): NUM = 100 locker = Lock("test") phase1 = [] phase2 = [] def work(value, please_stop): with locker: phase1.append(value) locker.wait() phase2.append(value) with locker: threads = [Thread.run(unicode(i), work, i) for i in range(NUM)] # CONTINUE TO USE THE locker SO WAITS GET TRIGGERED while len(phase2) < NUM: with locker: pass for t in threads: t.join() self.assertEqual(len(phase1), NUM, "expecting "+unicode(NUM)+" items") self.assertEqual(len(phase2), NUM, "expecting "+unicode(NUM)+" items") for i in range(NUM): self.assertTrue(i in phase1, "expecting "+unicode(i)) self.assertTrue(i in phase2, "expecting "+unicode(i)) Log.note("done")
def close(self): self.please_stop.go() with self.lock: if self.db is None: return self.add(Thread.STOP) if self.db.status.end == self.start: if DEBUG: Log.note("persistent queue clear and closed") self.file.delete() else: if DEBUG: Log.note("persistent queue closed with {{num}} items left", num=len(self)) try: self._add_pending({"add": {"status.start": self.start}}) for i in range(self.db.status.start, self.start): self._add_pending({"remove": str(i)}) self.file.write( convert.value2json({"add": self.db}) + "\n" + ("\n".join( convert.value2json(p) for p in self.pending)) + "\n") self._apply_pending() except Exception, e: raise e self.db = None
def forall(self, sql, param=None, _execute=None): assert _execute num = 0 self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() if param: sql = expand_template(sql, self.quote_param(param)) sql = self.preamble + outdent(sql) if self.debug: Log.note("Execute SQL:\n{{sql}}", sql= indent(sql)) self.cursor.execute(sql) columns = tuple([utf8_to_unicode(d[0]) for d in self.cursor.description]) for r in self.cursor: num += 1 _execute(wrap(dict(zip(columns, [utf8_to_unicode(c) for c in r])))) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None except Exception, e: Log.error("Problem executing SQL:\n{{sql|indent}}", sql= sql, cause=e, stack_depth=1)
def column_query(self, sql, param=None): """ RETURN RESULTS IN [column][row_num] GRID """ self._execute_backlog() try: old_cursor = self.cursor if not old_cursor: # ALLOW NON-TRANSACTIONAL READS self.cursor = self.db.cursor() self.cursor.execute("SET TIME_ZONE='+00:00'") self.cursor.close() self.cursor = self.db.cursor() if param: sql = expand_template(sql, self.quote_param(param)) sql = self.preamble + outdent(sql) if self.debug: Log.note("Execute SQL:\n{{sql}}", sql=indent(sql)) self.cursor.execute(sql) grid = [[utf8_to_unicode(c) for c in row] for row in self.cursor] # columns = [utf8_to_unicode(d[0]) for d in coalesce(self.cursor.description, [])] result = zip(*grid) if not old_cursor: # CLEANUP AFTER NON-TRANSACTIONAL READS self.cursor.close() self.cursor = None return result except Exception, e: if isinstance(e, InterfaceError) or e.message.find("InterfaceError") >= 0: Log.error("Did you close the db connection?", e) Log.error("Problem executing SQL:\n{{sql|indent}}", sql= sql, cause=e,stack_depth=1)
def get_all_in_es(es): in_es = set() all_indexes = es.es.cluster.get_metadata().indices for name, index in all_indexes.items(): if "unittest" not in index.aliases: continue result = elasticsearch.Index(index=name, alias="unittest", settings=es.es.settings).search({ "aggs": { "_match": { "terms": { "field": "etl.source.source.id", "size": 200000 } } } }) good_es = [] for k in result.aggregations._match.buckets.key: try: good_es.append(int(k)) except Exception, e: pass Log.note("got {{num}} from {{index}}", num=len(good_es), index=name) in_es |= set(good_es)
def get_columns(self, table_name, column_name=None, fail_when_not_found=False): """ RETURN METADATA COLUMNS """ try: with self.meta.columns.locker: columns = [ c for c in self.meta.columns.data if c.table == table_name and ( column_name is None or c.name == column_name) ] if columns: columns = jx.sort(columns, "name") if fail_when_not_found: # AT LEAST WAIT FOR THE COLUMNS TO UPDATE while len(self.todo) and not all( columns.get("last_updated")): Log.note( "waiting for columns to update {{columns|json}}", columns=[ c.table + "." + c.es_column for c in columns if not c.last_updated ]) Thread.sleep(seconds=1) return columns elif all(columns.get("last_updated")): return columns except Exception, e: Log.error("Not expected", cause=e)
def find_some_work(th): # th.get_markup("fx-team", "036f62007472", "B8kS5IJ5Rom8l-kcSIRIlA") # th.get_markup("mozilla-inbound", "971c1ee26cad", "fNuzNmZxS6m3i_p9jDh8iA") # GET SOME TASKS result = http.post_json(url="http://activedata.allizom.org/query", data={ "from": "task", "select": ["build.branch", "build.revision", "task.id"], "where": { "and": [{ "gt": { "task.run.start_time": (Date.today() - DAY).unix } }, { "exists": "build.revision" }, { "exists": "build.branch" }] }, "format": "list" }) # TRY TO GET THEM OUT OF OUR CACHE for r in result.data: Log.note("look for task {{task_id}}", task_id=r.task.id) th.get_markup(r.build.branch, r.build.revision, r.task.id)
def pretty_json(value): try: if scrub(value) is None: return "null" elif isinstance(value, basestring): if isinstance(value, str): value = utf82unicode(value) try: return quote(value) except Exception, e: from pyLibrary.debugs.logs import Log try: Log.note("try explicit convert of string with length {{length}}", length= len(value)) acc = [u"\""] for c in value: try: try: c2 = ESCAPE_DCT[c] except Exception, h: c2 = c c3 = unicode(c2) acc.append(c3) except BaseException, g: pass # Log.warning("odd character {{ord}} found in string. Ignored.", ord= ord(c)}, cause=g) acc.append(u"\"") output = u"".join(acc) Log.note("return value of length {{length}}", length= len(output)) return output
def __enter__(self): if self.debug: if not self.silent: Log.note("Timer start: " + self.template, stack_depth=1, **self.param) self.start = time() return self
def flush(self): try: self.cluster.post("/" + self.settings.index + "/_flush", data={"wait_if_ongoing": True, "forced": False}) except Exception, e: if "FlushNotAllowedEngineException" in e: Log.note("Flush is ignored") else: Log.error("Problem flushing", cause=e)
def tearDownClass(self): cluster = elasticsearch.Cluster(global_settings.backend_es) for i in ESUtils.indexes: try: cluster.delete_index(i.settings.index) Log.note("remove index {{index}}", index=i) except Exception, e: pass
def _find(b, please_stop): try: url = b.url + "rev/" + revision response = http.get(url) if response.status_code == 200: Log.note("{{revision}} found at {{url}}", url=url, revision=revision) except Exception, e: pass
def add(self, doc, queue=None): if queue == None: queue = self._get_queue(doc) if queue == None: Log.note("Document not added: Too old") return queue.add(doc)
def _replace_ref(node, url): if url.path.endswith("/"): url.path = url.path[:-1] if isinstance(node, Mapping): ref = None output = {} for k, v in node.items(): if k == "$ref": ref = URL(v) else: output[k] = _replace_ref(v, url) if not ref: return output node = output if not ref.scheme and not ref.path: # DO NOT TOUCH LOCAL REF YET output["$ref"] = ref return output if not ref.scheme: # SCHEME RELATIVE IMPLIES SAME PROTOCOL AS LAST TIME, WHICH # REQUIRES THE CURRENT DOCUMENT'S SCHEME ref.scheme = url.scheme # FIND THE SCHEME AND LOAD IT if ref.scheme in scheme_loaders: new_value = scheme_loaders[ref.scheme](ref, url) else: raise _Log.error("unknown protocol {{scheme}}", scheme=ref.scheme) if ref.fragment: new_value = dot.get_attr(new_value, ref.fragment) if DEBUG: _Log.note("Replace {{ref}} with {{new_value}}", ref=ref, new_value=new_value) if not output: output = new_value else: output = unwrap(set_default(output, new_value)) if DEBUG: _Log.note("Return {{output}}", output=output) return output elif isinstance(node, list): output = [_replace_ref(n, url) for n in node] # if all(p[0] is p[1] for p in zip(output, node)): # return node return output return node
def test_timing(self): if self.not_real_service(): return test = wrap({ "query": { "from": { "type": "elasticsearch", "settings": { "host": ES_CLUSTER_LOCATION, "index": "unittest", "type": "test_result" } }, "select": [{ "name": "count", "value": "run.duration", "aggregate": "count" }, { "name": "total", "value": "run.duration", "aggregate": "sum" }], "edges": [{ "name": "chunk", "value": ["run.suite", "run.chunk"] }, "result.ok"], "where": { "and": [{ "lt": { "timestamp": Date.floor(Date.now()).milli / 1000 } }, { "gte": { "timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000 } }] }, "format": "cube", "samples": { "limit": 30 } } }) query = convert.unicode2utf8(convert.value2json(test.query)) # EXECUTE QUERY with Timer("query"): response = http.get(self.service_url, data=query) if response.status_code != 200: error(response) result = convert.json2value(convert.utf82unicode(response.all_content)) Log.note("result\n{{result|indent}}", {"result": result})
def join(self): try: # WAIT FOR FINISH for t in self.threads: t.join() except (KeyboardInterrupt, SystemExit): Log.note("Shutdow Started, please be patient") except Exception, e: Log.error("Unusual shutdown!", e)
def disconnect(): try: self.target_queue.close() Log.note("stop put into queue") except: pass self.pulse.disconnect() Log.note("pulse listener was given a disconnect()")