def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d)
def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d)
def request(method, url, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent if not default_headers and not _warning_sent: _warning_sent = True Log.warning( "The pyLibrary.env.http module was meant to add extra " "default headers to all requests, specifically the 'Referer' " "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " "function to set `pyLibrary.env.http.default_headers`" ) if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, zip=zip, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception, e: e = Except.wrap(e) failures.append(e) Log.error("Tried {{num}} urls", num=len(url), cause=failures)
def request(method, url, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent if not default_headers and not _warning_sent: _warning_sent = True Log.warning( "The pyLibrary.env.http module was meant to add extra " "default headers to all requests, specifically the 'Referer' " "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " "function to set `pyLibrary.env.http.default_headers`") if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, zip=zip, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception, e: e = Except.wrap(e) failures.append(e) Log.error("Tried {{num}} urls", num=len(url), cause=failures)
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
def es_aggsop(es, frum, query): select = wrap([s.copy() for s in listwrap(query.select)]) es_column_map = {c.name: unwraplist(c.es_column) for c in frum.schema.all_columns} es_query = Dict() new_select = Dict() #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": s.pull = "doc_count" elif isinstance(s.value, Variable): if s.value.var == ".": if frum.typed: # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING if s.aggregate in NON_STATISTICAL_AGGS: #TODO: HANDLE BOTH $value AND $objects TO COUNT Log.error("do not know how to handle") else: s.value.var = "$value" new_select["$value"] += [s] else: if s.aggregate in NON_STATISTICAL_AGGS: #TODO: WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT Log.error("do not know how to handle") else: Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate) elif s.aggregate == "count": s.value = s.value.map(es_column_map) new_select["count_"+literal_field(s.value.var)] += [s] else: s.value = s.value.map(es_column_map) new_select[literal_field(s.value.var)] += [s] else: formula.append(s) for canonical_name, many in new_select.items(): representative = many[0] if representative.value.var == ".": Log.error("do not know how to handle") else: field_name = representative.value.var # canonical_name=literal_field(many[0].name) for s in many: if s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.field = field_name s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile: Log.error("Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = literal_field(canonical_name + " cardinality") es_query.aggs[key].cardinality.field = field_name s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = field_name # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.field = field_name es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.field = field_name es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT) s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate] for i, s in enumerate(formula): canonical_name = literal_field(s.name) abs_value = s.value.map(es_column_map) if s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby() s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = abs_value.to_ruby() s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby() # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = abs_value.to_ruby() es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate=="union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby() s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = canonical_name + "." + aggregates1_4[s.aggregate] es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby() decoders = get_decoders_by_depth(query) start = 0 vars_ = query.where.vars() #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum, map_=es_column_map) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter()) es_query = Dict( aggs={"_filter": set_default({"filter": filter_}, es_query)} ) es_query = wrap({ "aggs": {"_nested": set_default( { "nested": { "path": frum.query_path } }, es_query )} }) else: if any(split_where[1::]): Log.error("Where clause is too deep") for d in decoders[0]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter()) es_query = Dict( aggs={"_filter": set_default({"filter": filter}, es_query)} ) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es09.util.post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception, e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", e)
def round(self, interval, decimal=0): output = self / interval output = Math.round(output, decimal) return output
def round(self, interval, decimal=0): output = self / interval output = Math.round(output, decimal) return output
def transform(self, id, datazilla): try: r = datazilla.json_blob #ADD DATAZILLA MARKUP r.datazilla = { "id": id, "date_loaded": datazilla.date_loaded * 1000, "error_flag": datazilla.error_flag, "test_run_id": datazilla.test_run_id, "processed_flag": datazilla.processed_flag, "error_msg": datazilla.error_msg } #CONVERT UNIX TIMESTAMP TO MILLISECOND TIMESTAMP r.testrun.date *= 1000 def mainthread_transform(r): if r == None: return None output = Struct() for i in r.mainthread_readbytes: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].readbytes = i[0] r.mainthread_readbytes = None for i in r.mainthread_writebytes: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].writebytes = i[0] r.mainthread_writebytes = None for i in r.mainthread_readcount: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].readcount = i[0] r.mainthread_readcount = None for i in r.mainthread_writecount: output[literal_field(i[1])].name = i[1] output[literal_field(i[1])].writecount = i[0] r.mainthread_writecount = None r.mainthread = output.values() mainthread_transform(r.results_aux) mainthread_transform(r.results_xperf) #ADD PUSH LOG INFO try: branch = r.test_build.branch if branch.endswith("-Non-PGO"): r.test_build.branch = branch r.test_build.pgo = False branch = branch[0:-8] else: r.test_build.pgo = True with Profiler("get from pushlog"): if not self.pushlog: #NO PUSHLOG MEANS WE DO NOTHING TO MARKUP TEST RESULTS pass elif self.pushlog[branch]: possible_dates = self.pushlog[branch][r.test_build.revision] if possible_dates: r.test_build.push_date = int(Math.round(possible_dates[0].date * 1000)) else: if r.test_build.revision == 'NULL': r.test_build.no_pushlog = True # OOPS! SOMETHING BROKE elif CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD: Log.note("{{branch}} @ {{revision}} has no pushlog, transforming anyway", r.test_build) r.test_build.no_pushlog = True else: Log.note("{{branch}} @ {{revision}} has no pushlog, try again later", r.test_build) return [] # TRY AGAIN LATER else: with self.locker: if branch not in self.unknown_branches: Log.note("Whole branch {{branch}} has no pushlog", {"branch":branch}) self.unknown_branches.add(branch) if CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD: r.test_build.no_pushlog = True else: r.test_build.no_pushlog = True #return [r] #TODO: DO THIS IF WE FIGURE OUT HOW TO HANDLE THE VERY LARGE NUMBER OF RESULTS WITH NO PUSHLOG except Exception, e: Log.warning("{{branch}} @ {{revision}} has no pushlog", r.test_build, e) new_records = [] # RECORD THE UNKNOWN PART OF THE TEST RESULTS remainder = r.copy() remainder.results = None if len(remainder.keys()) > 4: new_records.append(remainder) #RECORD TEST RESULTS total = StructList() if r.testrun.suite in ["dromaeo_css", "dromaeo_dom"]: #dromaeo IS SPECIAL, REPLICATES ARE IN SETS OF FIVE #RECORD ALL RESULTS for i, (test_name, replicates) in enumerate(r.results.items()): for g, sub_results in Q.groupby(replicates, size=5): new_record = Struct( test_machine=r.test_machine, datazilla=r.datazilla, testrun=r.testrun, test_build=r.test_build, result={ "test_name": unicode(test_name) + "." + unicode(g), "ordering": i, "samples": sub_results } ) try: s = stats(sub_results) new_record.result.stats = s total.append(s) except Exception, e: Log.warning("can not reduce series to moments", e) new_records.append(new_record)
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
def query(path): with CProfiler(): try: with Timer("total duration") as query_timer: preamble_timer = Timer("preamble") with preamble_timer: if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK return Response(BLANK, status=400, headers={"Content-Type": "text/html"}) elif int(flask.request.headers["content-length"] ) > QUERY_SIZE_LIMIT: Log.error("Query is too large") request_body = flask.request.get_data().strip() text = convert.utf82unicode(request_body) text = replace_vars(text, flask.request.args) data = convert.json2value(text) record_request(flask.request, data, None, None) if data.meta.testing: _test_mode_wait(data) translate_timer = Timer("translate") with translate_timer: if data.sql: data = parse_sql(data.sql) frum = wrap_from(data['from']) result = jx.run(data, frum=frum) if isinstance( result, Container ): #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format(data.format) save_timer = Timer("save") with save_timer: if data.meta.save: try: result.meta.saved_as = save_query.query_finder.save( data) except Exception, e: Log.warning("Unexpected save problem", cause=e) result.meta.timing.preamble = Math.round( preamble_timer.duration.seconds, digits=4) result.meta.timing.translate = Math.round( translate_timer.duration.seconds, digits=4) result.meta.timing.save = Math.round( save_timer.duration.seconds, digits=4) result.meta.timing.total = "{{TOTAL_TIME}}" # TIMING PLACEHOLDER with Timer("jsonification") as json_timer: response_data = convert.unicode2utf8( convert.value2json(result)) with Timer("post timer"): # IMPORTANT: WE WANT TO TIME OF THE JSON SERIALIZATION, AND HAVE IT IN THE JSON ITSELF. # WE CHEAT BY DOING A (HOPEFULLY FAST) STRING REPLACEMENT AT THE VERY END timing_replacement = b'"total": ' + str(Math.round(query_timer.duration.seconds, digits=4)) +\ b', "jsonification": ' + str(Math.round(json_timer.duration.seconds, digits=4)) response_data = response_data.replace( b'"total": "{{TOTAL_TIME}}"', timing_replacement) Log.note("Response is {{num}} bytes in {{duration}}", num=len(response_data), duration=query_timer.duration) return Response( response_data, status=200, headers={"Content-Type": result.meta.content_type}) except Exception, e: e = Except.wrap(e) return _send_error(query_timer, request_body, e)