def int2Partition(value): if Math.round(value) == 0: return edge.domain.NULL d = datetime(str(value)[:4:], str(value)[-2:], 1) d = d.addMilli(offset) return edge.domain.getPartByKey(d)
def get_json(url, **kwargs): """ ASSUME RESPONSE IN IN JSON """ response = get(url, **kwargs) try: c = response.all_content return json2value(utf82unicode(c)) except Exception as e: if Math.round(response.status_code, decimal=-2) in [400, 500]: Log.error(u"Bad GET response: {{code}}", code=response.status_code) else: Log.error(u"Good GET requests, but bad JSON", cause=e)
def next(self, value): v = Date(value[0]) if self.last_value.floor(self.duration) > v: Log.error("Expecting strictly increasing") self.last_value = v key = Math.round((v.floor(self.duration) - self.start) / self.duration, decimal=0) if key != self.batch: self.child.reset() self.batch = key c = self.child.next(value[1:]) return [self.batch] + c
def request(method, url, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent if not default_headers and not _warning_sent: _warning_sent = True Log.warning( "The pyLibrary.env.http module was meant to add extra " "default headers to all requests, specifically the 'Referer' " "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " "function to set `pyLibrary.env.http.default_headers`") if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, zip=zip, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception, e: e = Except.wrap(e) failures.append(e) Log.error("Tried {{num}} urls", num=len(url), cause=failures)
def query(path): with CProfiler(): try: with Timer("total duration") as query_timer: preamble_timer = Timer("preamble") with preamble_timer: if flask.request.headers.get("content-length", "") in ["", "0"]: # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK return Response(BLANK, status=400, headers={"Content-Type": "text/html"}) elif int(flask.request.headers["content-length"] ) > QUERY_SIZE_LIMIT: Log.error("Query is too large") request_body = flask.request.get_data().strip() text = convert.utf82unicode(request_body) text = replace_vars(text, flask.request.args) data = convert.json2value(text) record_request(flask.request, data, None, None) if data.meta.testing: _test_mode_wait(data) translate_timer = Timer("translate") with translate_timer: if data.sql: data = parse_sql(data.sql) frum = wrap_from(data['from']) result = jx.run(data, frum=frum) if isinstance( result, Container ): #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers result = result.format(data.format) save_timer = Timer("save") with save_timer: if data.meta.save: try: result.meta.saved_as = save_query.query_finder.save( data) except Exception, e: Log.warning("Unexpected save problem", cause=e) result.meta.timing.preamble = Math.round( preamble_timer.duration.seconds, digits=4) result.meta.timing.translate = Math.round( translate_timer.duration.seconds, digits=4) result.meta.timing.save = Math.round( save_timer.duration.seconds, digits=4) result.meta.timing.total = "{{TOTAL_TIME}}" # TIMING PLACEHOLDER with Timer("jsonification") as json_timer: response_data = convert.unicode2utf8( convert.value2json(result)) with Timer("post timer"): # IMPORTANT: WE WANT TO TIME OF THE JSON SERIALIZATION, AND HAVE IT IN THE JSON ITSELF. # WE CHEAT BY DOING A (HOPEFULLY FAST) STRING REPLACEMENT AT THE VERY END timing_replacement = b'"total": ' + str(Math.round(query_timer.duration.seconds, digits=4)) +\ b', "jsonification": ' + str(Math.round(json_timer.duration.seconds, digits=4)) response_data = response_data.replace( b'"total": "{{TOTAL_TIME}}"', timing_replacement) Log.note("Response is {{num}} bytes in {{duration}}", num=len(response_data), duration=query_timer.duration) return Response( response_data, status=200, headers={"Content-Type": result.meta.content_type}) except Exception, e: e = Except.wrap(e) return _send_error(query_timer, request_body, e)
def round(self, interval, decimal=0): output = self / interval output = Math.round(output, decimal) return output
def es_aggsop(es, frum, query): query = query.copy() # WE WILL MARK UP THIS QUERY schema = frum.schema query_path = schema.query_path[0] select = listwrap(query.select) new_select = Data( ) # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if isinstance(s.value, Variable): s.query_path = query_path if s.aggregate == "count": new_select["count_" + literal_field(s.value.var)] += [s] else: new_select[literal_field(s.value.var)] += [s] elif s.aggregate: split_select = split_expression_by_path(s.value, schema) for si_key, si_value in split_select.items(): if si_value: if s.query_path: Log.error( "can not handle more than one depth per select") s.query_path = si_key formula.append(s) acc = Aggs() for _, many in new_select.items(): for s in many: canonical_name = s.name if s.aggregate in ("value_count", "count"): columns = frum.schema.values(s.value.var, exclude_type=(OBJECT, NESTED)) else: columns = frum.schema.values(s.value.var) if s.aggregate == "count": canonical_names = [] for column in columns: es_name = column.es_column + "_count" if column.jx_type == EXISTS: if column.nested_path[0] == query_path: canonical_names.append("doc_count") acc.add( NestedAggs(column.nested_path[0]).add( ComplexAggs(s))) else: canonical_names.append("value") acc.add( NestedAggs(column.nested_path[0]).add( ExprAggs(es_name, { "value_count": { "field": column.es_column } }, s))) if len(canonical_names) == 1: s.pull = jx_expression_to_function(canonical_names[0]) else: s.pull = jx_expression_to_function( {"add": canonical_names}) elif s.aggregate == "median": if len(columns) > 1: Log.error( "Do not know how to count columns with more than one type (script probably)" ) # ES USES DIFFERENT METHOD FOR PERCENTILES key = canonical_name + " percentile" acc.add( ExprAggs( key, { "percentiles": { "field": first(columns).es_column, "percents": [50] } }, s)) s.pull = jx_expression_to_function("values.50\\.0") elif s.aggregate == "percentile": if len(columns) > 1: Log.error( "Do not know how to count columns with more than one type (script probably)" ) # ES USES DIFFERENT METHOD FOR PERCENTILES key = canonical_name + " percentile" if isinstance( s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile: Log.error( "Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) acc.add( ExprAggs( key, { "percentiles": { "field": first(columns).es_column, "percents": [percent], "tdigest": { "compression": 2 } } }, s)) s.pull = jx_expression_to_function( join_field(["values", text_type(percent)])) elif s.aggregate == "cardinality": for column in columns: path = column.es_column + "_cardinality" acc.add( ExprAggs(path, {"cardinality": { "field": column.es_column }}, s)) s.pull = jx_expression_to_function("value") elif s.aggregate == "stats": if len(columns) > 1: Log.error( "Do not know how to count columns with more than one type (script probably)" ) # REGULAR STATS stats_name = literal_field(canonical_name) complex = ComplexAggs(s).add( ExprAggs(canonical_name, { "extended_stats": { "field": first(columns).es_column } }, None)) # GET MEDIAN TOO! median_name = literal_field(canonical_name + "_percentile") complex.add( ExprAggs( canonical_name + "_percentile", { "percentiles": { "field": first(columns).es_column, "percents": [50] } }, None)) acc.add(complex) s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": for column in columns: script = { "scripted_metric": { 'init_script': 'params._agg.terms = new HashSet()', 'map_script': 'for (v in doc[' + quote(column.es_column) + '].values) params._agg.terms.add(v);', 'combine_script': 'return params._agg.terms.toArray()', 'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()', } } stats_name = column.es_column acc.add( NestedAggs(column.nested_path[0]).add( ExprAggs(stats_name, script, s))) s.pull = jx_expression_to_function("value") elif s.aggregate == "count_values": # RETURN MAP FROM VALUE TO THE NUMBER OF TIMES FOUND IN THE DOCUMENTS # NOT A NESTED DOC, RATHER A MULTIVALUE FIELD for column in columns: script = { "scripted_metric": { 'params': { "_agg": {} }, 'init_script': 'params._agg.terms = new HashMap()', 'map_script': 'for (v in doc[' + quote(column.es_column) + '].values) params._agg.terms.put(v, Optional.ofNullable(params._agg.terms.get(v)).orElse(0)+1);', 'combine_script': 'return params._agg.terms', 'reduce_script': ''' HashMap output = new HashMap(); for (agg in params._aggs) { if (agg!=null){ for (e in agg.entrySet()) { String key = String.valueOf(e.getKey()); output.put(key, e.getValue() + Optional.ofNullable(output.get(key)).orElse(0)); } } } return output; ''' } } stats_name = encode_property(column.es_column) acc.add( NestedAggs(column.nested_path[0]).add( ExprAggs(stats_name, script, s))) s.pull = jx_expression_to_function("value") else: if not columns: s.pull = jx_expression_to_function(NULL) else: for c in columns: acc.add( NestedAggs(c.nested_path[0]).add( ExprAggs( canonical_name, {"extended_stats": { "field": c.es_column }}, s))) s.pull = jx_expression_to_function(aggregates[s.aggregate]) for i, s in enumerate(formula): s_path = [ k for k, v in split_expression_by_path(s.value, schema=schema).items() if v ] if len(s_path) == 0: # FOR CONSTANTS nest = NestedAggs(query_path) acc.add(nest) elif len(s_path) == 1: nest = NestedAggs(first(s_path)) acc.add(nest) else: Log.error("do not know how to handle") canonical_name = s.name if isinstance(s.value, TupleOp): if s.aggregate == "count": # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY s.pull = jx_expression_to_function("doc_count") elif s.aggregate in ('max', 'maximum', 'min', 'minimum'): if s.aggregate in ('max', 'maximum'): dir = 1 op = "max" else: dir = -1 op = 'min' nully = TupleOp("tuple", [NULL] * len( s.value.terms)).partial_eval().to_es_script(schema).expr selfy = s.value.partial_eval().to_es_script(schema).expr script = { "scripted_metric": { 'init_script': 'params._agg.best = ' + nully + ';', 'map_script': 'params._agg.best = ' + expand_template( MAX_OF_TUPLE, { "expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op }) + ";", 'combine_script': 'return params._agg.best', 'reduce_script': 'return params._aggs.stream().' + op + '(' + expand_template(COMPARE_TUPLE, { "dir": dir, "op": op }) + ').get()', } } nest.add( NestedAggs(query_path).add( ExprAggs(canonical_name, script, s))) s.pull = jx_expression_to_function("value") else: Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) elif s.aggregate == "count": nest.add( ExprAggs( canonical_name, { "value_count": { "script": s.value.partial_eval().to_es_script(schema).script( schema) } }, s)) s.pull = jx_expression_to_function("value") elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") nest.add( ExprAggs( key, { "percentiles": { "script": s.value.to_es_script(schema).script(schema), "percents": [50] } }, s)) s.pull = jx_expression_to_function(join_field(["50.0"])) elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) nest.add( ExprAggs( key, { "percentiles": { "script": s.value.to_es_script(schema).script(schema), "percents": [percent] } }, s)) s.pull = jx_expression_to_function( join_field(["values", text_type(percent)])) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" nest.add( ExprAggs( key, { "cardinality": { "script": s.value.to_es_script(schema).script(schema) } }, s)) s.pull = jx_expression_to_function("value") elif s.aggregate == "stats": # REGULAR STATS stats_name = canonical_name nest.add( ComplexAggs(s).add( ExprAggs( stats_name, { "extended_stats": { "script": s.value.to_es_script(schema).script(schema) } }, None))) # GET MEDIAN TOO! median_name = canonical_name + " percentile" nest.add( ExprAggs( median_name, { "percentiles": { "script": s.value.to_es_script(schema).script(schema), "percents": [50] } }, s)) s.pull = get_pull_stats(None, stats_name, median_name) elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union nest.add( TermsAggs(canonical_name, { "script_field": s.value.to_es_script(schema).script(schema) }, s)) s.pull = jx_expression_to_function("key") else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function(aggregates[s.aggregate]) nest.add( ExprAggs( canonical_name, { "extended_stats": { "script": s.value.to_es_script(schema).script(schema) } }, s)) acc = NestedAggs(query_path).add(acc) split_decoders = get_decoders_by_path(query) split_wheres = split_expression_by_path(query.where, schema=frum.schema) start = 0 decoders = [None] * (len(query.edges) + len(query.groupby)) paths = list(reversed(sorted(split_wheres.keys() | split_decoders.keys()))) for path in paths: literal_path = literal_field(path) decoder = split_decoders[literal_path] where = split_wheres[literal_path] for d in decoder: decoders[d.edge.dim] = d acc = d.append_query(path, acc) start += d.num_columns if where: acc = FilterAggs("_filter", AndOp("and", where), None).add(acc) acc = NestedAggs(path).add(acc) acc = NestedAggs('.').add(acc) acc = simplify(acc) es_query = wrap(acc.to_es(schema)) es_query.size = 0 with Timer("ES query time", silent=not DEBUG) as es_duration: result = es_post(es, es_query, query.limit) try: format_time = Timer("formatting", silent=not DEBUG) with format_time: # result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE aggs = unwrap(result.aggregations) formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[ query.format] if query.edges: output = formatter(aggs, acc, query, decoders, select) elif query.groupby: output = groupby_formatter(aggs, acc, query, decoders, select) else: output = aggop_formatter(aggs, acc, query, decoders, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", cause=e)
def es_aggsop(es, frum, query): query = query.copy() # WE WILL MARK UP THIS QUERY schema = frum.schema select = listwrap(query.select) es_query = Data() new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": if schema.query_path == ".": s.pull = jx_expression_to_function("doc_count") else: s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]}) elif isinstance(s.value, Variable): if s.aggregate == "count": new_select["count_"+literal_field(s.value.var)] += [s] else: new_select[literal_field(s.value.var)] += [s] elif s.aggregate: formula.append(s) for canonical_name, many in new_select.items(): for s in many: columns = frum.schema.values(s.value.var) if s.aggregate == "count": canonical_names = [] for column in columns: cn = literal_field(column.es_column + "_count") if column.jx_type == EXISTS: canonical_names.append(cn + ".doc_count") es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}} else: canonical_names.append(cn+ ".value") es_query.aggs[cn].value_count.field = column.es_column if len(canonical_names) == 1: s.pull = jx_expression_to_function(canonical_names[0]) else: s.pull = jx_expression_to_function({"add": canonical_names}) elif s.aggregate == "median": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = columns[0].es_column es_query.aggs[key].percentiles.percents += [50] s.pull = jx_expression_to_function(key + ".values.50\\.0") elif s.aggregate == "percentile": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile: Log.error("Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = columns[0].es_column es_query.aggs[key].percentiles.percents += [percent] s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) elif s.aggregate == "cardinality": canonical_names = [] for column in columns: cn = literal_field(column.es_column + "_cardinality") canonical_names.append(cn) es_query.aggs[cn].cardinality.field = column.es_column if len(columns) == 1: s.pull = jx_expression_to_function(canonical_names[0] + ".value") else: s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0}) elif s.aggregate == "stats": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = columns[0].es_column # GET MEDIAN TOO! median_name = literal_field(canonical_name + "_percentile") es_query.aggs[median_name].percentiles.field = columns[0].es_column es_query.aggs[median_name].percentiles.percents += [50] s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": pulls = [] for column in columns: script = {"scripted_metric": { 'init_script': 'params._agg.terms = new HashSet()', 'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v)', 'combine_script': 'return params._agg.terms.toArray()', 'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()', }} stats_name = encode_property(column.es_column) if column.nested_path[0] == ".": es_query.aggs[stats_name] = script pulls.append(jx_expression_to_function(stats_name + ".value")) else: es_query.aggs[stats_name] = { "nested": {"path": column.nested_path[0]}, "aggs": {"_nested": script} } pulls.append(jx_expression_to_function(stats_name + "._nested.value")) if len(pulls) == 0: s.pull = NULL elif len(pulls) == 1: s.pull = pulls[0] else: s.pull = lambda row: UNION(p(row) for p in pulls) else: if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") elif len(columns) <1: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function({"null":{}}) else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]}) for i, s in enumerate(formula): canonical_name = literal_field(s.name) if isinstance(s.value, TupleOp): if s.aggregate == "count": # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY s.pull = "doc_count" elif s.aggregate in ('max', 'maximum', 'min', 'minimum'): if s.aggregate in ('max', 'maximum'): dir = 1 op = "max" else: dir = -1 op = 'min' nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr selfy = s.value.partial_eval().to_es_script(schema).expr script = {"scripted_metric": { 'init_script': 'params._agg.best = ' + nully + ';', 'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";", 'combine_script': 'return params._agg.best', 'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()', }} if schema.query_path[0] == ".": es_query.aggs[canonical_name] = script s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") else: es_query.aggs[canonical_name] = { "nested": {"path": schema.query_path[0]}, "aggs": {"_nested": script} } s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value") else: Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) elif s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema) s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[key].percentiles.percents += [50] s.pull = jx_expression_to_function(key + ".values.50\\.0") elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[key].percentiles.percents += [percent] s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema) s.pull = jx_expression_to_function(key + ".value") elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema) # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[median_name].percentiles.percents += [50] s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema) s.pull = jx_expression_to_function(stats_name + ".buckets.key") else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate]) es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema) decoders = get_decoders_by_depth(query) start = 0 #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum.schema) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = AndOp("and", split_where[1]).to_esfilter(schema) es_query = Data( aggs={"_filter": set_default({"filter": filter_}, es_query)} ) es_query = wrap({ "aggs": {"_nested": set_default( {"nested": {"path": schema.query_path[0]}}, es_query )} }) else: if any(split_where[1::]): Log.error("Where clause is too deep") if decoders: for d in jx.reverse(decoders[0]): es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = AndOp("and", split_where[0]).to_esfilter(schema) es_query = Data( aggs={"_filter": set_default({"filter": filter}, es_query)} ) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es_post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", cause=e)
def request(method, url, headers=None, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning(text_type( "The pyLibrary.env.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `pyLibrary.env.http.default_headers`" )) _warning_sent = True if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if 'session' in kwargs: session = kwargs['session'] del kwargs['session'] sess = Null else: sess = session = sessions.Session() with closing(sess): if PY2 and isinstance(url, text_type): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, {"zip":zip, "retry": retry}, DEFAULTS) _to_ascii_dict(kwargs) # HEADERS headers = kwargs['headers'] = unwrap(set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) del kwargs['headers'] # RETRY retry = wrap(kwargs['retry']) if isinstance(retry, Number): retry = set_default({"times":retry}, DEFAULTS['retry']) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds del kwargs['retry'] # JSON if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] # ZIP set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000: compressed = convert.bytes2zip(kwargs['data']) headers['content-encoding'] = 'gzip' kwargs['data'] = compressed del kwargs['zip'] except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url)) request_count += 1 return session.request(method=method, headers=headers, url=str(url), **kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith("https://") and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note("Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error(u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def request(method, url, headers=None, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent global request_count if not _warning_sent and not default_headers: Log.warning( text_type( "The pyLibrary.env.http module was meant to add extra " + "default headers to all requests, specifically the 'Referer' " + "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " + "function to set `pyLibrary.env.http.default_headers`")) _warning_sent = True if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if 'session' in kwargs: session = kwargs['session'] del kwargs['session'] sess = Null else: sess = session = sessions.Session() with closing(sess): if PY2 and isinstance(url, text_type): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') try: set_default(kwargs, {"zip": zip, "retry": retry}, DEFAULTS) _to_ascii_dict(kwargs) # HEADERS headers = kwargs['headers'] = unwrap( set_default(headers, session.headers, default_headers)) _to_ascii_dict(headers) del kwargs['headers'] # RETRY retry = wrap(kwargs['retry']) if isinstance(retry, Number): retry = set_default({"times": retry}, DEFAULTS['retry']) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds del kwargs['retry'] # JSON if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] # ZIP set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if kwargs['zip'] and len(coalesce(kwargs.get('data'))) > 1000: compressed = convert.bytes2zip(kwargs['data']) headers['content-encoding'] = 'gzip' kwargs['data'] = compressed del kwargs['zip'] except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: DEBUG and Log.note(u"http {{method|upper}} to {{url}}", method=method, url=text_type(url)) request_count += 1 return session.request(method=method, headers=headers, url=str(url), **kwargs) except Exception as e: e = Except.wrap(e) if retry['http'] and str(url).startswith( "https://" ) and "EOF occurred in violation of protocol" in e: url = URL("http://" + str(url)[8:]) Log.note( "Changed {{url}} to http due to SSL EOF violation.", url=str(url)) errors.append(e) if " Read timed out." in errors[0]: Log.error( u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=kwargs['timeout'], times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey(ref.add(edge.domain.interval.multiply(value)))
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey((value * edge.domain.interval) + offset)
def es_aggsop(es, frum, query): select = wrap([s.copy() for s in listwrap(query.select)]) es_column_map = { c.name: unwraplist(c.es_column) for c in frum.schema.columns } es_query = Data() new_select = Data( ) #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance( s.value, Variable) and s.value.var == ".": s.pull = "doc_count" elif isinstance(s.value, Variable): if s.value.var == ".": if frum.typed: # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING if s.aggregate in NON_STATISTICAL_AGGS: #TODO: HANDLE BOTH $value AND $objects TO COUNT Log.error("do not know how to handle") else: s.value.var = "$value" new_select["$value"] += [s] else: if s.aggregate in NON_STATISTICAL_AGGS: #TODO: WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT Log.error("do not know how to handle") else: Log.error( 'Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate) elif s.aggregate == "count": s.value = s.value.map(es_column_map) new_select["count_" + literal_field(s.value.var)] += [s] else: s.value = s.value.map(es_column_map) new_select[literal_field(s.value.var)] += [s] else: formula.append(s) for canonical_name, many in new_select.items(): representative = many[0] if representative.value.var == ".": Log.error("do not know how to handle") else: field_name = representative.value.var # canonical_name=literal_field(many[0].name) for s in many: if s.aggregate == "count": es_query.aggs[literal_field( canonical_name)].value_count.field = field_name s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance( s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile: Log.error( "Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = literal_field(canonical_name + " cardinality") es_query.aggs[key].cardinality.field = field_name s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = field_name # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.field = field_name es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.field = field_name es_query.aggs[stats_name].terms.size = Math.min( s.limit, MAX_LIMIT) s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field( canonical_name)].extended_stats.field = field_name s.pull = literal_field(canonical_name) + "." + aggregates1_4[ s.aggregate] for i, s in enumerate(formula): canonical_name = literal_field(s.name) abs_value = s.value.map(es_column_map) if s.aggregate == "count": es_query.aggs[literal_field( canonical_name)].value_count.script = abs_value.to_ruby() s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = abs_value.to_ruby() s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[ stats_name].extended_stats.script = abs_value.to_ruby() # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = abs_value.to_ruby() es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby() s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = canonical_name + "." + aggregates1_4[s.aggregate] es_query.aggs[ canonical_name].extended_stats.script = abs_value.to_ruby() decoders = get_decoders_by_depth(query) start = 0 vars_ = query.where.vars() #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum.schema, map_=es_column_map) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = simplify_esfilter( AndOp("and", split_where[1]).to_esfilter()) es_query = Data( aggs={"_filter": set_default({"filter": filter_}, es_query)}) es_query = wrap({ "aggs": { "_nested": set_default({"nested": { "path": frum.query_path }}, es_query) } }) else: if any(split_where[1::]): Log.error("Where clause is too deep") for d in decoders[0]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter()) es_query = Data( aggs={"_filter": set_default({"filter": filter}, es_query)}) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es09.util.post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce( result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[ query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception, e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", e)
def request(method, url, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent if not default_headers and not _warning_sent: _warning_sent = True Log.warning( "The pyLibrary.env.http module was meant to add extra " "default headers to all requests, specifically the 'Referer' " "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " "function to set `pyLibrary.env.http.default_headers`") if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, zip=zip, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error(u"Tried {{num}} urls", num=len(url), cause=failures) if 'session' in kwargs: session = kwargs['session'] del kwargs['session'] sess = Null else: sess = session = sessions.Session() session.headers.update(default_headers) with closing(sess): if zip is None: zip = ZIP_REQUEST if isinstance(url, text_type): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode('ascii') _to_ascii_dict(kwargs) timeout = kwargs['timeout'] = coalesce(kwargs.get('timeout'), default_timeout) if retry == None: retry = Data(times=1, sleep=0) elif isinstance(retry, Number): retry = Data(times=retry, sleep=1) else: retry = wrap(retry) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds set_default(retry, {"times": 1, "sleep": 0}) if 'json' in kwargs: kwargs['data'] = value2json(kwargs['json']).encode('utf8') del kwargs['json'] try: headers = kwargs['headers'] = unwrap( coalesce(kwargs.get('headers'), {})) set_default(headers, {'Accept-Encoding': 'compress, gzip'}) if zip and len(coalesce(kwargs.get('data'))) > 1000: compressed = convert.bytes2zip(kwargs['data']) headers['content-encoding'] = 'gzip' kwargs['data'] = compressed _to_ascii_dict(headers) else: _to_ascii_dict(headers) except Exception as e: Log.error(u"Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: if DEBUG: Log.note(u"http {{method}} to {{url}}", method=method, url=url) return session.request(method=method, url=url, **kwargs) except Exception as e: errors.append(Except.wrap(e)) if " Read timed out." in errors[0]: Log.error( u"Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0]) else: Log.error(u"Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
def add_instances(self, net_new_utility, remaining_budget): prices = self.pricing() for p in prices: if net_new_utility <= 0 or remaining_budget <= 0: break if p.current_price == None: Log.note("{{type}} has no current price", type=p.type.instance_type ) continue if self.settings.utility[p.type.instance_type].blacklist or \ p.availability_zone in listwrap(self.settings.utility[p.type.instance_type].blacklist_zones): Log.note("{{type}} in {{zone}} skipped due to blacklist", type=p.type.instance_type, zone=p.availability_zone) continue # DO NOT BID HIGHER THAN WHAT WE ARE WILLING TO PAY max_acceptable_price = p.type.utility * self.settings.max_utility_price + p.type.discount max_bid = Math.min(p.higher_price, max_acceptable_price, remaining_budget) min_bid = p.price_80 if min_bid > max_acceptable_price: Log.note( "Price of ${{price}}/hour on {{type}}: Over remaining acceptable price of ${{remaining}}/hour", type=p.type.instance_type, price=min_bid, remaining=max_acceptable_price ) continue elif min_bid > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over budget of ${{remaining_budget}}/hour", type=p.type.instance_type, bid=min_bid, remaining_budget=remaining_budget ) continue elif min_bid > max_bid: Log.error("not expected") naive_number_needed = int(Math.round(float(net_new_utility) / float(p.type.utility), decimal=0)) limit_total = None if self.settings.max_percent_per_type < 1: current_count = sum(1 for a in self.active if a.launch_specification.instance_type == p.type.instance_type and a.launch_specification.placement == p.availability_zone) all_count = sum(1 for a in self.active if a.launch_specification.placement == p.availability_zone) all_count = max(all_count, naive_number_needed) limit_total = int(Math.floor((all_count * self.settings.max_percent_per_type - current_count) / (1 - self.settings.max_percent_per_type))) num = Math.min(naive_number_needed, limit_total, self.settings.max_requests_per_type) if num < 0: Log.note( "{{type}} is over {{limit|percent}} of instances, no more requested", limit=self.settings.max_percent_per_type, type=p.type.instance_type ) continue elif num == 1: min_bid = Math.min(Math.max(p.current_price * 1.1, min_bid), max_acceptable_price) price_interval = 0 else: price_interval = Math.min(min_bid / 10, (max_bid - min_bid) / (num - 1)) for i in range(num): bid_per_machine = min_bid + (i * price_interval) if bid_per_machine < p.current_price: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Under current price of ${{current_price}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, current_price=p.current_price ) continue if bid_per_machine - p.type.discount > remaining_budget: Log.note( "Did not bid ${{bid}}/hour on {{type}}: Over remaining budget of ${{remaining}}/hour", type=p.type.instance_type, bid=bid_per_machine - p.type.discount, remaining=remaining_budget ) continue try: if self.settings.ec2.request.count == None or self.settings.ec2.request.count != 1: Log.error("Spot Manager can only request machine one-at-a-time") new_requests = self._request_spot_instances( price=bid_per_machine, availability_zone_group=p.availability_zone, instance_type=p.type.instance_type, kwargs=copy(self.settings.ec2.request) ) Log.note( "Request {{num}} instance {{type}} in {{zone}} with utility {{utility}} at ${{price}}/hour", num=len(new_requests), type=p.type.instance_type, zone=p.availability_zone, utility=p.type.utility, price=bid_per_machine ) net_new_utility -= p.type.utility * len(new_requests) remaining_budget -= (bid_per_machine - p.type.discount) * len(new_requests) with self.net_new_locker: for ii in new_requests: self.net_new_spot_requests.add(ii) except Exception as e: Log.warning( "Request instance {{type}} failed because {{reason}}", type=p.type.instance_type, reason=e.message, cause=e ) if "Max spot instance count exceeded" in e.message: Log.note("No further spot requests will be attempted.") return net_new_utility, remaining_budget return net_new_utility, remaining_budget
def es_aggsop(es, frum, query): select = wrap([s.copy() for s in listwrap(query.select)]) # [0] is a cheat; each es_column should be a dict of columns keyed on type, like in sqlite es_column_map = {v: frum.schema[v][0].es_column for v in query.vars()} es_query = Data() new_select = Data() #MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": s.pull = "doc_count" elif isinstance(s.value, Variable): if s.value.var == ".": if frum.typed: # STATISITCAL AGGS IMPLY $value, WHILE OTHERS CAN BE ANYTHING if s.aggregate in NON_STATISTICAL_AGGS: #TODO: HANDLE BOTH $value AND $objects TO COUNT Log.error("do not know how to handle") else: s.value.var = "$value" new_select["$value"] += [s] else: if s.aggregate in NON_STATISTICAL_AGGS: #TODO: WE SHOULD BE ABLE TO COUNT, BUT WE MUST *OR* ALL LEAF VALUES TO DO IT Log.error("do not know how to handle") else: Log.error('Not expecting ES to have a value at "." which {{agg}} can be applied', agg=s.aggregate) elif s.aggregate == "count": s.value = s.value.map(es_column_map) new_select["count_"+literal_field(s.value.var)] += [s] else: s.value = s.value.map(es_column_map) new_select[literal_field(s.value.var)] += [s] else: formula.append(s) for canonical_name, many in new_select.items(): representative = many[0] if representative.value.var == ".": Log.error("do not know how to handle") else: field_name = representative.value.var # canonical_name=literal_field(many[0].name) for s in many: if s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.field = field_name s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance(s.percentile, basestring) or s.percetile < 0 or 1 < s.percentile: Log.error("Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = field_name es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = literal_field(canonical_name + " cardinality") es_query.aggs[key].cardinality.field = field_name s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = field_name # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.field = field_name es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.field = field_name es_query.aggs[stats_name].terms.size = Math.min(s.limit, MAX_LIMIT) s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field(canonical_name)].extended_stats.field = field_name s.pull = literal_field(canonical_name) + "." + aggregates1_4[s.aggregate] for i, s in enumerate(formula): canonical_name = literal_field(s.name) abs_value = s.value.map(es_column_map) if isinstance(abs_value, TupleOp): if s.aggregate == "count": # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY s.pull = "doc_count" else: Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) elif s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.script = abs_value.to_ruby() s.pull = literal_field(canonical_name) + ".value" elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [50] s.pull = key + ".values.50\.0" elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = abs_value.to_ruby() es_query.aggs[key].percentiles.percents += [percent] s.pull = key + ".values." + literal_field(unicode(percent)) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = abs_value.to_ruby() s.pull = key + ".value" elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.script = abs_value.to_ruby() # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = abs_value.to_ruby() es_query.aggs[median_name].percentiles.percents += [50] s.pull = { "count": stats_name + ".count", "sum": stats_name + ".sum", "min": stats_name + ".min", "max": stats_name + ".max", "avg": stats_name + ".avg", "sos": stats_name + ".sum_of_squares", "std": stats_name + ".std_deviation", "var": stats_name + ".variance", "median": median_name + ".values.50\.0" } elif s.aggregate=="union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = abs_value.to_ruby() s.pull = stats_name + ".buckets.key" else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = canonical_name + "." + aggregates1_4[s.aggregate] es_query.aggs[canonical_name].extended_stats.script = abs_value.to_ruby() decoders = get_decoders_by_depth(query) start = 0 vars_ = query.where.vars() #<TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum.schema) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = simplify_esfilter(AndOp("and", split_where[1]).to_esfilter()) es_query = Data( aggs={"_filter": set_default({"filter": filter_}, es_query)} ) es_query = wrap({ "aggs": {"_nested": set_default( { "nested": { "path": frum.query_path } }, es_query )} }) else: if any(split_where[1::]): Log.error("Where clause is too deep") if decoders: for d in jx.reverse(decoders[0]): es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = simplify_esfilter(AndOp("and", split_where[0]).to_esfilter()) es_query = Data( aggs={"_filter": set_default({"filter": filter}, es_query)} ) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es09.util.post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", e)
def int2Partition(value): if Math.round(value) == numPartitions: return edge.domain.NULL return edge.domain.getPartByKey( ref.add(edge.domain.interval.multiply(value)))
def es_aggsop(es, frum, query): query = query.copy() # WE WILL MARK UP THIS QUERY schema = frum.schema select = listwrap(query.select) es_query = Data() new_select = Data() # MAP FROM canonical_name (USED FOR NAMES IN QUERY) TO SELECT MAPPING formula = [] for s in select: if s.aggregate == "count" and isinstance(s.value, Variable) and s.value.var == ".": if schema.query_path == ".": s.pull = jx_expression_to_function("doc_count") else: s.pull = jx_expression_to_function({"coalesce": ["_nested.doc_count", "doc_count", 0]}) elif isinstance(s.value, Variable): if s.aggregate == "count": new_select["count_"+literal_field(s.value.var)] += [s] else: new_select[literal_field(s.value.var)] += [s] elif s.aggregate: formula.append(s) for canonical_name, many in new_select.items(): for s in many: columns = frum.schema.values(s.value.var) if s.aggregate == "count": canonical_names = [] for column in columns: cn = literal_field(column.es_column + "_count") if column.jx_type == EXISTS: canonical_names.append(cn + ".doc_count") es_query.aggs[cn].filter.range = {column.es_column: {"gt": 0}} else: canonical_names.append(cn+ ".value") es_query.aggs[cn].value_count.field = column.es_column if len(canonical_names) == 1: s.pull = jx_expression_to_function(canonical_names[0]) else: s.pull = jx_expression_to_function({"add": canonical_names}) elif s.aggregate == "median": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.field = columns[0].es_column es_query.aggs[key].percentiles.percents += [50] s.pull = jx_expression_to_function(key + ".values.50\\.0") elif s.aggregate == "percentile": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # ES USES DIFFERENT METHOD FOR PERCENTILES key = literal_field(canonical_name + " percentile") if isinstance(s.percentile, text_type) or s.percetile < 0 or 1 < s.percentile: Log.error("Expecting percentile to be a float from 0.0 to 1.0") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.field = columns[0].es_column es_query.aggs[key].percentiles.percents += [percent] es_query.aggs[key].percentiles.tdigest.compression = 2 s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) elif s.aggregate == "cardinality": canonical_names = [] for column in columns: cn = literal_field(column.es_column + "_cardinality") canonical_names.append(cn) es_query.aggs[cn].cardinality.field = column.es_column if len(columns) == 1: s.pull = jx_expression_to_function(canonical_names[0] + ".value") else: s.pull = jx_expression_to_function({"add": [cn + ".value" for cn in canonical_names], "default": 0}) elif s.aggregate == "stats": if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.field = columns[0].es_column # GET MEDIAN TOO! median_name = literal_field(canonical_name + "_percentile") es_query.aggs[median_name].percentiles.field = columns[0].es_column es_query.aggs[median_name].percentiles.percents += [50] s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": pulls = [] for column in columns: script = {"scripted_metric": { 'init_script': 'params._agg.terms = new HashSet()', 'map_script': 'for (v in doc['+quote(column.es_column)+'].values) params._agg.terms.add(v);', 'combine_script': 'return params._agg.terms.toArray()', 'reduce_script': 'HashSet output = new HashSet(); for (a in params._aggs) { if (a!=null) for (v in a) {output.add(v)} } return output.toArray()', }} stats_name = encode_property(column.es_column) if column.nested_path[0] == ".": es_query.aggs[stats_name] = script pulls.append(jx_expression_to_function(stats_name + ".value")) else: es_query.aggs[stats_name] = { "nested": {"path": column.nested_path[0]}, "aggs": {"_nested": script} } pulls.append(jx_expression_to_function(stats_name + "._nested.value")) if len(pulls) == 0: s.pull = NULL elif len(pulls) == 1: s.pull = pulls[0] else: s.pull = lambda row: UNION(p(row) for p in pulls) else: if len(columns) > 1: Log.error("Do not know how to count columns with more than one type (script probably)") elif len(columns) <1: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function({"null":{}}) else: # PULL VALUE OUT OF THE stats AGGREGATE es_query.aggs[literal_field(canonical_name)].extended_stats.field = columns[0].es_column s.pull = jx_expression_to_function({"coalesce": [literal_field(canonical_name) + "." + aggregates[s.aggregate], s.default]}) for i, s in enumerate(formula): canonical_name = literal_field(s.name) if isinstance(s.value, TupleOp): if s.aggregate == "count": # TUPLES ALWAYS EXIST, SO COUNTING THEM IS EASY s.pull = "doc_count" elif s.aggregate in ('max', 'maximum', 'min', 'minimum'): if s.aggregate in ('max', 'maximum'): dir = 1 op = "max" else: dir = -1 op = 'min' nully = TupleOp("tuple", [NULL]*len(s.value.terms)).partial_eval().to_es_script(schema).expr selfy = s.value.partial_eval().to_es_script(schema).expr script = {"scripted_metric": { 'init_script': 'params._agg.best = ' + nully + ';', 'map_script': 'params._agg.best = ' + expand_template(MAX_OF_TUPLE, {"expr1": "params._agg.best", "expr2": selfy, "dir": dir, "op": op}) + ";", 'combine_script': 'return params._agg.best', 'reduce_script': 'return params._aggs.stream().max(' + expand_template(COMPARE_TUPLE, {"dir": dir, "op": op}) + ').get()', }} if schema.query_path[0] == ".": es_query.aggs[canonical_name] = script s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") else: es_query.aggs[canonical_name] = { "nested": {"path": schema.query_path[0]}, "aggs": {"_nested": script} } s.pull = jx_expression_to_function(literal_field(canonical_name) + "._nested.value") else: Log.error("{{agg}} is not a supported aggregate over a tuple", agg=s.aggregate) elif s.aggregate == "count": es_query.aggs[literal_field(canonical_name)].value_count.script = s.value.partial_eval().to_es_script(schema).script(schema) s.pull = jx_expression_to_function(literal_field(canonical_name) + ".value") elif s.aggregate == "median": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[key].percentiles.percents += [50] s.pull = jx_expression_to_function(key + ".values.50\\.0") elif s.aggregate == "percentile": # ES USES DIFFERENT METHOD FOR PERCENTILES THAN FOR STATS AND COUNT key = literal_field(canonical_name + " percentile") percent = Math.round(s.percentile * 100, decimal=6) es_query.aggs[key].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[key].percentiles.percents += [percent] s.pull = jx_expression_to_function(key + ".values." + literal_field(text_type(percent))) elif s.aggregate == "cardinality": # ES USES DIFFERENT METHOD FOR CARDINALITY key = canonical_name + " cardinality" es_query.aggs[key].cardinality.script = s.value.to_es_script(schema).script(schema) s.pull = jx_expression_to_function(key + ".value") elif s.aggregate == "stats": # REGULAR STATS stats_name = literal_field(canonical_name) es_query.aggs[stats_name].extended_stats.script = s.value.to_es_script(schema).script(schema) # GET MEDIAN TOO! median_name = literal_field(canonical_name + " percentile") es_query.aggs[median_name].percentiles.script = s.value.to_es_script(schema).script(schema) es_query.aggs[median_name].percentiles.percents += [50] s.pull = get_pull_stats(stats_name, median_name) elif s.aggregate == "union": # USE TERMS AGGREGATE TO SIMULATE union stats_name = literal_field(canonical_name) es_query.aggs[stats_name].terms.script_field = s.value.to_es_script(schema).script(schema) s.pull = jx_expression_to_function(stats_name + ".buckets.key") else: # PULL VALUE OUT OF THE stats AGGREGATE s.pull = jx_expression_to_function(canonical_name + "." + aggregates[s.aggregate]) es_query.aggs[canonical_name].extended_stats.script = s.value.to_es_script(schema).script(schema) decoders = get_decoders_by_depth(query) start = 0 # <TERRIBLE SECTION> THIS IS WHERE WE WEAVE THE where CLAUSE WITH nested split_where = split_expression_by_depth(query.where, schema=frum.schema) if len(split_field(frum.name)) > 1: if any(split_where[2::]): Log.error("Where clause is too deep") for d in decoders[1]: es_query = d.append_query(es_query, start) start += d.num_columns if split_where[1]: #TODO: INCLUDE FILTERS ON EDGES filter_ = AndOp("and", split_where[1]).to_esfilter(schema) es_query = Data( aggs={"_filter": set_default({"filter": filter_}, es_query)} ) es_query = wrap({ "aggs": {"_nested": set_default( {"nested": {"path": schema.query_path[0]}}, es_query )} }) else: if any(split_where[1::]): Log.error("Where clause is too deep") if decoders: for d in jx.reverse(decoders[0]): es_query = d.append_query(es_query, start) start += d.num_columns if split_where[0]: #TODO: INCLUDE FILTERS ON EDGES filter = AndOp("and", split_where[0]).to_esfilter(schema) es_query = Data( aggs={"_filter": set_default({"filter": filter}, es_query)} ) # </TERRIBLE SECTION> if not es_query: es_query = wrap({"query": {"match_all": {}}}) es_query.size = 0 with Timer("ES query time") as es_duration: result = es_post(es, es_query, query.limit) try: format_time = Timer("formatting") with format_time: decoders = [d for ds in decoders for d in ds] result.aggregations.doc_count = coalesce(result.aggregations.doc_count, result.hits.total) # IT APPEARS THE OLD doc_count IS GONE formatter, groupby_formatter, aggop_formatter, mime_type = format_dispatch[query.format] if query.edges: output = formatter(decoders, result.aggregations, start, query, select) elif query.groupby: output = groupby_formatter(decoders, result.aggregations, start, query, select) else: output = aggop_formatter(decoders, result.aggregations, start, query, select) output.meta.timing.formatting = format_time.duration output.meta.timing.es_search = es_duration.duration output.meta.content_type = mime_type output.meta.es_query = es_query return output except Exception as e: if query.format not in format_dispatch: Log.error("Format {{format|quote}} not supported yet", format=query.format, cause=e) Log.error("Some problem", cause=e)
def request(method, url, zip=None, retry=None, **kwargs): """ JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES DEMANDS data IS ONE OF: * A JSON-SERIALIZABLE STRUCTURE, OR * LIST OF JSON-SERIALIZABLE STRUCTURES, OR * None Parameters * zip - ZIP THE REQUEST BODY, IF BIG ENOUGH * json - JSON-SERIALIZABLE STRUCTURE * retry - {"times": x, "sleep": y} STRUCTURE THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT** IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH INCLUDES url AND headers """ global _warning_sent if not default_headers and not _warning_sent: _warning_sent = True Log.warning( "The pyLibrary.env.http module was meant to add extra " "default headers to all requests, specifically the 'Referer' " "header with a URL to the project. Use the `pyLibrary.debug.constants.set()` " "function to set `pyLibrary.env.http.default_headers`" ) if isinstance(url, list): # TRY MANY URLS failures = [] for remaining, u in jx.countdown(url): try: response = request(method, u, zip=zip, retry=retry, **kwargs) if Math.round(response.status_code, decimal=-2) not in [400, 500]: return response if not remaining: return response except Exception as e: e = Except.wrap(e) failures.append(e) Log.error("Tried {{num}} urls", num=len(url), cause=failures) if b"session" in kwargs: session = kwargs[b"session"] del kwargs[b"session"] else: session = sessions.Session() session.headers.update(default_headers) if zip is None: zip = ZIP_REQUEST if isinstance(url, unicode): # httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE url = url.encode("ascii") _to_ascii_dict(kwargs) timeout = kwargs[b'timeout'] = coalesce(kwargs.get(b'timeout'), default_timeout) if retry == None: retry = Data(times=1, sleep=0) elif isinstance(retry, Number): retry = Data(times=retry, sleep=1) else: retry = wrap(retry) if isinstance(retry.sleep, Duration): retry.sleep = retry.sleep.seconds set_default(retry, {"times": 1, "sleep": 0}) if b'json' in kwargs: kwargs[b'data'] = convert.value2json(kwargs[b'json']).encode("utf8") del kwargs[b'json'] try: headers = kwargs[b"headers"] = unwrap(coalesce(wrap(kwargs)[b"headers"], {})) set_default(headers, {b"accept-encoding": b"compress, gzip"}) if zip and len(coalesce(kwargs.get(b"data"))) > 1000: compressed = convert.bytes2zip(kwargs[b"data"]) headers[b'content-encoding'] = b'gzip' kwargs[b"data"] = compressed _to_ascii_dict(headers) else: _to_ascii_dict(headers) except Exception as e: Log.error("Request setup failure on {{url}}", url=url, cause=e) errors = [] for r in range(retry.times): if r: Till(seconds=retry.sleep).wait() try: if DEBUG: Log.note("http {{method}} to {{url}}", method=method, url=url) return session.request(method=method, url=url, **kwargs) except Exception as e: errors.append(Except.wrap(e)) if " Read timed out." in errors[0]: Log.error("Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0]) else: Log.error("Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])