def query(self, q): frum = self if is_aggs(q): return cube_aggs(frum, q) columns = dot.dict_to_data({s.name: s for s in self.select + self.edges}) # DEFER TO ListContainer from jx_python.containers.list import ListContainer frum = ListContainer(name="", data=frum.values(), schema=columns) return frum.query(q)
def denormalized(self): """ THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES """ with self.locker: self._update_meta() output = [ { "table": c.es_index, "name": untype_path(c.name), "cardinality": c.cardinality, "es_column": c.es_column, "es_index": c.es_index, "last_updated": c.last_updated, "count": c.count, "nested_path": [unnest_path(n) for n in c.nested_path], "es_type": c.es_type, "type": c.jx_type, } for tname, css in self.data.items() for cname, cs in css.items() for c in cs if c.jx_type not in INTERNAL # and c.es_column != "_id" ] from jx_python.containers.list import ListContainer return ListContainer( self.name, data=output, schema=BaseSchema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS), )
def download_perfherder(desc, repo, id, dummy, framework): sig_result = http.get_json( "https://treeherder.mozilla.org/api/project/" + repo + "/performance/signatures/?format=json&framework=" + str(framework) + "&id=" + str(id) ) signature = first(sig_result.keys()) data_result = http.get_json( "https://treeherder.mozilla.org/api/project/" + repo + "/performance/data/?signatures=" + signature ) Log.note( "{{result|json}}", result={ "name": desc, "data": jx.run({ "from": ListContainer("data", data_result[signature]), "sort": "push_timestamp", "select": "value" }).data }, )
def query(self, query): # NOT EXPECTED TO BE RUN Log.error("not") with self.locker: self._update_meta() if not self._schema: self._schema = Schema( ".", [c for cs in self.data[META_COLUMNS_NAME].values() for c in cs] ) snapshot = self._all_columns() from jx_python.containers.list import ListContainer query.frum = ListContainer(META_COLUMNS_NAME, snapshot, self._schema) return jx.run(query)
def run(query, container=Null): """ THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER, BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer """ if container == None: container = to_data(query)["from"] query_op = QueryOp.wrap(query, container=container, namespace=container.schema) else: query_op = QueryOp.wrap(query, container=container, namespace=container.namespace) if container == None: from jx_python.containers.list import DUAL return DUAL.query(query_op) elif isinstance(container, Container): return container.query(query_op) elif is_many(container): container = ListContainer(name=None, data=list(container)) elif isinstance(container, Cube): if is_aggs(query_op): return cube_aggs(container, query_op) elif is_op(container, QueryOp): container = run(container) elif is_data(container): query = container container = query["from"] container = run(QueryOp.wrap(query, container, container.namespace), container) else: Log.error("Do not know how to handle {{type}}", type=container.__class__.__name__) if is_aggs(query_op): container = list_aggs(container, query_op) else: # SETOP if query_op.where is not TRUE: container = filter(container, query_op.where) if query_op.sort: container = sort(container, query_op.sort, already_normalized=True) if query_op.select: container = select(container, query_op.select) if query_op.window: if isinstance(container, Cube): container = list(container.values()) for param in query_op.window: window(container, param) # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT if query_op.format == "cube": container = list2cube(container) elif query_op.format == "table": container = list2table(container) container.meta.format = "table" else: container = dict_to_data({ "meta": { "format": "list" }, "data": container }) return container
def _get_spot_prices_from_aws(self): with Timer("Read no capacity file"): try: # FILE IS LIST OF {instance_type, last_failure} OBJECTS content = self.no_capacity_file.read() self.no_capacity = dict( (r.instance_type, r.last_failure) for r in convert.json2value( content, flexible=False, leaves=False)) except Exception as e: self.no_capacity = {} with Timer("Read pricing file"): try: content = File(self.settings.price_file).read() cache = convert.json2value(content, flexible=False, leaves=False) except Exception as e: cache = FlatList() cache = ListContainer(name=None, data=cache) most_recents = jx.run({ "from": cache, "edges": ["instance_type", "availability_zone"], "select": { "value": "timestamp", "aggregate": "max" } }) zones = self._get_valid_availability_zones() prices = set(cache) with Timer("Get pricing from AWS"): for instance_type in self.settings.utility.keys(): for zone in zones: if cache: most_recent = most_recents[{ "instance_type": instance_type, "availability_zone": zone }].timestamp start_at = MAX( [Date(most_recent), Date.today() - WEEK]) else: start_at = Date.today() - WEEK if DEBUG_PRICING: Log.note( "get pricing for {{instance_type}} starting at {{start_at}}", instance_type=instance_type, start_at=start_at) next_token = None while True: resultset = self.ec2_conn.get_spot_price_history( product_description=coalesce( self.settings.product, "Linux/UNIX (Amazon VPC)"), instance_type=instance_type, availability_zone=zone, start_time=start_at.format(ISO8601), next_token=next_token) next_token = resultset.next_token for p in resultset: prices.add( wrap({ "availability_zone": p.availability_zone, "instance_type": p.instance_type, "price": p.price, "product_description": p.product_description, "region": p.region.name, "timestamp": Date(p.timestamp).unix })) if not next_token: break with Timer("Save prices to file"): new_prices = jx.filter( prices, {"gte": { "timestamp": { "date": "today-2day" } }}) def stream(): # IT'S A LOT OF PRICES, STREAM THEM TO FILE prefix = "[\n" for p in new_prices: yield prefix yield convert.value2json(p) prefix = ",\n" yield "]" File(self.settings.price_file).write(stream()) return ListContainer(name="prices", data=prices)
def pricing(self): with self.price_locker: if self.prices: return self.prices prices = self._get_spot_prices_from_aws() now = Date.now() with Timer("processing pricing data"): hourly_pricing = jx.run({ "from": { # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE "from": prices, "window": [ { "name": "expire", "value": { "coalesce": [{ "rows": { "timestamp": 1 } }, { "date": "eod" }] }, "edges": ["availability_zone", "instance_type"], "sort": "timestamp" }, { # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME "name": "effective", "value": { "sub": { "timestamp": self.settings.uptime.duration.seconds } } } ] }, "edges": [ "availability_zone", "instance_type", { "name": "time", "range": { "min": "effective", "max": "expire", "mode": "inclusive" }, "allowNulls": False, "domain": { "type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR) + HOUR, "interval": "hour" } } ], "select": [{ "value": "price", "aggregate": "max" }, { "aggregate": "count" }], "where": { "gt": { "expire": now.floor(HOUR) - self.settings.uptime.history } }, "window": [{ "name": "current_price", "value": "rows.last.price", "edges": ["availability_zone", "instance_type"], "sort": "time" }] }).data bid80 = jx.run({ "from": ListContainer(name=None, data=hourly_pricing), "edges": [{ "value": "availability_zone", "allowNulls": False }, { "name": "type", "value": "instance_type", "allowNulls": False, "domain": { "type": "set", "key": "instance_type", "partitions": self.settings.utility } }], "select": [{ "name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile }, { "name": "max_price", "value": "price", "aggregate": "max" }, { "aggregate": "count" }, { "value": "current_price", "aggregate": "one" }, { "name": "all_price", "value": "price", "aggregate": "list" }], "window": [ { "name": "estimated_value", "value": { "div": ["type.utility", "price_80"] } }, { "name": "higher_price", "value": lambda row, rownum, rows: find_higher( row.all_price, row.price_80) } # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}} ] }) output = jx.sort(bid80.values(), { "value": "estimated_value", "sort": -1 }) self.prices = wrap(output) self.price_lookup = UniqueIndex( ("type.instance_type", "availability_zone"), data=self.prices) return self.prices