def query(self, q):
        frum = self
        if is_aggs(q):
            return cube_aggs(frum, q)

        columns = dot.dict_to_data({s.name: s for s in self.select + self.edges})

        # DEFER TO ListContainer
        from jx_python.containers.list import ListContainer

        frum = ListContainer(name="", data=frum.values(), schema=columns)
        return frum.query(q)
    def denormalized(self):
        """
        THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM
        THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES
        """
        with self.locker:
            self._update_meta()
            output = [
                {
                    "table": c.es_index,
                    "name": untype_path(c.name),
                    "cardinality": c.cardinality,
                    "es_column": c.es_column,
                    "es_index": c.es_index,
                    "last_updated": c.last_updated,
                    "count": c.count,
                    "nested_path": [unnest_path(n) for n in c.nested_path],
                    "es_type": c.es_type,
                    "type": c.jx_type,
                } for tname, css in self.data.items()
                for cname, cs in css.items() for c in cs
                if c.jx_type not in INTERNAL  # and c.es_column != "_id"
            ]

        from jx_python.containers.list import ListContainer

        return ListContainer(
            self.name,
            data=output,
            schema=BaseSchema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS),
        )
def download_perfherder(desc, repo, id, dummy, framework):
    sig_result = http.get_json(
        "https://treeherder.mozilla.org/api/project/"
        + repo
        + "/performance/signatures/?format=json&framework="
        + str(framework)
        + "&id="
        + str(id)
    )

    signature = first(sig_result.keys())
    data_result = http.get_json(
        "https://treeherder.mozilla.org/api/project/"
        + repo
        + "/performance/data/?signatures="
        + signature
    )

    Log.note(
        "{{result|json}}",
        result={
            "name": desc,
            "data": jx.run({
                "from": ListContainer("data", data_result[signature]),
                "sort": "push_timestamp",
                "select": "value"
            }).data
        },
    )
Esempio n. 4
0
    def query(self, query):
        # NOT EXPECTED TO BE RUN
        Log.error("not")
        with self.locker:
            self._update_meta()
            if not self._schema:
                self._schema = Schema(
                    ".", [c for cs in self.data[META_COLUMNS_NAME].values() for c in cs]
                )
            snapshot = self._all_columns()

        from jx_python.containers.list import ListContainer

        query.frum = ListContainer(META_COLUMNS_NAME, snapshot, self._schema)
        return jx.run(query)
Esempio n. 5
0
def run(query, container=Null):
    """
    THIS FUNCTION IS SIMPLY SWITCHING BASED ON THE query["from"] CONTAINER,
    BUT IT IS ALSO PROCESSING A list CONTAINER; SEPARATE TO A ListContainer
    """
    if container == None:
        container = to_data(query)["from"]
        query_op = QueryOp.wrap(query,
                                container=container,
                                namespace=container.schema)
    else:
        query_op = QueryOp.wrap(query,
                                container=container,
                                namespace=container.namespace)

    if container == None:
        from jx_python.containers.list import DUAL

        return DUAL.query(query_op)
    elif isinstance(container, Container):
        return container.query(query_op)
    elif is_many(container):
        container = ListContainer(name=None, data=list(container))
    elif isinstance(container, Cube):
        if is_aggs(query_op):
            return cube_aggs(container, query_op)
    elif is_op(container, QueryOp):
        container = run(container)
    elif is_data(container):
        query = container
        container = query["from"]
        container = run(QueryOp.wrap(query, container, container.namespace),
                        container)
    else:
        Log.error("Do not know how to handle {{type}}",
                  type=container.__class__.__name__)

    if is_aggs(query_op):
        container = list_aggs(container, query_op)
    else:  # SETOP
        if query_op.where is not TRUE:
            container = filter(container, query_op.where)

        if query_op.sort:
            container = sort(container, query_op.sort, already_normalized=True)

        if query_op.select:
            container = select(container, query_op.select)

    if query_op.window:
        if isinstance(container, Cube):
            container = list(container.values())

        for param in query_op.window:
            window(container, param)

    # AT THIS POINT frum IS IN LIST FORMAT, NOW PACKAGE RESULT
    if query_op.format == "cube":
        container = list2cube(container)
    elif query_op.format == "table":
        container = list2table(container)
        container.meta.format = "table"
    else:
        container = dict_to_data({
            "meta": {
                "format": "list"
            },
            "data": container
        })

    return container
    def _get_spot_prices_from_aws(self):
        with Timer("Read no capacity file"):
            try:
                # FILE IS LIST OF {instance_type, last_failure} OBJECTS
                content = self.no_capacity_file.read()
                self.no_capacity = dict(
                    (r.instance_type, r.last_failure)
                    for r in convert.json2value(
                        content, flexible=False, leaves=False))
            except Exception as e:
                self.no_capacity = {}

        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content,
                                           flexible=False,
                                           leaves=False)
            except Exception as e:
                cache = FlatList()

        cache = ListContainer(name=None, data=cache)
        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {
                "value": "timestamp",
                "aggregate": "max"
            }
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX(
                            [Date(most_recent),
                             Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note(
                            "get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at)

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(
                                self.settings.product,
                                "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token)
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(
                                wrap({
                                    "availability_zone": p.availability_zone,
                                    "instance_type": p.instance_type,
                                    "price": p.price,
                                    "product_description":
                                    p.product_description,
                                    "region": p.region.name,
                                    "timestamp": Date(p.timestamp).unix
                                }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(
                prices, {"gte": {
                    "timestamp": {
                        "date": "today-2day"
                    }
                }})

            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"

            File(self.settings.price_file).write(stream())

        return ListContainer(name="prices", data=prices)
    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()
            now = Date.now()

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from":
                        prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {
                                    "coalesce": [{
                                        "rows": {
                                            "timestamp": 1
                                        }
                                    }, {
                                        "date": "eod"
                                    }]
                                },
                                "edges":
                                ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {
                                    "sub": {
                                        "timestamp":
                                        self.settings.uptime.duration.seconds
                                    }
                                }
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone", "instance_type", {
                            "name": "time",
                            "range": {
                                "min": "effective",
                                "max": "expire",
                                "mode": "inclusive"
                            },
                            "allowNulls": False,
                            "domain": {
                                "type": "time",
                                "min":
                                now.floor(HOUR) - self.settings.uptime.history,
                                "max": Date.now().floor(HOUR) + HOUR,
                                "interval": "hour"
                            }
                        }
                    ],
                    "select": [{
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }],
                    "where": {
                        "gt": {
                            "expire":
                            now.floor(HOUR) - self.settings.uptime.history
                        }
                    },
                    "window": [{
                        "name": "current_price",
                        "value": "rows.last.price",
                        "edges": ["availability_zone", "instance_type"],
                        "sort": "time"
                    }]
                }).data

                bid80 = jx.run({
                    "from":
                    ListContainer(name=None, data=hourly_pricing),
                    "edges": [{
                        "value": "availability_zone",
                        "allowNulls": False
                    }, {
                        "name": "type",
                        "value": "instance_type",
                        "allowNulls": False,
                        "domain": {
                            "type": "set",
                            "key": "instance_type",
                            "partitions": self.settings.utility
                        }
                    }],
                    "select": [{
                        "name":
                        "price_80",
                        "value":
                        "price",
                        "aggregate":
                        "percentile",
                        "percentile":
                        self.settings.uptime.bid_percentile
                    }, {
                        "name": "max_price",
                        "value": "price",
                        "aggregate": "max"
                    }, {
                        "aggregate": "count"
                    }, {
                        "value": "current_price",
                        "aggregate": "one"
                    }, {
                        "name": "all_price",
                        "value": "price",
                        "aggregate": "list"
                    }],
                    "window": [
                        {
                            "name": "estimated_value",
                            "value": {
                                "div": ["type.utility", "price_80"]
                            }
                        },
                        {
                            "name":
                            "higher_price",
                            "value":
                            lambda row, rownum, rows: find_higher(
                                row.all_price, row.price_80)
                        }  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.sort(bid80.values(), {
                    "value": "estimated_value",
                    "sort": -1
                })

                self.prices = wrap(output)
                self.price_lookup = UniqueIndex(
                    ("type.instance_type", "availability_zone"),
                    data=self.prices)
            return self.prices