Ejemplo n.º 1
0
def get_raw_json(path):
    active_data_timer = Timer("total duration")
    body = flask.request.get_data()
    try:
        with active_data_timer:
            args = wrap(Data(**flask.request.args))
            limit = args.limit if args.limit else 10
            args.limit = None
            frum = wrap_from(path)
            result = jx.run(
                {
                    "from": path,
                    "where": {
                        "eq": args
                    },
                    "limit": limit,
                    "format": "list"
                }, frum)

            if isinstance(
                    result, Container
            ):  #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers
                result = result.format("list")

        result.meta.active_data_response_time = active_data_timer.duration

        response_data = convert.unicode2utf8(
            convert.value2json(result.data, pretty=True))
        Log.note("Response is {{num}} bytes", num=len(response_data))
        return Response(response_data, status=200)
    except Exception, e:
        e = Except.wrap(e)
        return _send_error(active_data_timer, body, e)
Ejemplo n.º 2
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = convert.json2value(row.json), None
        timestamp = Date(self.rollover_field(wrap(row).value))
        if timestamp == None or timestamp < Date.today() - self.rollover_max:
            return Null

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": self.cluster.get_aliases(),
                "where": {
                    "regex": {
                        "index":
                        self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"
                    }
                },
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:],
                                      elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = elasticsearch.Index(read_only=False,
                                             alias=best.alias,
                                             index=best.index,
                                             settings=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(
                            create_timestamp=rounded_timestamp,
                            settings=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception, e:
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = elasticsearch.Index(read_only=False,
                                         alias=best.alias,
                                         index=best.index,
                                         settings=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 10, timeout=5)

            self._delete_old_indexes(candidates)

            queue = self.known_queues[
                rounded_timestamp.unix] = es.threaded_queue(
                    max_size=self.settings.queue_size,
                    batch_size=self.settings.batch_size,
                    silent=True)
Ejemplo n.º 3
0
def allowed_query(data):
    data = json2value(data)
    data.edges = None
    data.groupby = None
    data.select = {"value": "result.test", "aggregate": "cardinality"}
    data.format = "list"

    frum = wrap_from(data['from'])
    result = jx.run(data, frum=frum)
    if result.data < 10000:
        return True
    else:
        Log.error("not allowed to groupby result.test")
Ejemplo n.º 4
0
    def get(*args, **kwargs):
        body = kwargs.get("data")

        if not body:
            return wrap({"status_code": 400})

        text = convert.utf82unicode(body)
        text = replace_vars(text)
        data = convert.json2value(text)
        result = jx.run(data)
        output_bytes = convert.unicode2utf8(convert.value2json(result))
        return wrap({
            "status_code": 200,
            "all_content": output_bytes,
            "content": output_bytes
        })
Ejemplo n.º 5
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = mo_json.json2value(row.json), None
        timestamp = Date(self.rollover_field(wrap(row).value))
        if timestamp == None:
            return Null
        elif timestamp < Date.today() - self.rollover_max:
            return DATA_TOO_OLD

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        with self.locker:
            queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": self.cluster.get_aliases(),
                "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}},
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception as e:
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = elasticsearch.Index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 5, timeout=5)

            self._delete_old_indexes(candidates)
            threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
            with self.locker:
                queue = self.known_queues[rounded_timestamp.unix] = threaded_queue
        return queue
Ejemplo n.º 6
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, schema=self)

            for n in self.namespaces:
                query = n.convert(query)
            if self.typed:
                query = Typed().convert(query)

            for s in listwrap(query.select):
                if not aggregates1_4.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            if es09_setop.is_setop(query):
                return es09_setop.es_setop(self._es, None, query)
            if es09_aggop.is_aggop(query):
                return es09_aggop.es_aggop(self._es, None, query)
            Log.error("Can not handle")
        except Exception, e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path+"/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Ejemplo n.º 7
0
    def query(self, _query):
        try:
            query = QueryOp.wrap(_query, schema=self)

            for n in self.namespaces:
                query = n.convert(query)
            if self.typed:
                query = Typed().convert(query)

            for s in listwrap(query.select):
                if not aggregates1_4.get(s.aggregate):
                    Log.error(
                        "ES can not aggregate {{name}} because {{aggregate|quote}} is not a recognized aggregate",
                        name=s.name,
                        aggregate=s.aggregate,
                    )

            frum = query["from"]
            if isinstance(frum, QueryOp):
                result = self.query(frum)
                q2 = query.copy()
                q2.frum = result
                return jx.run(q2)

            if is_deepop(self._es, query):
                return es_deepop(self._es, query)
            if is_aggsop(self._es, query):
                return es_aggsop(self._es, frum, query)
            if is_setop(self._es, query):
                return es_setop(self._es, query)
            if es09_setop.is_setop(query):
                return es09_setop.es_setop(self._es, None, query)
            if es09_aggop.is_aggop(query):
                return es09_aggop.es_aggop(self._es, None, query)
            Log.error("Can not handle")
        except Exception, e:
            e = Except.wrap(e)
            if "Data too large, data for" in e:
                http.post(self._es.cluster.path + "/_cache/clear")
                Log.error("Problem (Tried to clear Elasticsearch cache)", e)
            Log.error("problem", e)
Ejemplo n.º 8
0
def query(path):
    with CProfiler():
        try:
            with Timer("total duration") as query_timer:
                preamble_timer = Timer("preamble")
                with preamble_timer:
                    if flask.request.headers.get("content-length",
                                                 "") in ["", "0"]:
                        # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                        return Response(BLANK,
                                        status=400,
                                        headers={"Content-Type": "text/html"})
                    elif int(flask.request.headers["content-length"]
                             ) > QUERY_SIZE_LIMIT:
                        Log.error("Query is too large")

                    request_body = flask.request.get_data().strip()
                    text = convert.utf82unicode(request_body)
                    text = replace_vars(text, flask.request.args)
                    data = convert.json2value(text)
                    record_request(flask.request, data, None, None)
                    if data.meta.testing:
                        _test_mode_wait(data)

                translate_timer = Timer("translate")
                with translate_timer:
                    if data.sql:
                        data = parse_sql(data.sql)
                    frum = wrap_from(data['from'])
                    result = jx.run(data, frum=frum)

                    if isinstance(
                            result, Container
                    ):  #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers
                        result = result.format(data.format)

                save_timer = Timer("save")
                with save_timer:
                    if data.meta.save:
                        try:
                            result.meta.saved_as = save_query.query_finder.save(
                                data)
                        except Exception, e:
                            Log.warning("Unexpected save problem", cause=e)

                result.meta.timing.preamble = Math.round(
                    preamble_timer.duration.seconds, digits=4)
                result.meta.timing.translate = Math.round(
                    translate_timer.duration.seconds, digits=4)
                result.meta.timing.save = Math.round(
                    save_timer.duration.seconds, digits=4)
                result.meta.timing.total = "{{TOTAL_TIME}}"  # TIMING PLACEHOLDER

                with Timer("jsonification") as json_timer:
                    response_data = convert.unicode2utf8(
                        convert.value2json(result))

            with Timer("post timer"):
                # IMPORTANT: WE WANT TO TIME OF THE JSON SERIALIZATION, AND HAVE IT IN THE JSON ITSELF.
                # WE CHEAT BY DOING A (HOPEFULLY FAST) STRING REPLACEMENT AT THE VERY END
                timing_replacement = b'"total": ' + str(Math.round(query_timer.duration.seconds, digits=4)) +\
                                     b', "jsonification": ' + str(Math.round(json_timer.duration.seconds, digits=4))
                response_data = response_data.replace(
                    b'"total": "{{TOTAL_TIME}}"', timing_replacement)
                Log.note("Response is {{num}} bytes in {{duration}}",
                         num=len(response_data),
                         duration=query_timer.duration)

                return Response(
                    response_data,
                    status=200,
                    headers={"Content-Type": result.meta.content_type})
        except Exception, e:
            e = Except.wrap(e)
            return _send_error(query_timer, request_body, e)
Ejemplo n.º 9
0
    def query(self, query):
        query.frum = self.__iter__()
        output = jx.run(query)

        return output
Ejemplo n.º 10
0
    def query(self, query):
        query.frum = self.__iter__()
        output = jx.run(query)

        return output
Ejemplo n.º 11
0
    def _get_spot_prices_from_aws(self):
        with Timer("Read pricing file"):
            try:
                content = File(self.settings.price_file).read()
                cache = convert.json2value(content, flexible=False, leaves=False)
            except Exception as e:
                cache = FlatList()

        most_recents = jx.run({
            "from": cache,
            "edges": ["instance_type", "availability_zone"],
            "select": {"value": "timestamp", "aggregate": "max"}
        })

        zones = self._get_valid_availability_zones()
        prices = set(cache)
        with Timer("Get pricing from AWS"):
            for instance_type in self.settings.utility.keys():
                for zone in zones:
                    if cache:
                        most_recent = most_recents[{
                            "instance_type": instance_type,
                            "availability_zone": zone
                        }].timestamp
                        start_at = MAX([Date(most_recent), Date.today() - WEEK])
                    else:
                        start_at = Date.today() - WEEK

                    if DEBUG_PRICING:
                        Log.note("get pricing for {{instance_type}} starting at {{start_at}}",
                            instance_type=instance_type,
                            start_at=start_at
                        )

                    next_token = None
                    while True:
                        resultset = self.ec2_conn.get_spot_price_history(
                            product_description=coalesce(self.settings.product, "Linux/UNIX (Amazon VPC)"),
                            instance_type=instance_type,
                            availability_zone=zone,
                            start_time=start_at.format(ISO8601),
                            next_token=next_token
                        )
                        next_token = resultset.next_token

                        for p in resultset:
                            prices.add(wrap({
                                "availability_zone": p.availability_zone,
                                "instance_type": p.instance_type,
                                "price": p.price,
                                "product_description": p.product_description,
                                "region": p.region.name,
                                "timestamp": Date(p.timestamp).unix
                            }))

                        if not next_token:
                            break

        with Timer("Save prices to file"):
            new_prices = jx.filter(prices, {"gte": {"timestamp": {"date": "today-2day"}}})
            def stream():  # IT'S A LOT OF PRICES, STREAM THEM TO FILE
                prefix = "[\n"
                for p in new_prices:
                    yield prefix
                    yield convert.value2json(p)
                    prefix = ",\n"
                yield "]"
            File(self.settings.price_file).write(stream())

        return prices
Ejemplo n.º 12
0
    def pricing(self):
        with self.price_locker:
            if self.prices:
                return self.prices

            prices = self._get_spot_prices_from_aws()

            now = Date.now()
            expressions.ALLOW_SCRIPTING = True

            with Timer("processing pricing data"):
                hourly_pricing = jx.run({
                    "from": {
                        # AWS PRICING ONLY SENDS timestamp OF CHANGES, MATCH WITH NEXT INSTANCE
                        "from": prices,
                        "window": [
                            {
                                "name": "expire",
                                "value": {"coalesce": [{"rows": {"timestamp": 1}}, {"date": "eod"}]},
                                "edges": ["availability_zone", "instance_type"],
                                "sort": "timestamp"
                            },
                            {  # MAKE THIS PRICE EFFECTIVE INTO THE PAST, THIS HELPS SPREAD PRICE SPIKES OVER TIME
                                "name": "effective",
                                "value": {"sub": {"timestamp": self.settings.uptime.duration.seconds}}
                            }
                        ]
                    },
                    "edges": [
                        "availability_zone",
                        "instance_type",
                        {
                            "name": "time",
                            "range": {"min": "effective", "max": "expire", "mode": "inclusive"},
                            "allowNulls": False,
                            "domain": {"type": "time", "min": now.floor(HOUR) - self.settings.uptime.history, "max": Date.now().floor(HOUR)+HOUR, "interval": "hour"}
                        }
                    ],
                    "select": [
                        {"value": "price", "aggregate": "max"},
                        {"aggregate": "count"}
                    ],
                    "where": {"gt": {"expire": now.floor(HOUR) - self.settings.uptime.history}},
                    "window": [
                        {
                            "name": "current_price",
                            "value": "rows.last.price",
                            "edges": ["availability_zone", "instance_type"],
                            "sort": "time"
                        }
                    ]
                }).data

                bid80 = jx.run({
                    "from": hourly_pricing,
                    "edges": [
                        {
                            "value": "availability_zone",
                            "allowNulls": False
                        },
                        {
                            "name": "type",
                            "value": "instance_type",
                            "allowNulls": False,
                            "domain": {"type": "set", "key": "instance_type", "partitions": self.settings.utility}
                        }
                    ],
                    "select": [
                        {"name": "price_80", "value": "price", "aggregate": "percentile", "percentile": self.settings.uptime.bid_percentile},
                        {"name": "max_price", "value": "price", "aggregate": "max"},
                        {"aggregate": "count"},
                        {"value": "current_price", "aggregate": "one"},
                        {"name": "all_price", "value": "price", "aggregate": "list"}
                    ],
                    "window": [
                        {"name": "estimated_value", "value": {"div": ["type.utility", "price_80"]}},
                        {"name": "higher_price", "value": lambda row, rownum, rows: find_higher(row.all_price, row.price_80)}  # TODO: SUPPORT {"from":"all_price", "where":{"gt":[".", "price_80"]}, "select":{"aggregate":"min"}}
                    ]
                })

                output = jx.run({
                    "from": bid80,
                    "sort": {"value": "estimated_value", "sort": -1}
                })

                self.prices = wrap(output.data)
                self.price_lookup = UniqueIndex(("type.instance_type", "availability_zone"), data=self.prices)
            return self.prices