Example #1
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    c = response.all_content
    return convert.json2value(convert.utf82unicode(c))
    def test_request(self):
        # MAKE SOME DATA
        data = {
            "constant": "this is a test",
            "random-data": convert.bytes2base64(Random.bytes(100))
        }

        client = Client(settings.url, unwrap(settings.hawk))  # unwrap() DUE TO BUG https://github.com/kumar303/mohawk/issues/21
        link, id = client.send(data)
        Log.note("Success!  Located at {{link}} id={{id}}", link=link, id=id)

        # FILL THE REST OF THE FILE
        Log.note("Add ing {{num}} more...", num=99-id)
        for i in range(id + 1, storage.BATCH_SIZE):
            l, k = client.send(data)
            if l != link:
                Log.error("Expecting rest of data to have same link")

        # TEST LINK HAS DATA
        raw_content = requests.get(link).content
        content = convert.zip2bytes(raw_content)
        for line in convert.utf82unicode(content).split("\n"):
            data = convert.json2value(line)
            if data.etl.id == id:
                Log.note("Data {{id}} found", id=id)
                break
        else:
            Log.error("Expecting to find data at link")
Example #3
0
    def test_multiple_agg_on_same_field(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "max_bytes",
                    "value": "run.stats.bytes",
                    "aggregate": "max"
                }, {
                    "name": "count",
                    "value": "run.stats.bytes",
                    "aggregate": "count"
                }]
            }
        })

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #4
0
    def test_request(self):
        # SIMPLEST POSSIBLE REQUEST (NOTHING IMPORTANT HAPPENING)
        data = {
            "meta": {
                "suite": "sessionrestore_no_auto_restore osx-10-10",
                "platform": "osx-10-10",
                "e10s": False,
                "och": "opt",
                "bucket": "startup",
                "statistic": "mean"
            },
            "header": ["rownum", "timestamp", "revision", "value"],
            "data": [
                [1, "2015-12-06 09:21:15", "18339318", 879],
                [2, "2015-12-06 16:50:36", "18340858", 976],
                [3, "2015-12-06 19:01:54", "18342319", 880],
                [4, "2015-12-06 21:08:56", "18343567", 1003],
                [5, "2015-12-06 23:33:27", "18345266", 1002],
                [6, "2015-12-07 02:16:22", "18347807", 977],
                [7, "2015-12-07 02:18:29", "18348057", 1035],
                [8, "2015-12-07 04:51:52", "18351263", 1032],
                [9, "2015-12-07 05:29:42", "18351078", 1035],
                [10, "2015-12-07 05:50:37", "18351749", 1010]
            ]
        }

        response = requests.post(settings.url, json=data)
        self.assertEqual(response.status_code, 200)
        data = convert.json2value(convert.utf82unicode(response.content))
        self.assertEqual(data, {})
Example #5
0
def get_active_data(settings):
    query = {
    "limit": 100000,
    "from": "unittest",
    "where": {"and": [
        {"eq": {"result.ok": False}},
        {"gt": {"run.timestamp": RECENT.milli}}
    ]},
    "select": [
        "result.ok",
        "build.branch",
        "build.platform",
        "build.release",
        "build.revision",
        "build.type",
        "build.revision",
        "build.date",
        "run.timestamp",
        "run.suite",
        "run.chunk",
        "result.test",
        "run.stats.status.test_status"
    ],
    "format": "table"
    }
    result = http.post("http://activedata.allizom.org/query", data=convert.unicode2utf8(convert.value2json(query)))

    query_result = convert.json2value(convert.utf82unicode(result.all_content))

    tab = convert.table2tab(query_result.header, query_result.data)
    File(settings.output.activedata).write(tab)
Example #6
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    c = response.all_content
    return mo_json.json2value(convert.utf82unicode(c))
Example #7
0
    def test_branch_count(self):
        if self.not_real_service():
            return

        test = wrap({"query": {
            "from": {
                "type": "elasticsearch",
                "settings": {
                    "host": ES_CLUSTER_LOCATION,
                    "index": "unittest",
                    "type": "test_result"
                }
            },
            "select": [
                {"aggregate": "count"},
            ],
            "edges": [
                "build.branch"
            ],
            "where": {"or": [
                {"missing": "build.id"}
                # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}}
            ]},
            "format": "table"
        }})

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #8
0
def solve():
    try:
        data = convert.json2value(convert.utf82unicode(flask.request.data))
        solved = noop.solve(data)
        response_data = convert.unicode2utf8(convert.value2json(solved))

        return Response(
            response_data,
            direct_passthrough=True,  # FOR STREAMING
            status=200,
            headers={
                "access-control-allow-origin": "*",
                "content-type": "application/json"
            }
        )
    except Exception, e:
        e = Except.wrap(e)
        Log.warning("Could not process", cause=e)
        e = e.as_dict()
        return Response(
            convert.unicode2utf8(convert.value2json(e)),
            status=400,
            headers={
                "access-control-allow-origin": "*",
                "content-type": "application/json"
            }
        )
Example #9
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"]))

    response = post(url, **kwargs)
    c = response.all_content
    return convert.json2value(convert.utf82unicode(c))
Example #10
0
    def test_timing(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "count",
                    "value": "run.duration",
                    "aggregate": "count"
                }, {
                    "name": "total",
                    "value": "run.duration",
                    "aggregate": "sum"
                }],
                "edges": [{
                    "name": "chunk",
                    "value": ["run.suite", "run.chunk"]
                }, "result.ok"],
                "where": {
                    "and": [{
                        "lt": {
                            "timestamp": Date.floor(Date.now()).milli / 1000
                        }
                    }, {
                        "gte": {
                            "timestamp":
                            Date.floor(Date.now() - (Duration.DAY * 7),
                                       Duration.DAY).milli / 1000
                        }
                    }]
                },
                "format":
                "cube",
                "samples": {
                    "limit": 30
                }
            }
        })

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #11
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"]))

    response = post(url, **kwargs)
    c=response.all_content
    return convert.json2value(convert.utf82unicode(c))
Example #12
0
 def all_lines(self):
     try:
         content = self.raw.read(decode_content=False)
         if self.headers.get('content-encoding') == 'gzip':
             return CompressedLines(content)
         elif self.headers.get('content-type') == 'application/zip':
             return ZipfileLines(content)
         else:
             return convert.utf82unicode(content).split("\n")
     except Exception, e:
         Log.error("Not JSON", e)
Example #13
0
 def all_lines(self):
     try:
         content = self.raw.read(decode_content=False)
         if self.headers.get('content-encoding') == 'gzip':
             return CompressedLines(content)
         elif self.headers.get('content-type') == 'application/zip':
             return ZipfileLines(content)
         else:
             return convert.utf82unicode(content).split("\n")
     except Exception, e:
         Log.error("Not JSON", e)
Example #14
0
def error(response):
    response = convert.utf82unicode(response.content)

    try:
        e = Except.new_instance(convert.json2value(response))
    except Exception:
        e = None

    if e:
        Log.error("Failed request", e)
    else:
        Log.error("Failed request\n {{response}}", {"response": response})
Example #15
0
    def test_simple_query(self):
        if self.not_real_service():
            return

        query = convert.unicode2utf8(convert.value2json({"from": "unittest"}))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #16
0
    def read_lines(self, key):
        source = self.get_meta(key)
        if source is None:
            Log.error("{{key}} does not exist", key=key)
        if source.size < MAX_STRING_SIZE:
            if source.key.endswith(".gz"):
                return LazyLines(ibytes2ilines(scompressed2ibytes(source)))
            else:
                return convert.utf82unicode(source.read()).split("\n")

        if source.key.endswith(".gz"):
            return LazyLines(ibytes2ilines(scompressed2ibytes(source)))
        else:
            return LazyLines(source)
Example #17
0
def _scrub(value, is_done):
    type = value.__class__

    if type in (NoneType, NullType):
        return None
    elif type in (date, datetime):
        return float(datetime2unix(value))
    elif type is timedelta:
        return value.total_seconds()
    elif type is Date:
        return float(value.unix)
    elif type is Duration:
        return value.seconds
    elif type is str:
        return utf82unicode(value)
    elif type is Decimal:
        return float(value)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            Log.error("possible loop in structure detected")
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if not isinstance(k, basestring):
                Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None:
                output[k] = v

        is_done.discard(_id)
        return output
    elif type in (list, DictList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif hasattr(value, '__json__'):
        try:
            output = json._default_decoder.decode(value.__json__())
            return output
        except Exception, e:
            Log.error("problem with calling __json__()", e)
Example #18
0
    def read_lines(self, key):
        source = self.get_meta(key)
        if source is None:
            Log.error("{{key}} does not exist",  key= key)
        if source.size < MAX_STRING_SIZE:
            if source.key.endswith(".gz"):
                return LazyLines(ibytes2ilines(scompressed2ibytes(source)))
            else:
                return convert.utf82unicode(source.read()).split("\n")

        if source.key.endswith(".gz"):
            return LazyLines(ibytes2ilines(scompressed2ibytes(source)))
        else:
            return LazyLines(source)
Example #19
0
    def execute_query(self, query):
        query = wrap(query)

        try:
            query = convert.unicode2utf8(convert.value2json(query))
            # EXECUTE QUERY
            response = self.try_till_response(self.service_url, data=query)

            if response.status_code != 200:
                error(response)
            result = convert.json2value(convert.utf82unicode(response.all_content))

            return result
        except Exception, e:
            Log.error("Failed query", e)
Example #20
0
    def get(*args, **kwargs):
        body = kwargs.get("data")

        if not body:
            return wrap({"status_code": 400})

        text = convert.utf82unicode(body)
        text = replace_vars(text)
        data = convert.json2value(text)
        result = jx.run(data)
        output_bytes = convert.unicode2utf8(convert.value2json(result))
        return wrap({
            "status_code": 200,
            "all_content": output_bytes,
            "content": output_bytes
        })
Example #21
0
    def read(self, key):
        source = self.get_meta(key)

        try:
            json = safe_size(source)
        except Exception as e:
            Log.error(READ_ERROR, e)

        if json == None:
            return None

        if source.key.endswith(".zip"):
            json = _unzip(json)
        elif source.key.endswith(".gz"):
            json = convert.zip2bytes(json)

        return convert.utf82unicode(json)
Example #22
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if b"json" in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"]))
    elif b'data':
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"]))
    else:
        Log.error("Expecting `json` parameter")

    response = post(url, **kwargs)
    c = response.content
    try:
        details = convert.json2value(convert.utf82unicode(c))
    except Exception, e:
        Log.error("Unexpected return value {{content}}", content=c, cause=e)
Example #23
0
    def read(self, key):
        source = self.get_meta(key)

        try:
            json = safe_size(source)
        except Exception as e:
            Log.error(READ_ERROR, e)

        if json == None:
            return None

        if source.key.endswith(".zip"):
            json = _unzip(json)
        elif source.key.endswith(".gz"):
            json = convert.zip2bytes(json)

        return convert.utf82unicode(json)
Example #24
0
    def test_longest_running_tests(self):
        test = wrap({
            "query": {
                "sort": {
                    "sort": -1,
                    "field": "avg"
                },
                "from": {
                    "from":
                    "unittest",
                    "where": {
                        "and": [{
                            "gt": {
                                "build.date": "1439337600"
                            }
                        }]
                    },
                    "groupby": [
                        "build.platform", "build.type", "run.suite",
                        "result.test"
                    ],
                    "select": [{
                        "aggregate": "avg",
                        "name": "avg",
                        "value": "result.duration"
                    }],
                    "format":
                    "table",
                    "limit":
                    100
                },
                "limit": 100,
                "format": "list"
            }
        })
        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #25
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if b"json" in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(
            convert.value2json(kwargs[b"json"]))
    elif b'data' in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(
            convert.value2json(kwargs[b"data"]))
    else:
        Log.error("Expecting `json` parameter")

    response = post(url, **kwargs)
    c = response.content
    try:
        details = mo_json.json2value(convert.utf82unicode(c))
    except Exception, e:
        Log.error("Unexpected return value {{content}}", content=c, cause=e)
Example #26
0
    def read_lines(self, key):
        source = self.get_meta(key)
        if source is None:
            Log.error("{{key}} does not exist", key=key)
        if source.size < MAX_STRING_SIZE:
            if source.key.endswith(".gz"):
                return GzipLines(source.read())
            else:
                return convert.utf82unicode(source.read()).split("\n")

        if source.key.endswith(".gz"):
            bytes = safe_size(source)
            if isinstance(bytes, str):
                buff = BytesIO(bytes)
            else:
                # SWAP OUT FILE REFERENCE
                bytes.file, buff = None, bytes.file
            archive = gzip.GzipFile(fileobj=buff, mode='r')
            return LazyLines(archive)
        else:
            return LazyLines(source)
Example #27
0
    def read_lines(self, key):
        source = self.get_meta(key)
        if source is None:
            Log.error("{{key}} does not exist",  key= key)
        if source.size < MAX_STRING_SIZE:
            if source.key.endswith(".gz"):
                return GzipLines(source.read())
            else:
                return convert.utf82unicode(source.read()).split("\n")

        if source.key.endswith(".gz"):
            bytes = safe_size(source)
            if isinstance(bytes, str):
                buff = BytesIO(bytes)
            else:
                # SWAP OUT FILE REFERENCE
                bytes.file, buff = None, bytes.file
            archive = gzip.GzipFile(fileobj=buff, mode='r')
            return LazyLines(archive)
        else:
            return LazyLines(source)
Example #28
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if b"json" in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"]))
    elif b'data' in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"]))
    else:
        Log.error("Expecting `json` parameter")

    response = post(url, **kwargs)
    c = response.content
    try:
        details = mo_json.json2value(convert.utf82unicode(c))
    except Exception as e:
        Log.error("Unexpected return value {{content}}", content=c, cause=e)

    if response.status_code not in [200, 201]:
        Log.error("Bad response", cause=Except.wrap(details))

    return details
Example #29
0
    def test_failures_by_directory(self):
        if self.not_real_service():
            return

        test = wrap({"query": {
            "from": {
                "type": "elasticsearch",
                "settings": {
                    "host": ES_CLUSTER_LOCATION,
                    "index": "unittest",
                    "type": "test_result"
                }
            },
            "select": [
                {
                    "aggregate": "count"
                }
            ],
            "edges": [
                "result.test",
                "result.ok"
            ],
            "where": {
                "prefix": {
                    "result.test": "/"
                }
            },
            "format": "table"
        }})

        query = convert.unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.service_url, data=query)
            if response.status_code != 200:
                error(response)
        result = convert.json2value(convert.utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #30
0
    def send_queries(self, subtest):
        subtest = wrap(subtest)

        try:
            # EXECUTE QUERY
            num_expectations = 0
            for k, v in subtest.items():
                if k.startswith("expecting_"):  # WHAT FORMAT ARE WE REQUESTING
                    format = k[len("expecting_"):]
                elif k == "expecting":  # NO FORMAT REQUESTED (TO TEST DEFAULT FORMATS)
                    format = None
                else:
                    continue

                num_expectations += 1
                expected = v

                subtest.query.format = format
                subtest.query.meta.testing = True  # MARK ALL QUERIES FOR TESTING SO FULL METADATA IS AVAILABLE BEFORE QUERY EXECUTION
                query = convert.unicode2utf8(convert.value2json(subtest.query))
                # EXECUTE QUERY
                response = self.try_till_response(self.service_url, data=query)

                if response.status_code != 200:
                    error(response)
                result = convert.json2value(
                    convert.utf82unicode(response.all_content))

                # HOW TO COMPARE THE OUT-OF-ORDER DATA?
                compare_to_expected(subtest.query, result, expected)
                Log.note("Test result compares well")
            if num_expectations == 0:
                Log.error(
                    "Expecting test {{name|quote}} to have property named 'expecting_*' for testing the various format clauses",
                    {"name": subtest.name})
        except Exception, e:
            Log.error("Failed test {{name|quote}}", {"name": subtest.name}, e)
Example #31
0
def get_bugs(settings):
    request_bugs = convert.unicode2utf8(convert.value2json({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": {"and": [
                {"term": {"keyword": "intermittent-failure"}},
                {"range": {"expires_on": {"gt": Date.now().milli}}},
                {"range": {"modified_ts": {"gt": RECENT.milli}}}
            ]}
        }},
        "from": 0,
        "size": 200000,
        "sort": [],
        "facets": {},
        "fields": ["bug_id", "bug_status", "short_desc", "status_whiteboard"]
    }))
    result = http.post(settings.bugs.url, data=request_bugs).all_content
    bugs = UniqueIndex(["bug_id"], convert.json2value(convert.utf82unicode(result)).hits.hits.fields)

    for i, b in enumerate(bugs):
        try:
            parse_short_desc(b)
        except Exception, e:
            Log.warning("can not parse {{bug_id}} {{short_desc}}", bug_id=b.bug_id, short_desc=b.short_desc, cause=e)
from pyLibrary.dot import unwrap
from pyLibrary.maths.randoms import Random
from modatasubmission import Client


settings = jsons.ref.get("file://~/MoDataSubmissionClient.json")


data={
    "constant": "this is a test",
    "random-data": convert.bytes2base64(Random.bytes(100))
}
link, id = Client(settings.url, unwrap(settings.hawk)).send(data)
Log.note("Success!  Located at {{link}} id={{id}}", link=link, id=id)


data = convert.unicode2utf8(convert.value2json(settings.example))

response = requests.post(
    settings.url,
    data=data,
    headers={
        'Content-Type': b'application/json'
    }
)
if response.status_code != 200:
    Log.error("Expecting a pass")

details = convert.json2value(convert.utf82unicode(response.content))
Log.note("Success!  Located at {{link}} id={{id}}", link=details.link, id=details.etl.id)
Example #33
0
 def get_branches(self):
     response = http.get(self.settings.branches.url, timeout=coalesce(self.settings.timeout, 30))
     branches = convert.json2value(convert.utf82unicode(response.all_content))
     return wrap({branch.name: unwrap(branch) for branch in branches})
Example #34
0
        source = self.get_meta(key)

        try:
            json = safe_size(source)
        except Exception, e:
            Log.error(READ_ERROR, e)

        if json == None:
            return None

        if source.key.endswith(".zip"):
            json = _unzip(json)
        elif source.key.endswith(".gz"):
            json = convert.zip2bytes(json)

        return convert.utf82unicode(json)

    def read_bytes(self, key):
        source = self.get_meta(key)
        return safe_size(source)

    def read_lines(self, key):
        source = self.get_meta(key)
        if source is None:
            Log.error("{{key}} does not exist", key=key)
        if source.size < MAX_STRING_SIZE:
            if source.key.endswith(".gz"):
                return LazyLines(ibytes2ilines(scompressed2ibytes(source)))
            else:
                return convert.utf82unicode(source.read()).split("\n")
Example #35
0
    request_comments = convert.unicode2utf8(convert.value2json({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": {"and":[
                {"terms": {"bug_id": bugs.keys()}},
                {"range": {"modified_ts": {"gt": RECENT.milli}}}
            ]}
        }},
        "from": 0,
        "size": 200000,
        "sort": [],
        "facets": {},
        "fields": ["bug_id", "modified_by", "modified_ts", "comment"]
    }))

    comments = convert.json2value(convert.utf82unicode(http.post(settings.comments.url, data=request_comments).all_content)).hits.hits.fields

    results = []
    for c in comments:
        errors = parse_comment(bugs[c.bug_id], c)
        results.extend(errors)

    tab = convert.list2tab(results)
    File(settings.output.tab).write(tab)



def parse_short_desc(bug):
    parts = bug.short_desc.split("|")
    if len(parts) in [2, 3]:
        bug.result.test = parts[0].strip()
Example #36
0
def process(source_key, source, dest_bucket, resources, please_stop=None):
    """
    SIMPLE CONVERT pulse_block INTO TALOS, IF ANY
    """
    etl_head_gen = EtlHeadGenerator(source_key)
    stats = Dict()
    counter = 0

    output = set()
    for i, pulse_line in enumerate(source.read_lines()):
        pulse_record = scrub_pulse_record(source_key, i, pulse_line, stats)
        if not pulse_record:
            continue

        if not pulse_record.payload.talos:
            continue

        all_talos = []
        etl_file = wrap({
            "id": counter,
            "file": pulse_record.payload.logurl,
            "timestamp": Date.now().unix,
            "source": pulse_record.etl,
            "type": "join"
        })
        with Timer("Read {{url}}", {"url": pulse_record.payload.logurl},
                   debug=DEBUG) as timer:
            try:
                response = http.get(pulse_record.payload.logurl)
                if response.status_code == 404:
                    Log.alarm("Talos log missing {{url}}",
                              url=pulse_record.payload.logurl)
                    k = source_key + "." + unicode(counter)
                    try:
                        # IF IT EXISTS WE WILL ASSUME SOME PAST PROCESS TRANSFORMED THE MISSING DATA ALREADY
                        dest_bucket.get_key(k)
                        output |= {k}  # FOR DENSITY CALCULATIONS
                    except Exception:
                        _, dest_etl = etl_head_gen.next(etl_file, "talos")
                        dest_etl.error = "Talos log missing"
                        output |= dest_bucket.extend([{
                            "id": etl2key(dest_etl),
                            "value": {
                                "etl": dest_etl,
                                "pulse": pulse_record.payload
                            }
                        }])

                    continue
                all_log_lines = response.all_lines

                for log_line in all_log_lines:
                    s = log_line.find(TALOS_PREFIX)
                    if s < 0:
                        continue

                    log_line = strings.strip(log_line[s + len(TALOS_PREFIX):])
                    talos = convert.json2value(convert.utf82unicode(log_line))

                    for t in talos:
                        _, dest_etl = etl_head_gen.next(etl_file, "talos")
                        t.etl = dest_etl
                        t.pulse = pulse_record.payload
                    all_talos.extend(talos)
            except Exception, e:
                Log.error("Problem processing {{url}}",
                          {"url": pulse_record.payload.logurl}, e)
            finally:
def process(source_key, source, dest_bucket, resources, please_stop=None):
    """
    SIMPLE CONVERT pulse_block INTO TALOS, IF ANY
    """
    etl_head_gen = EtlHeadGenerator(source_key)
    stats = Dict()
    counter = 0

    output = set()
    for i, pulse_line in enumerate(source.read_lines()):
        pulse_record = scrub_pulse_record(source_key, i, pulse_line, stats)
        if not pulse_record:
            continue

        if not pulse_record.payload.talos:
            continue

        all_talos = []
        etl_file = wrap({
            "id": counter,
            "file": pulse_record.payload.logurl,
            "timestamp": Date.now().unix,
            "source": pulse_record.etl,
            "type": "join"
        })
        with Timer("Read {{url}}", {"url": pulse_record.payload.logurl}, debug=DEBUG) as timer:
            try:
                response = http.get(pulse_record.payload.logurl)
                if response.status_code == 404:
                    Log.alarm("Talos log missing {{url}}", url=pulse_record.payload.logurl)
                    k = source_key + "." + unicode(counter)
                    try:
                        # IF IT EXISTS WE WILL ASSUME SOME PAST PROCESS TRANSFORMED THE MISSING DATA ALREADY
                        dest_bucket.get_key(k)
                        output |= {k}  # FOR DENSITY CALCULATIONS
                    except Exception:
                        _, dest_etl = etl_head_gen.next(etl_file, "talos")
                        dest_etl.error = "Talos log missing"
                        output |= dest_bucket.extend([{
                            "id": etl2key(dest_etl),
                            "value": {
                                "etl": dest_etl,
                                "pulse": pulse_record.payload
                            }
                        }])

                    continue
                all_log_lines = response.all_lines

                for log_line in all_log_lines:
                    s = log_line.find(TALOS_PREFIX)
                    if s < 0:
                        continue

                    log_line = strings.strip(log_line[s + len(TALOS_PREFIX):])
                    talos = convert.json2value(convert.utf82unicode(log_line))

                    for t in talos:
                        _, dest_etl = etl_head_gen.next(etl_file, "talos")
                        t.etl = dest_etl
                        t.pulse = pulse_record.payload
                    all_talos.extend(talos)
            except Exception, e:
                Log.error("Problem processing {{url}}", {
                    "url": pulse_record.payload.logurl
                }, e)
            finally:
Example #38
0
def _scrub(value, is_done):
    type_ = value.__class__

    if type_ in (NoneType, NullType):
        return None
    elif type_ is unicode:
        value_ = value.strip()
        if value_:
            return value_
        else:
            return None
    elif type_ is float:
        if math.isnan(value) or math.isinf(value):
            return None
        return value
    elif type_ in (int, long, bool):
        return value
    elif type_ in (date, datetime):
        return float(datetime2unix(value))
    elif type_ is timedelta:
        return value.total_seconds()
    elif type_ is Date:
        return float(value.unix)
    elif type_ is Duration:
        return float(value.seconds)
    elif type_ is str:
        return utf82unicode(value)
    elif type_ is Decimal:
        return float(value)
    elif type_ is Dict:
        return _scrub(unwrap(value), is_done)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            _Log.warning("possible loop in structure detected")
            return '"<LOOP IN STRUCTURE>"'
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if isinstance(k, basestring):
                pass
            elif hasattr(k, "__unicode__"):
                k = unicode(k)
            else:
                _Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None or isinstance(v, Mapping):
                output[k] = v

        is_done.discard(_id)
        return output
    elif type_ in (tuple, list, DictList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type_ is type:
        return value.__name__
    elif type_.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif hasattr(value, '__json__'):
        try:
            output = json._default_decoder.decode(value.__json__())
            return output
        except Exception, e:
            _Log.error("problem with calling __json__()", e)
Example #39
0
        source = self.get_meta(key)

        try:
            json = safe_size(source)
        except Exception, e:
            Log.error(READ_ERROR, e)

        if json == None:
            return None

        if source.key.endswith(".zip"):
            json = _unzip(json)
        elif source.key.endswith(".gz"):
            json = convert.zip2bytes(json)

        return convert.utf82unicode(json)

    def read_bytes(self, key):
        source = self.get_meta(key)
        return safe_size(source)

    def read_lines(self, key):
        source = self.get_meta(key)
        if source is None:
            Log.error("{{key}} does not exist",  key= key)
        if source.size < MAX_STRING_SIZE:
            if source.key.endswith(".gz"):
                return LazyLines(ibytes2ilines(scompressed2ibytes(source)))
            else:
                return convert.utf82unicode(source.read()).split("\n")
Example #40
0
 def try_till_response(self, *args, **kwargs):
     self.execute_query(
         convert.json2value(convert.utf82unicode(kwargs["data"])))
Example #41
0
 def test_whitespace_prefix(self):
     hex = "00 00 00 00 7B 22 74 68 72 65 61 64 22 3A 20 22 4D 61 69 6E 54 68 72 65 61 64 22 2C 20 22 6C 65 76 65 6C 22 3A 20 22 49 4E 46 4F 22 2C 20 22 70 69 64 22 3A 20 31 32 39 33 2C 20 22 63 6F 6D 70 6F 6E 65 6E 74 22 3A 20 22 77 70 74 73 65 72 76 65 22 2C 20 22 73 6F 75 72 63 65 22 3A 20 22 77 65 62 2D 70 6C 61 74 66 6F 72 6D 2D 74 65 73 74 73 22 2C 20 22 74 69 6D 65 22 3A 20 31 34 32 34 31 39 35 30 31 33 35 39 33 2C 20 22 61 63 74 69 6F 6E 22 3A 20 22 6C 6F 67 22 2C 20 22 6D 65 73 73 61 67 65 22 3A 20 22 53 74 61 72 74 69 6E 67 20 68 74 74 70 20 73 65 72 76 65 72 20 6F 6E 20 31 32 37 2E 30 2E 30 2E 31 3A 38 34 34 33 22 7D 0A"
     json = convert.utf82unicode(convert.hex2bytes("".join(hex.split(" "))))
     self.assertRaises(Exception, convert.json2value, *[json])
Example #42
0
def _scrub(value, is_done):
    type_ = value.__class__

    if type_ in (NoneType, NullType):
        return None
    elif type_ is unicode:
        value_ = value.strip()
        if value_:
            return value_
        else:
            return None
    elif type_ is float:
        if math.isnan(value) or math.isinf(value):
            return None
        return value
    elif type_ in (int, long, bool):
        return value
    elif type_ in (date, datetime):
        return float(datetime2unix(value))
    elif type_ is timedelta:
        return value.total_seconds()
    elif type_ is Date:
        return float(value.unix)
    elif type_ is Duration:
        return float(value.seconds)
    elif type_ is str:
        return utf82unicode(value)
    elif type_ is Decimal:
        return float(value)
    elif type_ is Dict:
        return _scrub(unwrap(value), is_done)
    elif isinstance(value, Mapping):
        _id = id(value)
        if _id in is_done:
            _Log.warning("possible loop in structure detected")
            return '"<LOOP IN STRUCTURE>"'
        is_done.add(_id)

        output = {}
        for k, v in value.iteritems():
            if isinstance(k, basestring):
                pass
            elif hasattr(k, "__unicode__"):
                k = unicode(k)
            else:
                _Log.error("keys must be strings")
            v = _scrub(v, is_done)
            if v != None or isinstance(v, Mapping):
                output[k] = v

        is_done.discard(_id)
        return output
    elif type_ in (tuple, list, DictList):
        output = []
        for v in value:
            v = _scrub(v, is_done)
            output.append(v)
        return output
    elif type_ is type:
        return value.__name__
    elif type_.__name__ == "bool_":  # DEAR ME!  Numpy has it's own booleans (value==False could be used, but 0==False in Python.  DOH!)
        if value == False:
            return False
        else:
            return True
    elif hasattr(value, '__json__'):
        try:
            output = json._default_decoder.decode(value.__json__())
            return output
        except Exception, e:
            _Log.error("problem with calling __json__()", e)
Example #43
0
def query(path):
    with CProfiler():
        try:
            with Timer("total duration") as query_timer:
                preamble_timer = Timer("preamble")
                with preamble_timer:
                    if flask.request.headers.get("content-length",
                                                 "") in ["", "0"]:
                        # ASSUME A BROWSER HIT THIS POINT, SEND text/html RESPONSE BACK
                        return Response(BLANK,
                                        status=400,
                                        headers={"Content-Type": "text/html"})
                    elif int(flask.request.headers["content-length"]
                             ) > QUERY_SIZE_LIMIT:
                        Log.error("Query is too large")

                    request_body = flask.request.get_data().strip()
                    text = convert.utf82unicode(request_body)
                    text = replace_vars(text, flask.request.args)
                    data = convert.json2value(text)
                    record_request(flask.request, data, None, None)
                    if data.meta.testing:
                        _test_mode_wait(data)

                translate_timer = Timer("translate")
                with translate_timer:
                    if data.sql:
                        data = parse_sql(data.sql)
                    frum = wrap_from(data['from'])
                    result = jx.run(data, frum=frum)

                    if isinstance(
                            result, Container
                    ):  #TODO: REMOVE THIS CHECK, jx SHOULD ALWAYS RETURN Containers
                        result = result.format(data.format)

                save_timer = Timer("save")
                with save_timer:
                    if data.meta.save:
                        try:
                            result.meta.saved_as = save_query.query_finder.save(
                                data)
                        except Exception, e:
                            Log.warning("Unexpected save problem", cause=e)

                result.meta.timing.preamble = Math.round(
                    preamble_timer.duration.seconds, digits=4)
                result.meta.timing.translate = Math.round(
                    translate_timer.duration.seconds, digits=4)
                result.meta.timing.save = Math.round(
                    save_timer.duration.seconds, digits=4)
                result.meta.timing.total = "{{TOTAL_TIME}}"  # TIMING PLACEHOLDER

                with Timer("jsonification") as json_timer:
                    response_data = convert.unicode2utf8(
                        convert.value2json(result))

            with Timer("post timer"):
                # IMPORTANT: WE WANT TO TIME OF THE JSON SERIALIZATION, AND HAVE IT IN THE JSON ITSELF.
                # WE CHEAT BY DOING A (HOPEFULLY FAST) STRING REPLACEMENT AT THE VERY END
                timing_replacement = b'"total": ' + str(Math.round(query_timer.duration.seconds, digits=4)) +\
                                     b', "jsonification": ' + str(Math.round(json_timer.duration.seconds, digits=4))
                response_data = response_data.replace(
                    b'"total": "{{TOTAL_TIME}}"', timing_replacement)
                Log.note("Response is {{num}} bytes in {{duration}}",
                         num=len(response_data),
                         duration=query_timer.duration)

                return Response(
                    response_data,
                    status=200,
                    headers={"Content-Type": result.meta.content_type})
        except Exception, e:
            e = Except.wrap(e)
            return _send_error(query_timer, request_body, e)