Example #1
0
 def test_bad_long_json(self):
     test = value2json({"values": [i for i in range(1000)]})
     test = test[:1000] + "|" + test[1000:]
     expected = "Can not decode JSON at:\n\t..., 216, 217, 218, 219|, 220, 221, 222, 22...\n\t                       ^\n"
     # expected = u'Can not decode JSON at:\n\t...9,270,271,272,273,27|4,275,276,277,278,2...\n\t                       ^\n'
     with self.assertRaises("Can not decode JSON"):
         json2value(test)
Example #2
0
 def test_aes_on_char(self):
     key = convert.base642bytearray(u'nm5/wK20R45AUtetHJwHTdOigvGTxP7NcH/41YE8AZo=')
     encrypted = aes_crypto.encrypt("kyle", key, salt=convert.base642bytearray("AehqWt1OdEgPJhCx6uylyg=="))
     self.assertEqual(
         json2value(encrypted.decode('utf8')),
         json2value(u'{"type": "AES256", "length": 4, "salt": "AehqWt1OdEgPJhCx6uylyg==", "data": "FXUGxdb9E+4UCKwsIT9ugQ=="}')
     )
Example #3
0
 def test_aes_nothing(self):
     key = convert.base642bytearray(u'nm5/wK20R45AUtetHJwHTdOigvGTxP7NcH/41YE8AZo=')
     encrypted = aes_crypto.encrypt("", key, salt=convert.base642bytearray("AehqWt1OdEgPJhCx6uylyg=="))
     self.assertEqual(
         json2value(encrypted.decode('utf8')),
         json2value(u'{"type": "AES256", "length": 0, "salt": "AehqWt1OdEgPJhCx6uylyg=="}')
     )
Example #4
0
    def create_index(
        self,
        index,
        alias=None,
        create_timestamp=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        kwargs=None
    ):
        if not alias:
            alias = kwargs.alias = kwargs.index
            index = kwargs.index = proto_name(alias, create_timestamp)

        if kwargs.alias == index:
            Log.error("Expecting index name to conform to pattern")

        if kwargs.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = mo_json.json2value(schema, leaves=True)
        else:
            schema = mo_json.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.post(
            "/" + index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )

        # CONFIRM INDEX EXISTS
        while True:
            try:
                state = self.get("/_cluster/state", retry={"times": 5}, timeout=3)
                if index in state.metadata.indices:
                    break
                Log.note("Waiting for index {{index}} to appear", index=index)
            except Exception as e:
                Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e)
            Till(seconds=1).wait()
        Log.alert("Made new index {{index|quote}}", index=index)

        es = Index(kwargs=kwargs)
        return es
Example #5
0
    def create_index(
        self,
        index,
        alias=None,
        create_timestamp=None,
        schema=None,
        limit_replicas=None,
        read_only=False,
        tjson=False,
        kwargs=None
    ):
        if not alias:
            alias = kwargs.alias = kwargs.index
            index = kwargs.index = proto_name(alias, create_timestamp)

        if kwargs.alias == index:
            Log.error("Expecting index name to conform to pattern")

        if kwargs.schema_file:
            Log.error('schema_file attribute not supported.  Use {"$ref":<filename>} instead')

        if schema == None:
            Log.error("Expecting a schema")
        elif isinstance(schema, basestring):
            schema = mo_json.json2value(schema, leaves=True)
        else:
            schema = mo_json.json2value(convert.value2json(schema), leaves=True)

        if limit_replicas:
            # DO NOT ASK FOR TOO MANY REPLICAS
            health = self.get("/_cluster/health")
            if schema.settings.index.number_of_replicas >= health.number_of_nodes:
                Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized",
                    {"from": schema.settings.index.number_of_replicas},
                    to= health.number_of_nodes - 1
                )
                schema.settings.index.number_of_replicas = health.number_of_nodes - 1

        self.put(
            "/" + index,
            data=schema,
            headers={"Content-Type": "application/json"}
        )

        # CONFIRM INDEX EXISTS
        while True:
            try:
                state = self.get("/_cluster/state", retry={"times": 5}, timeout=3)
                if index in state.metadata.indices:
                    break
                Log.note("Waiting for index {{index}} to appear", index=index)
            except Exception as e:
                Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e)
            Till(seconds=1).wait()
        Log.alert("Made new index {{index|quote}}", index=index)

        es = Index(kwargs=kwargs)
        return es
Example #6
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}',
                        '{"code": "bb"}').replace('{"id": "tc"}',
                                                  '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = mo_json.json2value(suite_json)
                suite = convert.value2json(coalesce(suite.fullname,
                                                    suite.name))
                line = line.replace(suite_json, suite)

    if source.name.startswith("active-data-codecoverage"):
        d = convert.json2value(line)
        if d.source.file.total_covered > 0:
            return {"id": d._id, "json": line}, False
        else:
            return None, False

    if rownum == 0:
        value = mo_json.json2value(line)
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        value = mo_json.json2value(line)
        _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find('"resource_usage":') != -1:
        value = mo_json.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Example #7
0
 def __data__(self):
     if isinstance(self.value, Variable) and isinstance(
             self.length, Literal):
         output = {
             "concat": {
                 self.terms[0].var: json2value(self.terms[2].json)
             }
         }
     else:
         output = {"concat": [t.__data__() for t in self.terms]}
     if self.separator.json != '""':
         output["separator"] = json2value(self.terms[2].json)
     return output
Example #8
0
    def device_register(self, path=None):
        """
        EXPECTING A SIGNED REGISTRATION REQUEST
        RETURN JSON WITH url FOR LOGIN
        """
        now = Date.now()
        expires = now + parse(self.device.register.session['max-age'])
        request_body = request.get_data()
        signed = json2value(request_body.decode("utf8"))
        command = json2value(base642bytes(signed.data).decode("utf8"))
        session.public_key = command.public_key
        rsa_crypto.verify(signed, session.public_key)

        self.session_manager.create_session(session)
        session.expires = expires.unix
        session.state = bytes2base64URL(crypto.bytes(32))

        with self.device.db.transaction() as t:
            t.execute(
                sql_insert(
                    self.device.table,
                    {
                        "state": session.state,
                        "session_id": session.session_id
                    },
                ))
        body = value2json(
            Data(
                session_id=session.session_id,
                interval="5second",
                expires=session.expires,
                url=URL(
                    self.device.home,
                    path=self.device.endpoints.login,
                    query={"state": session.state},
                ),
            ))

        response = Response(body,
                            headers={"Content-Type": mimetype.JSON},
                            status=200)
        response.set_cookie(self.device.register.session.name,
                            session.session_id,
                            path=self.device.login.session.path,
                            domain=self.device.login.session.domain,
                            expires=expires.format(RFC1123),
                            secure=self.device.login.session.secure,
                            httponly=self.device.login.session.httponly)

        return response
Example #9
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    value = json2value(line)

    if value._id.startswith(("tc.97", "96", "bb.27")):
        # AUG 24, 25 2017 - included full diff with repo; too big to index
        try:
            data = json2value(line)
            repo = data.repo
            repo.etl = None
            repo.branch.last_used = None
            repo.branch.description = None
            repo.branch.etl = None
            repo.branch.parent_name = None
            repo.children = None
            repo.parents = None
            if repo.changeset.diff or data.build.repo.changeset.diff:
                Log.error("no diff allowed")
            else:
                assertAlmostEqual(minimize_repo(repo), repo)
        except Exception as e:
            if CAN_NOT_DECODE_JSON in e:
                raise e
            data.repo = minimize_repo(repo)
            data.build.repo = minimize_repo(data.build.repo)
            line = value2json(data)
    else:
        pass

    if rownum == 0:
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        value = _fix(value)
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            row = {"value": value}
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        _shorten(value, source)
        value = _fix(value)
    elif line.find('"resource_usage":') != -1:
        value = _fix(value)

    row = {"value": value}
    return row, False
Example #10
0
def row_to_column(header, row):
    return Column(
        **{
            h: c if c is None or h not in ("nested_path",
                                           "partitions") else json2value(c)
            for h, c in zip(header, row)
        })
Example #11
0
def fix(source_key, rownum, line, source, sample_only_filter, sample_size):
    """
    :param rownum:
    :param line:
    :param source:
    :param sample_only_filter:
    :param sample_size:
    :return:  (row, no_more_data) TUPLE WHERE row IS {"value":<data structure>} OR {"json":<text line>}
    """
    value = json2value(line)

    if rownum == 0:
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(source_key, value, source)
        value = _fix(value)
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            row = {"value": value}
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        _shorten(source_key, value, source)
        value = _fix(value)
    elif '"resource_usage":' in line:
        value = _fix(value)

    row = {"value": value}
    return row, False
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds
        kwargs.host = Random.sample(listwrap(host), 1)[0]

        schema = json2value(value2json(SCHEMA), leaves=True)
        schema.mappings[type].properties["~N~"].type = "nested"
        self.es = Cluster(kwargs).get_or_create_index(
            schema=schema,
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
Example #13
0
 def _convert(row):
     text = row[column]
     if text == None:
         return set()
     else:
         value = json2value(row[column])
         return set(value) - {None}
Example #14
0
    def test_one_tuple(self):
        test = to_data({
            "data": [{"a": 1}, {"a": 2}],
            "query": {"from": TEST_TABLE},
            # "expecting_list": [{"a": 1}, {"a": 2}]
        })

        self.utils.fill_container(test)  # FILL AND MAKE DUMMY QUERY

        query = value2json({"tuple": {
            "from": test.query['from'],
            "where": {"eq": {"a": 1}},
            "format": "list",
            "meta": {"testing": True}
        }}).encode('utf8')
        # SEND  QUERY
        response = self.utils.try_till_response(self.utils.testing.query, data=query)

        if response.status_code != 200:
            error(response)

        result = json2value(response.all_content.decode('utf8'))

        self.assertEqual(result, {
            "data": [
                {"data": [{"a": 1}]},
            ]
        })
Example #15
0
 def get_state(self):
     try:
         state = json2value(adr_configuration.config.cache.get(CACHY_STATE))
         Log.note("Got ETL state:\n{{state|json|indent}}", state=state)
         return state
     except Exception:
         return None
Example #16
0
 def __init__(self, filename, host="fake", index="fake", kwargs=None):
     self.settings = kwargs
     self.filename = kwargs.filename
     try:
         self.data = mo_json.json2value(File(self.filename).read())
     except Exception:
         self.data = Data()
Example #17
0
def fix(source_key, rownum, line, source, sample_only_filter, sample_size):
    """
    :param rownum:
    :param line:
    :param source:
    :param sample_only_filter:
    :param sample_size:
    :return:  (row, no_more_data) TUPLE WHERE row IS {"value":<data structure>} OR {"json":<text line>}
    """
    value = json2value(line)

    if rownum == 0:
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(source_key, value, source)
        value = _fix(value)
        if sample_only_filter and Random.int(
                int(1.0 / coalesce(sample_size, 0.01))) != 0 and jx.filter(
                    [value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            row = {"value": value}
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        _shorten(source_key, value, source)
        value = _fix(value)
    elif '"resource_usage":' in line:
        value = _fix(value)

    row = {"value": value}
    return row, False
Example #18
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    c = response.all_content
    return mo_json.json2value(convert.utf82unicode(c))
Example #19
0
    def test_branch_count(self):
        if self.not_real_service():
            return

        test = wrap({"query": {
            "from": {
                "type": "elasticsearch",
                "settings": {
                    "host": ES_CLUSTER_LOCATION,
                    "index": "unittest",
                    "type": "test_result"
                }
            },
            "select": [
                {"aggregate": "count"},
            ],
            "edges": [
                "build.branch"
            ],
            "where": {"or": [
                {"missing": "build.id"}
                # {"gte": {"timestamp": Date.floor(Date.now() - (Duration.DAY * 7), Duration.DAY).milli / 1000}}
            ]},
            "format": "table"
        }})

        query = value2json(test.query).encode('utf8')
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.testing.query, data=query)
            if response.status_code != 200:
                error(response)
        result = json2value(response.all_content.decode('utf8'))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #20
0
    def put(self, path, **kwargs):
        url = self.settings.host + ":" + unicode(self.settings.port) + path

        data = kwargs.get(b'data')
        if data == None:
            pass
        elif isinstance(data, Mapping):
            kwargs[b'data'] = data = convert.unicode2utf8(convert.value2json(data))
        elif not isinstance(kwargs["data"], str):
            Log.error("data must be utf8 encoded string")

        if self.debug:
            sample = kwargs.get(b'data', "")[:300]
            Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)
        # try:
            response = http.put(url, **kwargs)
            if response.status_code not in [200]:
                Log.error(response.reason+": "+response.all_content)
            if self.debug:
                Log.note("response: {{response}}",  response= utf82unicode(response.all_content)[0:300:])

            details = mo_json.json2value(utf82unicode(response.content))
            if details.error:
                Log.error(convert.quote2string(details.error))
            if details._shards.failed > 0:
                Log.error("Shard failures {{failures|indent}}",
                          failures="---\n".join(r.replace(";", ";\n") for r in details._shards.failures.reason)
                          )
            return details
Example #21
0
def to_sql(self, schema, not_null=False, boolean=False):
    return wrap([{
        "name": ".",
        "sql": {
            "n": sql_quote(json2value(self.json))
        }
    }])
Example #22
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if 'json' in kwargs:
        kwargs['data'] = value2json(kwargs['json']).encode('utf8')
        del kwargs['json']
    elif 'data' in kwargs:
        kwargs['data'] = value2json(kwargs['data']).encode('utf8')
    else:
        Log.error(u"Expecting `json` parameter")
    response = post(url, **kwargs)
    details = json2value(response.content.decode('utf8'))
    if response.status_code not in [200, 201, 202]:

        if "template" in details:
            Log.error(u"Bad response code {{code}}",
                      code=response.status_code,
                      cause=Except.wrap(details))
        else:
            Log.error(u"Bad response code {{code}}\n{{details}}",
                      code=response.status_code,
                      details=details)
    else:
        return details
Example #23
0
    def test_multiple_agg_on_same_field(self):
        if self.not_real_service():
            return

        test = wrap({
            "query": {
                "from": {
                    "type": "elasticsearch",
                    "settings": {
                        "host": ES_CLUSTER_LOCATION,
                        "index": "unittest",
                        "type": "test_result"
                    }
                },
                "select": [{
                    "name": "max_bytes",
                    "value": "run.stats.bytes",
                    "aggregate": "max"
                }, {
                    "name": "count",
                    "value": "run.stats.bytes",
                    "aggregate": "count"
                }]
            }
        })

        query = unicode2utf8(convert.value2json(test.query))
        # EXECUTE QUERY
        with Timer("query"):
            response = http.get(self.testing.query, data=query)
            if response.status_code != 200:
                error(response)
        result = json2value(utf82unicode(response.all_content))

        Log.note("result\n{{result|indent}}", {"result": result})
Example #24
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200, got {{code}}", code=response.status_code)
            details = mo_json.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception, e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
Example #25
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    c = response.all_content
    return mo_json.json2value(convert.utf82unicode(c))
Example #26
0
 def __new__(cls, op, term, *args):
     Expression.__new__(cls, *args)
     field, comparisons = term  # comparisons IS A Literal()
     return AndOp("and", [
         operators[op](op, [field, Literal(None, value)])
         for op, value in json2value(comparisons.json).items()
     ])
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep, MINUTE)).seconds

        self.es = Cluster(kwargs).get_or_create_index(
            schema=json2value(value2json(SCHEMA), leaves=True),
            limit_replicas=True,
            typed=True,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
Example #28
0
    def delete_index(self, index_name):
        if not isinstance(index_name, unicode):
            Log.error("expecting an index name")

        if self.debug:
            Log.note("Deleting index {{index}}", index=index_name)

        # REMOVE ALL ALIASES TOO
        aliases = [a for a in self.get_aliases() if a.index == index_name and a.alias != None]
        if aliases:
            self.post(
                path="/_aliases",
                data={"actions": [{"remove": a} for a in aliases]}
            )

        url = self.settings.host + ":" + unicode(self.settings.port) + "/" + index_name
        try:
            response = http.delete(url)
            if response.status_code != 200:
                Log.error("Expecting a 200, got {{code}}", code=response.status_code)
            details = mo_json.json2value(utf82unicode(response.content))
            if self.debug:
                Log.note("delete response {{response}}", response=details)
            return response
        except Exception as e:
            Log.error("Problem with call to {{url}}", url=url, cause=e)
Example #29
0
 def __init__(self, filename, host="fake", index="fake", kwargs=None):
     self.settings = kwargs
     self.filename = kwargs.filename
     try:
         self.data = mo_json.json2value(File(self.filename).read())
     except Exception:
         self.data = Data()
Example #30
0
    def __init__(
        self,
        host,
        index,
        port=9200,
        type="log",
        queue_size=1000,
        batch_size=100,
        refresh_interval="1second",
        kwargs=None,
    ):
        """
        settings ARE FOR THE ELASTICSEARCH INDEX
        """
        kwargs.timeout = Duration(coalesce(kwargs.timeout, "30second")).seconds
        kwargs.retry.times = coalesce(kwargs.retry.times, 3)
        kwargs.retry.sleep = Duration(coalesce(kwargs.retry.sleep,
                                               MINUTE)).seconds
        kwargs.host = randoms.sample(listwrap(host), 1)[0]

        rollover_interval = coalesce(kwargs.rollover.interval,
                                     kwargs.rollover.max, "year")
        rollover_max = coalesce(kwargs.rollover.max, kwargs.rollover.interval,
                                "year")

        schema = set_default(
            kwargs.schema,
            {
                "mappings": {
                    kwargs.type: {
                        "properties": {
                            "~N~": {
                                "type": "nested"
                            }
                        }
                    }
                }
            },
            json2value(value2json(SCHEMA), leaves=True),
        )

        self.es = RolloverIndex(
            rollover_field={"get": [{
                "first": "."
            }, {
                "literal": "timestamp"
            }]},
            rollover_interval=rollover_interval,
            rollover_max=rollover_max,
            schema=schema,
            limit_replicas=True,
            typed=True,
            read_only=False,
            kwargs=kwargs,
        )
        self.batch_size = batch_size
        self.queue = Queue("debug logs to es", max=queue_size, silent=True)

        self.worker = Thread.run("add debug logs to es", self._insert_loop)
Example #31
0
def command_loop(local):
    DEBUG and Log.note("mo-python process running with {{config|json}}",
                       config=local['config'])
    while not please_stop:
        line = sys.stdin.readline()
        try:
            command = json2value(line.decode('utf8'))
            DEBUG and Log.note("got {{command}}", command=command)

            if "import" in command:
                dummy = {}
                if is_text(command['import']):
                    exec("from " + command['import'] + " import *", dummy,
                         context)
                else:
                    exec(
                        "from " + command['import']['from'] + " import " +
                        ",".join(listwrap(command['import']['vars'])), dummy,
                        context)
                STDOUT.write(DONE)
            elif "set" in command:
                for k, v in command.set.items():
                    context[k] = v
                STDOUT.write(DONE)
            elif "get" in command:
                STDOUT.write(
                    value2json({
                        "out":
                        coalesce(local.get(command['get']),
                                 context.get(command['get']))
                    }))
                STDOUT.write('\n')
            elif "stop" in command:
                STDOUT.write(DONE)
                please_stop.go()
            elif "exec" in command:
                if not is_text(command['exec']):
                    Log.error("exec expects only text")
                exec(command['exec'], context, local)
                STDOUT.write(DONE)
            else:
                for k, v in command.items():
                    if is_list(v):
                        exec(
                            "_return = " + k + "(" +
                            ",".join(map(value2json, v)) + ")", context, local)
                    else:
                        exec(
                            "_return = " + k + "(" +
                            ",".join(kk + "=" + value2json(vv)
                                     for kk, vv in v.items()) + ")", context,
                            local)
                    STDOUT.write(value2json({"out": local['_return']}))
                    STDOUT.write('\n')
        except Exception as e:
            STDOUT.write(value2json({"err": e}))
            STDOUT.write('\n')
        finally:
            STDOUT.flush()
Example #32
0
def expand_json(rows):
    # CONVERT JSON TO VALUES
    for r in rows:
        for k, json in items(r):
            if isinstance(json, text) and json[0:1] in ("[", "{"):
                with suppress_exception:
                    value = mo_json.json2value(json)
                    r[k] = value
Example #33
0
def _get_url(url, branch, **kwargs):
    with Explanation("get push from {{url}}", url=url, debug=DEBUG):
        response = http.get(url, **kwargs)
        data = json2value(response.content.decode("utf8"))
        if isinstance(data, (text_type, str)) and data.startswith("unknown revision"):
            Log.error(UNKNOWN_PUSH, revision=strings.between(data, "'", "'"))
        branch.url = _trim(url)  # RECORD THIS SUCCESS IN THE BRANCH
        return data
Example #34
0
def expand_json(rows):
    # CONVERT JSON TO VALUES
    for r in rows:
        for k, json in items(r):
            if isinstance(json, text_type) and json[0:1] in ("[", "{"):
                with suppress_exception:
                    value = mo_json.json2value(json)
                    r[k] = value
Example #35
0
def get_env(ref, url):
    # GET ENVIRONMENT VARIABLES
    ref = ref.host
    try:
        new_value = json2value(os.environ[ref])
    except Exception as e:
        new_value = os.environ[ref]
    return new_value
Example #36
0
 def __init__(self, filename, host="fake", index="fake", kwargs=None):
     self.settings = kwargs
     self.file = File(filename)
     self.cluster= Null
     try:
         self.data = mo_json.json2value(self.file.read())
     except Exception as e:
         self.data = Data()
Example #37
0
def _get_env(ref, url):
    # GET ENVIRONMENT VARIABLES
    ref = ref.host
    try:
        new_value = json2value(os.environ[ref])
    except Exception as e:
        new_value = os.environ.get(ref)
    return new_value
Example #38
0
 def to_python(self, not_null=False, boolean=False, many=False):
     return (
         "re.match("
         + quote(json2value(self.pattern.json) + "$")
         + ", "
         + Python[self.var].to_python()
         + ")"
     )
Example #39
0
def _get_url(url, branch, **kwargs):
    with Explanation("get push from {{url}}", url=url, debug=DEBUG):
        response = http.get(url, **kwargs)
        data = json2value(response.content.decode("utf8"))
        if isinstance(data, (text_type, str)) and data.startswith("unknown revision"):
            Log.error("Unknown push {{revision}}", revision=strings.between(data, "'", "'"))
        branch.url = _trim(url)  # RECORD THIS SUCCESS IN THE BRANCH
        return data
Example #40
0
 def __init__(self, filename, host="fake", index="fake", kwargs=None):
     self.settings = kwargs
     self.file = File(filename)
     self.cluster= Null
     try:
         self.data = mo_json.json2value(self.file.read())
     except Exception as e:
         self.data = Data()
Example #41
0
    def typed_encode(self, r):
        """
        :param record:  expecting id and value properties
        :return:  dict with id and json properties
        """
        try:
            value = r['value']
            if "json" in r:
                value = json2value(r["json"])
            elif isinstance(value, Mapping) or value != None:
                pass
            else:
                from mo_logs import Log
                raise Log.error(
                    "Expecting every record given to have \"value\" or \"json\" property"
                )

            _buffer = UnicodeBuilder(1024)
            net_new_properties = []
            path = []
            if isinstance(value, Mapping):
                given_id = self.get_id(value)
                if self.remove_id:
                    value['_id'] = None
            else:
                given_id = None

            if given_id:
                record_id = r.get('id')
                if record_id and record_id != given_id:
                    from mo_logs import Log

                    raise Log.error(
                        "expecting {{property}} of record ({{record_id|quote}}) to match one given ({{given|quote}})",
                        property=self.id_column,
                        record_id=record_id,
                        given=given_id)
            else:
                record_id = r.get('id')
                if record_id:
                    given_id = record_id
                else:
                    given_id = random_id()

            self._typed_encode(value, self.schema, path, net_new_properties,
                               _buffer)
            json = _buffer.build()

            for props in net_new_properties:
                path, type = props[:-1], props[-1][1:]
                # self.es.add_column(join_field(path), type)

            return {"id": given_id, "json": json}
        except Exception as e:
            # THE PRETTY JSON WILL PROVIDE MORE DETAIL ABOUT THE SERIALIZATION CONCERNS
            from mo_logs import Log

            Log.error("Serialization of JSON problems", cause=e)
Example #42
0
def row_to_column(header, row):
    return Column(
        **{
            h: c
            if c is None or h not in ("nested_path", "partitions")
            else json2value(c)
            for h, c in zip(header, row)
        }
    )
Example #43
0
def to_python(self, not_null=False, boolean=False, many=False):
    agg = "rows[rownum+" + self.offset.to_python() + "]"
    path = split_field(json2value(self.var.json))
    if not path:
        return agg

    for p in path[:-1]:
        agg = agg + ".get(" + convert.value2quote(p) + ", EMPTY_DICT)"
    return agg + ".get(" + convert.value2quote(path[-1]) + ")"
Example #44
0
    def pop(self, wait=SECOND, till=None):
        if till is not None and not isinstance(till, Signal):
            Log.error("Expecting a signal")

        m = self.queue.read(wait_time_seconds=mo_math.floor(wait.seconds))
        if not m:
            return None

        self.pending.append(m)
        output = mo_json.json2value(m.get_body())
        return output
Example #45
0
def fix(rownum, line, source, sample_only_filter, sample_size):
    # ES SCHEMA IS STRICTLY TYPED, USE "code" FOR TEXT IDS
    line = line.replace('{"id": "bb"}', '{"code": "bb"}').replace('{"id": "tc"}', '{"code": "tc"}')

    # ES SCHEMA IS STRICTLY TYPED, THE SUITE OBJECT CAN NOT BE HANDLED
    if source.name.startswith("active-data-test-result"):
        # "suite": {"flavor": "plain-chunked", "name": "mochitest"}
        found = strings.between(line, '"suite": {', '}')
        if found:
            suite_json = '{' + found + "}"
            if suite_json:
                suite = mo_json.json2value(suite_json)
                suite = convert.value2json(coalesce(suite.fullname, suite.name))
                line = line.replace(suite_json, suite)

    if rownum == 0:
        value = mo_json.json2value(line)
        if len(line) > MAX_RECORD_LENGTH:
            _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
        if sample_only_filter and Random.int(int(1.0/coalesce(sample_size, 0.01))) != 0 and jx.filter([value], sample_only_filter):
            # INDEX etl.id==0, BUT NO MORE
            if value.etl.id != 0:
                Log.error("Expecting etl.id==0")
            return row, True
    elif len(line) > MAX_RECORD_LENGTH:
        value = mo_json.json2value(line)
        _shorten(value, source)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    elif line.find('"resource_usage":') != -1:
        value = mo_json.json2value(line)
        _id, value = _fix(value)
        row = {"id": _id, "value": value}
    else:
        # FAST
        _id = strings.between(line, "\"_id\": \"", "\"")  # AVOID DECODING JSON
        row = {"id": _id, "json": line}

    return row, False
Example #46
0
    def extend(self, records):
        """
        JUST SO WE MODEL A Queue
        """
        records = {
            v["id"]: v["value"] if "value" in v else mo_json.json2value(v['json'])
            for v in records
        }

        unwrap(self.data).update(records)
        self.refresh()
        Log.note("{{num}} documents added", num=len(records))
Example #47
0
def get_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    response = get(url, **kwargs)
    try:
        c = response.all_content
        return json2value(utf82unicode(c))
    except Exception as e:
        if mo_math.round(response.status_code, decimal=-2) in [400, 500]:
            Log.error(u"Bad GET response: {{code}}", code=response.status_code)
        else:
            Log.error(u"Good GET requests, but bad JSON", cause=e)
Example #48
0
 def delete(self, path, **kwargs):
     url = self.settings.host + ":" + unicode(self.settings.port) + path
     try:
         response = http.delete(url, **kwargs)
         if response.status_code not in [200]:
             Log.error(response.reason+": "+response.all_content)
         if self.debug:
             Log.note("response: {{response}}", response=strings.limit(utf82unicode(response.all_content), 130))
         details = wrap(mo_json.json2value(utf82unicode(response.all_content)))
         if details.error:
             Log.error(details.error)
         return details
     except Exception as e:
         Log.error("Problem with call to {{url}}", url=url, cause=e)
Example #49
0
    def pop_message(self, wait=SECOND, till=None):
        """
        RETURN TUPLE (message, payload) CALLER IS RESPONSIBLE FOR CALLING message.delete() WHEN DONE
        """
        if till is not None and not isinstance(till, Signal):
            Log.error("Expecting a signal")

        message = self.queue.read(wait_time_seconds=mo_math.floor(wait.seconds))
        if not message:
            return None
        message.delete = lambda: self.queue.delete_message(message)

        payload = mo_json.json2value(message.get_body())
        return message, payload
Example #50
0
    def set_refresh_interval(self, seconds, **kwargs):
        """
        :param seconds:  -1 FOR NO REFRESH
        :param kwargs: ANY OTHER REQUEST PARAMETERS
        :return: None
        """
        if seconds <= 0:
            interval = -1
        else:
            interval = unicode(seconds) + "s"

        if self.cluster.version.startswith("0.90."):
            response = self.cluster.put(
                "/" + self.settings.index + "/_settings",
                data='{"index":{"refresh_interval":' + convert.value2json(interval) + '}}',
                **kwargs
            )

            result = mo_json.json2value(utf82unicode(response.all_content))
            if not result.ok:
                Log.error("Can not set refresh interval ({{error}})", {
                    "error": utf82unicode(response.all_content)
                })
        elif any(map(self.cluster.version.startswith, ["1.4.", "1.5.", "1.6.", "1.7."])):
            response = self.cluster.put(
                "/" + self.settings.index + "/_settings",
                data=convert.unicode2utf8('{"index":{"refresh_interval":' + convert.value2json(interval) + '}}'),
                **kwargs
            )

            result = mo_json.json2value(utf82unicode(response.all_content))
            if not result.acknowledged:
                Log.error("Can not set refresh interval ({{error}})", {
                    "error": utf82unicode(response.all_content)
                })
        else:
            Log.error("Do not know how to handle ES version {{version}}", version=self.cluster.version)
Example #51
0
    def _get_queue(self, row):
        row = wrap(row)
        if row.json:
            row.value, row.json = json2value(row.json), None
        timestamp = Date(self.rollover_field(row.value))
        if timestamp == None:
            return Null
        elif timestamp < Date.today() - self.rollover_max:
            return DATA_TOO_OLD

        rounded_timestamp = timestamp.floor(self.rollover_interval)
        with self.locker:
            queue = self.known_queues.get(rounded_timestamp.unix)
        if queue == None:
            candidates = jx.run({
                "from": ListContainer(".", self.cluster.get_aliases()),
                "where": {"regex": {"index": self.settings.index + "\d\d\d\d\d\d\d\d_\d\d\d\d\d\d"}},
                "sort": "index"
            })
            best = None
            for c in candidates:
                c = wrap(c)
                c.date = unicode2Date(c.index[-15:], elasticsearch.INDEX_DATE_FORMAT)
                if timestamp > c.date:
                    best = c
            if not best or rounded_timestamp > best.date:
                if rounded_timestamp < wrap(candidates[-1]).date:
                    es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)
                else:
                    try:
                        es = self.cluster.create_index(create_timestamp=rounded_timestamp, kwargs=self.settings)
                        es.add_alias(self.settings.index)
                    except Exception as e:
                        e = Except.wrap(e)
                        if "IndexAlreadyExistsException" not in e:
                            Log.error("Problem creating index", cause=e)
                        return self._get_queue(row)  # TRY AGAIN
            else:
                es = self.cluster.get_or_create_index(read_only=False, alias=best.alias, index=best.index, kwargs=self.settings)

            with suppress_exception:
                es.set_refresh_interval(seconds=60 * 5, timeout=5)

            self._delete_old_indexes(candidates)
            threaded_queue = es.threaded_queue(max_size=self.settings.queue_size, batch_size=self.settings.batch_size, silent=True)
            with self.locker:
                queue = self.known_queues[rounded_timestamp.unix] = threaded_queue
        return queue
Example #52
0
    def post(self, path, **kwargs):
        url = self.settings.host + ":" + unicode(self.settings.port) + path

        try:
            wrap(kwargs).headers["Accept-Encoding"] = "gzip,deflate"

            data = kwargs.get(b'data')
            if data == None:
                pass
            elif isinstance(data, Mapping):
                kwargs[b'data'] = data =convert.unicode2utf8(convert.value2json(data))
            elif not isinstance(kwargs["data"], str):
                Log.error("data must be utf8 encoded string")

            if self.debug:
                sample = kwargs.get(b'data', "")[:300]
                Log.note("{{url}}:\n{{data|indent}}", url=url, data=sample)

            if self.debug:
                Log.note("POST {{url}}", url=url)
            response = http.post(url, **kwargs)
            if response.status_code not in [200, 201]:
                Log.error(response.reason.decode("latin1") + ": " + strings.limit(response.content.decode("latin1"), 100 if self.debug else 10000))
            if self.debug:
                Log.note("response: {{response}}", response=utf82unicode(response.content)[:130])
            details = mo_json.json2value(utf82unicode(response.content))
            if details.error:
                Log.error(convert.quote2string(details.error))
            if details._shards.failed > 0:
                Log.error("Shard failures {{failures|indent}}",
                    failures="---\n".join(r.replace(";", ";\n") for r in details._shards.failures.reason)
                )
            return details
        except Exception as e:
            if url[0:4] != "http":
                suggestion = " (did you forget \"http://\" prefix on the host name?)"
            else:
                suggestion = ""

            if kwargs.get("data"):
                Log.error(
                    "Problem with call to {{url}}" + suggestion + "\n{{body|left(10000)}}",
                    url=url,
                    body=strings.limit(kwargs["data"], 100 if self.debug else 10000),
                    cause=e
                )
            else:
                Log.error("Problem with call to {{url}}" + suggestion, url=url, cause=e)
 def __init__(self, host, index, port=9200, type="log", max_size=1000, batch_size=100, kwargs=None):
     """
     settings ARE FOR THE ELASTICSEARCH INDEX
     """
     self.es = Cluster(kwargs).get_or_create_index(
         schema=mo_json.json2value(value2json(SCHEMA), leaves=True),
         limit_replicas=True,
         tjson=True,
         kwargs=kwargs
     )
     self.batch_size = batch_size
     self.es.add_alias(coalesce(kwargs.alias, kwargs.index))
     self.queue = Queue("debug logs to es", max=max_size, silent=True)
     self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3)
     self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE))
     Thread.run("add debug logs to es", self._insert_loop)
Example #54
0
def get_file(ref, url):

    if ref.path.startswith("~"):
        home_path = os.path.expanduser("~")
        if os.sep == "\\":
            home_path = "/" + home_path.replace(os.sep, "/")
        if home_path.endswith("/"):
            home_path = home_path[:-1]

        ref.path = home_path + ref.path[1::]
    elif not ref.path.startswith("/"):
        # CONVERT RELATIVE TO ABSOLUTE
        if ref.path[0] == ".":
            num_dot = 1
            while ref.path[num_dot] == ".":
                num_dot += 1

            parent = url.path.rstrip("/").split("/")[:-num_dot]
            ref.path = "/".join(parent) + ref.path[num_dot:]
        else:
            parent = url.path.rstrip("/").split("/")[:-1]
            ref.path = "/".join(parent) + "/" + ref.path

    path = ref.path if os.sep != "\\" else ref.path[1::].replace("/", "\\")

    try:
        if DEBUG:
            Log.note("reading file {{path}}", path=path)
        content = File(path).read()
    except Exception as e:
        content = None
        Log.error("Could not read file {{filename}}", filename=path, cause=e)

    try:
        new_value = json2value(content, params=ref.query, flexible=True, leaves=True)
    except Exception as e:
        e = Except.wrap(e)
        try:
            new_value = ini2value(content)
        except Exception:
            raise Log.error("Can not read {{file}}", file=path, cause=e)
    new_value = _replace_ref(new_value, ref)
    return new_value
Example #55
0
    def _decode(v):
        output = []
        i = 0
        while i < len(v):
            c = v[i]
            if c == "%":
                d = hex2chr(v[i + 1:i + 3])
                output.append(d)
                i += 3
            else:
                output.append(c)
                i += 1

        output = text_type("".join(output))
        try:
            return json2value(output)
        except Exception:
            pass
        return output
Example #56
0
def command_loop(local):
    DEBUG and Log.note("mo-python process running with {{config|json}}", config=local['config'])
    while not please_stop:
        line = sys.stdin.readline()
        try:
            command = json2value(line.decode('utf8'))
            DEBUG and Log.note("got {{command}}", command=command)

            if "import" in command:
                dummy={}
                if is_text(command['import']):
                    exec ("from " + command['import'] + " import *", dummy, context)
                else:
                    exec ("from " + command['import']['from'] + " import " + ",".join(listwrap(command['import']['vars'])), dummy, context)
                STDOUT.write(DONE)
            elif "set" in command:
                for k, v in command.set.items():
                    context[k] = v
                STDOUT.write(DONE)
            elif "get" in command:
                STDOUT.write(value2json({"out": coalesce(local.get(command['get']), context.get(command['get']))}))
                STDOUT.write('\n')
            elif "stop" in command:
                STDOUT.write(DONE)
                please_stop.go()
            elif "exec" in command:
                if not is_text(command['exec']):
                    Log.error("exec expects only text")
                exec (command['exec'], context, local)
                STDOUT.write(DONE)
            else:
                for k, v in command.items():
                    if is_list(v):
                        exec ("_return = " + k + "(" + ",".join(map(value2json, v)) + ")", context, local)
                    else:
                        exec ("_return = " + k + "(" + ",".join(kk + "=" + value2json(vv) for kk, vv in v.items()) + ")", context, local)
                    STDOUT.write(value2json({"out": local['_return']}))
                    STDOUT.write('\n')
        except Exception as e:
            STDOUT.write(value2json({"err": e}))
            STDOUT.write('\n')
        finally:
            STDOUT.flush()
Example #57
0
    def _decode(v):
        output = []
        i = 0
        while i < len(v):
            c = v[i]
            if c == "%":
                d = (v[i + 1:i + 3]).decode("hex")
                output.append(d)
                i += 3
            else:
                output.append(c)
                i += 1

        output = (b"".join(output)).decode("latin1")
        try:
            return json2value(output)
        except Exception:
            pass
        return output
Example #58
0
 def _daemon(self, please_stop):
     while not please_stop:
         line = self.process.stdout.pop(till=please_stop)
         if line == THREAD_STOP:
             break
         try:
             data = json2value(line.decode('utf8'))
             if "log" in data:
                 Log.main_log.write(*data.log)
             elif "out" in data:
                 with self.lock:
                     self.current_response = data.out
                     self.current_task.go()
             elif "err" in data:
                 with self.lock:
                     self.current_error = data.err
                     self.current_task.go()
         except Exception:
             Log.note("non-json line: {{line}}", line=line)
     DEBUG and Log.note("stdout reader is done")
Example #59
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if 'json' in kwargs:
        kwargs['data'] = unicode2utf8(value2json(kwargs['json']))
        del kwargs['json']
    elif 'data' in kwargs:
        kwargs['data'] = unicode2utf8(value2json(kwargs['data']))
    else:
        Log.error(u"Expecting `json` parameter")
    response = post(url, **kwargs)
    details = json2value(utf82unicode(response.content))
    if response.status_code not in [200, 201, 202]:

        if "template" in details:
            Log.error(u"Bad response code {{code}}", code=response.status_code, cause=Except.wrap(details))
        else:
            Log.error(u"Bad response code {{code}}\n{{details}}", code=response.status_code, details=details)
    else:
        return details
Example #60
0
def post_json(url, **kwargs):
    """
    ASSUME RESPONSE IN IN JSON
    """
    if b"json" in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"json"]))
    elif b'data' in kwargs:
        kwargs[b"data"] = convert.unicode2utf8(convert.value2json(kwargs[b"data"]))
    else:
        Log.error("Expecting `json` parameter")

    response = post(url, **kwargs)
    c = response.content
    try:
        details = mo_json.json2value(convert.utf82unicode(c))
    except Exception as e:
        Log.error("Unexpected return value {{content}}", content=c, cause=e)

    if response.status_code not in [200, 201]:
        Log.error("Bad response", cause=Except.wrap(details))

    return details