Ejemplo n.º 1
0
            # process the total times
            for m in re.finditer(self._time_pat, inputstr):
                num = m.group('num')
                time = m.group('time')
                d = piperesults.get(num, {})
                d['pipeline_time'] = time

            # process the starts
            for m in re.finditer(self._start_pat, inputstr):
                num = m.group('num')
                time = m.group('time')
                d = piperesults.get(num, {})
                d['pipeline_start'] = time

            # process the ends
            for m in re.finditer(self._end_pat, inputstr):
                num = m.group('num')
                time = m.group('time')
                d = piperesults.get(num, {})
                d['pipeline_end'] = time

            # combine into records
            for num, d in piperesults.iteritems():
                d['pipeline_id'] = num
                cr = pdict.copy()
                cr.update(d)
                yield cr

if __name__ == '__main__':
    cli(PipelineLogParser())
Ejemplo n.º 2
0
class ImpalaLogParser(Parser):
    def recorditer(self, inputstr):
        querypat = re.compile(
            r"Running query: (?P<query>q\d+)[_a-z]+, no_codegen: (?P<ncodegen>\d+), scale: (?P<scale>\d+)\nTime:(?P<preptime>\d+[.]\d+)\nTime:(?P<runtime1>\d+[.]\d+)\nTime:(?P<runtime2>\d+[.]\d+)\n(?P<failmsg>(ABOVE QUERY FAILED:1)?)"
        )

        for m in re.finditer(querypat, inputstr):
            # params
            r = {
                "machine": "bigdata",
                "system": "impala",
                "nnode": 16,
                "codegen": 1 - int(m.group("ncodegen")),
                "scale": m.group("scale"),
            }

            # measures
            for k in ["query", "runtime1", "runtime2", "preptime"]:
                r[k] = m.group(k)

            if m.group("failmsg") != "":
                print "failed query {0}; not saving".format(r["query"])
                continue

            yield r


if __name__ == "__main__":
    cli(ImpalaLogParser())
Ejemplo n.º 3
0
        self.includes_params = includes_params

    def recorditer(self, inputstr):
        jparams = JSONParamsParser(PARAMS_TAG)
        sparams = JSONParamsParser('STATS')

        if self.includes_params:
            assert jparams.count(inputstr) == sparams.count(inputstr), \
                "different numbers of STATS and PARAMS; " \
                "check your log file for errors"
            # concurrently search for adjacent pairs of PARAMS and STATS
            for pdict, sdict in itertools.izip(
                jparams.idict_from_json(inputstr),
                    sparams.idict_from_json(inputstr)):

                result = {}
                result.update(pdict)
                result.update(sdict)
                yield result
        else:
            for sdict in itertools.izip(
                    jparams.idict_from_json(inputstr)):

                result = {}
                result.update(sdict)
                yield result


if __name__ == '__main__':
    cli(GrappaLogParser())