コード例 #1
0
 def extend(d):
     try:
         db.insert_list(table_name, d)
         db.flush()
         Log.note("added {{num}} records", {"num":len(d)})
     except Exception, e:
         Log.warning("Can not inert into database", e)
コード例 #2
0
 def __init__(self, pushlog_settings):
     with Timer("get pushlog"):
         if pushlog_settings.disable:
             all_pushlogs = []
         else:
             with DB(pushlog_settings) as db:
                 all_pushlogs = db.query("""
                     SELECT
                         pl.`date`,
                         left(ch.node, 12) revision,
                         coalesce(bm.alt_name, br.name) branch
                     FROM
                         changesets ch
                     LEFT JOIN
                         pushlogs pl ON pl.id = ch.pushlog_id
                     LEFT JOIN
                         branches br ON br.id = pl.branch_id
                     LEFT JOIN
                         branch_map bm ON br.id = bm.id
                     WHERE
                         pl.date > {{oldest_date}}
                 """, {"oldest_date": TOO_OLD})
         Log.note("Got pushlog, now indexing...")
         self.pushlog = wrap(Q.index(all_pushlogs, ["branch", "revision"])._data)
         self.locker = Lock()
         self.unknown_branches = set()
コード例 #3
0
def main():
    try:
        settings = startup.read_settings(filename="file2db_settings.json")
        Log.start(settings.debug)


        with DB(settings.db) as db:
            db.execute("""
                DROP TABLE IF EXISTS b2g_tests
            """)
            db.execute("""
                CREATE TABLE b2g_tests (
                    id INTEGER PRIMARY KEY NOT NULL,
                    branch VARCHAR(100),
                    name VARCHAR(100),
                    version VARCHAR(100),
                    suite varchar(200),
                    revision varchar(100),
                    `date` LONG
                )
            """)

            file2db(db, "b2g_tests", settings.source_file)
    except Exception, e:
        Log.error("can not seem to startup", e)
コード例 #4
0
 def check_for_errors(self, logs, path):
     try:
         errors = [l for l in logs if l.type == "ERROR"]
         if errors:
             Log.error("Problem found in {{page}}:\n{{error|indent}}", {
                 "page": path,
                 "error": errors[0]
             })
     finally:
         self.close()
コード例 #5
0
ファイル: util.py プロジェクト: klahnakoski/MoDevMetrics
 def check_for_errors(self, logs, path):
     try:
         errors = [l for l in logs if l.type == "ERROR"]
         if errors:
             Log.error("Problem found in {{page}}:\n{{error|indent}}", {
                 "page": path,
                 "error": errors[0]
             })
     finally:
         self.close()
コード例 #6
0
def extract_from_datazilla_using_id(es, settings, transformer):

    existing_ids = get_existing_ids(es, settings, transformer.pushlog.keys())
    max_existing_id = nvl(MAX(existing_ids), settings.production.min)
    holes = set(range(settings.production.min, max_existing_id)) - existing_ids
    missing_ids = set(range(settings.production.min, max_existing_id+nvl(settings.production.step, NUM_PER_BATCH))) - existing_ids

    Log.note("Number missing: {{num}}", {"num": len(missing_ids)})
    Log.note("Number in holes: {{num}}", {"num": len(holes)})
    #FASTER IF NO INDEXING IS ON
    es.set_refresh_interval(-1)

    #FILE IS FASTER THAN NETWORK
    if (len(holes) > 10000 or settings.args.scan_file or settings.args.restart) and File(settings.param.output_file).exists:
        #ASYNCH PUSH TO ES IN BLOCKS OF 1000
        with Timer("Scan file for missing ids"):
            with ThreadedQueue(es, size=nvl(es.settings.batch_size, 100)) as json_for_es:
                num = 0
                for line in File(settings.param.output_file):
                    try:
                        if len(line.strip()) == 0:
                            continue
                        col = line.split("\t")
                        id = int(col[0])
                        # if id==3003529:
                        #     Log.debug()
                        if id < settings.production.min:
                            continue
                        if id in existing_ids:
                            continue

                        if num > settings.production.step:
                            return
                        num += 1

                        with Profiler("decode and transform"):
                            data = CNV.JSON2object(col[-1])
                            if data.test_run_id:
                                with Profiler("transform"):
                                    data = transformer.transform(id, data)
                                json_for_es.extend({"value": d} for d in data)
                                Log.note("Added {{id}} from file", {"id": id})

                                existing_ids.add(id)
                            else:
                                Log.note("Skipped {{id}} from file (no test_run_id)", {"id": id})
                                num -= 1

                    except Exception, e:
                        Log.warning("Bad line id={{id}} ({{length}}bytes):\n\t{{prefix}}", {
                            "id": id,
                            "length": len(CNV.object2JSON(line)),
                            "prefix": CNV.object2JSON(line)[0:130]
                        }, e)
        missing_ids = missing_ids - existing_ids
コード例 #7
0
def get_existing_ids(es, settings, branches):
    #FIND WHAT'S IN ES
    bad_ids = []
    int_ids = set()

    demand_pushlog = {"match_all":{}}
    if branches:
        demand_pushlog = {"or": [
            {"not": {"missing": {"field": "test_build.push_date"}}},
            {"not": {"missing": {"field": "test_build.no_pushlog"}}}
        ]}

    if settings.elasticsearch.debug and settings.production.step < 10:
        # SIMPLY RELOAD THIS SMALL NUMBER
        return set([])

    with ESQuery(es) as esq:
        max_id = esq.query({
            "from": es.settings.alias,
            "select": {"value": "datazilla.id", "aggregate": "max"}
        })

        interval_size = 200000
        for mini, maxi in Q.intervals(settings.production.min, max_id+interval_size, interval_size):
            existing_ids = es.search({
                "query": {
                    "filtered": {
                        "query": {"match_all": {}},
                        "filter": {"and": [
                            {"range": {"datazilla.id": {"gte": mini, "lt": maxi}}},
                            demand_pushlog
                        ]}
                    }
                },
                "from": 0,
                "size": 0,
                "sort": [],
                "facets": {
                    "ids": {"terms": {"field": "datazilla.id", "size": interval_size}}
                }
            })

            for t in existing_ids.facets.ids.terms:
                try:
                    int_ids.add(int(t.term))
                except Exception, e:
                    bad_ids.append(t.term)

        existing_ids = int_ids
        Log.println("Number of ids in ES: " + str(len(existing_ids)))
        Log.println("BAD ids in ES: " + str(bad_ids))
        return existing_ids
コード例 #8
0
def arrays_add(id, path, r):
    try:
        if isinstance(r, dict):
            for k, v in [(k, v) for k, v in r.items()]:
                new_path = path + "[" + k + "]"
                arrays_add(id, new_path, v)
        elif isinstance(r, list):
            try:
                values = r.map(float)
                arrays.append([id, path, len(values), 1])
            except Exception, e:
                for i, v in enumerate(r):
                    r[i] = arrays_add(id, path + "[" + str(i) + "]", v)
                #        return r
    except Exception, e:
        Log.warning("Can not summarize: {{json}}", {"json": CNV.object2JSON(r)})
コード例 #9
0
def main():
    try:
        settings = startup.read_settings(defs=[{
            "name": ["--no_restart", "--no_reset", "--no_redo", "--norestart", "--noreset", "--noredo"],
            "help": "do not allow creation of new index (for debugging rouge resets)",
            "action": "store_true",
            "dest": "no_restart"
        }, {
            "name": ["--restart", "--reset", "--redo"],
            "help": "force a reprocessing of all data",
            "action": "store_true",
            "dest": "restart"
        }, {
            "name": ["--file", "--scan_file", "--scanfile", "--use_file", "--usefile"],
            "help": "scan file for missing ids",
            "action": "store_true",
            "dest": "scan_file"
        }, {
            "name": ["--nofile", "--no_file", "--no-file"],
            "help": "do not scan file for missing ids",
            "action": "store_false",
            "dest": "scan_file"
        }])
        Log.start(settings.debug)

        with startup.SingleInstance(flavor_id=settings.args.filename):
            settings.production.threads = nvl(settings.production.threads, 1)
            settings.param.output_file = nvl(settings.param.output_file, "./results/raw_json_blobs.tab")

            transformer = DZ_to_ES(settings.pushlog)

            #RESET ONLY IF NEW Transform IS USED
            if settings.args.restart:
                es = Cluster(settings.elasticsearch).create_index(settings.elasticsearch)
                es.add_alias()
                es.delete_all_but_self()
                extract_from_datazilla_using_id(es, settings, transformer)
            else:
                es = Cluster(settings.elasticsearch).get_or_create_index(settings.elasticsearch)
                extract_from_datazilla_using_id(es, settings, transformer)
    except Exception, e:
        Log.error("Problem with etl", e)
コード例 #10
0
    def _wait_for_stable(self, detect_function, timeout):
        """
        WAIT FOR RESULTS OF detect_function TO BE STABLE
        """
        if not isinstance(timeout, timedelta):
            Log.error("Expecting a timeout as a timedelta")

        detectTime = Date.now()
        oldValue = "probably never an initial value"
        newValue = detect_function()
        while True:
            now = Date.now()
            potentialValue = detect_function()
            if potentialValue != newValue:
                oldValue = newValue
                newValue = potentialValue
                detectTime = now
            if now - detectTime > timeout:
                return
            Thread.sleep(seconds=0.5)
コード例 #11
0
ファイル: util.py プロジェクト: iSC-Host/charts.mozilla.org
    def _wait_for_stable(self, detect_function, timeout):
        """
        WAIT FOR RESULTS OF detect_function TO BE STABLE
        """
        if not isinstance(timeout, timedelta):
            Log.error("Expecting a timeout as a timedelta")

        detectTime = Date.now()
        oldValue = "probably never an initial value"
        newValue = detect_function()
        while True:
            now = Date.now()
            potentialValue = detect_function()
            if potentialValue != newValue:
                oldValue = newValue
                newValue = potentialValue
                detectTime = now
            if now - detectTime > timeout:
                return
            Thread.sleep(seconds=0.5)
コード例 #12
0
ファイル: util.py プロジェクト: iSC-Host/charts.mozilla.org
    def wait_for_logs(self, timeout=None):
        if not timeout:
            timeout = timedelta(seconds=10)

        def logs():
            return self.find("#" + LOG_DIV + " p")

        def status():
            s = self.find("#status")
            if not s:
                return None
            return s[0].text

        # IF THE MESSAGE KEEPS CHANGING OR THE LOGS KEEP INCREASING WE CAN BE
        # CONFIDENT SOMETHING IMPORTANT IS STILL HAPPENING
        self._wait_for_stable(lambda: (status(), len(logs())), timeout)

        output = [CNV.JSON2object(CNV.html2unicode(e.get_attribute('innerHTML'))) for e in logs()]
        Log.note("Logs:\n{{logs|indent}}", {"logs": output})
        return output
コード例 #13
0
    def wait_for_logs(self, timeout=None):
        if not timeout:
            timeout = timedelta(seconds=10)

        def logs():
            return self.find("#" + LOG_DIV + " p")

        def status():
            s = self.find("#status")
            if not s:
                return None
            return s[0].text

        # IF THE MESSAGE KEEPS CHANGING OR THE LOGS KEEP INCREASING WE CAN BE
        # CONFIDENT SOMETHING IMPORTANT IS STILL HAPPENING
        self._wait_for_stable(lambda: (status(), len(logs())), timeout)

        output = [
            CNV.JSON2object(CNV.html2unicode(e.get_attribute('innerHTML')))
            for e in logs()
        ]
        Log.note("Logs:\n{{logs|indent}}", {"logs": output})
        return output
コード例 #14
0
def etl(es_sink, file_sink, settings, transformer, max_id, id):
    """
    PULL FROM DZ AND PUSH TO es AND file_sink
    """

    # DEBUG GROWTH
    # with GC_LOCKER:
    #     try:
    #         if COUNTER.count % 100 == 0:
    #             # gc.collect()
    #             deltas, stats = objgraph.get_growth()
    #             Log.note("Deltas:\n{{deltas|indent}}", {"deltas": deltas})
    #     except Exception, e:
    #         Log.warning("objgraph problem", e)
    #
    #     COUNTER.count += 1

    url = settings.production.blob_url + "/" + str(id)
    try:
        with Timer("read {{id}} from DZ", {"id": id}):
            content = requests.get(url, timeout=nvl(settings.production.timeout, 30)).content
    except Exception, e:
        Log.warning("Failure to read from {{url}}", {"url": url}, e)
        return False
コード例 #15
0
    #     except Exception, e:
    #         Log.warning("objgraph problem", e)
    #
    #     COUNTER.count += 1

    url = settings.production.blob_url + "/" + str(id)
    try:
        with Timer("read {{id}} from DZ", {"id": id}):
            content = requests.get(url, timeout=nvl(settings.production.timeout, 30)).content
    except Exception, e:
        Log.warning("Failure to read from {{url}}", {"url": url}, e)
        return False

    try:
        if content.startswith("Id not found"):
            Log.note("{{id}} not found {{url}}", {"id": id, "url": url})
            if id < max_id:
                return True
            else:
                return False

        data = CNV.JSON2object(content.decode('utf-8'))
        content = CNV.object2JSON(data)  #ENSURE content HAS NO crlf

        if data.test_run_id:
            Log.println("Add {{id}} for revision {{revision}} ({{bytes}} bytes)", {
                "id": id,
                "revision": data.json_blob.test_build.revision,
                "bytes": len(content)
            })
            with Profiler("transform"):
コード例 #16
0
                new_path = path + "[" + k + "]"
                arrays_add(id, new_path, v)
        elif isinstance(r, list):
            try:
                values = r.map(float)
                arrays.append([id, path, len(values), 1])
            except Exception, e:
                for i, v in enumerate(r):
                    r[i] = arrays_add(id, path + "[" + str(i) + "]", v)
                #        return r
    except Exception, e:
        Log.warning("Can not summarize: {{json}}", {"json": CNV.object2JSON(r)})


settings = startup.read_settings()
Log.settings(settings.debug)
all = set()

with open(settings.output_file, "r") as input_file:
    with open("good_talos.tab", "w") as output_file:
        for line in input_file:
            try:
                if len(line.strip()) == 0:
                    continue

                col = line.split("\t")
                id = int(col[0])
                if id < MINIMUM_ID:
                    continue

                json = col[1]
コード例 #17
0
 def check_if_still_loading(self, path):
     # IF SPINNER STILL SHOWS, THEN WE GOT LOADING ISSUES
     isLoading = OR([e.is_displayed() for e in self.find(".loading")])
     if isLoading:
         Log.error("page still loading: {{page}}", {"page": path})
コード例 #18
0
ファイル: util.py プロジェクト: klahnakoski/MoDevMetrics
 def check_if_still_loading(self, path):
     # IF SPINNER STILL SHOWS, THEN WE GOT LOADING ISSUES
     isLoading = OR([e.is_displayed() for e in self.find(".loading")])
     if isLoading:
         Log.error("page still loading: {{page}}", {"page": path})
コード例 #19
0
                id = int(col[0])
                if id in added:
                    continue
                added.add(id)

                data = CNV.JSON2object(col[1])
                records_for_db.add({
                    "id": nvl(data.test_run_id, id),
                    "branch": data.json_blob.test_build.branch,
                    "name": data.json_blob.test_build.name,
                    "version": data.json_blob.test_build.version,
                    "suite": data.json_blob.testrun.suite,
                    "revision": data.json_blob.test_build.revision,
                    "date": data.json_blob.testrun.date
                })
                Log.note("Added {{id}} from file", {"id": data.test_run_id})
            except Exception, e:
                Log.warning("Bad line ({{length}}bytes):\n\t{{prefix}}", {
                    "length": len(CNV.object2JSON(line)),
                    "prefix": CNV.object2JSON(line)[0:130]
                }, e)



def main():
    try:
        settings = startup.read_settings(filename="file2db_settings.json")
        Log.start(settings.debug)


        with DB(settings.db) as db:
コード例 #20
0
                        test_machine=r.test_machine,
                        datazilla=r.datazilla,
                        testrun=r.testrun,
                        test_build=r.test_build,
                        result={
                            "test_name": test_name,
                            "ordering": i,
                            "samples": replicates
                        }
                    )
                    try:
                        s = stats(replicates)
                        new_record.result.stats = s
                        total.append(s)
                    except Exception, e:
                        Log.warning("can not reduce series to moments", e)
                    new_records.append(new_record)

            if len(total) > 1:
                # ADD RECORD FOR GEOMETRIC MEAN SUMMARY

                new_record = Struct(
                    test_machine=r.test_machine,
                    datazilla=r.datazilla,
                    testrun=r.testrun,
                    test_build=r.test_build,
                    result={
                        "test_name": "SUMMARY",
                        "ordering": -1,
                        "stats": geo_mean(total)
                    }
コード例 #21
0
    def transform(self, id, datazilla):
        try:
            r = datazilla.json_blob

            #ADD DATAZILLA MARKUP
            r.datazilla = {
                "id": id,
                "date_loaded": datazilla.date_loaded * 1000,
                "error_flag": datazilla.error_flag,
                "test_run_id": datazilla.test_run_id,
                "processed_flag": datazilla.processed_flag,
                "error_msg": datazilla.error_msg
            }

            #CONVERT UNIX TIMESTAMP TO MILLISECOND TIMESTAMP
            r.testrun.date *= 1000

            def mainthread_transform(r):
                if r == None:
                    return None

                output = Struct()

                for i in r.mainthread_readbytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readbytes = i[0]
                r.mainthread_readbytes = None

                for i in r.mainthread_writebytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writebytes = i[0]
                r.mainthread_writebytes = None

                for i in r.mainthread_readcount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readcount = i[0]
                r.mainthread_readcount = None

                for i in r.mainthread_writecount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writecount = i[0]
                r.mainthread_writecount = None

                r.mainthread = output.values()

            mainthread_transform(r.results_aux)
            mainthread_transform(r.results_xperf)

            #ADD PUSH LOG INFO
            try:
                branch = r.test_build.branch
                if branch.endswith("-Non-PGO"):
                    r.test_build.branch = branch
                    r.test_build.pgo = False
                    branch = branch[0:-8]
                else:
                    r.test_build.pgo = True

                with Profiler("get from pushlog"):
                    if not self.pushlog:
                        #NO PUSHLOG MEANS WE DO NOTHING TO MARKUP TEST RESULTS
                        pass
                    elif self.pushlog[branch]:
                        possible_dates = self.pushlog[branch][r.test_build.revision]
                        if possible_dates:
                            r.test_build.push_date = int(Math.round(possible_dates[0].date * 1000))
                        else:
                            if r.test_build.revision == 'NULL':
                                r.test_build.no_pushlog = True  # OOPS! SOMETHING BROKE
                            elif CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD:
                                Log.note("{{branch}} @ {{revision}} has no pushlog, transforming anyway", r.test_build)
                                r.test_build.no_pushlog = True
                            else:
                                Log.note("{{branch}} @ {{revision}} has no pushlog, try again later", r.test_build)
                                return []  # TRY AGAIN LATER
                    else:
                        with self.locker:
                            if branch not in self.unknown_branches:
                                Log.note("Whole branch {{branch}} has no pushlog", {"branch":branch})
                                self.unknown_branches.add(branch)
                            if CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD:
                                r.test_build.no_pushlog = True
                            else:
                                r.test_build.no_pushlog = True
                                #return [r]  #TODO: DO THIS IF WE FIGURE OUT HOW TO HANDLE THE VERY LARGE NUMBER OF RESULTS WITH NO PUSHLOG

            except Exception, e:
                Log.warning("{{branch}} @ {{revision}} has no pushlog", r.test_build, e)

            new_records = []

            # RECORD THE UNKNOWN PART OF THE TEST RESULTS
            remainder = r.copy()
            remainder.results = None
            if len(remainder.keys()) > 4:
                new_records.append(remainder)

            #RECORD TEST RESULTS
            total = StructList()
            if r.testrun.suite in ["dromaeo_css", "dromaeo_dom"]:
                #dromaeo IS SPECIAL, REPLICATES ARE IN SETS OF FIVE
                #RECORD ALL RESULTS
                for i, (test_name, replicates) in enumerate(r.results.items()):
                    for g, sub_results in Q.groupby(replicates, size=5):
                        new_record = Struct(
                            test_machine=r.test_machine,
                            datazilla=r.datazilla,
                            testrun=r.testrun,
                            test_build=r.test_build,
                            result={
                                "test_name": unicode(test_name) + "." + unicode(g),
                                "ordering": i,
                                "samples": sub_results
                            }
                        )
                        try:
                            s = stats(sub_results)
                            new_record.result.stats = s
                            total.append(s)
                        except Exception, e:
                            Log.warning("can not reduce series to moments", e)
                        new_records.append(new_record)
コード例 #22
0
 def __del__(self):
     try:
         Log.println("Branches missing from pushlog:\n{{list}}", {"list": self.unknown_branches})
     except Exception, e:
         pass