def extract_from_datazilla_using_id(es, settings, transformer):

    existing_ids = get_existing_ids(es, settings, transformer.pushlog.keys())
    max_existing_id = nvl(MAX(existing_ids), settings.production.min)
    holes = set(range(settings.production.min, max_existing_id)) - existing_ids
    missing_ids = set(range(settings.production.min, max_existing_id+nvl(settings.production.step, NUM_PER_BATCH))) - existing_ids

    Log.note("Number missing: {{num}}", {"num": len(missing_ids)})
    Log.note("Number in holes: {{num}}", {"num": len(holes)})
    #FASTER IF NO INDEXING IS ON
    es.set_refresh_interval(-1)

    #FILE IS FASTER THAN NETWORK
    if (len(holes) > 10000 or settings.args.scan_file or settings.args.restart) and File(settings.param.output_file).exists:
        #ASYNCH PUSH TO ES IN BLOCKS OF 1000
        with Timer("Scan file for missing ids"):
            with ThreadedQueue(es, size=nvl(es.settings.batch_size, 100)) as json_for_es:
                num = 0
                for line in File(settings.param.output_file):
                    try:
                        if len(line.strip()) == 0:
                            continue
                        col = line.split("\t")
                        id = int(col[0])
                        # if id==3003529:
                        #     Log.debug()
                        if id < settings.production.min:
                            continue
                        if id in existing_ids:
                            continue

                        if num > settings.production.step:
                            return
                        num += 1

                        with Profiler("decode and transform"):
                            data = CNV.JSON2object(col[-1])
                            if data.test_run_id:
                                with Profiler("transform"):
                                    data = transformer.transform(id, data)
                                json_for_es.extend({"value": d} for d in data)
                                Log.note("Added {{id}} from file", {"id": id})

                                existing_ids.add(id)
                            else:
                                Log.note("Skipped {{id}} from file (no test_run_id)", {"id": id})
                                num -= 1

                    except Exception, e:
                        Log.warning("Bad line id={{id}} ({{length}}bytes):\n\t{{prefix}}", {
                            "id": id,
                            "length": len(CNV.object2JSON(line)),
                            "prefix": CNV.object2JSON(line)[0:130]
                        }, e)
        missing_ids = missing_ids - existing_ids
예제 #2
0
    def wait_for_logs(self):
        old_length = -1
        elements = self.find("#" + LOG_DIV + " p")
        while len(elements) != old_length:
            Thread.sleep(seconds=10)
            old_length = len(elements)
            elements = self.find("#" + LOG_DIV + " p")

        return [
            CNV.JSON2object(CNV.html2unicode(e.get_attribute('innerHTML')))
            for e in elements
        ]
예제 #3
0
    def wait_for_logs(self, timeout=None):
        if not timeout:
            timeout = timedelta(seconds=10)

        def logs():
            return self.find("#" + LOG_DIV + " p")

        def status():
            s = self.find("#status")
            if not s:
                return None
            return s[0].text

        # IF THE MESSAGE KEEPS CHANGING OR THE LOGS KEEP INCREASING WE CAN BE
        # CONFIDENT SOMETHING IMPORTANT IS STILL HAPPENING
        self._wait_for_stable(lambda: (status(), len(logs())), timeout)

        output = [
            CNV.JSON2object(CNV.html2unicode(e.get_attribute('innerHTML')))
            for e in logs()
        ]
        Log.note("Logs:\n{{logs|indent}}", {"logs": output})
        return output
def arrays_add(id, path, r):
    try:
        if isinstance(r, dict):
            for k, v in [(k, v) for k, v in r.items()]:
                new_path = path + "[" + k + "]"
                arrays_add(id, new_path, v)
        elif isinstance(r, list):
            try:
                values = r.map(float)
                arrays.append([id, path, len(values), 1])
            except Exception, e:
                for i, v in enumerate(r):
                    r[i] = arrays_add(id, path + "[" + str(i) + "]", v)
                #        return r
    except Exception, e:
        Log.warning("Can not summarize: {{json}}", {"json": CNV.object2JSON(r)})
예제 #5
0
def get_config():
    op = OptionParser()
    op.add_option("--config",
        action="store", type="string", dest="config",
        default=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.ini'),
        help="path to the config file [config.ini]")

    options, args = op.parse_args()

    if not os.path.exists(options.config):
        print "ERROR: %s doesn't exist" % (os.path.abspath(options.config))
        sys.exit(1)

    parser = ConfigParser.RawConfigParser(defaults={'debug': 'false'})
    parser.read(options.config)

    def now():
        return Date.eod().value

    def today():
        return Date.today().value

    try:
        const = CNV.string2datetime(parser.get('alerts', 'now'))

        def now():
            return const

        today = now
    except Exception:
        pass


    return {
        'username': parser.get('alerts', 'username'),
        'password': parser.get('alerts', 'password'),
        'host': parser.get('alerts', 'host'),
        'database': parser.get('alerts', 'database'),
        'maildir': parser.get('alerts', 'maildir'),
        'now': now,
        'today': today,
        'DEBUG': parser.getboolean('alerts', 'debug'),
    }
예제 #6
0
    def wait_for_logs(self, timeout=None):
        if not timeout:
            timeout = timedelta(seconds=10)

        def logs():
            return self.find("#" + LOG_DIV + " p")

        def status():
            s = self.find("#status")
            if not s:
                return None
            return s[0].text

        # IF THE MESSAGE KEEPS CHANGING OR THE LOGS KEEP INCREASING WE CAN BE
        # CONFIDENT SOMETHING IMPORTANT IS STILL HAPPENING
        self._wait_for_stable(lambda: (status(), len(logs())), timeout)

        output = [CNV.JSON2object(CNV.html2unicode(e.get_attribute('innerHTML'))) for e in logs()]
        Log.note("Logs:\n{{logs|indent}}", {"logs": output})
        return output
    def transform(self, id, datazilla):
        try:
            r = datazilla.json_blob

            #ADD DATAZILLA MARKUP
            r.datazilla = {
                "id": id,
                "date_loaded": datazilla.date_loaded * 1000,
                "error_flag": datazilla.error_flag,
                "test_run_id": datazilla.test_run_id,
                "processed_flag": datazilla.processed_flag,
                "error_msg": datazilla.error_msg
            }

            #CONVERT UNIX TIMESTAMP TO MILLISECOND TIMESTAMP
            r.testrun.date *= 1000

            def mainthread_transform(r):
                if r == None:
                    return None

                output = Struct()

                for i in r.mainthread_readbytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readbytes = i[0]
                r.mainthread_readbytes = None

                for i in r.mainthread_writebytes:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writebytes = i[0]
                r.mainthread_writebytes = None

                for i in r.mainthread_readcount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].readcount = i[0]
                r.mainthread_readcount = None

                for i in r.mainthread_writecount:
                    output[literal_field(i[1])].name = i[1]
                    output[literal_field(i[1])].writecount = i[0]
                r.mainthread_writecount = None

                r.mainthread = output.values()

            mainthread_transform(r.results_aux)
            mainthread_transform(r.results_xperf)

            #ADD PUSH LOG INFO
            try:
                branch = r.test_build.branch
                if branch.endswith("-Non-PGO"):
                    r.test_build.branch = branch
                    r.test_build.pgo = False
                    branch = branch[0:-8]
                else:
                    r.test_build.pgo = True

                with Profiler("get from pushlog"):
                    if not self.pushlog:
                        #NO PUSHLOG MEANS WE DO NOTHING TO MARKUP TEST RESULTS
                        pass
                    elif self.pushlog[branch]:
                        possible_dates = self.pushlog[branch][r.test_build.revision]
                        if possible_dates:
                            r.test_build.push_date = int(Math.round(possible_dates[0].date * 1000))
                        else:
                            if r.test_build.revision == 'NULL':
                                r.test_build.no_pushlog = True  # OOPS! SOMETHING BROKE
                            elif CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD:
                                Log.note("{{branch}} @ {{revision}} has no pushlog, transforming anyway", r.test_build)
                                r.test_build.no_pushlog = True
                            else:
                                Log.note("{{branch}} @ {{revision}} has no pushlog, try again later", r.test_build)
                                return []  # TRY AGAIN LATER
                    else:
                        with self.locker:
                            if branch not in self.unknown_branches:
                                Log.note("Whole branch {{branch}} has no pushlog", {"branch":branch})
                                self.unknown_branches.add(branch)
                            if CNV.milli2datetime(Math.min(r.testrun.date, r.datazilla.date_loaded)) < PUSHLOG_TOO_OLD:
                                r.test_build.no_pushlog = True
                            else:
                                r.test_build.no_pushlog = True
                                #return [r]  #TODO: DO THIS IF WE FIGURE OUT HOW TO HANDLE THE VERY LARGE NUMBER OF RESULTS WITH NO PUSHLOG

            except Exception, e:
                Log.warning("{{branch}} @ {{revision}} has no pushlog", r.test_build, e)

            new_records = []

            # RECORD THE UNKNOWN PART OF THE TEST RESULTS
            remainder = r.copy()
            remainder.results = None
            if len(remainder.keys()) > 4:
                new_records.append(remainder)

            #RECORD TEST RESULTS
            total = StructList()
            if r.testrun.suite in ["dromaeo_css", "dromaeo_dom"]:
                #dromaeo IS SPECIAL, REPLICATES ARE IN SETS OF FIVE
                #RECORD ALL RESULTS
                for i, (test_name, replicates) in enumerate(r.results.items()):
                    for g, sub_results in Q.groupby(replicates, size=5):
                        new_record = Struct(
                            test_machine=r.test_machine,
                            datazilla=r.datazilla,
                            testrun=r.testrun,
                            test_build=r.test_build,
                            result={
                                "test_name": unicode(test_name) + "." + unicode(g),
                                "ordering": i,
                                "samples": sub_results
                            }
                        )
                        try:
                            s = stats(sub_results)
                            new_record.result.stats = s
                            total.append(s)
                        except Exception, e:
                            Log.warning("can not reduce series to moments", e)
                        new_records.append(new_record)
                added.add(id)

                data = CNV.JSON2object(col[1])
                records_for_db.add({
                    "id": nvl(data.test_run_id, id),
                    "branch": data.json_blob.test_build.branch,
                    "name": data.json_blob.test_build.name,
                    "version": data.json_blob.test_build.version,
                    "suite": data.json_blob.testrun.suite,
                    "revision": data.json_blob.test_build.revision,
                    "date": data.json_blob.testrun.date
                })
                Log.note("Added {{id}} from file", {"id": data.test_run_id})
            except Exception, e:
                Log.warning("Bad line ({{length}}bytes):\n\t{{prefix}}", {
                    "length": len(CNV.object2JSON(line)),
                    "prefix": CNV.object2JSON(line)[0:130]
                }, e)



def main():
    try:
        settings = startup.read_settings(filename="file2db_settings.json")
        Log.start(settings.debug)


        with DB(settings.db) as db:
            db.execute("""
                DROP TABLE IF EXISTS b2g_tests
            """)
#
# Author: Kyle Lahnakoski ([email protected])
#
from __future__ import unicode_literals
from pyLibrary.maths import Math
from pyLibrary.env import startup
from pyLibrary.cnv import CNV
from pyLibrary.env.logs import Log
from pyLibrary.times.timer import Timer


with Timer("load pandas"):
    import pandas
    from pandas.core.frame import DataFrame

MINIMUM_DATE = CNV.string2datetime("20130720", "%Y%m%d")
MINIMUM_ID = 0
parts = [0, 1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000, 25000, 50000, 100000, 250000]

arrays = []

#TRAVERSE THE JSON GRAPH AND REPORT THE float() ARRAY POPULATIONS
def arrays_add(id, path, r):
    try:
        if isinstance(r, dict):
            for k, v in [(k, v) for k, v in r.items()]:
                new_path = path + "[" + k + "]"
                arrays_add(id, new_path, v)
        elif isinstance(r, list):
            try:
                values = r.map(float)
예제 #10
0
        with Timer("read {{id}} from DZ", {"id": id}):
            content = requests.get(url, timeout=nvl(settings.production.timeout, 30)).content
    except Exception, e:
        Log.warning("Failure to read from {{url}}", {"url": url}, e)
        return False

    try:
        if content.startswith("Id not found"):
            Log.note("{{id}} not found {{url}}", {"id": id, "url": url})
            if id < max_id:
                return True
            else:
                return False

        data = CNV.JSON2object(content.decode('utf-8'))
        content = CNV.object2JSON(data)  #ENSURE content HAS NO crlf

        if data.test_run_id:
            Log.println("Add {{id}} for revision {{revision}} ({{bytes}} bytes)", {
                "id": id,
                "revision": data.json_blob.test_build.revision,
                "bytes": len(content)
            })
            with Profiler("transform"):
                result = transformer.transform(id, data)

            if result:
                Log.println("{{num}} records to add", {
                    "num": len(result)
                })
                es_sink.extend({"value": d} for d in result)