예제 #1
0
def main(settings):
    file = File(settings.param.alias_file)
    aliases = CNV.JSON2object(file.read())

    for v in aliases.values():
        v.candidates = CNV.dict2Multiset(v.candidates)

    data = [
        {
            "lost": n,
            "found": d.canonical
        }
        for n, d in aliases.items()
        if d.canonical != None and n != d.canonical
    ]

    sorted = Q.sort(data, "found")
    for s in sorted:
        Log.note("{{found}} == {{lost}}", s)

    clean = {
        n: d.canonical
        for n, d in aliases.items()
        if d.canonical != None and n != d.canonical and n != ""
    }

    rev_clean = struct.inverse(clean)
    Log.note(CNV.object2JSON(rev_clean, pretty=True))

    for k, v in rev_clean.items():
        if len(v) > 3:
            Log.note(CNV.object2JSON({k: v}, pretty=True))
예제 #2
0
def main(settings):
    file = File(settings.param.alias_file)
    aliases = CNV.JSON2object(file.read())

    for v in aliases.values():
        v.candidates = CNV.dict2Multiset(v.candidates)

    data = [{
        "lost": n,
        "found": d.canonical
    } for n, d in aliases.items() if d.canonical != None and n != d.canonical]

    sorted = Q.sort(data, "found")
    for s in sorted:
        Log.note("{{found}} == {{lost}}", s)

    clean = {
        n: d.canonical
        for n, d in aliases.items()
        if d.canonical != None and n != d.canonical and n != ""
    }

    rev_clean = struct.inverse(clean)
    Log.note(CNV.object2JSON(rev_clean, pretty=True))

    for k, v in rev_clean.items():
        if len(v) > 3:
            Log.note(CNV.object2JSON({k: v}, pretty=True))
    def random_sample_of_bugs(self):
        """
        I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS.  OF COURSE, IT ONLY WORKS
        WHEN I HAVE A REFERENCE TO COMPARE TO
        """
        NUM_TO_TEST = 100
        MAX_BUG_ID = 900000

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance(
                "candidate", self.settings.candidate)
            reference = ElasticSearch(self.settings.private_bugs_reference)

            #GO FASTER BY STORING LOCAL FILE
            local_cache = File(self.settings.param.temp_dir +
                               "/private_bugs.json")
            if local_cache.exists:
                private_bugs = set(CNV.JSON2object(local_cache.read()))
            else:
                with Timer("get private bugs"):
                    private_bugs = compare_es.get_private_bugs(reference)
                    local_cache.write(CNV.object2JSON(private_bugs))

            while True:
                some_bugs = [
                    b for b in
                    [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)]
                    if b not in private_bugs
                ]

                Log.note("Test with the following bug_ids: {{bugs}}",
                         {"bugs": some_bugs})

                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                param.start_time = 0
                param.start_time_str = extract_bugzilla.milli2string(db, 0)
                param.alias_file = self.settings.param.alias_file

                try:
                    with ThreadedQueue(candidate, 100) as output:
                        etl(db, output, param, please_stop=None)

                    #COMPARE ALL BUGS
                    Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
                    found_errors = compare_both(candidate, reference,
                                                self.settings, some_bugs)
                    if found_errors:
                        Log.note("Errors found")
                        break
                    else:
                        pass
                except Exception, e:
                    Log.warning(
                        "Total failure during compare of bugs {{bugs}}",
                        {"bugs": some_bugs}, e)
예제 #4
0
    def random_sample_of_bugs(self):
        """
        I USE THIS TO FIND BUGS THAT CAUSE MY CODE PROBLEMS.  OF COURSE, IT ONLY WORKS
        WHEN I HAVE A REFERENCE TO COMPARE TO
        """
        NUM_TO_TEST = 100
        MAX_BUG_ID = 900000

        with DB(self.settings.bugzilla) as db:
            candidate = elasticsearch.make_test_instance("candidate", self.settings.candidate)
            reference = ElasticSearch(self.settings.private_bugs_reference)

            #GO FASTER BY STORING LOCAL FILE
            local_cache = File(self.settings.param.temp_dir + "/private_bugs.json")
            if local_cache.exists:
                private_bugs = set(CNV.JSON2object(local_cache.read()))
            else:
                with Timer("get private bugs"):
                    private_bugs = compare_es.get_private_bugs(reference)
                    local_cache.write(CNV.object2JSON(private_bugs))

            while True:
                some_bugs = [b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs]

                Log.note("Test with the following bug_ids: {{bugs}}", {"bugs":some_bugs})

                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                param.start_time = 0
                param.start_time_str = extract_bugzilla.milli2string(db, 0)
                param.alias_file = self.settings.param.alias_file

                try:
                    with ThreadedQueue(candidate, 100) as output:
                        etl(db, output, param, please_stop=None)

                    #COMPARE ALL BUGS
                    Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
                    found_errors = compare_both(candidate, reference, self.settings, some_bugs)
                    if found_errors:
                        Log.note("Errors found")
                        break
                    else:
                        pass
                except Exception, e:
                    Log.warning("Total failure during compare of bugs {{bugs}}", {"bugs": some_bugs}, e)
def main(settings):
    #USE A FILE
    if settings.source.filename != None:
        settings.destination.alias = settings.destination.index
        settings.destination.index = ElasticSearch.proto_name(settings.destination.alias)
        schema = CNV.JSON2object(File(settings.source.schema_filename).read())
        if transform_bugzilla.USE_ATTACHMENTS_DOT:
            schema = CNV.JSON2object(CNV.object2JSON(schema).replace("attachments_", "attachments."))

        dest = ElasticSearch.create_index(settings.destination, schema, limit_replicas=True)
        dest.set_refresh_interval(-1)
        extract_from_file(settings.source, dest)
        dest.set_refresh_interval(1)

        dest.delete_all_but(settings.destination.alias, settings.destination.index)
        dest.add_alias(settings.destination.alias)
        return

    # SYNCH WITH source ES INDEX
    source=ElasticSearch(settings.source)
    destination=get_or_create_index(settings["destination"], source)

    # GET LAST UPDATED
    time_file = File(settings.param.last_replication_time)
    from_file = None
    if time_file.exists:
        from_file = CNV.milli2datetime(CNV.value2int(time_file.read()))
    from_es = get_last_updated(destination)
    last_updated = nvl(MIN(from_file, from_es), CNV.milli2datetime(0))
    current_time = datetime.utcnow()

    pending = get_pending(source, last_updated)
    with ThreadedQueue(destination, size=1000) as data_sink:
        replicate(source, data_sink, pending, last_updated)

    # RECORD LAST UPDATED
    time_file.write(unicode(CNV.datetime2milli(current_time)))