def search(self, query):
     query=wrap(query)
     f = CNV.esfilter2where(query.query.filtered.filter)
     filtered=wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)])
     if query.fields:
         return wrap({"hits": {"total":len(filtered), "hits": [{"_id":d._id, "fields":unwrap(Q.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}})
     else:
         return wrap({"hits": {"total":len(filtered), "hits": filtered}})
コード例 #2
0
 def search(self, query):
     filter = parse_filter(wrap(query).query.filtered.filter)
     return wrap({
         "hits": {
             "hits": [{
                 "_id": i,
                 "_source": d
             } for i, d in self.data.items() if filter(d)]
         }
     })
コード例 #3
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_ambiguous_whiteboard_screened(self):
        GOOD_BUG_TO_TEST=1046

        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate", self.settings.candidate)

            #MARK BUG AS ONE OF THE SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST, SCREENED_WHITEBOARD_BUG_GROUPS[0])
            #MARK BUG AS ONE OF THE *NOT* SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST, "not screened")
            db.flush()

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([GOOD_BUG_TO_TEST]) # bug 1046 sees lots of whiteboard, and other field, changes
            param.allow_private_bugs = True

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST)

            for v in versions:
                if v.status_whiteboard not in (None, "", "[screened]"):
                    Log.error("Expecting whiteboard to be screened")
 def __init__(self, settings):
     self.settings = wrap({"host":"fake", "index":"fake"})
     self.filename = settings.filename
     try:
         self.data = CNV.JSON2object(File(self.filename).read())
     except IOError:
         self.data = Struct()
    def test_whiteboard_screened(self):
        GOOD_BUG_TO_TEST = 1046

        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate",
                                                  self.settings.candidate)

            #MARK BUG AS ONE OF THE SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST,
                                   SCREENED_WHITEBOARD_BUG_GROUPS[0])
            db.flush()

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([
                GOOD_BUG_TO_TEST
            ])  # bug 1046 sees lots of whiteboard, and other field, changes
            param.allow_private_bugs = True

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST)

            for v in versions:
                if v.status_whiteboard not in (None, "", "[screened]"):
                    Log.error("Expecting whiteboard to be screened")
コード例 #6
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_incremental_etl_catches_tracking_flags(self):
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate", self.settings.candidate)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME
            param.start_time = CNV.datetime2milli(CNV.string2datetime("02/01/2013 10:09:15", "%d/%m/%Y %H:%M:%S"))
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([813650])
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, 813650)

            flags = ["cf_status_firefox18", "cf_status_firefox19", "cf_status_firefox_esr17", "cf_status_b2g18"]
            for v in versions:
                if v.modified_ts>param.start_time:
                    for f in flags:
                        if v[f] != "fixed":
                            Log.error("813650 should have {{flag}}=='fixed'", {"flag": f})
コード例 #7
0
def loadAliases(settings):
    try:
        try:
            with Timer("load alias file at {{filename}}", {"filename":nvl(settings.param.alias_file.path, settings.param.alias_file)}):
                alias_json = File(settings.param.alias_file).read()
        except Exception, e:
            Log.warning("No alias file found (looking at {{filename}}", {"filename":nvl(settings.param.alias_file.path, settings.param.alias_file)})
            alias_json = "{}"
            #self.aliases IS A dict POINTING TO structs
        for k, v in CNV.JSON2object(alias_json).iteritems():
            aliases[k] = struct.wrap(v)

        Log.note("{{num}} aliases loaded", {"num": len(aliases.keys())})
コード例 #8
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_incremental_has_correct_expires_on(self):
        # 813650, 726635 BOTH HAVE CHANGES IN 2013
        bugs = struct.wrap([813650, 726635])
        start_incremental=CNV.datetime2milli(CNV.string2datetime("2013-01-01", "%Y-%m-%d"))

        es = elasticsearch.make_test_instance("candidate", self.settings.candidate)
        with DB(self.settings.bugzilla) as db:
            #SETUP FIRST RUN PARAMETERS
            param = Struct()
            param.end_time = start_incremental
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #SETUP INCREMENTAL RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(datetime.utcnow())
            param.start_time = start_incremental
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

        for b in bugs:
            results = es.search({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and":[
                        {"term":{"bug_id":b}},
                        {"range":{"expires_on":{"gte":CNV.datetime2milli(datetime.utcnow())}}}
                    ]}
                }},
                "from": 0,
                "size": 200000,
                "sort": [],
                "fields": ["bug_id"]
            })

            if results.hits.total>1:
                Log.error("Expecting only one active bug_version record")
コード例 #9
0
def get(es, esfilter, fields=None, limit=None):
    query = struct.wrap({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": esfilter
        }},
        "from": 0,
        "size": nvl(limit, 200000),
        "sort": []
    })

    if fields:
        query.fields=fields
        results = es.search(query)
        return Q.select(results.hits.hits, "fields")
    else:
        results = es.search(query)
        return Q.select(results.hits.hits, "_source")
コード例 #10
0
def etl(db, output_queue, param, please_stop):
    """
    PROCESS RANGE, AS SPECIFIED IN param AND PUSH
    BUG VERSION RECORDS TO output_queue
    """

    # CONNECTIONS ARE EXPENSIVE, CACHE HERE
    with db_cache_lock:
        if not db_cache:
            with Timer("open connections to db"):
                for f in get_stuff_from_bugzilla:
                    db = DB(db)
                    db_cache.append(db)

    db_results = Queue(max=2**30)
    with db_cache_lock:
        # ASYMMETRIC MULTI THREADING TO GET RECORDS FROM DB
        with AllThread() as all:
            for i, f in enumerate(get_stuff_from_bugzilla):

                def process(target, db, param, please_stop):
                    db_results.extend(target(db, param))

                all.add(process, f, db_cache[i], param.copy())
    db_results.add(Thread.STOP)

    sorted = Q.sort(db_results, [
        "bug_id", "_merge_order", {
            "field": "modified_ts",
            "sort": -1
        }, "modified_by"
    ])

    process = BugHistoryParser(param, output_queue)
    for s in sorted:
        process.processRow(s)
    process.processRow(
        struct.wrap({
            "bug_id": parse_bug_history.STOP_BUG,
            "_merge_order": 1
        }))
def get(es, esfilter, fields=None, limit=None):
    query = struct.wrap({
        "query": {
            "filtered": {
                "query": {
                    "match_all": {}
                },
                "filter": esfilter
            }
        },
        "from": 0,
        "size": nvl(limit, 200000),
        "sort": []
    })

    if fields:
        query.fields = fields
        results = es.search(query)
        return Q.select(results.hits.hits, "fields")
    else:
        results = es.search(query)
        return Q.select(results.hits.hits, "_source")
def loadAliases(settings):
    try:
        try:
            with Timer(
                    "load alias file at {{filename}}", {
                        "filename":
                        nvl(settings.param.alias_file.path,
                            settings.param.alias_file)
                    }):
                alias_json = File(settings.param.alias_file).read()
        except Exception, e:
            Log.warning(
                "No alias file found (looking at {{filename}}", {
                    "filename":
                    nvl(settings.param.alias_file.path,
                        settings.param.alias_file)
                })
            alias_json = "{}"
            #self.aliases IS A dict POINTING TO structs
        for k, v in CNV.JSON2object(alias_json).iteritems():
            aliases[k] = struct.wrap(v)

        Log.note("{{num}} aliases loaded", {"num": len(aliases.keys())})
    def test_incremental_etl_catches_tracking_flags(self):
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate",
                                                  self.settings.candidate)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME
            param.start_time = CNV.datetime2milli(
                CNV.string2datetime("02/01/2013 10:09:15",
                                    "%d/%m/%Y %H:%M:%S"))
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([813650])
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, 813650)

            flags = [
                "cf_status_firefox18", "cf_status_firefox19",
                "cf_status_firefox_esr17", "cf_status_b2g18"
            ]
            for v in versions:
                if v.modified_ts > param.start_time:
                    for f in flags:
                        if v[f] != "fixed":
                            Log.error("813650 should have {{flag}}=='fixed'",
                                      {"flag": f})
コード例 #14
0
ファイル: bz_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
def etl(db, output_queue, param, please_stop):
    """
    PROCESS RANGE, AS SPECIFIED IN param AND PUSH
    BUG VERSION RECORDS TO output_queue
    """

    # CONNECTIONS ARE EXPENSIVE, CACHE HERE
    with db_cache_lock:
        if not db_cache:
            with Timer("open connections to db"):
                for f in get_stuff_from_bugzilla:
                    db = DB(db)
                    db_cache.append(db)

    db_results = Queue(max=2**30)
    with db_cache_lock:
        # ASYMMETRIC MULTI THREADING TO GET RECORDS FROM DB
        with AllThread() as all:
            for i, f in enumerate(get_stuff_from_bugzilla):
                def process(target, db, param, please_stop):
                    db_results.extend(target(db, param))

                all.add(process, f, db_cache[i], param.copy())
    db_results.add(Thread.STOP)

    sorted = Q.sort(db_results, [
        "bug_id",
        "_merge_order",
        {"field": "modified_ts", "sort": -1},
        "modified_by"
    ])

    process = BugHistoryParser(param, output_queue)
    for s in sorted:
        process.processRow(s)
    process.processRow(struct.wrap({"bug_id": parse_bug_history.STOP_BUG, "_merge_order": 1}))
 def delete_record(self, filter):
     f = CNV.esfilter2where(filter)
     self.data = wrap({k: v for k, v in self.data.items() if not f(v)})
コード例 #16
0
def saveAliases(settings):
    compressed = {
        email: details
        for email, details in aliases.iteritems()
        if details.canonical
    }

    #COMPARE WITH PREVIOUS ALIAS VERSION
    try:
        old_alias_json = File(settings.param.alias_file).read()
    except Exception, e:
        old_alias_json = "{}"
    old_aliases = {}
    for k, v in CNV.JSON2object(old_alias_json).iteritems():
        old_aliases[k] = struct.wrap(v)

    added = set(compressed.keys()) - set(old_aliases.keys())
    removed = set(old_aliases.keys()) - set(compressed.keys())
    common = set(compressed.keys()) & set(old_aliases.keys())

    changed = set()
    for c in common:
        if CNV.object2JSON(compressed[c], pretty=True) != CNV.object2JSON(old_aliases[c], pretty=True):
            changed.add(c)

    if added or removed or changed:
        alias_json = CNV.object2JSON(compressed, pretty=True)
        file = File(settings.param.alias_file)
        file.write(alias_json)

def saveAliases(settings):
    compressed = {
        email: details
        for email, details in aliases.iteritems() if details.canonical
    }

    #COMPARE WITH PREVIOUS ALIAS VERSION
    try:
        old_alias_json = File(settings.param.alias_file).read()
    except Exception, e:
        old_alias_json = "{}"
    old_aliases = {}
    for k, v in CNV.JSON2object(old_alias_json).iteritems():
        old_aliases[k] = struct.wrap(v)

    added = set(compressed.keys()) - set(old_aliases.keys())
    removed = set(old_aliases.keys()) - set(compressed.keys())
    common = set(compressed.keys()) & set(old_aliases.keys())

    changed = set()
    for c in common:
        if CNV.object2JSON(compressed[c], pretty=True) != CNV.object2JSON(
                old_aliases[c], pretty=True):
            changed.add(c)

    if added or removed or changed:
        alias_json = CNV.object2JSON(compressed, pretty=True)
        file = File(settings.param.alias_file)
        file.write(alias_json)
    def test_incremental_has_correct_expires_on(self):
        # 813650, 726635 BOTH HAVE CHANGES IN 2013
        bugs = struct.wrap([813650, 726635])
        start_incremental = CNV.datetime2milli(
            CNV.string2datetime("2013-01-01", "%Y-%m-%d"))

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.candidate)
        with DB(self.settings.bugzilla) as db:
            #SETUP FIRST RUN PARAMETERS
            param = Struct()
            param.end_time = start_incremental
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #SETUP INCREMENTAL RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(datetime.utcnow())
            param.start_time = start_incremental
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

        for b in bugs:
            results = es.search({
                "query": {
                    "filtered": {
                        "query": {
                            "match_all": {}
                        },
                        "filter": {
                            "and": [{
                                "term": {
                                    "bug_id": b
                                }
                            }, {
                                "range": {
                                    "expires_on": {
                                        "gte":
                                        CNV.datetime2milli(datetime.utcnow())
                                    }
                                }
                            }]
                        }
                    }
                },
                "from": 0,
                "size": 200000,
                "sort": [],
                "fields": ["bug_id"]
            })

            if results.hits.total > 1:
                Log.error("Expecting only one active bug_version record")
コード例 #19
0
ファイル: fake_es.py プロジェクト: markrcote/Bugzilla-ETL
 def search(self, query):
     filter=parse_filter(wrap(query).query.filtered.filter)
     return wrap({"hits":{"hits":[{"_id":i, "_source":d} for i,d in self.data.items() if filter(d)]}})