コード例 #1
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_public_etl(self):
        """
        ENSURE ETL GENERATES WHAT'S IN THE REFERENCE FILE
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = Null

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate", self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance("candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance("reference", self.settings.public_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.public_comments_reference.filename).read()
        if can != ref:
            found = -1
            for i, c in enumerate(can):
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}", {"sample": can[MIN(0, found - 100):found + 100:]})
コード例 #2
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_private_comments_do_not_show(self):
        self.settings.param.allow_private_bugs = False
        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME COMMENTS PRIVATE
        with DB(self.settings.bugzilla) as db:
            private_comments = db.query("""
                SELECT
                    bug_id,
                    comment_id
                FROM
                    longdescs
                ORDER BY
                    mod(comment_id, 7),
                    comment_id
                LIMIT
                    5
            """)

            for c in private_comments:
                database.mark_comment_private(db, c.comment_id, 1)

        es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_no_private_comments(es, private_comments)
    def test_private_comments_do_not_show(self):
        self.settings.param.allow_private_bugs = False
        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME COMMENTS PRIVATE
        with DB(self.settings.bugzilla) as db:
            private_comments = db.query("""
                SELECT
                    bug_id,
                    comment_id
                FROM
                    longdescs
                ORDER BY
                    mod(comment_id, 7),
                    comment_id
                LIMIT
                    5
            """)

            for c in private_comments:
                database.mark_comment_private(db, c.comment_id, 1)

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments",
                                                self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_no_private_comments(es, private_comments)
コード例 #4
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_private_etl(self):
        """
        ENSURE IDENTIFIABLE INFORMATION DOES NOT EXIST ON ANY BUGS
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = True

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate", self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance("candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance("reference", self.settings.private_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.private_comments_reference.filename).read()
        if can != ref:
            for i, c in enumerate(can):
                found = -1
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}", {"sample": can[MIN([0, found - 100]):found + 100]})
    def test_public_etl(self):
        """
        ENSURE ETL GENERATES WHAT'S IN THE REFERENCE FILE
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = Null

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance(
            "candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance(
            "reference", self.settings.public_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.public_comments_reference.filename).read()
        if can != ref:
            found = -1
            for i, c in enumerate(can):
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}",
                      {"sample": can[MIN(0, found - 100):found + 100:]})
    def test_private_etl(self):
        """
        ENSURE IDENTIFIABLE INFORMATION DOES NOT EXIST ON ANY BUGS
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = True

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance(
            "candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance(
            "reference", self.settings.private_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.private_comments_reference.filename).read()
        if can != ref:
            for i, c in enumerate(can):
                found = -1
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}",
                      {"sample": can[MIN([0, found - 100]):found + 100]})
コード例 #7
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_private_bugs_do_not_show(self):
        self.settings.param.allow_private_bugs = False
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        private_bugs = set(Random.sample(self.settings.param.bugs, 3))
        Log.note("The private bugs for this test are {{bugs}}", {"bugs": private_bugs})

        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME BUGS PRIVATE
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_no_private_bugs(es, private_bugs)
    def test_private_bugs_do_not_show(self):
        self.settings.param.allow_private_bugs = False
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        private_bugs = set(Random.sample(self.settings.param.bugs, 3))
        Log.note("The private bugs for this test are {{bugs}}",
                 {"bugs": private_bugs})

        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME BUGS PRIVATE
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments",
                                                self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_no_private_bugs(es, private_bugs)
コード例 #9
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_changes_to_private_bugs_still_have_bug_group(self):
        self.settings.param.allow_private_bugs = True
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        private_bugs = set(Random.sample(self.settings.param.bugs, 3))

        Log.note("The private bugs for this test are {{bugs}}", {"bugs": private_bugs})

        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME BUGS PRIVATE
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        # MAKE A CHANGE TO THE PRIVATE BUGS
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                old_bug = db.query("SELECT * FROM bugs WHERE bug_id={{bug_id}}", {"bug_id": b})[0]
                new_bug = old_bug.copy()

                new_bug.bug_status = "NEW STATUS"
                diff(db, "bugs", old_bug, new_bug)


        #RUN INCREMENTAL
        bz_etl.main(self.settings, es, es_c)

        #VERIFY BUG GROUP STILL EXISTS
        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        now = datetime.utcnow()
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {"and": [
                    {"terms": {"bug_id": private_bugs}},
                    {"range": {"expires_on": {"gte": CNV.datetime2milli(now)}}}
                ]}
            }},
            "from": 0,
            "size": 200000,
            "sort": []
        })
        latest_bugs = Q.select(results.hits.hits, "_source")
        latest_bugs_index = Q.unique_index(latest_bugs, "bug_id")  # IF NOT UNIQUE, THEN ETL IS WRONG

        for bug_id in private_bugs:
            if latest_bugs_index[bug_id] == None:
                Log.error("Expecting to find the private bug {{bug_id}}", {"bug_id": bug_id})

            bug_group = latest_bugs_index[bug_id].bug_group
            if not bug_group:
                Log.error("Expecting private bug ({{bug_id}}) to have a bug group", {"bug_id": bug_id})
            if BUG_GROUP_FOR_TESTING not in bug_group:
                Log.error("Expecting private bug ({{bug_id}}) to have a \"{{bug_group}}\" bug group", {
                    "bug_id": bug_id,
                    "bug_group": BUG_GROUP_FOR_TESTING
                })
コード例 #10
0
ファイル: test_etl.py プロジェクト: klahnakoski/Bugzilla-ETL
    def test_recent_private_stuff_does_not_show(self):
        self.settings.param.allow_private_bugs = False
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        database.make_test_instance(self.settings.bugzilla)

        es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        #MARK SOME STUFF PRIVATE
        with DB(self.settings.bugzilla) as db:
            #BUGS
            private_bugs = set(Random.sample(self.settings.param.bugs, 3))
            Log.note("The private bugs are {{bugs}}", {"bugs": private_bugs})
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

            #COMMENTS
            comments = db.query("SELECT comment_id FROM longdescs").comment_id
            marked_private_comments = Random.sample(comments, 5)
            for c in marked_private_comments:
                database.mark_comment_private(db, c, isprivate=1)

            #INCLUDE COMMENTS OF THE PRIVATE BUGS
            implied_private_comments = db.query("""
                SELECT comment_id FROM longdescs WHERE {{where}}
            """, {
                "where": esfilter2sqlwhere(db, {"terms":{"bug_id":private_bugs}})
            }).comment_id
            private_comments = marked_private_comments + implied_private_comments
            Log.note("The private comments are {{comments}}", {"comments": private_comments})

            #ATTACHMENTS
            attachments = db.query("SELECT bug_id, attach_id FROM attachments")
            private_attachments = Random.sample(attachments, 5)
            Log.note("The private attachments are {{attachments}}", {"attachments": private_attachments})
            for a in private_attachments:
                database.mark_attachment_private(db, a.attach_id, isprivate=1)

        if not File(self.settings.param.last_run_time).exists:
            Log.error("last_run_time should exist")
        bz_etl.main(self.settings, es, es_c)

        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_no_private_bugs(es, private_bugs)
        verify_no_private_attachments(es, private_attachments)
        verify_no_private_comments(es_c, private_comments)

        #MARK SOME STUFF PUBLIC

        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.remove_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        bz_etl.main(self.settings, es, es_c)

        #VERIFY BUG IS PUBLIC, BUT PRIVATE ATTACHMENTS AND COMMENTS STILL NOT
        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_public_bugs(es, private_bugs)
        verify_no_private_attachments(es, private_attachments)
        verify_no_private_comments(es_c, marked_private_comments)
    def test_changes_to_private_bugs_still_have_bug_group(self):
        self.settings.param.allow_private_bugs = True
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        private_bugs = set(Random.sample(self.settings.param.bugs, 3))

        Log.note("The private bugs for this test are {{bugs}}",
                 {"bugs": private_bugs})

        database.make_test_instance(self.settings.bugzilla)

        #MARK SOME BUGS PRIVATE
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments",
                                                self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        # MAKE A CHANGE TO THE PRIVATE BUGS
        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                old_bug = db.query(
                    "SELECT * FROM bugs WHERE bug_id={{bug_id}}",
                    {"bug_id": b})[0]
                new_bug = old_bug.copy()

                new_bug.bug_status = "NEW STATUS"
                diff(db, "bugs", old_bug, new_bug)

        #RUN INCREMENTAL
        bz_etl.main(self.settings, es, es_c)

        #VERIFY BUG GROUP STILL EXISTS
        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        now = datetime.utcnow()
        results = es.search({
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": {
                        "and": [{
                            "terms": {
                                "bug_id": private_bugs
                            }
                        }, {
                            "range": {
                                "expires_on": {
                                    "gte": CNV.datetime2milli(now)
                                }
                            }
                        }]
                    }
                }
            },
            "from": 0,
            "size": 200000,
            "sort": []
        })
        latest_bugs = Q.select(results.hits.hits, "_source")
        latest_bugs_index = Q.unique_index(
            latest_bugs, "bug_id")  # IF NOT UNIQUE, THEN ETL IS WRONG

        for bug_id in private_bugs:
            if latest_bugs_index[bug_id] == None:
                Log.error("Expecting to find the private bug {{bug_id}}",
                          {"bug_id": bug_id})

            bug_group = latest_bugs_index[bug_id].bug_group
            if not bug_group:
                Log.error(
                    "Expecting private bug ({{bug_id}}) to have a bug group",
                    {"bug_id": bug_id})
            if BUG_GROUP_FOR_TESTING not in bug_group:
                Log.error(
                    "Expecting private bug ({{bug_id}}) to have a \"{{bug_group}}\" bug group",
                    {
                        "bug_id": bug_id,
                        "bug_group": BUG_GROUP_FOR_TESTING
                    })
    def test_recent_private_stuff_does_not_show(self):
        self.settings.param.allow_private_bugs = False
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()

        database.make_test_instance(self.settings.bugzilla)

        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.real.bugs)
        es_c = elasticsearch.make_test_instance("candidate_comments",
                                                self.settings.real.comments)
        bz_etl.main(self.settings, es, es_c)

        #MARK SOME STUFF PRIVATE
        with DB(self.settings.bugzilla) as db:
            #BUGS
            private_bugs = set(Random.sample(self.settings.param.bugs, 3))
            Log.note("The private bugs are {{bugs}}", {"bugs": private_bugs})
            for b in private_bugs:
                database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING)

            #COMMENTS
            comments = db.query("SELECT comment_id FROM longdescs").comment_id
            marked_private_comments = Random.sample(comments, 5)
            for c in marked_private_comments:
                database.mark_comment_private(db, c, isprivate=1)

            #INCLUDE COMMENTS OF THE PRIVATE BUGS
            implied_private_comments = db.query(
                """
                SELECT comment_id FROM longdescs WHERE {{where}}
            """, {
                    "where":
                    esfilter2sqlwhere(db, {"terms": {
                        "bug_id": private_bugs
                    }})
                }).comment_id
            private_comments = marked_private_comments + implied_private_comments
            Log.note("The private comments are {{comments}}",
                     {"comments": private_comments})

            #ATTACHMENTS
            attachments = db.query("SELECT bug_id, attach_id FROM attachments")
            private_attachments = Random.sample(attachments, 5)
            Log.note("The private attachments are {{attachments}}",
                     {"attachments": private_attachments})
            for a in private_attachments:
                database.mark_attachment_private(db, a.attach_id, isprivate=1)

        if not File(self.settings.param.last_run_time).exists:
            Log.error("last_run_time should exist")
        bz_etl.main(self.settings, es, es_c)

        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_no_private_bugs(es, private_bugs)
        verify_no_private_attachments(es, private_attachments)
        verify_no_private_comments(es_c, private_comments)

        #MARK SOME STUFF PUBLIC

        with DB(self.settings.bugzilla) as db:
            for b in private_bugs:
                database.remove_bug_group(db, b, BUG_GROUP_FOR_TESTING)

        bz_etl.main(self.settings, es, es_c)

        #VERIFY BUG IS PUBLIC, BUT PRIVATE ATTACHMENTS AND COMMENTS STILL NOT
        Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
        verify_public_bugs(es, private_bugs)
        verify_no_private_attachments(es, private_attachments)
        verify_no_private_comments(es_c, marked_private_comments)