Esempi in Python per Log.error, esempi in Python per bzETL.util.env.logs.Log.error

Esempio n. 1

0

Mostra file

def get_recent_private_comments(db, param):
    """
    GET COMMENTS THAT HAVE HAD THEIR PRIVACY INDICATOR CHANGED
    """
    if param.allow_private_bugs:
        return []

    param.field_id = PRIVATE_COMMENTS_FIELD_ID

    try:
        comments = db.query(
            """
            SELECT
                a.comment_id,
                a.bug_id
            FROM
                bugs_activity a
            WHERE
                bug_when >= {{start_time_str}} AND
                fieldid={{field_id}}
            """, param)

        return comments
    except Exception, e:
        Log.error("problem getting recent private attachments", e)

Esempio n. 2

0

Mostra file

File: replicate.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def start():
    try:
        settings=startup.read_settings()
        Log.start(settings.debug)
        main(settings)
    except Exception, e:
        Log.error("Problems exist", e)

Esempio n. 3

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def test_private_etl(self):
        """
        ENSURE IDENTIFIABLE INFORMATION DOES NOT EXIST ON ANY BUGS
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = True

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance(
            "candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance(
            "reference", self.settings.private_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.private_comments_reference.filename).read()
        if can != ref:
            for i, c in enumerate(can):
                found = -1
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}",
                      {"sample": can[MIN([0, found - 100]):found + 100]})

Esempio n. 4

0

Mostra file

def get_max_bug_id(es):
    try:
        results = es.search({
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": {
                        "script": {
                            "script": "true"
                        }
                    }
                }
            },
            "from": 0,
            "size": 0,
            "sort": [],
            "facets": {
                "0": {
                    "statistical": {
                        "field": "bug_id"
                    }
                }
            }
        })

        if results.facets["0"].count == 0:
            return 0
        return results.facets["0"].max
    except Exception, e:
        Log.error("Can not get_max_bug from {{host}}/{{index}}", {
            "host": es.settings.host,
            "index": es.settings.index
        }, e)

Esempio n. 5

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

    def test_public_etl(self):
        """
        ENSURE ETL GENERATES WHAT'S IN THE REFERENCE FILE
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = Null

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate", self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance("candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance("reference", self.settings.public_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.public_comments_reference.filename).read()
        if can != ref:
            found = -1
            for i, c in enumerate(can):
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}", {"sample": can[MIN(0, found - 100):found + 100:]})

Esempio n. 6

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def test_public_etl(self):
        """
        ENSURE ETL GENERATES WHAT'S IN THE REFERENCE FILE
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = Null

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate",
                                              self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance(
            "candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance(
            "reference", self.settings.public_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.public_comments_reference.filename).read()
        if can != ref:
            found = -1
            for i, c in enumerate(can):
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}",
                      {"sample": can[MIN(0, found - 100):found + 100:]})

Esempio n. 7

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def verify_public_bugs(es, private_bugs):
    #VERIFY BUGS ARE IN OUTPUT
    for b in private_bugs:
        versions = compare_es.get_all_bug_versions(es, b)
        if not versions:
            Log.error("Expecting versions for public bug {{bug_id}}",
                      {"bug_id": b})

Esempio n. 8

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def test_whiteboard_screened(self):
        GOOD_BUG_TO_TEST = 1046

        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate",
                                                  self.settings.candidate)

            #MARK BUG AS ONE OF THE SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST,
                                   SCREENED_WHITEBOARD_BUG_GROUPS[0])
            db.flush()

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([
                GOOD_BUG_TO_TEST
            ])  # bug 1046 sees lots of whiteboard, and other field, changes
            param.allow_private_bugs = True

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST)

            for v in versions:
                if v.status_whiteboard not in (None, "", "[screened]"):
                    Log.error("Expecting whiteboard to be screened")

Esempio n. 9

0

Mostra file

File: database.py Progetto: klahnakoski/Bugzilla-ETL

def diff(db, table, old_record, new_record):
    """
    UPDATE bugs_activity WITH THE CHANGES IN RECORDS
    """
    now = milli2string(db, CNV.datetime2milli(get_current_time(db)))
    changed = set(old_record.keys()) ^ set(new_record.keys())
    changed |= set([k for k, v in old_record.items() if v != new_record[k]])

    if table != u"bugs":
        prefix = table + u"."
    else:
        prefix = u""

    for c in changed:
        fieldid=db.query("SELECT id FROM fielddefs WHERE name={{field_name}}", {"field_name": prefix + c})[0].id

        if fieldid == None:
            Log.error("Expecting a valid field name")

        activity = Struct(
            bug_id=old_record.bug_id,
            who=1,
            bug_when=now,
            fieldid=fieldid,
            removed=old_record[c],
            added=new_record[c],
            attach_id=old_record.attach_id,
            comment_id=old_record.comment_id
        )
        db.insert("bugs_activity", activity)

    db.execute("UPDATE bugs SET delta_ts={{now}} WHERE {{where}}", {
        "now":now,
        "where":esfilter2sqlwhere(db, {"term":{"bug_id":old_record.bug_id}})
    })

Esempio n. 10

0

Mostra file

File: extract_bugzilla.py Progetto: klahnakoski/Bugzilla-ETL

def get_comments_by_id(db, comments, param):
    """
    GET SPECIFIC COMMENTS
    """
    if param.allow_private_bugs:
        return []

    param.comments_filter = esfilter2sqlwhere(db, {"and": [
        {"term": {"isprivate": 0}},
        {"terms": {"c.comment_id": comments}}
    ]})

    try:
        comments = db.query("""
            SELECT
                c.comment_id,
                c.bug_id,
                p.login_name modified_by,
                UNIX_TIMESTAMP(CONVERT_TZ(bug_when, 'US/Pacific','UTC'))*1000 AS modified_ts,
                c.thetext comment,
                c.isprivate
            FROM
                longdescs c
            LEFT JOIN
                profiles p ON c.who = p.userid
            LEFT JOIN
                longdescs_tags t ON t.comment_id=c.comment_id AND t.tag <> 'deleted'
            WHERE
                {{comments_filter}}
            """, param)

        return comments
    except Exception, e:
        Log.error("can not get comment data", e)

Esempio n. 11

0

Mostra file

File: alias_analysis.py Progetto: klahnakoski/Bugzilla-ETL

def start():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        main(settings, restart=True)
    except Exception, e:
        Log.error("Can not start", e)

Esempio n. 12

0

Mostra file

File: database.py Progetto: klahnakoski/Bugzilla-ETL

def make_test_instance(db_settings):
    if not db_settings.filename:
        Log.note("Database schema will not be touched")
        return

    with Timer("Make database instance"):
        try:
            #CLEAR SCHEMA
            Log.note("Make empty {{schema}} schema", {"schema":db_settings.schema})
            no_schema=db_settings.copy()
            no_schema.schema = None
            with DB(no_schema) as db:
                db.execute("DROP DATABASE IF EXISTS {{schema}}", {"schema":db.quote_column(db_settings.schema)})
                db.execute("CREATE DATABASE {{schema}}", {"schema":db.quote_column(db_settings.schema)})

            #FILL SCHEMA
            Log.note("Fill {{schema}} schema with data", {"schema":db_settings.schema})
            DB.execute_file(db_settings, db_settings.filename)

            #ADD MISSING TABLES
            with DB(db_settings) as db:
                db.execute("""
                CREATE TABLE `longdescs_tags` (
                  `id` mediumint(9) NOT NULL AUTO_INCREMENT,
                  `comment_id` int(11) DEFAULT NULL,
                  `tag` varchar(24) NOT NULL,
                  PRIMARY KEY (`id`),
                  UNIQUE KEY `longdescs_tags_idx` (`comment_id`,`tag`),
                  CONSTRAINT `fk_longdescs_tags_comment_id_longdescs_comment_id` FOREIGN KEY (`comment_id`) REFERENCES `longdescs` (`comment_id`) ON DELETE CASCADE ON UPDATE CASCADE
                ) DEFAULT CHARSET=utf8""")
        except Exception, e:
            Log.error("Can not setup test database", e)

Esempio n. 13

0

Mostra file

File: alias_analysis.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def start():
    try:
        settings = startup.read_settings()
        Log.start(settings.debug)
        main(settings, restart=True)
    except Exception, e:
        Log.error("Can not start", e)

Esempio n. 14

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

    def test_ambiguous_whiteboard_screened(self):
        GOOD_BUG_TO_TEST=1046

        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate", self.settings.candidate)

            #MARK BUG AS ONE OF THE SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST, SCREENED_WHITEBOARD_BUG_GROUPS[0])
            #MARK BUG AS ONE OF THE *NOT* SCREENED GROUPS
            database.add_bug_group(db, GOOD_BUG_TO_TEST, "not screened")
            db.flush()

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, 0)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([GOOD_BUG_TO_TEST]) # bug 1046 sees lots of whiteboard, and other field, changes
            param.allow_private_bugs = True

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, GOOD_BUG_TO_TEST)

            for v in versions:
                if v.status_whiteboard not in (None, "", "[screened]"):
                    Log.error("Expecting whiteboard to be screened")

Esempio n. 15

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

    def test_private_etl(self):
        """
        ENSURE IDENTIFIABLE INFORMATION DOES NOT EXIST ON ANY BUGS
        """
        File(self.settings.param.first_run_time).delete()
        File(self.settings.param.last_run_time).delete()
        self.settings.param.allow_private_bugs = True

        database.make_test_instance(self.settings.bugzilla)
        es = elasticsearch.make_test_instance("candidate", self.settings.fake.bugs)
        es_comments = elasticsearch.make_test_instance("candidate_comments", self.settings.fake.comments)
        bz_etl.main(self.settings, es, es_comments)

        ref = elasticsearch.open_test_instance("reference", self.settings.private_bugs_reference)
        compare_both(es, ref, self.settings, self.settings.param.bugs)

        #DIRECT COMPARE THE FILE JSON
        can = File(self.settings.fake.comments.filename).read()
        ref = File(self.settings.private_comments_reference.filename).read()
        if can != ref:
            for i, c in enumerate(can):
                found = -1
                if can[i] != ref[i]:
                    found = i
                    break
            Log.error("Comments do not match reference\n{{sample}}", {"sample": can[MIN([0, found - 100]):found + 100]})

Esempio n. 16

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

    def test_incremental_etl_catches_tracking_flags(self):
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate", self.settings.candidate)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME
            param.start_time = CNV.datetime2milli(CNV.string2datetime("02/01/2013 10:09:15", "%d/%m/%Y %H:%M:%S"))
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([813650])
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, 813650)

            flags = ["cf_status_firefox18", "cf_status_firefox19", "cf_status_firefox_esr17", "cf_status_b2g18"]
            for v in versions:
                if v.modified_ts>param.start_time:
                    for f in flags:
                        if v[f] != "fixed":
                            Log.error("813650 should have {{flag}}=='fixed'", {"flag": f})

Esempio n. 17

0

Mostra file

File: replicate.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def get_pending(es, since):
    result = es.search({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": {
            "range": {"modified_ts": {"gte": CNV.datetime2milli(since)}}}
        }},
        "from": 0,
        "size": 0,
        "sort": [],
        "facets": {"default": {"terms": {"field": "bug_id", "size": 200000}}}
    })

    if len(result.facets.default.terms) >= 200000:
        Log.error("Can not handle more than 200K bugs changed")

    pending_bugs = Multiset(
        result.facets.default.terms,
        key_field="term",
        count_field="count"
    )
    Log.note("Source has {{num}} bug versions for updating", {
        "num": len(pending_bugs)
    })
    return pending_bugs

Esempio n. 18

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def verify_no_private_bugs(es, private_bugs):
    #VERIFY BUGS ARE NOT IN OUTPUT
    for b in private_bugs:
        versions = compare_es.get_all_bug_versions(es, b)

        if versions:
            Log.error("Expecting no version for private bug {{bug_id}}",
                      {"bug_id": b})

Esempio n. 19

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

    def tearDown(self):
        #CLOSE THE CACHED DB CONNECTIONS
        bz_etl.close_db_connections()

        if all_db:
            Log.error("not all db connections are closed")

        Log.stop()

Esempio n. 20

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

def verify_public_bugs(es, private_bugs):
    #VERIFY BUGS ARE IN OUTPUT
    for b in private_bugs:
        versions = compare_es.get_all_bug_versions(es, b)
        if not versions:
            Log.error("Expecting versions for public bug {{bug_id}}", {
                "bug_id": b
            })

Esempio n. 21

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def tearDown(self):
        #CLOSE THE CACHED DB CONNECTIONS
        bz_etl.close_db_connections()

        if all_db:
            Log.error("not all db connections are closed")

        Log.stop()

Esempio n. 22

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

def verify_no_private_bugs(es, private_bugs):
    #VERIFY BUGS ARE NOT IN OUTPUT
    for b in private_bugs:
        versions = compare_es.get_all_bug_versions(es, b)

        if versions:
            Log.error("Expecting no version for private bug {{bug_id}}", {
                "bug_id": b
            })

Esempio n. 23

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

def verify_no_private_attachments(es, private_attachments):
    #VERIFY ATTACHMENTS ARE NOT IN OUTPUT
    for b in Q.select(private_attachments, "bug_id"):
        versions = compare_es.get_all_bug_versions(es, b)
        #WE ASSUME THE ATTACHMENT, IF IT EXISTS, WILL BE SOMEWHERE IN THE BUG IT
        #BELONGS TO, IF AT ALL
        for v in versions:
            for a in v.attachments:
                if a.attach_id in Q.select(private_attachments, "attach_id"):
                    Log.error("Private attachment should not exist")

Esempio n. 24

0

Mostra file

File: extract_bugzilla.py Progetto: klahnakoski/Bugzilla-ETL

def get_private_bugs_for_delete(db, param):
    if param.allow_private_bugs:
        return {0}  # NO BUGS TO DELETE

    try:
        with Timer("get all private bug ids"):
            private_bugs = db.query("SELECT DISTINCT bug_id FROM bug_group_map")
            return set(private_bugs.bug_id) | {0}
    except Exception, e:
        Log.error("problem getting private bugs", e)

Esempio n. 25

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def verify_no_private_attachments(es, private_attachments):
    #VERIFY ATTACHMENTS ARE NOT IN OUTPUT
    for b in Q.select(private_attachments, "bug_id"):
        versions = compare_es.get_all_bug_versions(es, b)
        #WE ASSUME THE ATTACHMENT, IF IT EXISTS, WILL BE SOMEWHERE IN THE BUG IT
        #BELONGS TO, IF AT ALL
        for v in versions:
            for a in v.attachments:
                if a.attach_id in Q.select(private_attachments, "attach_id"):
                    Log.error("Private attachment should not exist")

Esempio n. 26

0

Mostra file

def incremental_etl(settings, param, db, es, es_comments, output_queue):
    ####################################################################
    ## ES TAKES TIME TO DELETE RECORDS, DO DELETE FIRST WITH HOPE THE
    ## INDEX GETS A REWRITE DURING ADD OF NEW RECORDS
    ####################################################################

    #REMOVE PRIVATE BUGS
    private_bugs = get_private_bugs_for_delete(db, param)
    Log.note(
        "Ensure the following private bugs are deleted:\n{{private_bugs|indent}}",
        {"private_bugs": private_bugs})
    for g, delete_bugs in Q.groupby(private_bugs, size=1000):
        still_existing = get_bug_ids(es, {"terms": {"bug_id": delete_bugs}})
        if still_existing:
            Log.note(
                "Ensure the following private bugs are deleted:\n{{private_bugs|indent}}",
                {"private_bugs": still_existing})
        es.delete_record({"terms": {"bug_id": delete_bugs}})
        es_comments.delete_record({"terms": {"bug_id": delete_bugs}})

    #RECENT PUBLIC BUGS
    possible_public_bugs = get_recent_private_bugs(db, param)
    if param.allow_private_bugs:
        #PRIVATE BUGS
        #    A CHANGE IN PRIVACY INDICATOR MEANS THE WHITEBOARD IS AFFECTED, REDO
        es.delete_record({"terms": {"bug_id": possible_public_bugs}})
    else:
        #PUBLIC BUGS
        #    IF ADDING GROUP THEN private_bugs ALREADY DID THIS
        #    IF REMOVING GROUP THEN NO RECORDS TO DELETE
        pass

    #REMOVE **RECENT** PRIVATE ATTACHMENTS
    private_attachments = get_recent_private_attachments(db, param)
    bugs_to_refresh = set(Q.select(private_attachments, "bug_id"))
    es.delete_record({"terms": {"bug_id": bugs_to_refresh}})

    #REBUILD BUGS THAT GOT REMOVED
    bug_list = (possible_public_bugs
                | bugs_to_refresh) - private_bugs  # REMOVE PRIVATE BUGS
    if bug_list:
        refresh_param = param.copy()
        refresh_param.bug_list = bug_list
        refresh_param.start_time = 0
        refresh_param.start_time_str = extract_bugzilla.milli2string(db, 0)

        try:
            etl(db, output_queue, refresh_param.copy(), please_stop=None)
            etl_comments(db,
                         es_comments,
                         refresh_param.copy(),
                         please_stop=None)
        except Exception, e:
            Log.error("Problem with etl using parameters {{parameters}}",
                      {"parameters": refresh_param}, e)

Esempio n. 27

0

Mostra file

File: bz_etl.py Progetto: klahnakoski/Bugzilla-ETL

def run_both_etl(db, output_queue, es_comments, param):
    comment_thread = Thread.run("etl comments", etl_comments, db, es_comments, param)
    process_thread = Thread.run("etl", etl, db, output_queue, param)

    result = comment_thread.join()
    if result.exception:
        Log.error("etl_comments had problems", result.exception)

    result = process_thread.join()
    if result.exception:
        Log.error("etl had problems", result.exception)

Esempio n. 28

0

Mostra file

def get_private_bugs_for_delete(db, param):
    if param.allow_private_bugs:
        return {0}  # NO BUGS TO DELETE

    try:
        with Timer("get all private bug ids"):
            private_bugs = db.query(
                "SELECT DISTINCT bug_id FROM bug_group_map")
            return set(private_bugs.bug_id) | {0}
    except Exception, e:
        Log.error("problem getting private bugs", e)

Esempio n. 29

0

Mostra file

def main(settings, es=None, es_comments=None):
    if not settings.param.allow_private_bugs and es and not es_comments:
        Log.error("Must have ES for comments")

    resume_from_last_run = File(
        settings.param.first_run_time).exists and not File(
            settings.param.last_run_time).exists

    #MAKE HANDLES TO CONTAINERS
    try:
        with DB(settings.bugzilla, readonly=True) as db:
            current_run_time, es, es_comments, last_run_time = setup_es(
                settings, db, es, es_comments)

            with ThreadedQueue(es, size=500, silent=True) as output_queue:
                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                # DB WRITES ARE DELAYED, RESULTING IN UNORDERED bug_when IN bugs_activity (AS IS ASSUMED FOR bugs(delats_ts))
                # THIS JITTER IS USUALLY NO MORE THAN ONE SECOND, BUT WE WILL GO BACK 60sec, JUST IN CASE.
                # THERE ARE OCCASIONAL WRITES THAT ARE IN GMT, BUT SINCE THEY LOOK LIKE THE FUTURE, WE CAPTURE THEM
                param.start_time = last_run_time - nvl(
                    settings.param.look_back,
                    5 * 60 * 1000)  # 5 MINUTE LOOK_BACK
                param.start_time_str = extract_bugzilla.milli2string(
                    db, param.start_time)
                param.alias_file = settings.param.alias_file
                param.allow_private_bugs = settings.param.allow_private_bugs

                if last_run_time > 0:
                    with Timer("run incremental etl"):
                        incremental_etl(settings, param, db, es, es_comments,
                                        output_queue)
                else:
                    with Timer("run full etl"):
                        full_etl(resume_from_last_run, settings, param, db, es,
                                 es_comments, output_queue)

                output_queue.add(Thread.STOP)

        if settings.es.alias:
            es.delete_all_but(settings.es.alias, settings.es.index)
            es.add_alias(settings.es.alias)

        if settings.es_comments.alias:
            es.delete_all_but(settings.es_comments.alias,
                              settings.es_comments.index)
            es_comments.add_alias(settings.es_comments.alias)

        File(settings.param.last_run_time).write(
            unicode(CNV.datetime2milli(current_run_time)))
    except Exception, e:
        Log.error("Problem with main ETL loop", e)

Esempio n. 30

0

Mostra file

def run_both_etl(db, output_queue, es_comments, param):
    comment_thread = Thread.run("etl comments", etl_comments, db, es_comments,
                                param)
    process_thread = Thread.run("etl", etl, db, output_queue, param)

    result = comment_thread.join()
    if result.exception:
        Log.error("etl_comments had problems", result.exception)

    result = process_thread.join()
    if result.exception:
        Log.error("etl had problems", result.exception)

Esempio n. 31

0

Mostra file

File: leak_check.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def test_confidential_whiteboard_is_screened(self):
        leaked_whiteboard = get(
            self.private,
            {
                "and": [
                    {
                        "terms": {
                            "bug_group": SCREENED_WHITEBOARD_BUG_GROUPS
                        }
                    },
                    {
                        "exists": {
                            "field": "status_whiteboard"
                        }
                    },
                    {
                        "not": {
                            "terms": {
                                "status_whiteboard": ["", "[screened]"]
                            }
                        }
                    },
                    {
                        "range": {
                            "expires_on": {
                                "gte": NOW
                            }
                        }
                    },  #CURRENT RECORDS
                    {
                        "range": {
                            "modified_ts": {
                                "lt": A_WHILE_AGO
                            }
                        }
                    },  #OF A MINIMUM AGE
                ]
            },
            fields=[
                "bug_id", "product", "component", "status_whiteboard",
                "bug_group", "modified_ts"
            ],
            limit=100)

        if leaked_whiteboard:
            for l in leaked_whiteboard:
                l.modified_ts = CNV.datetime2string(
                    CNV.milli2datetime(l.modified_ts))

            Log.error("Whiteboard leaking:\b{{leak}}",
                      {"leak": leaked_whiteboard})

Esempio n. 32

0

Mostra file

def get_comments(db, param):
    if not param.bug_list:
        return []

    if param.allow_private_bugs:
        param.comment_field = SQL("'[screened]' comment")
        param.bug_filter = esfilter2sqlwhere(
            db, {"and": [{
                "terms": {
                    "bug_id": param.bug_list
                }
            }]})
    else:
        param.comment_field = SQL("c.thetext comment")
        param.bug_filter = esfilter2sqlwhere(
            db, {
                "and": [{
                    "terms": {
                        "bug_id": param.bug_list
                    }
                }, {
                    "term": {
                        "isprivate": 0
                    }
                }]
            })

    try:
        comments = db.query(
            """
            SELECT
                c.comment_id,
                c.bug_id,
                p.login_name modified_by,
                UNIX_TIMESTAMP(CONVERT_TZ(bug_when, 'US/Pacific','UTC'))*1000 AS modified_ts,
                {{comment_field}},
                c.isprivate
            FROM
                longdescs c
            LEFT JOIN
                profiles p ON c.who = p.userid
            LEFT JOIN
                longdescs_tags t ON t.comment_id=c.comment_id AND t.tag <> 'deleted'
            WHERE
                {{bug_filter}} AND
                bug_when >= {{start_time_str}}
            """, param)

        return comments
    except Exception, e:
        Log.error("can not get comment data", e)

Esempio n. 33

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

    def test_incremental_has_correct_expires_on(self):
        # 813650, 726635 BOTH HAVE CHANGES IN 2013
        bugs = struct.wrap([813650, 726635])
        start_incremental=CNV.datetime2milli(CNV.string2datetime("2013-01-01", "%Y-%m-%d"))

        es = elasticsearch.make_test_instance("candidate", self.settings.candidate)
        with DB(self.settings.bugzilla) as db:
            #SETUP FIRST RUN PARAMETERS
            param = Struct()
            param.end_time = start_incremental
            param.start_time = 0
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            #SETUP INCREMENTAL RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(datetime.utcnow())
            param.start_time = start_incremental
            param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = bugs
            param.allow_private_bugs = False

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

        for b in bugs:
            results = es.search({
                "query": {"filtered": {
                    "query": {"match_all": {}},
                    "filter": {"and":[
                        {"term":{"bug_id":b}},
                        {"range":{"expires_on":{"gte":CNV.datetime2milli(datetime.utcnow())}}}
                    ]}
                }},
                "from": 0,
                "size": 200000,
                "sort": [],
                "fields": ["bug_id"]
            })

            if results.hits.total>1:
                Log.error("Expecting only one active bug_version record")

Esempio n. 34

0

Mostra file

File: bz_etl.py Progetto: klahnakoski/Bugzilla-ETL

def incremental_etl(settings, param, db, es, es_comments, output_queue):
    ####################################################################
    ## ES TAKES TIME TO DELETE RECORDS, DO DELETE FIRST WITH HOPE THE
    ## INDEX GETS A REWRITE DURING ADD OF NEW RECORDS
    ####################################################################

    #REMOVE PRIVATE BUGS
    private_bugs = get_private_bugs_for_delete(db, param)
    Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": private_bugs})
    for g, delete_bugs in Q.groupby(private_bugs, size=1000):
        still_existing = get_bug_ids(es, {"terms": {"bug_id": delete_bugs}})
        if still_existing:
            Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": still_existing})
        es.delete_record({"terms": {"bug_id": delete_bugs}})
        es_comments.delete_record({"terms": {"bug_id": delete_bugs}})


    #RECENT PUBLIC BUGS
    possible_public_bugs = get_recent_private_bugs(db, param)
    if param.allow_private_bugs:
        #PRIVATE BUGS
        #    A CHANGE IN PRIVACY INDICATOR MEANS THE WHITEBOARD IS AFFECTED, REDO
        es.delete_record({"terms": {"bug_id": possible_public_bugs}})
    else:
        #PUBLIC BUGS
        #    IF ADDING GROUP THEN private_bugs ALREADY DID THIS
        #    IF REMOVING GROUP THEN NO RECORDS TO DELETE
        pass

    #REMOVE **RECENT** PRIVATE ATTACHMENTS
    private_attachments = get_recent_private_attachments(db, param)
    bugs_to_refresh = set(Q.select(private_attachments, "bug_id"))
    es.delete_record({"terms": {"bug_id": bugs_to_refresh}})

    #REBUILD BUGS THAT GOT REMOVED
    bug_list = (possible_public_bugs | bugs_to_refresh) - private_bugs # REMOVE PRIVATE BUGS
    if bug_list:
        refresh_param = param.copy()
        refresh_param.bug_list = bug_list
        refresh_param.start_time = 0
        refresh_param.start_time_str = extract_bugzilla.milli2string(db, 0)

        try:
            etl(db, output_queue, refresh_param.copy(), please_stop=None)
            etl_comments(db, es_comments, refresh_param.copy(), please_stop=None)
        except Exception, e:
            Log.error("Problem with etl using parameters {{parameters}}", {
                "parameters": refresh_param
            }, e)

Esempio n. 35

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

def verify_no_private_comments(es, private_comments):
    data = es.search({
        "query": {"filtered": {
            "query": {"match_all": {}},
            "filter": {"and": [
                {"terms": {"comment_id": private_comments}}
            ]}
        }},
        "from": 0,
        "size": 200000,
        "sort": []
    })

    if Q.select(data.hits.hits, "_source"):
        Log.error("Expecting no comments")

Esempio n. 36

0

Mostra file

File: leak_check.py Progetto: klahnakoski/Bugzilla-ETL

    def test_private_comments_not_leaking(self):
        leaked_comments = get(
            self.public_comments,
            {"term": {"isprivate": "1"}},
            limit=20
        )
        if leaked_comments:
            if self.settings.param.delete:
                self.public_comments.delete_record(
                    {"terms":{"bug_id":leaked_comments.bug_id}}
                )

            Log.error("{{num}} comments marked private have leaked!\n{{comments|indent}}", {
                "num": len(leaked_comments),
                "comments": leaked_comments
            })

Esempio n. 37

0

Mostra file

File: bz_etl.py Progetto: klahnakoski/Bugzilla-ETL

def main(settings, es=None, es_comments=None):
    if not settings.param.allow_private_bugs and es and not es_comments:
        Log.error("Must have ES for comments")

    resume_from_last_run = File(settings.param.first_run_time).exists and not File(settings.param.last_run_time).exists

    #MAKE HANDLES TO CONTAINERS
    try:
        with DB(settings.bugzilla, readonly=True) as db:
            current_run_time, es, es_comments, last_run_time = setup_es(settings, db, es, es_comments)

            with ThreadedQueue(es, size=500, silent=True) as output_queue:
                #SETUP RUN PARAMETERS
                param = Struct()
                param.end_time = CNV.datetime2milli(get_current_time(db))
                # DB WRITES ARE DELAYED, RESULTING IN UNORDERED bug_when IN bugs_activity (AS IS ASSUMED FOR bugs(delats_ts))
                # THIS JITTER IS USUALLY NO MORE THAN ONE SECOND, BUT WE WILL GO BACK 60sec, JUST IN CASE.
                # THERE ARE OCCASIONAL WRITES THAT ARE IN GMT, BUT SINCE THEY LOOK LIKE THE FUTURE, WE CAPTURE THEM
                param.start_time = last_run_time - nvl(settings.param.look_back, 5 * 60 * 1000)  # 5 MINUTE LOOK_BACK
                param.start_time_str = extract_bugzilla.milli2string(db, param.start_time)
                param.alias_file = settings.param.alias_file
                param.allow_private_bugs = settings.param.allow_private_bugs

                if last_run_time > 0:
                    with Timer("run incremental etl"):
                        incremental_etl(settings, param, db, es, es_comments, output_queue)
                else:
                    with Timer("run full etl"):
                        full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue)

                output_queue.add(Thread.STOP)

        if settings.es.alias:
            es.delete_all_but(settings.es.alias, settings.es.index)
            es.add_alias(settings.es.alias)

        if settings.es_comments.alias:
            es.delete_all_but(settings.es_comments.alias, settings.es_comments.index)
            es_comments.add_alias(settings.es_comments.alias)

        File(settings.param.last_run_time).write(unicode(CNV.datetime2milli(current_run_time)))
    except Exception, e:
        Log.error("Problem with main ETL loop", e)

Esempio n. 38

0

Mostra file

File: leak_check.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def test_private_comments_not_leaking(self):
        leaked_comments = get(self.public_comments,
                              {"term": {
                                  "isprivate": "1"
                              }},
                              limit=20)
        if leaked_comments:
            if self.settings.param.delete:
                self.public_comments.delete_record(
                    {"terms": {
                        "bug_id": leaked_comments.bug_id
                    }})

            Log.error(
                "{{num}} comments marked private have leaked!\n{{comments|indent}}",
                {
                    "num": len(leaked_comments),
                    "comments": leaked_comments
                })

Esempio n. 39

0

Mostra file

File: replicate.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def get_or_create_index(destination_settings, source):
    #CHECK IF INDEX, OR ALIAS, EXISTS
    es = ElasticSearch(destination_settings)
    aliases = es.get_aliases()

    indexes = [a for a in aliases if a.alias == destination_settings.index]
    if not indexes:
        #CREATE INDEX
        schema = source.get_schema()
        assert schema.settings
        assert schema.mappings
        ElasticSearch.create_index(destination_settings, schema, limit_replicas=True)
    elif len(indexes) > 1:
        Log.error("do not know how to replicate to more than one index")
    elif indexes[0].alias != None:
        destination_settings.alias = destination_settings.index
        destination_settings.index = indexes[0].index

    return ElasticSearch(destination_settings)

Esempio n. 40

0

Mostra file

File: bz_etl.py Progetto: klahnakoski/Bugzilla-ETL

def get_bug_ids(es, filter):
    try:
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": filter
            }},
            "from": 0,
            "size": 200000,
            "sort": [],
            "fields": ["bug_id"]
        })

        return set(results.hits.hits.fields.bug_id)
    except Exception, e:
        Log.error("Can not get_max_bug from {{host}}/{{index}}", {
            "host": es.settings.host,
            "index": es.settings.index
        }, e)

Esempio n. 41

0

Mostra file

def get_comments_by_id(db, comments, param):
    """
    GET SPECIFIC COMMENTS
    """
    if param.allow_private_bugs:
        return []

    param.comments_filter = esfilter2sqlwhere(db, {
        "and": [{
            "term": {
                "isprivate": 0
            }
        }, {
            "terms": {
                "c.comment_id": comments
            }
        }]
    })

    try:
        comments = db.query(
            """
            SELECT
                c.comment_id,
                c.bug_id,
                p.login_name modified_by,
                UNIX_TIMESTAMP(CONVERT_TZ(bug_when, 'US/Pacific','UTC'))*1000 AS modified_ts,
                c.thetext comment,
                c.isprivate
            FROM
                longdescs c
            LEFT JOIN
                profiles p ON c.who = p.userid
            LEFT JOIN
                longdescs_tags t ON t.comment_id=c.comment_id AND t.tag <> 'deleted'
            WHERE
                {{comments_filter}}
            """, param)

        return comments
    except Exception, e:
        Log.error("can not get comment data", e)

Esempio n. 42

0

Mostra file

File: leak_check.py Progetto: klahnakoski/Bugzilla-ETL

    def test_confidential_whiteboard_is_screened(self):
        leaked_whiteboard = get(
            self.private,
            {"and": [
                {"terms": {"bug_group": SCREENED_WHITEBOARD_BUG_GROUPS}},
                {"exists": {"field": "status_whiteboard"}},
                {"not": {"terms": {"status_whiteboard": ["", "[screened]"]}}},
                {"range": {"expires_on": {"gte": NOW}}}, #CURRENT RECORDS
                {"range": {"modified_ts": {"lt": A_WHILE_AGO}}}, #OF A MINIMUM AGE
            ]},
            fields=["bug_id", "product", "component", "status_whiteboard", "bug_group", "modified_ts"],
            limit=100

        )

        if leaked_whiteboard:
            for l in leaked_whiteboard:
                l.modified_ts=CNV.datetime2string(CNV.milli2datetime(l.modified_ts))

            Log.error("Whiteboard leaking:\b{{leak}}", {"leak": leaked_whiteboard})

Esempio n. 43

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def compare_both(candidate, reference, settings, some_bugs):
    File(settings.param.errors).delete()
    try_dir = settings.param.errors + "/try/"
    ref_dir = settings.param.errors + "/ref/"

    with Timer("Comparing to reference"):
        found_errors = False
        for bug_id in some_bugs:
            try:
                versions = Q.sort(
                    get_all_bug_versions(candidate, bug_id, datetime.utcnow()),
                    "modified_ts")
                # WE CAN NOT EXPECT candidate TO BE UP TO DATE BECAUSE IT IS USING AN OLD IMAGE
                if not versions:
                    max_time = CNV.milli2datetime(settings.bugzilla.expires_on)
                else:
                    max_time = CNV.milli2datetime(versions.last().modified_ts)

                pre_ref_versions = get_all_bug_versions(
                    reference, bug_id, max_time)
                ref_versions = \
                    Q.sort(
                        #ADDED TO FIX OLD PRODUCTION BUG VERSIONS
                        [compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
                        "modified_ts"
                    )

                can = CNV.object2JSON(versions, pretty=True)
                ref = CNV.object2JSON(ref_versions, pretty=True)
                if can != ref:
                    found_errors = True
                    File(try_dir + unicode(bug_id) + ".txt").write(can)
                    File(ref_dir + unicode(bug_id) + ".txt").write(ref)
            except Exception, e:
                found_errors = True
                Log.warning("Problem ETL'ing bug {{bug_id}}",
                            {"bug_id": bug_id}, e)

        if found_errors:
            Log.error("DIFFERENCES FOUND (Differences shown in {{path}})",
                      {"path": [try_dir, ref_dir]})

Esempio n. 44

0

Mostra file

File: bz_etl.py Progetto: klahnakoski/Bugzilla-ETL

def get_max_bug_id(es):
    try:
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {"script": {"script": "true"}}
            }},
            "from": 0,
            "size": 0,
            "sort": [],
            "facets": {"0": {"statistical": {"field": "bug_id"}}}
        })

        if results.facets["0"].count == 0:
            return 0
        return results.facets["0"].max
    except Exception, e:
        Log.error("Can not get_max_bug from {{host}}/{{index}}", {
            "host": es.settings.host,
            "index": es.settings.index
        }, e)

Esempio n. 45

0

Mostra file

File: test_etl.py Progetto: klahnakoski/Bugzilla-ETL

def compare_both(candidate, reference, settings, some_bugs):
    File(settings.param.errors).delete()
    try_dir = settings.param.errors + "/try/"
    ref_dir = settings.param.errors + "/ref/"

    with Timer("Comparing to reference"):
        found_errors = False
        for bug_id in some_bugs:
            try:
                versions = Q.sort(
                    get_all_bug_versions(candidate, bug_id, datetime.utcnow()),
                    "modified_ts")
                # WE CAN NOT EXPECT candidate TO BE UP TO DATE BECAUSE IT IS USING AN OLD IMAGE
                if not versions:
                    max_time = CNV.milli2datetime(settings.bugzilla.expires_on)
                else:
                    max_time = CNV.milli2datetime(versions.last().modified_ts)

                pre_ref_versions = get_all_bug_versions(reference, bug_id, max_time)
                ref_versions = \
                    Q.sort(
                        #ADDED TO FIX OLD PRODUCTION BUG VERSIONS
                        [compare_es.old2new(x, settings.bugzilla.expires_on) for x in pre_ref_versions],
                        "modified_ts"
                    )

                can = CNV.object2JSON(versions, pretty=True)
                ref = CNV.object2JSON(ref_versions, pretty=True)
                if can != ref:
                    found_errors = True
                    File(try_dir + unicode(bug_id) + ".txt").write(can)
                    File(ref_dir + unicode(bug_id) + ".txt").write(ref)
            except Exception, e:
                found_errors = True
                Log.warning("Problem ETL'ing bug {{bug_id}}", {"bug_id": bug_id}, e)

        if found_errors:
            Log.error("DIFFERENCES FOUND (Differences shown in {{path}})", {
                "path": [try_dir, ref_dir]}
            )

Esempio n. 46

0

Mostra file

def diff(db, table, old_record, new_record):
    """
    UPDATE bugs_activity WITH THE CHANGES IN RECORDS
    """
    now = milli2string(db, CNV.datetime2milli(get_current_time(db)))
    changed = set(old_record.keys()) ^ set(new_record.keys())
    changed |= set([k for k, v in old_record.items() if v != new_record[k]])

    if table != u"bugs":
        prefix = table + u"."
    else:
        prefix = u""

    for c in changed:
        fieldid = db.query(
            "SELECT id FROM fielddefs WHERE name={{field_name}}",
            {"field_name": prefix + c})[0].id

        if fieldid == None:
            Log.error("Expecting a valid field name")

        activity = Struct(bug_id=old_record.bug_id,
                          who=1,
                          bug_when=now,
                          fieldid=fieldid,
                          removed=old_record[c],
                          added=new_record[c],
                          attach_id=old_record.attach_id,
                          comment_id=old_record.comment_id)
        db.insert("bugs_activity", activity)

    db.execute(
        "UPDATE bugs SET delta_ts={{now}} WHERE {{where}}", {
            "now": now,
            "where": esfilter2sqlwhere(db,
                                       {"term": {
                                           "bug_id": old_record.bug_id
                                       }})
        })

Esempio n. 47

0

Mostra file

File: extract_bugzilla.py Progetto: klahnakoski/Bugzilla-ETL

def get_recent_private_bugs(db, param):
    """
    GET ONLY BUGS THAT HAVE SWITCHED PRIVACY INDICATOR
    THIS LIST IS USED TO SIGNAL BUGS THAT NEED TOTAL RE-ETL
    """
    param.field_id = PRIVATE_BUG_GROUP_FIELD_ID

    try:
        output = db.query("""
        SELECT
            a.bug_id
        FROM
            bugs_activity a
        WHERE
            bug_when >= {{start_time_str}} AND
            fieldid={{field_id}}
        """, param)

        return set(output.bug_id)

    except Exception, e:
        Log.error("problem getting recent private attachments", e)

Esempio n. 48

0

Mostra file

File: extract_bugzilla.py Progetto: klahnakoski/Bugzilla-ETL

def get_comments(db, param):
    if not param.bug_list:
        return []

    if param.allow_private_bugs:
        param.comment_field = SQL("'[screened]' comment")
        param.bug_filter = esfilter2sqlwhere(db, {"and": [
            {"terms": {"bug_id": param.bug_list}}
        ]})
    else:
        param.comment_field = SQL("c.thetext comment")
        param.bug_filter = esfilter2sqlwhere(db, {"and": [
            {"terms": {"bug_id": param.bug_list}},
            {"term": {"isprivate": 0}}
        ]})

    try:
        comments = db.query("""
            SELECT
                c.comment_id,
                c.bug_id,
                p.login_name modified_by,
                UNIX_TIMESTAMP(CONVERT_TZ(bug_when, 'US/Pacific','UTC'))*1000 AS modified_ts,
                {{comment_field}},
                c.isprivate
            FROM
                longdescs c
            LEFT JOIN
                profiles p ON c.who = p.userid
            LEFT JOIN
                longdescs_tags t ON t.comment_id=c.comment_id AND t.tag <> 'deleted'
            WHERE
                {{bug_filter}} AND
                bug_when >= {{start_time_str}}
            """, param)

        return comments
    except Exception, e:
        Log.error("can not get comment data", e)

Esempio n. 49

0

Mostra file

File: replicate.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def get_last_updated(es):
    try:
        results = es.search({
            "query": {"filtered": {
                "query": {"match_all": {}},
                "filter": {
                    "range": {
                    "modified_ts": {"gte": CNV.datetime2milli(far_back)}}}
            }},
            "from": 0,
            "size": 0,
            "sort": [],
            "facets": {"0": {"statistical": {"field": "modified_ts"}}}
        })

        if results.facets["0"].count == 0:
            return datetime.min
        return CNV.milli2datetime(results.facets["0"].max)
    except Exception, e:
        Log.error("Can not get_last_updated from {{host}}/{{index}}",{
            "host": es.settings.host,
            "index": es.settings.index
        }, e)

Esempio n. 50

0

Mostra file

def get_recent_private_bugs(db, param):
    """
    GET ONLY BUGS THAT HAVE SWITCHED PRIVACY INDICATOR
    THIS LIST IS USED TO SIGNAL BUGS THAT NEED TOTAL RE-ETL
    """
    param.field_id = PRIVATE_BUG_GROUP_FIELD_ID

    try:
        output = db.query(
            """
        SELECT
            a.bug_id
        FROM
            bugs_activity a
        WHERE
            bug_when >= {{start_time_str}} AND
            fieldid={{field_id}}
        """, param)

        return set(output.bug_id)

    except Exception, e:
        Log.error("problem getting recent private attachments", e)

Esempio n. 51

0

Mostra file

def get_bug_ids(es, filter):
    try:
        results = es.search({
            "query": {
                "filtered": {
                    "query": {
                        "match_all": {}
                    },
                    "filter": filter
                }
            },
            "from": 0,
            "size": 200000,
            "sort": [],
            "fields": ["bug_id"]
        })

        return set(results.hits.hits.fields.bug_id)
    except Exception, e:
        Log.error("Can not get_max_bug from {{host}}/{{index}}", {
            "host": es.settings.host,
            "index": es.settings.index
        }, e)

Esempio n. 52

0

Mostra file

File: extract_bugzilla.py Progetto: klahnakoski/Bugzilla-ETL

def get_recent_private_attachments(db, param):
    """
    GET ONLY RECENT ATTACHMENTS THAT HAVE SWITCHED PRIVACY INDICATOR
    THIS LIST IS USED TO SIGNAL BUGS THAT NEED TOTAL RE-ETL
    """
    if param.allow_private_bugs:
        return []

    param.field_id = PRIVATE_ATTACHMENT_FIELD_ID

    try:
        return db.query("""
        SELECT
            a.attach_id,
            a.bug_id
        FROM
            bugs_activity a
        WHERE
            bug_when >= {{start_time_str}} AND
            fieldid={{field_id}}
        """, param)
    except Exception, e:
        Log.error("problem getting recent private attachments", e)

Esempio n. 53

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

def verify_no_private_comments(es, private_comments):
    data = es.search({
        "query": {
            "filtered": {
                "query": {
                    "match_all": {}
                },
                "filter": {
                    "and": [{
                        "terms": {
                            "comment_id": private_comments
                        }
                    }]
                }
            }
        },
        "from": 0,
        "size": 200000,
        "sort": []
    })

    if Q.select(data.hits.hits, "_source"):
        Log.error("Expecting no comments")

Esempio n. 54

0

Mostra file

File: test_etl.py Progetto: Mozilla-GitHub-Standards/4f2f5414775d201b1bb4e317ed501b9d9afa2bd4f57359fded8f19d8638def96

    def test_incremental_etl_catches_tracking_flags(self):
        database.make_test_instance(self.settings.bugzilla)

        with DB(self.settings.bugzilla) as db:
            es = elasticsearch.make_test_instance("candidate",
                                                  self.settings.candidate)

            #SETUP RUN PARAMETERS
            param = Struct()
            param.end_time = CNV.datetime2milli(get_current_time(db))
            # FLAGS ADDED TO BUG 813650 ON 18/12/2012 2:38:08 AM (PDT), SO START AT SOME LATER TIME
            param.start_time = CNV.datetime2milli(
                CNV.string2datetime("02/01/2013 10:09:15",
                                    "%d/%m/%Y %H:%M:%S"))
            param.start_time_str = extract_bugzilla.milli2string(
                db, param.start_time)

            param.alias_file = self.settings.param.alias_file
            param.bug_list = struct.wrap([813650])
            param.allow_private_bugs = self.settings.param.allow_private_bugs

            with ThreadedQueue(es, size=1000) as output:
                etl(db, output, param, please_stop=None)

            Thread.sleep(2)  # MUST SLEEP WHILE ES DOES ITS INDEXING
            versions = get_all_bug_versions(es, 813650)

            flags = [
                "cf_status_firefox18", "cf_status_firefox19",
                "cf_status_firefox_esr17", "cf_status_b2g18"
            ]
            for v in versions:
                if v.modified_ts > param.start_time:
                    for f in flags:
                        if v[f] != "fixed":
                            Log.error("813650 should have {{flag}}=='fixed'",
                                      {"flag": f})

Esempio n. 55

0

Mostra file

def get_recent_private_attachments(db, param):
    """
    GET ONLY RECENT ATTACHMENTS THAT HAVE SWITCHED PRIVACY INDICATOR
    THIS LIST IS USED TO SIGNAL BUGS THAT NEED TOTAL RE-ETL
    """
    if param.allow_private_bugs:
        return []

    param.field_id = PRIVATE_ATTACHMENT_FIELD_ID

    try:
        return db.query(
            """
        SELECT
            a.attach_id,
            a.bug_id
        FROM
            bugs_activity a
        WHERE
            bug_when >= {{start_time_str}} AND
            fieldid={{field_id}}
        """, param)
    except Exception, e:
        Log.error("problem getting recent private attachments", e)

Esempio n. 56

0

Mostra file

def make_test_instance(db_settings):
    if not db_settings.filename:
        Log.note("Database schema will not be touched")
        return

    with Timer("Make database instance"):
        try:
            #CLEAR SCHEMA
            Log.note("Make empty {{schema}} schema",
                     {"schema": db_settings.schema})
            no_schema = db_settings.copy()
            no_schema.schema = None
            with DB(no_schema) as db:
                db.execute("DROP DATABASE IF EXISTS {{schema}}",
                           {"schema": db.quote_column(db_settings.schema)})
                db.execute("CREATE DATABASE {{schema}}",
                           {"schema": db.quote_column(db_settings.schema)})

            #FILL SCHEMA
            Log.note("Fill {{schema}} schema with data",
                     {"schema": db_settings.schema})
            DB.execute_file(db_settings, db_settings.filename)

            #ADD MISSING TABLES
            with DB(db_settings) as db:
                db.execute("""
                CREATE TABLE `longdescs_tags` (
                  `id` mediumint(9) NOT NULL AUTO_INCREMENT,
                  `comment_id` int(11) DEFAULT NULL,
                  `tag` varchar(24) NOT NULL,
                  PRIMARY KEY (`id`),
                  UNIQUE KEY `longdescs_tags_idx` (`comment_id`,`tag`),
                  CONSTRAINT `fk_longdescs_tags_comment_id_longdescs_comment_id` FOREIGN KEY (`comment_id`) REFERENCES `longdescs` (`comment_id`) ON DELETE CASCADE ON UPDATE CASCADE
                ) DEFAULT CHARSET=utf8""")
        except Exception, e:
            Log.error("Can not setup test database", e)