Example #1
0
def get_all_jobs(KibbleBit, source, joblist, creds):
    real_jobs = []
    building = 0
    for job in joblist:
        # Is this a job folder?
        jclass = job.get("_class")
        if jclass in [
                "jenkins.branch.OrganizationFolder",
                "org.jenkinsci.plugins.workflow.multibranch.WorkflowMultiBranchProject",
        ]:
            KibbleBit.pprint("%s is a jobs folder, expanding..." % job["name"])
            csURL = "%s/job/%s" % (
                source["sourceURL"],
                urllib.parse.quote(job["name"].replace("/", "%2F")),
            )
            try:
                child_jobs = jsonapi.get(
                    "%s/api/json?tree=jobs[name,color]&depth=1" % csURL,
                    auth=creds)
                csource = dict(source)
                csource["sourceURL"] = csURL
                if not csource.get("folder"):
                    csource["folder"] = job["name"]
                else:
                    csource["folder"] += "-" + job["name"]
                cjobs, cbuilding = get_all_jobs(KibbleBit, csource,
                                                child_jobs.get("jobs", []),
                                                creds)
                building += cbuilding
                for cjob in cjobs:
                    real_jobs.append(cjob)
            except:
                KibbleBit.pprint("Couldn't get child jobs, bailing")
                print("%s/api/json?tree=jobs[name,color]&depth=1" % csURL)
        # Or standard job?
        else:
            # Is it building?
            if "anime" in job.get(
                    "color", ""):  # a running job will have foo_anime as color
                building += 1
            job["fullURL"] = "%s/job/%s" % (
                source["sourceURL"],
                urllib.parse.quote(job["name"].replace("/", "%2F")),
            )
            job["folder"] = source.get("folder")
            real_jobs.append(job)
    return real_jobs, building
Example #2
0
def scanJob(KibbleBit, source, job, creds):
    """ Scans a single job for activity """
    dhash = hashlib.sha224(
        ("%s-%s-%s" %
         (source["organisation"], source["sourceID"], job)).encode(
             "ascii", errors="replace")).hexdigest()
    doc = None
    found = KibbleBit.exists("cijob", dhash)

    jobURL = "%s/json/builders/%s/builds/_all" % (source["sourceURL"], job)
    KibbleBit.pprint(jobURL)
    jobjson = jsonapi.get(jobURL, auth=creds)

    # If valid JSON, ...
    if jobjson:
        for buildno, data in jobjson.items():
            buildhash = hashlib.sha224(
                ("%s-%s-%s-%s" % (source["organisation"], source["sourceID"],
                                  job, buildno)).encode(
                                      "ascii", errors="replace")).hexdigest()
            builddoc = None
            try:
                builddoc = KibbleBit.get("ci_build", buildhash)
            except:
                pass

            # If this build already completed, no need to parse it again
            if builddoc and builddoc.get("completed", False):
                continue

            KibbleBit.pprint("[%s-%s] This is new or pending, analyzing..." %
                             (job, buildno))

            completed = True if "currentStep" in data else False

            # Get build status (success, failed, canceled etc)
            status = "building"
            if "successful" in data.get("text", []):
                status = "success"
            if "failed" in data.get("text", []):
                status = "failed"
            if "exception" in data.get("text", []):
                status = "aborted"

            DUR = 0
            # Calc when the build finished
            if completed and len(data.get("times",
                                          [])) == 2 and data["times"][1]:
                FIN = data["times"][1]
                DUR = FIN - data["times"][0]
            else:
                FIN = 0

            doc = {
                # Build specific data
                "id": buildhash,
                "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(FIN)),
                "buildID": buildno,
                "completed": completed,
                "duration": DUR * 1000,  # Buildbot does seconds, not milis
                "job": job,
                "jobURL": "%s/builders/%s" % (source["sourceURL"], job),
                "status": status,
                "started": int(data["times"][0]),
                "ci": "buildbot",
                # Standard docs values
                "sourceID": source["sourceID"],
                "organisation": source["organisation"],
                "upsert": True,
            }
            KibbleBit.append("ci_build", doc)
        # Yay, it worked!
        return True

    # Boo, it failed!
    KibbleBit.pprint("Fetching job data failed!")
    return False
Example #3
0
def scan(KibbleBit, source):
    # Simple URL check
    buildbot = re.match(r"(https?://.+)", source["sourceURL"])
    if buildbot:

        source["steps"]["ci"] = {
            "time": time.time(),
            "status": "Parsing Buildbot job changes...",
            "running": True,
            "good": True,
        }
        KibbleBit.updateSource(source)

        KibbleBit.pprint("Parsing Buildbot activity at %s" %
                         source["sourceURL"])
        source["steps"]["ci"] = {
            "time": time.time(),
            "status": "Downloading changeset",
            "running": True,
            "good": True,
        }
        KibbleBit.updateSource(source)

        # Buildbot may neeed credentials
        creds = None
        if (source["creds"] and "username" in source["creds"]
                and source["creds"]["username"]
                and len(source["creds"]["username"]) > 0):
            creds = "%s:%s" % (source["creds"]["username"],
                               source["creds"]["password"])

        # Get the job list
        sURL = source["sourceURL"]
        KibbleBit.pprint("Getting job list...")
        builders = jsonapi.get("%s/json/builders" % sURL, auth=creds)

        # Save queue snapshot
        NOW = int(datetime.datetime.utcnow().timestamp())
        queuehash = hashlib.sha224(
            ("%s-%s-queue-%s" %
             (source["organisation"], source["sourceID"], int(
                 time.time()))).encode("ascii", errors="replace")).hexdigest()

        # Scan queue items
        blocked = 0
        stuck = 0
        queueSize = 0
        actualQueueSize = 0
        building = 0
        jobs = []

        for builder, data in builders.items():
            jobs.append(builder)
            if data["state"] == "building":
                building += 1
            if data.get("pendingBuilds", 0) > 0:
                # All queued items, even offlined builders
                actualQueueSize += data.get("pendingBuilds", 0)
                # Only queues with an online builder (actually waiting stuff)
                if data["state"] == "building":
                    queueSize += data.get("pendingBuilds", 0)
                    blocked += data.get("pendingBuilds",
                                        0)  # Blocked by running builds
                # Stuck builds (iow no builder available)
                if data["state"] == "offline":
                    stuck += data.get("pendingBuilds", 0)

        # Write up a queue doc
        queuedoc = {
            "id": queuehash,
            "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(NOW)),
            "time": NOW,
            "size": queueSize,
            "blocked": blocked,
            "stuck": stuck,
            "building": building,
            "ci": "buildbot",
            # Standard docs values
            "sourceID": source["sourceID"],
            "organisation": source["organisation"],
            "upsert": True,
        }
        KibbleBit.append("ci_queue", queuedoc)

        KibbleBit.pprint("Found %u builders in Buildbot" % len(jobs))

        threads = []
        block = threading.Lock()
        KibbleBit.pprint("Scanning jobs using 4 sub-threads")
        for i in range(0, 4):
            t = buildbotThread(block, KibbleBit, source, creds, jobs)
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        # We're all done, yaay
        KibbleBit.pprint("Done scanning %s" % source["sourceURL"])

        source["steps"]["ci"] = {
            "time":
            time.time(),
            "status":
            "Buildbot successfully scanned at " +
            time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())),
            "running":
            False,
            "good":
            True,
        }
        KibbleBit.updateSource(source)
Example #4
0
def scanJob(KibbleBit, source, job, creds):
    """ Scans a single job for activity """
    NOW = int(datetime.datetime.utcnow().timestamp())
    jname = job["name"]
    if job.get("folder"):
        jname = job.get("folder") + "-" + job["name"]
    dhash = hashlib.sha224(
        ("%s-%s-%s" %
         (source["organisation"], source["sourceURL"], jname)).encode(
             "ascii", errors="replace")).hexdigest()
    doc = None
    found = KibbleBit.exists("cijob", dhash)

    # Get $jenkins/job/$job-name/json...
    jobURL = (
        "%s/api/json?depth=2&tree=builds[number,status,timestamp,id,result,duration]"
        % job["fullURL"])
    KibbleBit.pprint(jobURL)
    jobjson = jsonapi.get(jobURL, auth=creds)

    # If valid JSON, ...
    if jobjson:
        for build in jobjson.get("builds", []):
            buildhash = hashlib.sha224(
                ("%s-%s-%s-%s" % (source["organisation"], source["sourceURL"],
                                  jname, build["id"])).encode(
                                      "ascii", errors="replace")).hexdigest()
            builddoc = None
            try:
                builddoc = KibbleBit.get("ci_build", buildhash)
            except:
                pass

            # If this build already completed, no need to parse it again
            if builddoc and builddoc.get("completed", False):
                continue

            KibbleBit.pprint("[%s-%s] This is new or pending, analyzing..." %
                             (jname, build["id"]))

            completed = True if build["result"] else False

            # Estimate time spent in queue
            queuetime = 0
            TS = int(build["timestamp"] / 1000)
            if builddoc:
                queuetime = builddoc.get("queuetime", 0)
            if not completed:
                queuetime = NOW - TS

            # Get build status (success, failed, canceled etc)
            status = "building"
            if build["result"] in ["SUCCESS", "STABLE"]:
                status = "success"
            if build["result"] in ["FAILURE", "UNSTABLE"]:
                status = "failed"
            if build["result"] in ["ABORTED"]:
                status = "aborted"

            # Calc when the build finished (jenkins doesn't show this)
            if completed:
                FIN = int(build["timestamp"] + build["duration"]) / 1000
            else:
                FIN = 0

            doc = {
                # Build specific data
                "id": buildhash,
                "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(FIN)),
                "buildID": build["id"],
                "completed": completed,
                "duration": build["duration"],
                "job": jname,
                "jobURL": jobURL,
                "status": status,
                "started": int(build["timestamp"] / 1000),
                "ci": "jenkins",
                "queuetime": queuetime,
                # Standard docs values
                "sourceID": source["sourceID"],
                "organisation": source["organisation"],
                "upsert": True,
            }
            KibbleBit.append("ci_build", doc)
        # Yay, it worked!
        return True

    # Boo, it failed!
    KibbleBit.pprint("Fetching job data failed!")
    return False
Example #5
0
def scan(KibbleBit, source):
    # Simple URL check
    jenkins = re.match(r"(https?://.+)", source["sourceURL"])
    if jenkins:

        source["steps"]["jenkins"] = {
            "time": time.time(),
            "status": "Parsing Jenkins job changes...",
            "running": True,
            "good": True,
        }
        KibbleBit.updateSource(source)

        pendingJobs = []
        KibbleBit.pprint("Parsing Jenkins activity at %s" %
                         source["sourceURL"])
        source["steps"]["issues"] = {
            "time": time.time(),
            "status": "Downloading changeset",
            "running": True,
            "good": True,
        }
        KibbleBit.updateSource(source)

        # Jenkins may neeed credentials
        creds = None
        if (source["creds"] and "username" in source["creds"]
                and source["creds"]["username"]
                and len(source["creds"]["username"]) > 0):
            creds = "%s:%s" % (source["creds"]["username"],
                               source["creds"]["password"])

        # Get the job list
        sURL = source["sourceURL"]
        KibbleBit.pprint("Getting job list...")
        jobsjs = jsonapi.get("%s/api/json?tree=jobs[name,color]&depth=1" %
                             sURL,
                             auth=creds)

        # Get the current queue
        KibbleBit.pprint("Getting job queue...")
        queuejs = jsonapi.get("%s/queue/api/json?depth=1" % sURL, auth=creds)

        # Save queue snapshot
        NOW = int(datetime.datetime.utcnow().timestamp())
        queuehash = hashlib.sha224(
            ("%s-%s-queue-%s" %
             (source["organisation"], source["sourceURL"], int(
                 time.time()))).encode("ascii", errors="replace")).hexdigest()

        # Scan queue items
        blocked = 0
        stuck = 0
        totalqueuetime = 0
        items = queuejs.get("items", [])

        for item in items:
            if item["blocked"]:
                blocked += 1
            if item["stuck"]:
                stuck += 1
            if "inQueueSince" in item:
                totalqueuetime += NOW - int(item["inQueueSince"] / 1000)

        avgqueuetime = totalqueuetime / max(1, len(items))

        # Count how many jobs are building, find any folders...
        actual_jobs, building = get_all_jobs(KibbleBit, source,
                                             jobsjs.get("jobs", []), creds)

        # Write up a queue doc
        queuedoc = {
            "id": queuehash,
            "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(NOW)),
            "time": NOW,
            "building": building,
            "size": len(items),
            "blocked": blocked,
            "stuck": stuck,
            "avgwait": avgqueuetime,
            "ci": "jenkins",
            # Standard docs values
            "sourceID": source["sourceID"],
            "organisation": source["organisation"],
            "upsert": True,
        }
        KibbleBit.append("ci_queue", queuedoc)

        pendingJobs = actual_jobs
        KibbleBit.pprint("Found %u jobs in Jenkins" % len(pendingJobs))

        threads = []
        block = threading.Lock()
        KibbleBit.pprint("Scanning jobs using 4 sub-threads")
        for i in range(0, 4):
            t = jenkinsThread(block, KibbleBit, source, creds, pendingJobs)
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        # We're all done, yaay
        KibbleBit.pprint("Done scanning %s" % source["sourceURL"])

        source["steps"]["issues"] = {
            "time":
            time.time(),
            "status":
            "Jenkins successfully scanned at " +
            time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())),
            "running":
            False,
            "good":
            True,
        }
        KibbleBit.updateSource(source)
def scan(KibbleBit, source):
    # Validate URL first
    url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source["sourceURL"])
    if not url:
        KibbleBit.pprint(
            "Malformed or invalid Pony Mail URL passed to scanner: %s"
            % source["sourceURL"]
        )
        source["steps"]["mail"] = {
            "time": time.time(),
            "status": "Could not parse Pony Mail URL!",
            "running": False,
            "good": False,
        }
        KibbleBit.updateSource(source)
        return

    if not "azure" in KibbleBit.config and not "picoapi" in KibbleBit.config:
        KibbleBit.pprint(
            "No Azure/picoAPI creds configured, skipping key phrase extraction"
        )
        return

    cookie = None
    if "creds" in source and source["creds"]:
        cookie = source["creds"].get("cookie", None)

    rootURL = re.sub(r"list.html.+", "", source["sourceURL"])
    query = {
        "query": {"bool": {"must": [{"term": {"sourceID": source["sourceID"]}}]}},
        "sort": [{"ts": "desc"}],
    }

    # Get an initial count of commits
    res = KibbleBit.broker.DB.search(
        index=KibbleBit.dbname, doc_type="email", body=query, size=MAX_COUNT * 4
    )
    ec = 0
    hits = []
    for hit in res["hits"]["hits"]:
        eml = hit["_source"]
        if not re.search(ROBITS, eml["sender"]):
            ec += 1
            if ec > MAX_COUNT:
                break
            if "kpe" not in eml:
                emlurl = "%s/api/email.lua?id=%s" % (rootURL, eml["id"])
                KibbleBit.pprint("Fetching %s" % emlurl)
                rv = None
                try:
                    rv = jsonapi.get(emlurl, cookie=cookie)
                    if rv and "body" in rv:
                        hits.append([hit["_id"], rv["body"], eml])
                except Exception as err:
                    KibbleBit.pprint(f"Server error: {err}, skipping this email")

    bodies = []
    for hit in hits:
        body = hit[1]
        bid = hit[0]
        bodies.append(body)
    if bodies:
        if "watson" in KibbleBit.config:
            pass  # Haven't written this yet
        elif "azure" in KibbleBit.config:
            KPEs = kpe.azureKPE(KibbleBit, bodies)
        elif "picoapi" in KibbleBit.config:
            KPEs = kpe.picoKPE(KibbleBit, bodies)
        if KPEs == False:
            KibbleBit.pprint("Hit rate limit, not trying further emails for now.")

        a = 0
        for hit in hits:
            kpe_ = KPEs[a]
            bid = hit[0]
            eml = hit[2]
            a += 1
            if not kpe_:
                kpe_ = ["_NULL_"]
            eml["kpe"] = kpe_
            print("Key phrases for %s: %s" % (bid, ", ".join(kpe_)))
            KibbleBit.index("email", bid, eml)
    else:
        KibbleBit.pprint("No emails to analyze")
    KibbleBit.pprint("Done with key phrase extraction")
def scan(KibbleBit, source):
    # Validate URL first
    url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source["sourceURL"])
    if not url:
        KibbleBit.pprint(
            "Malformed or invalid Pony Mail URL passed to scanner: %s"
            % source["sourceURL"]
        )
        source["steps"]["mail"] = {
            "time": time.time(),
            "status": "Could not parse Pony Mail URL!",
            "running": False,
            "good": False,
        }
        KibbleBit.updateSource(source)
        return

    if (
        not "watson" in KibbleBit.config
        and not "azure" in KibbleBit.config
        and not "picoapi" in KibbleBit.config
    ):
        KibbleBit.pprint(
            "No Watson/Azure/picoAPI creds configured, skipping tone analyzer"
        )
        return

    cookie = None
    if "creds" in source and source["creds"]:
        cookie = source["creds"].get("cookie", None)

    rootURL = re.sub(r"list.html.+", "", source["sourceURL"])
    query = {
        "query": {"bool": {"must": [{"term": {"sourceID": source["sourceID"]}}]}},
        "sort": [{"ts": "desc"}],
    }

    # Get an initial count of commits
    res = KibbleBit.broker.DB.search(
        index=KibbleBit.dbname, doc_type="email", body=query, size=MAX_COUNT * 4
    )
    ec = 0
    hits = []
    for hit in res["hits"]["hits"]:
        eml = hit["_source"]
        if not re.search(ROBITS, eml["sender"]):
            ec += 1
            if ec > MAX_COUNT:
                break
            if "mood" not in eml:
                emlurl = "%s/api/email.lua?id=%s" % (rootURL, eml["id"])
                KibbleBit.pprint("Fetching %s" % emlurl)
                rv = None
                try:
                    rv = jsonapi.get(emlurl, cookie=cookie)
                    if rv and "body" in rv:
                        hits.append([hit["_id"], rv["body"], eml])
                except Exception as err:
                    KibbleBit.pprint(f"Server error: {err}, skipping this email")

    bodies = []
    for hit in hits:
        body = hit[1]
        bid = hit[0]
        bodies.append(body)
    if bodies:
        if "watson" in KibbleBit.config:
            moods = tone.watsonTone(KibbleBit, bodies)
        elif "azure" in KibbleBit.config:
            moods = tone.azureTone(KibbleBit, bodies)
        elif "picoapi" in KibbleBit.config:
            moods = tone.picoTone(KibbleBit, bodies)
        if moods == False:
            KibbleBit.pprint("Hit rate limit, not trying further emails for now.")

        a = 0
        for hit in hits:
            mood = moods[a]
            bid = hit[0]
            eml = hit[2]
            a += 1
            eml["mood"] = mood
            hm = [0, "unknown"]
            for m, s in mood.items():
                if s > hm[0]:
                    hm = [s, m]
            print("Likeliest overall mood for %s: %s" % (bid, hm[1]))
            KibbleBit.index("email", bid, eml)
    else:
        KibbleBit.pprint("No emails to analyze")
    KibbleBit.pprint("Done with tone analysis")
Example #8
0
def scan(KibbleBit, source):
    url = source["sourceURL"]

    source["steps"]["issues"] = {
        "time": time.time(),
        "status": "Parsing BugZilla changes...",
        "running": True,
        "good": True,
    }
    KibbleBit.updateSource(source)

    bz = re.match(r"(https?://\S+?)(/jsonrpc\.cgi)?[\s:?]+(.+)", url)
    if bz:
        if (
            source["creds"]
            and "username" in source["creds"]
            and source["creds"]["username"]
            and len(source["creds"]["username"]) > 0
        ):
            creds = "%s:%s" % (source["creds"]["username"], source["creds"]["password"])
        pendingTickets = []
        openTickets = []

        # Get base URL, list and domain to parse
        dom = bz.group(1)
        dom = re.sub(r"/+$", "", dom)
        u = "%s/jsonrpc.cgi" % dom
        instance = bz.group(3)

        params = {
            "product": [instance],
            "status": [
                "RESOLVED",
                "CLOSED",
                "NEW",
                "UNCOMFIRMED",
                "ASSIGNED",
                "REOPENED",
                "VERIFIED",
            ],
            "include_fields": ["id", "creation_time", "status", "summary", "creator"],
            "limit": 10000,
            "offset": 1,
        }
        # If * is requested, just omit the product name
        if instance == "*":
            params = {
                "status": [
                    "RESOLVED",
                    "CLOSED",
                    "NEW",
                    "UNCOMFIRMED",
                    "ASSIGNED",
                    "REOPENED",
                    "VERIFIED",
                ],
                "include_fields": [
                    "id",
                    "creation_time",
                    "status",
                    "summary",
                    "creator",
                ],
                "limit": 10000,
                "offset": 1,
            }

        ticketsURL = "%s?method=Bug.search&params=[%s]" % (
            u,
            urllib.parse.quote(json.dumps(params)),
        )

        while True:
            try:
                js = jsonapi.get(ticketsURL, auth=creds)
            except:
                KibbleBit.pprint("Couldn't fetch more tickets, bailing")
                break

            if len(js["result"]["bugs"]) > 0:
                KibbleBit.pprint(
                    "%s: Found %u tickets..."
                    % (
                        source["sourceURL"],
                        ((params.get("offset", 1) - 1) + len(js["result"]["bugs"])),
                    )
                )
                for bug in js["result"]["bugs"]:
                    pendingTickets.append(bug)
                    if not bug["status"] in ["RESOLVED", "CLOSED"]:
                        openTickets.append(bug["id"])
                params["offset"] += 10000
                ticketsURL = "%s?method=Bug.search&params=[%s]" % (
                    u,
                    urllib.parse.quote(json.dumps(params)),
                )
            else:
                KibbleBit.pprint("No more tickets left to scan")
                break

        KibbleBit.pprint(
            "Found %u open tickets, %u closed."
            % (len(openTickets), len(pendingTickets) - len(openTickets))
        )

        block = Lock()
        threads = []
        # TODO: Fix this loop
        for i in range(0, 4):
            t = bzThread(KibbleBit, source, block, pendingTickets, openTickets, u, dom)
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        source["steps"]["issues"] = {
            "time": time.time(),
            "status": "Issue tracker (BugZilla) successfully scanned at "
            + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())),
            "running": False,
            "good": True,
        }
        KibbleBit.updateSource(source)
Example #9
0
def scanTicket(bug, KibbleBit, source, openTickets, u, dom):
    try:
        key = bug["id"]
        dhash = hashlib.sha224(
            ("%s-%s-%s" % (source["organisation"], source["sourceURL"], key)).encode(
                "ascii", errors="replace"
            )
        ).hexdigest()
        found = KibbleBit.exists("issue", dhash)
        parseIt = False
        if not found:
            parseIt = True
        else:
            ticket = KibbleBit.get("issue", dhash)
            if ticket["status"] == "closed" and key in openTickets:
                KibbleBit.pprint("Ticket was reopened, reparsing")
                parseIt = True
            elif ticket["status"] == "open" and not key in openTickets:
                KibbleBit.pprint("Ticket was recently closed, parsing it")
                parseIt = True
            else:
                pass
                # print("Ticket hasn't changed, ignoring...")

        if parseIt:
            KibbleBit.pprint("Parsing data from BugZilla for #%s" % key)

            params = {"ids": [int(key)], "limit": 0}
            if (
                source["creds"]
                and "username" in source["creds"]
                and source["creds"]["username"]
                and len(source["creds"]["username"]) > 0
            ):
                params["Bugzilla_login"] = source["creds"]["username"]
                params["Bugzilla_password"] = source["creds"]["password"]
            ticketsURL = "%s?method=Bug.get&params=[%s]" % (
                u,
                urllib.parse.quote(json.dumps(params)),
            )

            js = jsonapi.get(ticketsURL)
            js = js["result"]["bugs"][0]
            creator = {"name": bug["creator"], "email": js["creator"]}
            closer = {}
            cd = getTime(js["creation_time"])
            rd = None
            status = "open"
            if js["status"] in ["CLOSED", "RESOLVED"]:
                status = "closed"
                KibbleBit.pprint("%s was closed, finding out who did that" % key)
                ticketsURL = "%s?method=Bug.history&params=[%s]" % (
                    u,
                    urllib.parse.quote(json.dumps(params)),
                )
                hjs = jsonapi.get(ticketsURL)
                history = hjs["result"]["bugs"][0]["history"]
                for item in history:
                    for change in item["changes"]:
                        if (
                            change["field_name"] == "status"
                            and "added" in change
                            and change["added"] in ["CLOSED", "RESOLVED"]
                        ):
                            rd = getTime(item["when"])
                            closer = {"name": item["who"], "email": item["who"]}
                            break
            KibbleBit.pprint("Counting comments for %s..." % key)
            ticketsURL = "%s?method=Bug.comments&params=[%s]" % (
                u,
                urllib.parse.quote(json.dumps(params)),
            )
            hjs = jsonapi.get(ticketsURL)
            comments = len(hjs["result"]["bugs"][str(key)]["comments"])

            title = bug["summary"]
            del params["ids"]
            if closer:

                pid = hashlib.sha1(
                    ("%s%s" % (source["organisation"], closer["email"])).encode(
                        "ascii", errors="replace"
                    )
                ).hexdigest()
                found = KibbleBit.exists("person", pid)
                if not found:
                    params["names"] = [closer["email"]]
                    ticketsURL = "%s?method=User.get&params=[%s]" % (
                        u,
                        urllib.parse.quote(json.dumps(params)),
                    )

                    try:
                        ujs = jsonapi.get(ticketsURL)
                        displayName = ujs["result"]["users"][0]["real_name"]
                    except:
                        displayName = closer["email"]
                    if displayName and len(displayName) > 0:
                        # Add to people db

                        jsp = {
                            "name": displayName,
                            "email": closer["email"],
                            "organisation": source["organisation"],
                            "id": pid,
                        }
                        # print("Updating person DB for closer: %s (%s)" % (displayName, closerEmail))
                        KibbleBit.index("person", pid, jsp)

            if creator:
                pid = hashlib.sha1(
                    ("%s%s" % (source["organisation"], creator["email"])).encode(
                        "ascii", errors="replace"
                    )
                ).hexdigest()
                found = KibbleBit.exists("person", pid)
                if not found:
                    if not creator["name"]:
                        params["names"] = [creator["email"]]
                        ticketsURL = "%s?method=User.get&params=[%s]" % (
                            u,
                            urllib.parse.quote(json.dumps(params)),
                        )
                        try:
                            ujs = jsonapi.get(ticketsURL)
                            creator["name"] = ujs["result"]["users"][0]["real_name"]
                        except:
                            creator["name"] = creator["email"]
                    if creator["name"] and len(creator["name"]) > 0:
                        # Add to people db

                        jsp = {
                            "name": creator["name"],
                            "email": creator["email"],
                            "organisation": source["organisation"],
                            "id": pid,
                        }
                        KibbleBit.index("person", pid, jsp)

            jso = {
                "id": dhash,
                "key": key,
                "organisation": source["organisation"],
                "sourceID": source["sourceID"],
                "url": "%s/show_bug.cgi?id=%s" % (dom, key),
                "status": status,
                "created": cd,
                "closed": rd,
                "issuetype": "issue",
                "issueCloser": closer["email"] if "email" in closer else None,
                "createdDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(cd)),
                "closedDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd))
                if rd
                else None,
                "changeDate": time.strftime(
                    "%Y/%m/%d %H:%M:%S", time.gmtime(rd if rd else cd)
                ),
                "assignee": None,
                "issueCreator": creator["email"],
                "comments": comments,
                "title": title,
            }
            KibbleBit.append("issue", jso)
            time.sleep(0.5)  # BugZilla is notoriously slow. Maybe remove this later
        return True
    except Exception as err:
        KibbleBit.pprint(err)
        return False
Example #10
0
def scan(KibbleBit, source):
    # Validate URL first
    url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source["sourceURL"])
    if not url:
        KibbleBit.pprint(
            "Malformed or invalid Pony Mail URL passed to scanner: %s" %
            source["sourceURL"])
        source["steps"]["mail"] = {
            "time": time.time(),
            "status": "Could not parse Pony Mail URL!",
            "running": False,
            "good": False,
        }
        KibbleBit.updateSource(source)
        return

    # Pony Mail requires a UI cookie in order to work. Maked sure we have one!
    cookie = None
    if "creds" in source and source["creds"]:
        cookie = source["creds"].get("cookie", None)
    if not cookie:
        KibbleBit.pprint(
            "Pony Mail instance at %s requires an authorized cookie, none found! Bailing."
            % source["sourceURL"])
        source["steps"]["mail"] = {
            "time": time.time(),
            "status": "No authorized cookie found in source object.",
            "running": False,
            "good": False,
        }
        KibbleBit.updateSource(source)
        return

    # Notify scanner and DB that this is valid and we've begun parsing
    KibbleBit.pprint("%s is a valid Pony Mail address, parsing" %
                     source["sourceURL"])
    source["steps"]["mail"] = {
        "time": time.time(),
        "status": "Downloading Pony Mail statistics",
        "running": True,
        "good": True,
    }
    KibbleBit.updateSource(source)

    # Get base URL, list and domain to parse
    u = url.group(1)
    l = url.group(2)
    d = url.group(3)

    # Get this month
    dt = time.gmtime(time.time())
    firstYear = 1970
    year = dt[0]
    month = dt[1]
    if month <= 0:
        month += 12
        year -= 1
    months = 0

    # Hash for keeping records of who we know
    knowns = {}

    # While we have older archives, continue to parse
    while firstYear <= year:
        statsurl = "%s/api/stats.lua?list=%s&domain=%s&d=%s" % (
            u,
            l,
            d,
            "%04u-%02u" % (year, month),
        )
        dhash = hashlib.sha224(
            (("%s %s") % (source["organisation"], statsurl)).encode(
                "ascii", errors="replace")).hexdigest()
        found = False
        if KibbleBit.exists("mailstats", dhash):
            found = True
        if months <= 1 or not found:  # Always parse this month's stats :)
            months += 1
            KibbleBit.pprint("Parsing %04u-%02u" % (year, month))
            KibbleBit.pprint(statsurl)
            pd = datetime.date(year, month, 1).timetuple()
            try:
                js = jsonapi.get(statsurl, cookie=cookie)
            except Exception as err:
                KibbleBit.pprint(f"Server error: {err}, skipping this month")
                month -= 1
                if month <= 0:
                    month += 12
                    year -= 1
                continue
            if "firstYear" in js:
                firstYear = js["firstYear"]
                # print("First Year is %u" % firstYear)
            else:
                KibbleBit.pprint("JSON was missing fields, aborting!")
                break
            replyList = repliedTo(js["emails"], js["thread_struct"])
            topics = js["no_threads"]
            posters = {}
            no_posters = 0
            emails = len(js["emails"])
            top10 = []
            for eml in js["thread_struct"]:
                count = countSubs(eml, 0)
                subject = ""
                for reml in js["emails"]:
                    if reml["id"] == eml["tid"]:
                        subject = reml["subject"]
                        break
                if len(subject) > 0 and count > 0:
                    subject = re.sub(r"^((re|fwd|aw|fw):\s*)+",
                                     "",
                                     subject,
                                     flags=re.IGNORECASE)
                    subject = re.sub(r"[\r\n\t]+", "", subject, count=20)
                    emlid = hashlib.sha1(
                        subject.encode("ascii", errors="replace")).hexdigest()
                    top10.append([emlid, subject, count])
            i = 0
            for top in reversed(sorted(top10, key=lambda x: x[2])):
                i += 1
                if i > 10:
                    break
                KibbleBit.pprint("Found top 10: %s (%s emails)" %
                                 (top[1], top[2]))
                md = time.strftime("%Y/%m/%d %H:%M:%S", pd)
                mlhash = hashlib.sha224(
                    (("%s%s%s%s") %
                     (top[0], source["sourceURL"], source["organisation"],
                      md)).encode("ascii", errors="replace")).hexdigest(
                      )  # one unique id per month per mail thread
                jst = {
                    "organisation": source["organisation"],
                    "sourceURL": source["sourceURL"],
                    "sourceID": source["sourceID"],
                    "date": md,
                    "emails": top[2],
                    "shash": top[0],
                    "subject": top[1],
                    "ts": time.mktime(pd),
                    "id": mlhash,
                }
                KibbleBit.index("mailtop", mlhash, jst)

            for email in js["emails"]:
                sender = email["from"]
                name = sender
                m = re.match(r"(.+)\s*<(.+)>", email["from"], flags=re.UNICODE)
                if m:
                    name = m.group(1).replace('"', "").strip()
                    sender = m.group(2)
                if not sender in posters:
                    posters[sender] = {"name": name, "email": sender}
                if not sender in knowns:
                    sid = hashlib.sha1(
                        ("%s%s" % (source["organisation"], sender)).encode(
                            "ascii", errors="replace")).hexdigest()
                    if KibbleBit.exists("person", sid):
                        knowns[sender] = True
                if not sender in knowns or name != sender:
                    KibbleBit.append(
                        "person",
                        {
                            "upsert":
                            True,
                            "name":
                            name,
                            "email":
                            sender,
                            "organisation":
                            source["organisation"],
                            "id":
                            hashlib.sha1(
                                ("%s%s" %
                                 (source["organisation"], sender)).encode(
                                     "ascii", errors="replace")).hexdigest(),
                        },
                    )
                    knowns[sender] = True
                replyTo = None
                if email["id"] in replyList:
                    rt = replyList[email["id"]]
                    for eml in js["emails"]:
                        if eml["id"] == rt:
                            replyTo = getSender(eml)
                            print("Email was reply to %s" % sender)
                jse = {
                    "organisation":
                    source["organisation"],
                    "sourceURL":
                    source["sourceURL"],
                    "sourceID":
                    source["sourceID"],
                    "date":
                    time.strftime("%Y/%m/%d %H:%M:%S",
                                  time.gmtime(email["epoch"])),
                    "sender":
                    sender,
                    "address":
                    sender,
                    "subject":
                    email["subject"],
                    "replyto":
                    replyTo,
                    "ts":
                    email["epoch"],
                    "id":
                    email["id"],
                    "upsert":
                    True,
                }
                KibbleBit.append("email", jse)
            for sender in posters:
                no_posters += 1

            jso = {
                "organisation": source["organisation"],
                "sourceURL": source["sourceURL"],
                "sourceID": source["sourceID"],
                "date": time.strftime("%Y/%m/%d %H:%M:%S", pd),
                "authors": no_posters,
                "emails": emails,
                "topics": topics,
            }
            # print("Indexing as %s" % dhash)
            KibbleBit.index("mailstats", dhash, jso)
        month -= 1
        if month <= 0:
            month += 12
            year -= 1

    source["steps"]["mail"] = {
        "time":
        time.time(),
        "status":
        "Mail archives successfully scanned at " +
        time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())),
        "running":
        False,
        "good":
        True,
    }
    KibbleBit.updateSource(source)
Example #11
0
def scan(KibbleBit, source):
    jira = re.match(r"(https?://.+)/browse/([A-Z0-9]+)", source["sourceURL"])
    if jira:

        # JIRA NEEDS credentials to do a proper scan!
        creds = None
        if (source["creds"] and "username" in source["creds"]
                and source["creds"]["username"]
                and len(source["creds"]["username"]) > 0):
            creds = "%s:%s" % (source["creds"]["username"],
                               source["creds"]["password"])
        if not creds:
            KibbleBit.pprint(
                "JIRA at %s requires authentication, but none was found! Bailing."
                % source["sourceURL"])
            source["steps"]["issues"] = {
                "time": time.time(),
                "status": "Parsing JIRA changes...",
                "running": True,
                "good": True,
            }
            KibbleBit.updateSource(source)
            return

        source["steps"]["issues"] = {
            "time": time.time(),
            "status": "Parsing JIRA changes...",
            "running": True,
            "good": True,
        }
        KibbleBit.updateSource(source)

        pendingTickets = []
        KibbleBit.pprint("Parsing JIRA activity at %s" % source["sourceURL"])
        source["steps"]["issues"] = {
            "time": time.time(),
            "status": "Downloading changeset",
            "running": True,
            "good": True,
        }
        KibbleBit.updateSource(source)

        # Get base URL, list and domain to parse
        u = jira.group(1)
        instance = jira.group(2)
        lastTicket = 0
        latestURL = (
            "%s/rest/api/2/search?jql=project=%s+order+by+createdDate+DESC&fields=id,key&maxResults=1"
            % (u, instance))
        js = None

        js = jsonapi.get(latestURL, auth=creds)
        if "issues" in js and len(js["issues"]) == 1:
            key = js["issues"][0]["key"]
            m = re.search(r"-(\d+)$", key)
            if m:
                lastTicket = int(m.group(1))

        openTickets = []
        startAt = 0
        badTries = 0
        while True and badTries < 10:
            openURL = (
                "%s/rest/api/2/search?jql=project=%s+and+status=open+order+by+createdDate+ASC&fields=id,key&maxResults=100&startAt=%u"
                % (u, instance, startAt))
            # print(openURL)
            try:
                ojs = jsonapi.get(openURL, auth=creds)
                if not "issues" in ojs or len(ojs["issues"]) == 0:
                    break
                for item in ojs["issues"]:
                    openTickets.append(item["key"])
                KibbleBit.pprint("Found %u open tickets" % len(openTickets))
                startAt += 100
            except:
                KibbleBit.pprint("JIRA borked, retrying")
                badTries += 1
        KibbleBit.pprint("Found %u open tickets" % len(openTickets))

        badOnes = 0
        for i in reversed(range(1, lastTicket + 1)):
            key = "%s-%u" % (instance, i)
            pendingTickets.append([key, u, source])

        threads = []
        block = threading.Lock()
        KibbleBit.pprint("Scanning tickets using 4 sub-threads")
        for i in range(0, 4):
            t = jiraThread(block, KibbleBit, source, creds, pendingTickets,
                           openTickets)
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        KibbleBit.pprint("Done scanning %s" % source["sourceURL"])

        source["steps"]["issues"] = {
            "time":
            time.time(),
            "status":
            "Issue tracker (JIRA) successfully scanned at " +
            time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())),
            "running":
            False,
            "good":
            True,
        }
        KibbleBit.updateSource(source)
Example #12
0
def scanTicket(KibbleBit, key, u, source, creds, openTickets):
    """ Scans a single ticket for activity and people """

    dhash = hashlib.sha224(
        ("%s-%s-%s" %
         (source["organisation"], source["sourceURL"], key)).encode(
             "ascii", errors="replace")).hexdigest()
    found = True
    parseIt = False

    # the 'domain' var we try to figure out here is used
    # for faking email addresses and keep them unique,
    # in case JIRA has email visibility turned off.
    domain = "jira"
    m = re.search(r"https?://([^/]+)", u)
    if m:
        domain = m.group(1)

    found = KibbleBit.exists("issue", dhash)
    if not found:
        KibbleBit.pprint(
            "[%s] We've never seen this ticket before, parsing..." % key)
        parseIt = True
    else:
        ticket = KibbleBit.get("issue", dhash)
        if ticket["status"] == "closed" and key in openTickets:
            KibbleBit.pprint("[%s] Ticket was reopened, reparsing" % key)
            parseIt = True
        elif ticket["status"] == "open" and not key in openTickets:
            KibbleBit.pprint("[%s] Ticket was recently closed, parsing it" %
                             key)
            parseIt = True
        else:
            if (ticket["issueCreator"] == "unknown@kibble"
                    or ticket["issueCloser"]
                    == "unknown@kibble"):  # Gotta redo these!
                parseIt = True
                KibbleBit.pprint(
                    "[%s] Ticket contains erroneous data from a previous scan, reparsing"
                    % key)
            # This is just noise!
            # KibbleBit.pprint("[%s] Ticket hasn't changed, ignoring..." % key)

    if parseIt:
        KibbleBit.pprint("[%s] Parsing data from JIRA at %s..." %
                         (key, domain))
        queryURL = (
            "%s/rest/api/2/issue/%s?fields=creator,reporter,status,issuetype,summary,assignee,resolutiondate,created,priority,changelog,comment,resolution,votes&expand=changelog"
            % (u, key))
        jiraURL = "%s/browse/%s" % (u, key)
        try:
            tjson = jsonapi.get(queryURL, auth=creds)
            if not tjson:
                KibbleBit.pprint("%s does not exist (404'ed)" % key)
                return False
        except requests.exceptions.ConnectionError as err:
            KibbleBit.pprint(
                f"Connection error: {err}, skipping this ticket for now!")
            return False
        st, closer = wasclosed(tjson)
        if st and not closer:
            KibbleBit.pprint("Closed but no closer??")
        closerEmail = None
        status = "closed" if st else "open"

        # Make sure we actually have field data to work with
        if not tjson.get("fields") or not tjson["fields"].get("created"):
            KibbleBit.pprint(
                "[%s] JIRA response is missing field data, ignoring ticket." %
                key)
            return False

        cd = getTime(tjson["fields"]["created"])
        rd = (getTime(tjson["fields"]["resolutiondate"])
              if "resolutiondate" in tjson["fields"]
              and tjson["fields"]["resolutiondate"] else None)
        comments = 0
        if "comment" in tjson["fields"] and tjson["fields"]["comment"]:
            comments = tjson["fields"]["comment"]["total"]
        assignee = (
            tjson["fields"]["assignee"].get(
                "emailAddress",  # Try email, fall back to username
                tjson["fields"]["assignee"].get("name"),
            ) if tjson["fields"].get("assignee") else None)
        creator = (
            tjson["fields"]["reporter"].get(
                "emailAddress",  # Try email, fall back to username
                tjson["fields"]["reporter"].get("name"),
            ) if tjson["fields"].get("reporter") else None)
        title = tjson["fields"]["summary"]
        if closer:
            # print("Parsing closer")
            closerEmail = (closer.get("emailAddress",
                                      closer.get("name")).replace(
                                          " dot ", ".",
                                          10).replace(" at ", "@", 1))
            if not "@" in closerEmail:
                closerEmail = "%s@%s" % (closerEmail, domain)
            displayName = closer.get("displayName", "Unkown")
            if displayName and len(displayName) > 0:
                # Add to people db
                pid = hashlib.sha1(
                    ("%s%s" % (source["organisation"], closerEmail)).encode(
                        "ascii", errors="replace")).hexdigest()
                jsp = {
                    "name": displayName,
                    "email": closerEmail,
                    "organisation": source["organisation"],
                    "id": pid,
                    "upsert": True,
                }
                KibbleBit.append("person", jsp)

        if creator:
            creator = creator.replace(" dot ", ".", 10).replace(" at ", "@", 1)
            if not "@" in creator:
                creator = "%s@%s" % (creator, domain)
            displayName = (tjson["fields"]["reporter"]["displayName"]
                           if tjson["fields"]["reporter"] else None)
            if displayName and len(displayName) > 0:
                # Add to people db
                pid = hashlib.sha1(
                    ("%s%s" % (source["organisation"], creator)).encode(
                        "ascii", errors="replace")).hexdigest()
                jsp = {
                    "name": displayName,
                    "email": creator,
                    "organisation": source["organisation"],
                    "id": pid,
                    "upsert": True,
                }
                KibbleBit.append("person", jsp)
        if assignee and not "@" in assignee:
            assignee = "%s@%s" % (assignee, domain)
        jso = {
            "id":
            dhash,
            "key":
            key,
            "organisation":
            source["organisation"],
            "sourceID":
            source["sourceID"],
            "url":
            jiraURL,
            "status":
            status,
            "created":
            cd,
            "closed":
            rd,
            "issuetype":
            "issue",
            "issueCloser":
            closerEmail,
            "createdDate":
            time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(cd)),
            "closedDate":
            time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd))
            if rd else None,
            "changeDate":
            time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd if rd else cd)),
            "assignee":
            assignee,
            "issueCreator":
            creator,
            "comments":
            comments,
            "title":
            title,
        }
        KibbleBit.append("issue", jso)
    return True