def get_all_jobs(KibbleBit, source, joblist, creds): real_jobs = [] building = 0 for job in joblist: # Is this a job folder? jclass = job.get("_class") if jclass in [ "jenkins.branch.OrganizationFolder", "org.jenkinsci.plugins.workflow.multibranch.WorkflowMultiBranchProject", ]: KibbleBit.pprint("%s is a jobs folder, expanding..." % job["name"]) csURL = "%s/job/%s" % ( source["sourceURL"], urllib.parse.quote(job["name"].replace("/", "%2F")), ) try: child_jobs = jsonapi.get( "%s/api/json?tree=jobs[name,color]&depth=1" % csURL, auth=creds) csource = dict(source) csource["sourceURL"] = csURL if not csource.get("folder"): csource["folder"] = job["name"] else: csource["folder"] += "-" + job["name"] cjobs, cbuilding = get_all_jobs(KibbleBit, csource, child_jobs.get("jobs", []), creds) building += cbuilding for cjob in cjobs: real_jobs.append(cjob) except: KibbleBit.pprint("Couldn't get child jobs, bailing") print("%s/api/json?tree=jobs[name,color]&depth=1" % csURL) # Or standard job? else: # Is it building? if "anime" in job.get( "color", ""): # a running job will have foo_anime as color building += 1 job["fullURL"] = "%s/job/%s" % ( source["sourceURL"], urllib.parse.quote(job["name"].replace("/", "%2F")), ) job["folder"] = source.get("folder") real_jobs.append(job) return real_jobs, building
def scanJob(KibbleBit, source, job, creds): """ Scans a single job for activity """ dhash = hashlib.sha224( ("%s-%s-%s" % (source["organisation"], source["sourceID"], job)).encode( "ascii", errors="replace")).hexdigest() doc = None found = KibbleBit.exists("cijob", dhash) jobURL = "%s/json/builders/%s/builds/_all" % (source["sourceURL"], job) KibbleBit.pprint(jobURL) jobjson = jsonapi.get(jobURL, auth=creds) # If valid JSON, ... if jobjson: for buildno, data in jobjson.items(): buildhash = hashlib.sha224( ("%s-%s-%s-%s" % (source["organisation"], source["sourceID"], job, buildno)).encode( "ascii", errors="replace")).hexdigest() builddoc = None try: builddoc = KibbleBit.get("ci_build", buildhash) except: pass # If this build already completed, no need to parse it again if builddoc and builddoc.get("completed", False): continue KibbleBit.pprint("[%s-%s] This is new or pending, analyzing..." % (job, buildno)) completed = True if "currentStep" in data else False # Get build status (success, failed, canceled etc) status = "building" if "successful" in data.get("text", []): status = "success" if "failed" in data.get("text", []): status = "failed" if "exception" in data.get("text", []): status = "aborted" DUR = 0 # Calc when the build finished if completed and len(data.get("times", [])) == 2 and data["times"][1]: FIN = data["times"][1] DUR = FIN - data["times"][0] else: FIN = 0 doc = { # Build specific data "id": buildhash, "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(FIN)), "buildID": buildno, "completed": completed, "duration": DUR * 1000, # Buildbot does seconds, not milis "job": job, "jobURL": "%s/builders/%s" % (source["sourceURL"], job), "status": status, "started": int(data["times"][0]), "ci": "buildbot", # Standard docs values "sourceID": source["sourceID"], "organisation": source["organisation"], "upsert": True, } KibbleBit.append("ci_build", doc) # Yay, it worked! return True # Boo, it failed! KibbleBit.pprint("Fetching job data failed!") return False
def scan(KibbleBit, source): # Simple URL check buildbot = re.match(r"(https?://.+)", source["sourceURL"]) if buildbot: source["steps"]["ci"] = { "time": time.time(), "status": "Parsing Buildbot job changes...", "running": True, "good": True, } KibbleBit.updateSource(source) KibbleBit.pprint("Parsing Buildbot activity at %s" % source["sourceURL"]) source["steps"]["ci"] = { "time": time.time(), "status": "Downloading changeset", "running": True, "good": True, } KibbleBit.updateSource(source) # Buildbot may neeed credentials creds = None if (source["creds"] and "username" in source["creds"] and source["creds"]["username"] and len(source["creds"]["username"]) > 0): creds = "%s:%s" % (source["creds"]["username"], source["creds"]["password"]) # Get the job list sURL = source["sourceURL"] KibbleBit.pprint("Getting job list...") builders = jsonapi.get("%s/json/builders" % sURL, auth=creds) # Save queue snapshot NOW = int(datetime.datetime.utcnow().timestamp()) queuehash = hashlib.sha224( ("%s-%s-queue-%s" % (source["organisation"], source["sourceID"], int( time.time()))).encode("ascii", errors="replace")).hexdigest() # Scan queue items blocked = 0 stuck = 0 queueSize = 0 actualQueueSize = 0 building = 0 jobs = [] for builder, data in builders.items(): jobs.append(builder) if data["state"] == "building": building += 1 if data.get("pendingBuilds", 0) > 0: # All queued items, even offlined builders actualQueueSize += data.get("pendingBuilds", 0) # Only queues with an online builder (actually waiting stuff) if data["state"] == "building": queueSize += data.get("pendingBuilds", 0) blocked += data.get("pendingBuilds", 0) # Blocked by running builds # Stuck builds (iow no builder available) if data["state"] == "offline": stuck += data.get("pendingBuilds", 0) # Write up a queue doc queuedoc = { "id": queuehash, "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(NOW)), "time": NOW, "size": queueSize, "blocked": blocked, "stuck": stuck, "building": building, "ci": "buildbot", # Standard docs values "sourceID": source["sourceID"], "organisation": source["organisation"], "upsert": True, } KibbleBit.append("ci_queue", queuedoc) KibbleBit.pprint("Found %u builders in Buildbot" % len(jobs)) threads = [] block = threading.Lock() KibbleBit.pprint("Scanning jobs using 4 sub-threads") for i in range(0, 4): t = buildbotThread(block, KibbleBit, source, creds, jobs) threads.append(t) t.start() for t in threads: t.join() # We're all done, yaay KibbleBit.pprint("Done scanning %s" % source["sourceURL"]) source["steps"]["ci"] = { "time": time.time(), "status": "Buildbot successfully scanned at " + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), "running": False, "good": True, } KibbleBit.updateSource(source)
def scanJob(KibbleBit, source, job, creds): """ Scans a single job for activity """ NOW = int(datetime.datetime.utcnow().timestamp()) jname = job["name"] if job.get("folder"): jname = job.get("folder") + "-" + job["name"] dhash = hashlib.sha224( ("%s-%s-%s" % (source["organisation"], source["sourceURL"], jname)).encode( "ascii", errors="replace")).hexdigest() doc = None found = KibbleBit.exists("cijob", dhash) # Get $jenkins/job/$job-name/json... jobURL = ( "%s/api/json?depth=2&tree=builds[number,status,timestamp,id,result,duration]" % job["fullURL"]) KibbleBit.pprint(jobURL) jobjson = jsonapi.get(jobURL, auth=creds) # If valid JSON, ... if jobjson: for build in jobjson.get("builds", []): buildhash = hashlib.sha224( ("%s-%s-%s-%s" % (source["organisation"], source["sourceURL"], jname, build["id"])).encode( "ascii", errors="replace")).hexdigest() builddoc = None try: builddoc = KibbleBit.get("ci_build", buildhash) except: pass # If this build already completed, no need to parse it again if builddoc and builddoc.get("completed", False): continue KibbleBit.pprint("[%s-%s] This is new or pending, analyzing..." % (jname, build["id"])) completed = True if build["result"] else False # Estimate time spent in queue queuetime = 0 TS = int(build["timestamp"] / 1000) if builddoc: queuetime = builddoc.get("queuetime", 0) if not completed: queuetime = NOW - TS # Get build status (success, failed, canceled etc) status = "building" if build["result"] in ["SUCCESS", "STABLE"]: status = "success" if build["result"] in ["FAILURE", "UNSTABLE"]: status = "failed" if build["result"] in ["ABORTED"]: status = "aborted" # Calc when the build finished (jenkins doesn't show this) if completed: FIN = int(build["timestamp"] + build["duration"]) / 1000 else: FIN = 0 doc = { # Build specific data "id": buildhash, "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(FIN)), "buildID": build["id"], "completed": completed, "duration": build["duration"], "job": jname, "jobURL": jobURL, "status": status, "started": int(build["timestamp"] / 1000), "ci": "jenkins", "queuetime": queuetime, # Standard docs values "sourceID": source["sourceID"], "organisation": source["organisation"], "upsert": True, } KibbleBit.append("ci_build", doc) # Yay, it worked! return True # Boo, it failed! KibbleBit.pprint("Fetching job data failed!") return False
def scan(KibbleBit, source): # Simple URL check jenkins = re.match(r"(https?://.+)", source["sourceURL"]) if jenkins: source["steps"]["jenkins"] = { "time": time.time(), "status": "Parsing Jenkins job changes...", "running": True, "good": True, } KibbleBit.updateSource(source) pendingJobs = [] KibbleBit.pprint("Parsing Jenkins activity at %s" % source["sourceURL"]) source["steps"]["issues"] = { "time": time.time(), "status": "Downloading changeset", "running": True, "good": True, } KibbleBit.updateSource(source) # Jenkins may neeed credentials creds = None if (source["creds"] and "username" in source["creds"] and source["creds"]["username"] and len(source["creds"]["username"]) > 0): creds = "%s:%s" % (source["creds"]["username"], source["creds"]["password"]) # Get the job list sURL = source["sourceURL"] KibbleBit.pprint("Getting job list...") jobsjs = jsonapi.get("%s/api/json?tree=jobs[name,color]&depth=1" % sURL, auth=creds) # Get the current queue KibbleBit.pprint("Getting job queue...") queuejs = jsonapi.get("%s/queue/api/json?depth=1" % sURL, auth=creds) # Save queue snapshot NOW = int(datetime.datetime.utcnow().timestamp()) queuehash = hashlib.sha224( ("%s-%s-queue-%s" % (source["organisation"], source["sourceURL"], int( time.time()))).encode("ascii", errors="replace")).hexdigest() # Scan queue items blocked = 0 stuck = 0 totalqueuetime = 0 items = queuejs.get("items", []) for item in items: if item["blocked"]: blocked += 1 if item["stuck"]: stuck += 1 if "inQueueSince" in item: totalqueuetime += NOW - int(item["inQueueSince"] / 1000) avgqueuetime = totalqueuetime / max(1, len(items)) # Count how many jobs are building, find any folders... actual_jobs, building = get_all_jobs(KibbleBit, source, jobsjs.get("jobs", []), creds) # Write up a queue doc queuedoc = { "id": queuehash, "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(NOW)), "time": NOW, "building": building, "size": len(items), "blocked": blocked, "stuck": stuck, "avgwait": avgqueuetime, "ci": "jenkins", # Standard docs values "sourceID": source["sourceID"], "organisation": source["organisation"], "upsert": True, } KibbleBit.append("ci_queue", queuedoc) pendingJobs = actual_jobs KibbleBit.pprint("Found %u jobs in Jenkins" % len(pendingJobs)) threads = [] block = threading.Lock() KibbleBit.pprint("Scanning jobs using 4 sub-threads") for i in range(0, 4): t = jenkinsThread(block, KibbleBit, source, creds, pendingJobs) threads.append(t) t.start() for t in threads: t.join() # We're all done, yaay KibbleBit.pprint("Done scanning %s" % source["sourceURL"]) source["steps"]["issues"] = { "time": time.time(), "status": "Jenkins successfully scanned at " + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), "running": False, "good": True, } KibbleBit.updateSource(source)
def scan(KibbleBit, source): # Validate URL first url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source["sourceURL"]) if not url: KibbleBit.pprint( "Malformed or invalid Pony Mail URL passed to scanner: %s" % source["sourceURL"] ) source["steps"]["mail"] = { "time": time.time(), "status": "Could not parse Pony Mail URL!", "running": False, "good": False, } KibbleBit.updateSource(source) return if not "azure" in KibbleBit.config and not "picoapi" in KibbleBit.config: KibbleBit.pprint( "No Azure/picoAPI creds configured, skipping key phrase extraction" ) return cookie = None if "creds" in source and source["creds"]: cookie = source["creds"].get("cookie", None) rootURL = re.sub(r"list.html.+", "", source["sourceURL"]) query = { "query": {"bool": {"must": [{"term": {"sourceID": source["sourceID"]}}]}}, "sort": [{"ts": "desc"}], } # Get an initial count of commits res = KibbleBit.broker.DB.search( index=KibbleBit.dbname, doc_type="email", body=query, size=MAX_COUNT * 4 ) ec = 0 hits = [] for hit in res["hits"]["hits"]: eml = hit["_source"] if not re.search(ROBITS, eml["sender"]): ec += 1 if ec > MAX_COUNT: break if "kpe" not in eml: emlurl = "%s/api/email.lua?id=%s" % (rootURL, eml["id"]) KibbleBit.pprint("Fetching %s" % emlurl) rv = None try: rv = jsonapi.get(emlurl, cookie=cookie) if rv and "body" in rv: hits.append([hit["_id"], rv["body"], eml]) except Exception as err: KibbleBit.pprint(f"Server error: {err}, skipping this email") bodies = [] for hit in hits: body = hit[1] bid = hit[0] bodies.append(body) if bodies: if "watson" in KibbleBit.config: pass # Haven't written this yet elif "azure" in KibbleBit.config: KPEs = kpe.azureKPE(KibbleBit, bodies) elif "picoapi" in KibbleBit.config: KPEs = kpe.picoKPE(KibbleBit, bodies) if KPEs == False: KibbleBit.pprint("Hit rate limit, not trying further emails for now.") a = 0 for hit in hits: kpe_ = KPEs[a] bid = hit[0] eml = hit[2] a += 1 if not kpe_: kpe_ = ["_NULL_"] eml["kpe"] = kpe_ print("Key phrases for %s: %s" % (bid, ", ".join(kpe_))) KibbleBit.index("email", bid, eml) else: KibbleBit.pprint("No emails to analyze") KibbleBit.pprint("Done with key phrase extraction")
def scan(KibbleBit, source): # Validate URL first url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source["sourceURL"]) if not url: KibbleBit.pprint( "Malformed or invalid Pony Mail URL passed to scanner: %s" % source["sourceURL"] ) source["steps"]["mail"] = { "time": time.time(), "status": "Could not parse Pony Mail URL!", "running": False, "good": False, } KibbleBit.updateSource(source) return if ( not "watson" in KibbleBit.config and not "azure" in KibbleBit.config and not "picoapi" in KibbleBit.config ): KibbleBit.pprint( "No Watson/Azure/picoAPI creds configured, skipping tone analyzer" ) return cookie = None if "creds" in source and source["creds"]: cookie = source["creds"].get("cookie", None) rootURL = re.sub(r"list.html.+", "", source["sourceURL"]) query = { "query": {"bool": {"must": [{"term": {"sourceID": source["sourceID"]}}]}}, "sort": [{"ts": "desc"}], } # Get an initial count of commits res = KibbleBit.broker.DB.search( index=KibbleBit.dbname, doc_type="email", body=query, size=MAX_COUNT * 4 ) ec = 0 hits = [] for hit in res["hits"]["hits"]: eml = hit["_source"] if not re.search(ROBITS, eml["sender"]): ec += 1 if ec > MAX_COUNT: break if "mood" not in eml: emlurl = "%s/api/email.lua?id=%s" % (rootURL, eml["id"]) KibbleBit.pprint("Fetching %s" % emlurl) rv = None try: rv = jsonapi.get(emlurl, cookie=cookie) if rv and "body" in rv: hits.append([hit["_id"], rv["body"], eml]) except Exception as err: KibbleBit.pprint(f"Server error: {err}, skipping this email") bodies = [] for hit in hits: body = hit[1] bid = hit[0] bodies.append(body) if bodies: if "watson" in KibbleBit.config: moods = tone.watsonTone(KibbleBit, bodies) elif "azure" in KibbleBit.config: moods = tone.azureTone(KibbleBit, bodies) elif "picoapi" in KibbleBit.config: moods = tone.picoTone(KibbleBit, bodies) if moods == False: KibbleBit.pprint("Hit rate limit, not trying further emails for now.") a = 0 for hit in hits: mood = moods[a] bid = hit[0] eml = hit[2] a += 1 eml["mood"] = mood hm = [0, "unknown"] for m, s in mood.items(): if s > hm[0]: hm = [s, m] print("Likeliest overall mood for %s: %s" % (bid, hm[1])) KibbleBit.index("email", bid, eml) else: KibbleBit.pprint("No emails to analyze") KibbleBit.pprint("Done with tone analysis")
def scan(KibbleBit, source): url = source["sourceURL"] source["steps"]["issues"] = { "time": time.time(), "status": "Parsing BugZilla changes...", "running": True, "good": True, } KibbleBit.updateSource(source) bz = re.match(r"(https?://\S+?)(/jsonrpc\.cgi)?[\s:?]+(.+)", url) if bz: if ( source["creds"] and "username" in source["creds"] and source["creds"]["username"] and len(source["creds"]["username"]) > 0 ): creds = "%s:%s" % (source["creds"]["username"], source["creds"]["password"]) pendingTickets = [] openTickets = [] # Get base URL, list and domain to parse dom = bz.group(1) dom = re.sub(r"/+$", "", dom) u = "%s/jsonrpc.cgi" % dom instance = bz.group(3) params = { "product": [instance], "status": [ "RESOLVED", "CLOSED", "NEW", "UNCOMFIRMED", "ASSIGNED", "REOPENED", "VERIFIED", ], "include_fields": ["id", "creation_time", "status", "summary", "creator"], "limit": 10000, "offset": 1, } # If * is requested, just omit the product name if instance == "*": params = { "status": [ "RESOLVED", "CLOSED", "NEW", "UNCOMFIRMED", "ASSIGNED", "REOPENED", "VERIFIED", ], "include_fields": [ "id", "creation_time", "status", "summary", "creator", ], "limit": 10000, "offset": 1, } ticketsURL = "%s?method=Bug.search¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) while True: try: js = jsonapi.get(ticketsURL, auth=creds) except: KibbleBit.pprint("Couldn't fetch more tickets, bailing") break if len(js["result"]["bugs"]) > 0: KibbleBit.pprint( "%s: Found %u tickets..." % ( source["sourceURL"], ((params.get("offset", 1) - 1) + len(js["result"]["bugs"])), ) ) for bug in js["result"]["bugs"]: pendingTickets.append(bug) if not bug["status"] in ["RESOLVED", "CLOSED"]: openTickets.append(bug["id"]) params["offset"] += 10000 ticketsURL = "%s?method=Bug.search¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) else: KibbleBit.pprint("No more tickets left to scan") break KibbleBit.pprint( "Found %u open tickets, %u closed." % (len(openTickets), len(pendingTickets) - len(openTickets)) ) block = Lock() threads = [] # TODO: Fix this loop for i in range(0, 4): t = bzThread(KibbleBit, source, block, pendingTickets, openTickets, u, dom) threads.append(t) t.start() for t in threads: t.join() source["steps"]["issues"] = { "time": time.time(), "status": "Issue tracker (BugZilla) successfully scanned at " + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), "running": False, "good": True, } KibbleBit.updateSource(source)
def scanTicket(bug, KibbleBit, source, openTickets, u, dom): try: key = bug["id"] dhash = hashlib.sha224( ("%s-%s-%s" % (source["organisation"], source["sourceURL"], key)).encode( "ascii", errors="replace" ) ).hexdigest() found = KibbleBit.exists("issue", dhash) parseIt = False if not found: parseIt = True else: ticket = KibbleBit.get("issue", dhash) if ticket["status"] == "closed" and key in openTickets: KibbleBit.pprint("Ticket was reopened, reparsing") parseIt = True elif ticket["status"] == "open" and not key in openTickets: KibbleBit.pprint("Ticket was recently closed, parsing it") parseIt = True else: pass # print("Ticket hasn't changed, ignoring...") if parseIt: KibbleBit.pprint("Parsing data from BugZilla for #%s" % key) params = {"ids": [int(key)], "limit": 0} if ( source["creds"] and "username" in source["creds"] and source["creds"]["username"] and len(source["creds"]["username"]) > 0 ): params["Bugzilla_login"] = source["creds"]["username"] params["Bugzilla_password"] = source["creds"]["password"] ticketsURL = "%s?method=Bug.get¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) js = jsonapi.get(ticketsURL) js = js["result"]["bugs"][0] creator = {"name": bug["creator"], "email": js["creator"]} closer = {} cd = getTime(js["creation_time"]) rd = None status = "open" if js["status"] in ["CLOSED", "RESOLVED"]: status = "closed" KibbleBit.pprint("%s was closed, finding out who did that" % key) ticketsURL = "%s?method=Bug.history¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) hjs = jsonapi.get(ticketsURL) history = hjs["result"]["bugs"][0]["history"] for item in history: for change in item["changes"]: if ( change["field_name"] == "status" and "added" in change and change["added"] in ["CLOSED", "RESOLVED"] ): rd = getTime(item["when"]) closer = {"name": item["who"], "email": item["who"]} break KibbleBit.pprint("Counting comments for %s..." % key) ticketsURL = "%s?method=Bug.comments¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) hjs = jsonapi.get(ticketsURL) comments = len(hjs["result"]["bugs"][str(key)]["comments"]) title = bug["summary"] del params["ids"] if closer: pid = hashlib.sha1( ("%s%s" % (source["organisation"], closer["email"])).encode( "ascii", errors="replace" ) ).hexdigest() found = KibbleBit.exists("person", pid) if not found: params["names"] = [closer["email"]] ticketsURL = "%s?method=User.get¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) try: ujs = jsonapi.get(ticketsURL) displayName = ujs["result"]["users"][0]["real_name"] except: displayName = closer["email"] if displayName and len(displayName) > 0: # Add to people db jsp = { "name": displayName, "email": closer["email"], "organisation": source["organisation"], "id": pid, } # print("Updating person DB for closer: %s (%s)" % (displayName, closerEmail)) KibbleBit.index("person", pid, jsp) if creator: pid = hashlib.sha1( ("%s%s" % (source["organisation"], creator["email"])).encode( "ascii", errors="replace" ) ).hexdigest() found = KibbleBit.exists("person", pid) if not found: if not creator["name"]: params["names"] = [creator["email"]] ticketsURL = "%s?method=User.get¶ms=[%s]" % ( u, urllib.parse.quote(json.dumps(params)), ) try: ujs = jsonapi.get(ticketsURL) creator["name"] = ujs["result"]["users"][0]["real_name"] except: creator["name"] = creator["email"] if creator["name"] and len(creator["name"]) > 0: # Add to people db jsp = { "name": creator["name"], "email": creator["email"], "organisation": source["organisation"], "id": pid, } KibbleBit.index("person", pid, jsp) jso = { "id": dhash, "key": key, "organisation": source["organisation"], "sourceID": source["sourceID"], "url": "%s/show_bug.cgi?id=%s" % (dom, key), "status": status, "created": cd, "closed": rd, "issuetype": "issue", "issueCloser": closer["email"] if "email" in closer else None, "createdDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(cd)), "closedDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd)) if rd else None, "changeDate": time.strftime( "%Y/%m/%d %H:%M:%S", time.gmtime(rd if rd else cd) ), "assignee": None, "issueCreator": creator["email"], "comments": comments, "title": title, } KibbleBit.append("issue", jso) time.sleep(0.5) # BugZilla is notoriously slow. Maybe remove this later return True except Exception as err: KibbleBit.pprint(err) return False
def scan(KibbleBit, source): # Validate URL first url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source["sourceURL"]) if not url: KibbleBit.pprint( "Malformed or invalid Pony Mail URL passed to scanner: %s" % source["sourceURL"]) source["steps"]["mail"] = { "time": time.time(), "status": "Could not parse Pony Mail URL!", "running": False, "good": False, } KibbleBit.updateSource(source) return # Pony Mail requires a UI cookie in order to work. Maked sure we have one! cookie = None if "creds" in source and source["creds"]: cookie = source["creds"].get("cookie", None) if not cookie: KibbleBit.pprint( "Pony Mail instance at %s requires an authorized cookie, none found! Bailing." % source["sourceURL"]) source["steps"]["mail"] = { "time": time.time(), "status": "No authorized cookie found in source object.", "running": False, "good": False, } KibbleBit.updateSource(source) return # Notify scanner and DB that this is valid and we've begun parsing KibbleBit.pprint("%s is a valid Pony Mail address, parsing" % source["sourceURL"]) source["steps"]["mail"] = { "time": time.time(), "status": "Downloading Pony Mail statistics", "running": True, "good": True, } KibbleBit.updateSource(source) # Get base URL, list and domain to parse u = url.group(1) l = url.group(2) d = url.group(3) # Get this month dt = time.gmtime(time.time()) firstYear = 1970 year = dt[0] month = dt[1] if month <= 0: month += 12 year -= 1 months = 0 # Hash for keeping records of who we know knowns = {} # While we have older archives, continue to parse while firstYear <= year: statsurl = "%s/api/stats.lua?list=%s&domain=%s&d=%s" % ( u, l, d, "%04u-%02u" % (year, month), ) dhash = hashlib.sha224( (("%s %s") % (source["organisation"], statsurl)).encode( "ascii", errors="replace")).hexdigest() found = False if KibbleBit.exists("mailstats", dhash): found = True if months <= 1 or not found: # Always parse this month's stats :) months += 1 KibbleBit.pprint("Parsing %04u-%02u" % (year, month)) KibbleBit.pprint(statsurl) pd = datetime.date(year, month, 1).timetuple() try: js = jsonapi.get(statsurl, cookie=cookie) except Exception as err: KibbleBit.pprint(f"Server error: {err}, skipping this month") month -= 1 if month <= 0: month += 12 year -= 1 continue if "firstYear" in js: firstYear = js["firstYear"] # print("First Year is %u" % firstYear) else: KibbleBit.pprint("JSON was missing fields, aborting!") break replyList = repliedTo(js["emails"], js["thread_struct"]) topics = js["no_threads"] posters = {} no_posters = 0 emails = len(js["emails"]) top10 = [] for eml in js["thread_struct"]: count = countSubs(eml, 0) subject = "" for reml in js["emails"]: if reml["id"] == eml["tid"]: subject = reml["subject"] break if len(subject) > 0 and count > 0: subject = re.sub(r"^((re|fwd|aw|fw):\s*)+", "", subject, flags=re.IGNORECASE) subject = re.sub(r"[\r\n\t]+", "", subject, count=20) emlid = hashlib.sha1( subject.encode("ascii", errors="replace")).hexdigest() top10.append([emlid, subject, count]) i = 0 for top in reversed(sorted(top10, key=lambda x: x[2])): i += 1 if i > 10: break KibbleBit.pprint("Found top 10: %s (%s emails)" % (top[1], top[2])) md = time.strftime("%Y/%m/%d %H:%M:%S", pd) mlhash = hashlib.sha224( (("%s%s%s%s") % (top[0], source["sourceURL"], source["organisation"], md)).encode("ascii", errors="replace")).hexdigest( ) # one unique id per month per mail thread jst = { "organisation": source["organisation"], "sourceURL": source["sourceURL"], "sourceID": source["sourceID"], "date": md, "emails": top[2], "shash": top[0], "subject": top[1], "ts": time.mktime(pd), "id": mlhash, } KibbleBit.index("mailtop", mlhash, jst) for email in js["emails"]: sender = email["from"] name = sender m = re.match(r"(.+)\s*<(.+)>", email["from"], flags=re.UNICODE) if m: name = m.group(1).replace('"', "").strip() sender = m.group(2) if not sender in posters: posters[sender] = {"name": name, "email": sender} if not sender in knowns: sid = hashlib.sha1( ("%s%s" % (source["organisation"], sender)).encode( "ascii", errors="replace")).hexdigest() if KibbleBit.exists("person", sid): knowns[sender] = True if not sender in knowns or name != sender: KibbleBit.append( "person", { "upsert": True, "name": name, "email": sender, "organisation": source["organisation"], "id": hashlib.sha1( ("%s%s" % (source["organisation"], sender)).encode( "ascii", errors="replace")).hexdigest(), }, ) knowns[sender] = True replyTo = None if email["id"] in replyList: rt = replyList[email["id"]] for eml in js["emails"]: if eml["id"] == rt: replyTo = getSender(eml) print("Email was reply to %s" % sender) jse = { "organisation": source["organisation"], "sourceURL": source["sourceURL"], "sourceID": source["sourceID"], "date": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email["epoch"])), "sender": sender, "address": sender, "subject": email["subject"], "replyto": replyTo, "ts": email["epoch"], "id": email["id"], "upsert": True, } KibbleBit.append("email", jse) for sender in posters: no_posters += 1 jso = { "organisation": source["organisation"], "sourceURL": source["sourceURL"], "sourceID": source["sourceID"], "date": time.strftime("%Y/%m/%d %H:%M:%S", pd), "authors": no_posters, "emails": emails, "topics": topics, } # print("Indexing as %s" % dhash) KibbleBit.index("mailstats", dhash, jso) month -= 1 if month <= 0: month += 12 year -= 1 source["steps"]["mail"] = { "time": time.time(), "status": "Mail archives successfully scanned at " + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), "running": False, "good": True, } KibbleBit.updateSource(source)
def scan(KibbleBit, source): jira = re.match(r"(https?://.+)/browse/([A-Z0-9]+)", source["sourceURL"]) if jira: # JIRA NEEDS credentials to do a proper scan! creds = None if (source["creds"] and "username" in source["creds"] and source["creds"]["username"] and len(source["creds"]["username"]) > 0): creds = "%s:%s" % (source["creds"]["username"], source["creds"]["password"]) if not creds: KibbleBit.pprint( "JIRA at %s requires authentication, but none was found! Bailing." % source["sourceURL"]) source["steps"]["issues"] = { "time": time.time(), "status": "Parsing JIRA changes...", "running": True, "good": True, } KibbleBit.updateSource(source) return source["steps"]["issues"] = { "time": time.time(), "status": "Parsing JIRA changes...", "running": True, "good": True, } KibbleBit.updateSource(source) pendingTickets = [] KibbleBit.pprint("Parsing JIRA activity at %s" % source["sourceURL"]) source["steps"]["issues"] = { "time": time.time(), "status": "Downloading changeset", "running": True, "good": True, } KibbleBit.updateSource(source) # Get base URL, list and domain to parse u = jira.group(1) instance = jira.group(2) lastTicket = 0 latestURL = ( "%s/rest/api/2/search?jql=project=%s+order+by+createdDate+DESC&fields=id,key&maxResults=1" % (u, instance)) js = None js = jsonapi.get(latestURL, auth=creds) if "issues" in js and len(js["issues"]) == 1: key = js["issues"][0]["key"] m = re.search(r"-(\d+)$", key) if m: lastTicket = int(m.group(1)) openTickets = [] startAt = 0 badTries = 0 while True and badTries < 10: openURL = ( "%s/rest/api/2/search?jql=project=%s+and+status=open+order+by+createdDate+ASC&fields=id,key&maxResults=100&startAt=%u" % (u, instance, startAt)) # print(openURL) try: ojs = jsonapi.get(openURL, auth=creds) if not "issues" in ojs or len(ojs["issues"]) == 0: break for item in ojs["issues"]: openTickets.append(item["key"]) KibbleBit.pprint("Found %u open tickets" % len(openTickets)) startAt += 100 except: KibbleBit.pprint("JIRA borked, retrying") badTries += 1 KibbleBit.pprint("Found %u open tickets" % len(openTickets)) badOnes = 0 for i in reversed(range(1, lastTicket + 1)): key = "%s-%u" % (instance, i) pendingTickets.append([key, u, source]) threads = [] block = threading.Lock() KibbleBit.pprint("Scanning tickets using 4 sub-threads") for i in range(0, 4): t = jiraThread(block, KibbleBit, source, creds, pendingTickets, openTickets) threads.append(t) t.start() for t in threads: t.join() KibbleBit.pprint("Done scanning %s" % source["sourceURL"]) source["steps"]["issues"] = { "time": time.time(), "status": "Issue tracker (JIRA) successfully scanned at " + time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(time.time())), "running": False, "good": True, } KibbleBit.updateSource(source)
def scanTicket(KibbleBit, key, u, source, creds, openTickets): """ Scans a single ticket for activity and people """ dhash = hashlib.sha224( ("%s-%s-%s" % (source["organisation"], source["sourceURL"], key)).encode( "ascii", errors="replace")).hexdigest() found = True parseIt = False # the 'domain' var we try to figure out here is used # for faking email addresses and keep them unique, # in case JIRA has email visibility turned off. domain = "jira" m = re.search(r"https?://([^/]+)", u) if m: domain = m.group(1) found = KibbleBit.exists("issue", dhash) if not found: KibbleBit.pprint( "[%s] We've never seen this ticket before, parsing..." % key) parseIt = True else: ticket = KibbleBit.get("issue", dhash) if ticket["status"] == "closed" and key in openTickets: KibbleBit.pprint("[%s] Ticket was reopened, reparsing" % key) parseIt = True elif ticket["status"] == "open" and not key in openTickets: KibbleBit.pprint("[%s] Ticket was recently closed, parsing it" % key) parseIt = True else: if (ticket["issueCreator"] == "unknown@kibble" or ticket["issueCloser"] == "unknown@kibble"): # Gotta redo these! parseIt = True KibbleBit.pprint( "[%s] Ticket contains erroneous data from a previous scan, reparsing" % key) # This is just noise! # KibbleBit.pprint("[%s] Ticket hasn't changed, ignoring..." % key) if parseIt: KibbleBit.pprint("[%s] Parsing data from JIRA at %s..." % (key, domain)) queryURL = ( "%s/rest/api/2/issue/%s?fields=creator,reporter,status,issuetype,summary,assignee,resolutiondate,created,priority,changelog,comment,resolution,votes&expand=changelog" % (u, key)) jiraURL = "%s/browse/%s" % (u, key) try: tjson = jsonapi.get(queryURL, auth=creds) if not tjson: KibbleBit.pprint("%s does not exist (404'ed)" % key) return False except requests.exceptions.ConnectionError as err: KibbleBit.pprint( f"Connection error: {err}, skipping this ticket for now!") return False st, closer = wasclosed(tjson) if st and not closer: KibbleBit.pprint("Closed but no closer??") closerEmail = None status = "closed" if st else "open" # Make sure we actually have field data to work with if not tjson.get("fields") or not tjson["fields"].get("created"): KibbleBit.pprint( "[%s] JIRA response is missing field data, ignoring ticket." % key) return False cd = getTime(tjson["fields"]["created"]) rd = (getTime(tjson["fields"]["resolutiondate"]) if "resolutiondate" in tjson["fields"] and tjson["fields"]["resolutiondate"] else None) comments = 0 if "comment" in tjson["fields"] and tjson["fields"]["comment"]: comments = tjson["fields"]["comment"]["total"] assignee = ( tjson["fields"]["assignee"].get( "emailAddress", # Try email, fall back to username tjson["fields"]["assignee"].get("name"), ) if tjson["fields"].get("assignee") else None) creator = ( tjson["fields"]["reporter"].get( "emailAddress", # Try email, fall back to username tjson["fields"]["reporter"].get("name"), ) if tjson["fields"].get("reporter") else None) title = tjson["fields"]["summary"] if closer: # print("Parsing closer") closerEmail = (closer.get("emailAddress", closer.get("name")).replace( " dot ", ".", 10).replace(" at ", "@", 1)) if not "@" in closerEmail: closerEmail = "%s@%s" % (closerEmail, domain) displayName = closer.get("displayName", "Unkown") if displayName and len(displayName) > 0: # Add to people db pid = hashlib.sha1( ("%s%s" % (source["organisation"], closerEmail)).encode( "ascii", errors="replace")).hexdigest() jsp = { "name": displayName, "email": closerEmail, "organisation": source["organisation"], "id": pid, "upsert": True, } KibbleBit.append("person", jsp) if creator: creator = creator.replace(" dot ", ".", 10).replace(" at ", "@", 1) if not "@" in creator: creator = "%s@%s" % (creator, domain) displayName = (tjson["fields"]["reporter"]["displayName"] if tjson["fields"]["reporter"] else None) if displayName and len(displayName) > 0: # Add to people db pid = hashlib.sha1( ("%s%s" % (source["organisation"], creator)).encode( "ascii", errors="replace")).hexdigest() jsp = { "name": displayName, "email": creator, "organisation": source["organisation"], "id": pid, "upsert": True, } KibbleBit.append("person", jsp) if assignee and not "@" in assignee: assignee = "%s@%s" % (assignee, domain) jso = { "id": dhash, "key": key, "organisation": source["organisation"], "sourceID": source["sourceID"], "url": jiraURL, "status": status, "created": cd, "closed": rd, "issuetype": "issue", "issueCloser": closerEmail, "createdDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(cd)), "closedDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd)) if rd else None, "changeDate": time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(rd if rd else cd)), "assignee": assignee, "issueCreator": creator, "comments": comments, "title": title, } KibbleBit.append("issue", jso) return True