def analyze_bug(self, bug_id, url): #Retrieving main bug information bug_url = url + "show_bug.cgi?id=" + bug_id + "&ctype=xml" printdbg(bug_url) handler = BugsHandler() parser = xml.sax.make_parser() parser.setContentHandler(handler) f = urllib.urlopen(bug_url) try: parser.feed(f.read()) except Exception: printerr("Error parsing URL: %s" % (bug_url)) raise f.close() parser.close() #handler.print_debug_data() issue = handler.get_issue() #Retrieving changes bug_activity_url = url + "show_activity.cgi?id=" + bug_id printdbg( bug_activity_url ) data_activity = urllib.urlopen(bug_activity_url).read() parser = SoupHtmlParser(data_activity, bug_id) changes = parser.parse_changes() for c in changes: issue.add_change(c) return issue
def getReviews (self, limit, start): args_gerrit ="gerrit query " args_gerrit += "project:" + Config.gerrit_project args_gerrit += " limit:" + str(limit) if (start != ""): args_gerrit += " resume_sortkey:"+start args_gerrit += " --all-approvals --format=JSON" if vars(Config).has_key('backend_user'): cmd = ["ssh", "-p 29418", Config.backend_user+"@"+Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh -p 29418 "+ Config.backend_user+"@"+Config.url+" "+ args_gerrit) else: cmd = ["ssh", "-p 29418", Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh "+ "-p 29418 "+ Config.url+" "+ args_gerrit) tickets_raw = subprocess.check_output(cmd) tickets_raw = "["+tickets_raw.replace("\n",",")+"]" tickets_raw = tickets_raw.replace(",]","]") tickets = json.loads(tickets_raw) # tickets_test = '[{"project":"openstack/nova","branch":"master","topic":"bug/857209","id":"I660532ee5758c7595138d4dcf5a2825ddf898c65","number":"637","subject":"contrib/nova.sh: Updated to latest \\u0027upstream\\u0027 commit:6a8433a resolves bug 857209","owner":{"name":"Dave Walker","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/637","createdOn":1316815511,"lastUpdated":1316815646,"sortKey":"0017e78f0000027d","open":false,"status":"ABANDONED","patchSets":[{"number":"1","revision":"95d8d0f75c188f7eabf00ecf6bd5b397852e67b9","ref":"refs/changes/37/637/1","uploader":{"name":"Dave Walker","email":"*****@*****.**","username":"******"},"createdOn":1316815511}]},' # tickets_test += '{"project":"openstack/nova","branch":"master","id":"I812e95fb0744ad84abd7ea2ad7d11123667abbc8","number":"635","subject":"Made jenkins email pruning more resilient.","owner":{"name":"Monty Taylor","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/635","createdOn":1316813897,"lastUpdated":1316814951,"sortKey":"0017e7830000027b","open":false,"status":"MERGED","patchSets":[{"number":"1","revision":"c586e4ed23846420177802c164f594e021cceea8","ref":"refs/changes/35/635/1","uploader":{"name":"Monty Taylor","email":"*****@*****.**","username":"******"},"createdOn":1316813897,"approvals":[{"type":"SUBM","value":"1","grantedOn":1316814951,"by":{"name":"Jenkins","username":"******"}},{"type":"VRIF","description":"Verified","value":"1","grantedOn":1316814948,"by":{"name":"Jenkins","username":"******"}},{"type":"CRVW","description":"Code Review","value":"2","grantedOn":1316814192,"by":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"}}]}]},' # tickets_test += '{"project":"openstack/nova","branch":"master","id":"I495363b44d9da96d66f85c2a621393329830aeb3","number":"630","subject":"Fixing bug 857712","owner":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/630","createdOn":1316810421,"lastUpdated":1316813692,"sortKey":"0017e76e00000276","open":false,"status":"MERGED","patchSets":[{"number":"1","revision":"ddb6945e8fbb8a00d5b67a6a6b8a069b7642022d","ref":"refs/changes/30/630/1","uploader":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"},"createdOn":1316810421,"approvals":[{"type":"SUBM","value":"1","grantedOn":1316813692,"by":{"name":"Jenkins","username":"******"}},{"type":"VRIF","description":"Verified","value":"1","grantedOn":1316813689,"by":{"name":"Jenkins","username":"******"}},{"type":"CRVW","description":"Code Review","value":"1","grantedOn":1316811221,"by":{"name":"Josh Kearney","email":"*****@*****.**","username":"******"}},{"type":"CRVW","description":"Code Review","value":"2","grantedOn":1316812789,"by":{"name":"Brian Lamar","email":"*****@*****.**","username":"******"}},{"type":"CRVW","description":"Code Review","value":"1","grantedOn":1316810744,"by":{"name":"Mark McLoughlin","email":"*****@*****.**","username":"******"}}]}]},' # tickets_test += '{"type":"stats","rowCount":67,"runTimeMilliseconds":365}]' # tickets = json.loads(tickets_test) return tickets
def __parse_issue_attachments(self, soup): """ """ attachments = [] try: files = soup.find('h4', { 'id': 'filebar' }).findNext('tbody').findAll('tr') for f in files: attch = {} # Each entry contains three fields (td tags) that # follow the next order: filename, description and URL. aux = f.findAll('td') attch['filename'] = self.__clean_str(u''.join(aux[0].contents)) attch['desc'] = self.__clean_str(u''.join(aux[1].contents)) attch['url'] = SOURCEFORGE_DOMAIN + aux[2].a.get('href') attachments.append(attch) except AttributeError: # there is no attachment pass except: raise SourceForgeParserError("Error parsing issue's attachments") printdbg("%s attachments" % (str(len(attachments)))) return attachments
def parse_changes (self, activity): changesList = [] for entry in activity['entries']: # print "changed_by:" + entry['author'] by = People(entry['author']) # print "changed_on:" + entry['updated'] description = entry['description'].split('updated:') changes = description.pop(0) field = changes.rpartition('\n')[2].strip() while description: changes = description.pop(0).split('\n') values = changes[0].split('=>') if (len(values) != 2): printdbg(field + " not supported in changes analysis") old_value = new_value = "" else: # u'in-progress' => u'closed' values = changes[0].split('=>') old_value = self.remove_unicode(values[0].strip()) if old_value == "''": old_value ="" new_value = self.remove_unicode(values[1].strip()) if new_value == "''": new_value ="" update = parse(entry['updated']) change = Change(unicode(field), unicode(old_value), unicode(new_value), by, update) changesList.append(change) if (len(changes)>1): field = changes[1].strip() return changesList
def __parse_issue_comments(self, soup): """ """ comments = [] try: artifacts = soup.findAll('tr', {'class': ISSUE_COMMENT_CLASS_PATTERN}) for art in artifacts: comment = {} rawsub, rawdesc = art.findAll('p') # Date and sender are content on the first 'p' a = rawsub.find('a') if a: comment['by'] = {'name': a.get('title'), 'id': a.string} else: comment['by'] = {'name': 'nobody', 'id': 'nobody'} # Time stamp is the first value of the 'p' contents d = self.__clean_str(rawsub.contents[0]) comment['date'] = self.__str_to_date( ISSUE_COMMENT_DATE_PATTERN.match(d).group(1)) # Description is content on the second 'p'. comment['desc'] = self.__clean_str(u''.join(rawdesc.contents)) comments.append(comment) except: SourceForgeParserError('Errror parsing issue comments') printdbg("%s comments" % (str(len(comments)))) return comments
def __parse_issue_comments(self, soup): """ """ comments = [] try: artifacts = soup.findAll("tr", {"class": ISSUE_COMMENT_CLASS_PATTERN}) for art in artifacts: comment = {} rawsub, rawdesc = art.findAll("p") # Date and sender are content on the first 'p' a = rawsub.find("a") if a: comment["by"] = {"name": a.get("title"), "id": a.string} else: comment["by"] = {"name": "nobody", "id": "nobody"} # Time stamp is the first value of the 'p' contents d = self.__clean_str(rawsub.contents[0]) comment["date"] = self.__str_to_date(ISSUE_COMMENT_DATE_PATTERN.match(d).group(1)) # Description is content on the second 'p'. comment["desc"] = self.__clean_str(u"".join(rawdesc.contents)) comments.append(comment) except: SourceForgeParserError("Errror parsing issue comments") printdbg("%s comments" % (str(len(comments)))) return comments
def analyze_bug(self, bug_url): #Retrieving main bug information printdbg(bug_url) bug_number = bug_url.split('/')[-1] try: f = urllib.urlopen(bug_url) # f = urllib.urlopen(bug_url) json_ticket = f.read() # print json_ticket try: issue_allura = json.loads(json_ticket)["ticket"] issue = self.parse_bug(issue_allura) changes = self.analyze_bug_changes(bug_url) for c in changes: issue.add_change(c) return issue except Exception, e: print "Problems with Ticket format: " + bug_number print e return None except Exception, e: printerr("Error in bug analysis: " + bug_url) print(e) raise
def __parse_issue_changes(self, soup): """ """ changes = [] try: entries = soup.find('h4', { 'id': 'changebar' }).findNext('tbody').findAll('tr') for e in entries: change = {} # Each change contains four fields (td tags) that # follow the next order: field, old value, date, by. aux = e.findAll('td') change['field'] = self.__clean_str(aux[0].string) change['old_value'] = self.__clean_str(aux[1].string) change['date'] = self.__str_to_date( self.__clean_str(aux[2].string)) if aux[3].a: change['by'] = { 'name': self.__clean_str(aux[3].a.get('title')), 'id': self.__clean_str(aux[3].a.string) } else: change['by'] = {'name': 'nobody', 'id': 'nobody'} changes.append(change) except AttributeError: # there are no changes pass except: raise SourceForgeParserError('Errror parsing issue changes') printdbg("%s changes" % (str(len(changes)))) return changes
def _retrieve_issues_ids(self, base_url, version, from_date, not_retrieved=True): url = self._get_issues_list_url(base_url, version, from_date) printdbg("Getting bugzilla issues from %s" % url) f = self._urlopen_auth(url) # Problems using csv library, not all the fields are delimited by # '"' character. Easier using split. # Moreover, we drop the first line of the CSV because it contains # the headers ids = [] csv = f.read().split('\n')[1:] for line in csv: # 0: bug_id, 7: changeddate values = line.split(',') id = values[0] change_ts = values[7].strip('"') # Filter issues already retrieved if not_retrieved: if (not self.retrieved.has_key(id)) or (self.retrieved[id] != change_ts): ids.append(id) else: ids.append(id) return ids
def _login(self): """ Authenticates a user in a bugzilla tracker """ if not (self.backend_user and self.backend_password): printdbg("No account data provided. Not logged in bugzilla") return import cookielib cookie_j = cookielib.CookieJar() cookie_h = urllib2.HTTPCookieProcessor(cookie_j) url = self._get_login_url(self.url) values = { 'Bugzilla_login': self.backend_user, 'Bugzilla_password': self.backend_password } opener = urllib2.build_opener(cookie_h) urllib2.install_opener(opener) data = urllib.urlencode(values) request = urllib2.Request(url, data) urllib2.urlopen(request) for i, c in enumerate(cookie_j): self.cookies[c.name] = c.value printout("Logged in bugzilla as %s" % self.backend_user) printdbg("Bugzilla session cookies: %s" % self.cookies)
def __parse_issue_changes(self, soup): """ """ changes = [] try: entries = soup.find('h4', {'id': 'changebar'}).findNext('tbody').findAll('tr') for e in entries: change = {} # Each change contains four fields (td tags) that # follow the next order: field, old value, date, by. aux = e.findAll('td') change['field'] = self.__clean_str(aux[0].string) change['old_value'] = self.__clean_str(aux[1].string) change['date'] = self.__str_to_date(self.__clean_str(aux[2].string)) if aux[3].a: change['by'] = {'name': self.__clean_str(aux[3].a.get('title')), 'id': self.__clean_str(aux[3].a.string)} else: change['by'] = {'name': 'nobody', 'id': 'nobody'} changes.append(change) except AttributeError: # there are no changes pass except: raise SourceForgeParserError('Errror parsing issue changes') printdbg("%s changes" % (str(len(changes)))) return changes
def bugsNumber(self,url): oneBug = self.basic_jira_url() oneBug += "&tempMax=1" printdbg("Getting number of issues: " + oneBug) data_url = urllib.urlopen(oneBug).read() bugs = data_url.split("<issue")[1].split('\"/>')[0].split("total=\"")[1] return int(bugs)
def run(self): printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_xml_query = 500 bugsdb = get_database(DBJiraBackend()) bugsdb.insert_supported_traker("jira","4.1.2") trk = Tracker(self.url.split("-")[0], "jira", "4.1.2") dbtrk = bugsdb.insert_tracker(trk) serverUrl = self.url.split("/browse/")[0] query = "/si/jira.issueviews:issue-xml/" project = self.url.split("/browse/")[1] if (project.split("-").__len__() > 1): bug_key = project project = project.split("-")[0] bugs_number = 1 printdbg(serverUrl + query + bug_key + "/" + bug_key + ".xml") parser = xml.sax.make_parser( ) handler = BugsHandler( ) parser.setContentHandler(handler) try: parser.parse(serverUrl + query + bug_key + "/" + bug_key + ".xml") issue = handler.getIssues()[0] bugsdb.insert_issue(issue, dbtrk.id) except Exception, e: #printerr(e) print(e)
def analyze_bug(self, bug_url): #Retrieving main bug information printdbg(bug_url) bug_number = bug_url.split('/')[-1] try: f = urllib.urlopen(bug_url) # f = urllib.urlopen(bug_url) json_ticket = f.read() # print json_ticket try: issue_allura = json.loads(json_ticket)["ticket"] issue = self.parse_bug(issue_allura) changes = self.analyze_bug_changes(bug_url) for c in changes: issue.add_change(c) return issue except Exception, e: print "Problems with Ticket format: " + bug_number print e return None except Exception, e: printerr("Error in bug analysis: " + bug_url); print(e) raise
def _retrieve_issues(self, ids, base_url, trk_id): # We want to use pop() to get the oldest first so we must reverse the # order ids.reverse() while (ids): query_issues = [] while (len(query_issues) < MAX_ISSUES_PER_XML_QUERY and ids): query_issues.append(ids.pop()) # Retrieving main bug information url = self._get_issues_info_url(base_url, query_issues) printdbg("Issues to retrieve from: %s" % url) handler = BugsHandler() self._safe_xml_parse(url, handler) issues = handler.get_issues() # Retrieving changes for issue in issues: changes = self._retrieve_issue_activity(base_url, issue.issue) for c in changes: issue.add_change(c) # We store here the issue once the complete retrieval # for each bug is done self._store_issue(issue, trk_id) self.retrieved[issue.issue] = self._timestamp_to_str( issue.delta_ts) time.sleep(self.delay)
def _login(self): """ Authenticates a user in a bugzilla tracker """ if not (self.backend_user and self.backend_password): printdbg("No account data provided. Not logged in bugzilla") return import cookielib cookie_j = cookielib.CookieJar() cookie_h = urllib2.HTTPCookieProcessor(cookie_j) url = self._get_login_url(self.url) values = {'Bugzilla_login': self.backend_user, 'Bugzilla_password': self.backend_password} opener = urllib2.build_opener(cookie_h) urllib2.install_opener(opener) data = urllib.urlencode(values) request = urllib2.Request(url, data) urllib2.urlopen(request) for i, c in enumerate(cookie_j): self.cookies[c.name] = c.value printout("Logged in bugzilla as %s" % self.backend_user) printdbg("Bugzilla session cookies: %s" % self.cookies)
def _safe_xml_parse(self, bugs_url, handler): f = self._urlopen_auth(bugs_url) parser = xml.sax.make_parser() parser.setContentHandler(handler) try: contents = f.read() except Exception: printerr("Error retrieving URL: %s" % (bugs_url)) raise try: parser.feed(contents) parser.close() except Exception: # Clean only the invalid XML try: parser2 = xml.sax.make_parser() parser2.setContentHandler(handler) parser2.setContentHandler(handler) printdbg("Cleaning dirty XML") cleaned_contents = ''. \ join(c for c in contents if valid_XML_char_ordinal(ord(c))) parser2.feed(cleaned_contents) parser2.close() except Exception: printerr("Error parsing URL: %s" % (bugs_url)) raise f.close()
def insert_new_bugs_created(self, date_from, date_to): """ This method inserts an entry with the data of the creation time """ if (not date_from) and (not date_to): issues = self.store.find(DBIssue) elif not date_from: issues = self.store.find(DBIssue, DBIssue.submitted_on < date_to) elif not date_to: issues = self.store.find(DBIssue, DBIssue.submitted_on > date_from) else: issues = self.store.find( DBIssue, And(DBIssue.submitted_on <= date_to, DBIssue.submitted_on > date_from)) issues = issues.order_by(Asc(DBIssue.submitted_on)) ## we store the initial data for each bug found for i in issues: db_ilog = self.get_last_values(i) # from issues and change tables db_ilog = self.build_initial_state(db_ilog) self.store.add(db_ilog) printdbg( "Issue #%s created at %s - date_from = %s - date_to = %s" % (db_ilog.issue, db_ilog.date, date_from, date_to))
def __parse_issue_comments(self, soup): """ """ comments = [] try: artifacts = soup.findAll('tr', {'class': ISSUE_COMMENT_CLASS_PATTERN}) for art in artifacts: comment = {} rawsub, rawdesc = art.findAll('p') # Date and sender are content on the first 'p' a = rawsub.find('a') if a: comment['by'] = {'name': a.get('title'), 'id': a.string} else: comment['by'] = {'name': 'nobody', 'id': 'nobody'} # Time stamp is the first value of the 'p' contents d = self.__clean_str(rawsub.contents[0]) comment['date'] = self.__str_to_date(ISSUE_COMMENT_DATE_PATTERN.match(d).group(1)) # Description is content on the second 'p'. comment['desc'] = self.__clean_str(u''.join(rawdesc.contents)) comments.append(comment) except: SourceForgeParserError('Errror parsing issue comments') printdbg("%s comments" % (str(len(comments)))) return comments
def run(self): printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_xml_query = 500 bugsdb = get_database(DBJiraBackend()) bugsdb.insert_supported_traker("jira", "4.1.2") trk = Tracker(self.url.split("-")[0], "jira", "4.1.2") dbtrk = bugsdb.insert_tracker(trk) serverUrl = self.url.split("/browse/")[0] query = "/si/jira.issueviews:issue-xml/" project = self.url.split("/browse/")[1] if (project.split("-").__len__() > 1): bug_key = project project = project.split("-")[0] bugs_number = 1 printdbg(serverUrl + query + bug_key + "/" + bug_key + ".xml") parser = xml.sax.make_parser() handler = BugsHandler() parser.setContentHandler(handler) try: parser.parse(serverUrl + query + bug_key + "/" + bug_key + ".xml") issue = handler.getIssues()[0] bugsdb.insert_issue(issue, dbtrk.id) except Exception, e: #printerr(e) print(e)
def _retrieve_issues(self, ids, base_url, trk_id): # We want to use pop() to get the oldest first so we must reverse the # order ids.reverse() while(ids): query_issues = [] while (len(query_issues) < MAX_ISSUES_PER_XML_QUERY and ids): query_issues.append(ids.pop()) # Retrieving main bug information url = self._get_issues_info_url(base_url, query_issues) printdbg("Issues to retrieve from: %s" % url) handler = BugsHandler() self._safe_xml_parse(url, handler); issues = handler.get_issues() # Retrieving changes for issue in issues: changes = self._retrieve_issue_activity(base_url, issue.issue) for c in changes: issue.add_change(c) # We store here the issue once the complete retrieval # for each bug is done self._store_issue(issue, trk_id) self.retrieved[issue.issue] = self._timestamp_to_str(issue.delta_ts) time.sleep(self.delay)
def _store_issue(self, issue, trk_id): try: self.bugsdb.insert_issue(issue, trk_id) printdbg("Issue #%s stored " % issue.issue) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % issue.issue)
def _process_issues(self): if self._is_issue_url(self.url): # FIXME: this only works for one issue, if more id parameters # are set, those issues will not be processed ids = [self.url.split("show_bug.cgi?id=")[1]] printdbg("Issue #%s URL found" % ids[0]) url = self._get_domain(self.url) self._retrieve_issues(ids, url, self.tracker.id) else: i = 0 max_rounds = 50 # 50*10000 url = self._get_domain(self.url) last_date, next_date = self._get_last_and_next_dates() # Some bugzillas limit the number of results that a query can return. # Due to this, bicho will search for new issues/changes until find # no one new. ids = self._retrieve_issues_ids(self.url, self.version, next_date) while(ids): if (i>=max_rounds): break printout("Round #%d - Total issues to retrieve: %d" % (i, len(ids))) self._retrieve_issues(ids, url, self.tracker.id) i += 1 # Search new ids, but first, we have to check whether they are # already stored or not last_date, next_date = self._get_last_and_next_dates() ids = self._retrieve_issues_ids(self.url, self.version, last_date) # If there aren't new issues from the same date, ask for a new one if not ids: printdbg("No issues found for date %s. Trying with %s" % (last_date, next_date)) ids = self._retrieve_issues_ids(self.url, self.version, next_date) if i > 0: printout("No more issues to retrieve")
def __parse_issue_changes(self, soup): """ """ changes = [] try: entries = soup.find("h4", {"id": "changebar"}).findNext("tbody").findAll("tr") for e in entries: change = {} # Each change contains four fields (td tags) that # follow the next order: field, old value, date, by. aux = e.findAll("td") change["field"] = self.__clean_str(aux[0].string) change["old_value"] = self.__clean_str(aux[1].string) change["date"] = self.__str_to_date(self.__clean_str(aux[2].string)) if aux[3].a: change["by"] = { "name": self.__clean_str(aux[3].a.get("title")), "id": self.__clean_str(aux[3].a.string), } else: change["by"] = {"name": "nobody", "id": "nobody"} changes.append(change) except AttributeError: # there are no changes pass except: raise SourceForgeParserError("Errror parsing issue changes") printdbg("%s changes" % (str(len(changes)))) return changes
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_query = 250 start_issue=1 bugs = []; bugsdb = get_database (DBGoogleCodeBackend()) # still useless bugsdb.insert_supported_traker("googlecode", "beta") trk = Tracker (Config.url, "googlecode", "beta") dbtrk = bugsdb.insert_tracker(trk) self.url = Config.url # https://code.google.com/feeds/issues/p/mobile-time-care self.url_issues = Config.url + "/issues/full?max-results=1" printdbg("URL for getting metadata " + self.url_issues) d = feedparser.parse(self.url_issues) total_issues = int(d['feed']['opensearch_totalresults']) print "Total bugs: ", total_issues if total_issues == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) remaining = total_issues print "ETA ", (total_issues*Config.delay)/(60), "m (", (total_issues*Config.delay)/(60*60), "h)" while start_issue < total_issues: self.url_issues = Config.url + "/issues/full?max-results=" + str(issues_per_query) self.url_issues += "&start-index=" + str(start_issue) printdbg("URL for next issues " + self.url_issues) d = feedparser.parse(self.url_issues) for entry in d['entries']: try: issue = self.analyze_bug(entry) if issue is None: continue bugsdb.insert_issue(issue, dbtrk.id) remaining -= 1 print "Remaining time: ", (remaining)*Config.delay/60, "m", " issues ", str(remaining) time.sleep(Config.delay) except Exception, e: printerr("Error in function analyze_bug ") pprint.pprint(entry) traceback.print_exc(file=sys.stdout) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue.issue))
def bugsNumber(self, url): oneBug = self.basic_jira_url() oneBug += "&tempMax=1" printdbg("Getting number of issues: " + oneBug) data_url = urllib.urlopen(oneBug).read() bugs = data_url.split("<issue")[1].split('\"/>')[0].split( "total=\"")[1] return int(bugs)
def _retrieve_issue_activity(self, base_url, id): activity_url = self._get_issue_activity_url(base_url, id) printdbg("Retrieving activity of issue #%s from %s" % (id, activity_url)) data = self._urlopen_auth(activity_url).read() parser = SoupHtmlParser(data, id) changes = parser.parse_changes() return changes
def analyze_bug_changes (self, bug_url): bug_number = bug_url.split('/')[-1] changes_url = bug_url.replace("rest/","")+"/feed.atom" printdbg("Analyzing issue changes" + changes_url) d = feedparser.parse(changes_url) changes = self.parse_changes(d) return changes
def _get_issue_changeset(self, bug_id): aux = Config.url.rfind('/') bug_url = Config.url[:aux + 1] bug_url = bug_url + "issues/" + unicode(bug_id) + ".atom" # changes_url = bug_url.replace("rest/","")+"/feed.atom" printdbg("Analyzing issue changes " + bug_url) d = feedparser.parse(bug_url) changes = self.parse_changes(d) return changes
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) bugs = [] bugsdb = get_database(DBGerritBackend()) # still useless in gerrit bugsdb.insert_supported_traker("gerrit", "beta") trk = Tracker(Config.url + "_" + Config.gerrit_project, "gerrit", "beta") dbtrk = bugsdb.insert_tracker(trk) last_mod_time = 0 last_mod_date = bugsdb.get_last_modification_date(dbtrk.id) if last_mod_date: printdbg("Last reviews analyzed were modified on date: %s" % last_mod_date) last_mod_time = time.mktime(time.strptime(last_mod_date, '%Y-%m-%d %H:%M:%S')) limit = 500 # gerrit default 500 last_item = "" # last_item = "001f672c00002f80"; number_results = limit total_reviews = 0 while (number_results == limit or number_results == limit + 1): # wikimedia gerrit returns limit+1 # ordered by lastUpdated tickets = self.getReviews(limit, last_item) number_results = 0 reviews = [] for entry in tickets: if 'project' in entry.keys(): if (entry['lastUpdated'] < last_mod_time): break reviews.append(entry["number"]) review_data = self.analyze_review(entry) last_item = entry['sortKey'] # extra changes not included in gerrit changes # self.add_merged_abandoned_changes_from_comments(entry, review_data) self.add_merged_abandoned_changes(entry, review_data) self.add_new_change(review_data) bugsdb.insert_issue(review_data, dbtrk.id) number_results += 1 elif 'rowCount' in entry.keys(): pprint.pprint(entry) printdbg("CONTINUE FROM: " + last_item) total_reviews = total_reviews + int(number_results) self.check_merged_abandoned_changes(bugsdb.store, dbtrk.id) print("Done. Number of reviews: " + str(total_reviews))
def store_final_relationships(self): """ """ temp_rels = self.store.find(DBIssueTempRelationship) for tr in temp_rels: aux_issue_id = self._get_db_issue(tr.issue_id, tr.tracker_id) aux_related_to = self._get_db_issue(tr.related_to, tr.tracker_id) if (aux_related_to != -1 and aux_issue_id != -1): self._insert_relationship(aux_issue_id.id, tr.type, aux_related_to.id) else: printdbg("Issue %s belongs to a different tracker and won't be stored" % tr.related_to)
def _get_author_email(self, author_id): root = self._get_redmine_root(Config.url) author_url = root + "users/" + str(author_id) + ".json" #print author_url res = None try: f = urllib2.urlopen(author_url) person = json.loads(f.read()) res = person['user']['mail'] except (urllib2.HTTPError, KeyError): printdbg("User with id %s has no account information" % author_id) res = author_id return res
def _get_last_and_next_dates(self): last_ts = self.bugsdb.get_last_modification_date(self.tracker.id) if not last_ts: return None, None printdbg("Last issues cached were modified on: %s" % last_ts) last_ts_str = self._timestamp_to_str(last_ts) # We add one second to the last date to avoid retrieve the same # changes modified at that date. next_ts = last_ts + timedelta(seconds=1) next_ts_str = self._timestamp_to_str(next_ts) return last_ts_str, next_ts_str
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['a', 'span','i'] try: [i.replaceWith(i.contents[0]) for i in soup.findAll(remove_tags)] except Exception: None changes = [] #FIXME The id of the changes are not stored tables = soup.findAll("div", {"class": "actionContainer"}) table = None for table in tables: change_author = table.find("div", {"class": "action-details"}) if change_author == None or len(change_author)<3: self.changes_lost += 1 printerr("Change author format not supported. Change lost!") continue if isinstance(change_author.contents[2], Tag): change_author_str = change_author.contents[2]['rel'] elif isinstance(change_author.contents[2], NavigableString): change_author_str = change_author.contents[2] else: printerr("Change author format not supported") printdbg(change_author) continue author = People(change_author_str.strip()) author.set_email(BugsHandler.getUserEmail(change_author_str.strip())) if isinstance(change_author.contents[4], Tag): date_str = change_author.contents[4].find('time')['datetime'] elif isinstance(change_author.contents[4], NavigableString): date_str = change_author.contents[4] else: printerr("Change date format not supported") continue date = parse(date_str).replace(tzinfo=None) rows = list(table.findAll('tr')) for row in rows: cols = list(row.findAll('td')) if len(cols) == 3: field = unicode(cols[0].contents[0].strip()) old = unicode(cols[1].contents[0].strip()) new = unicode(cols[2].contents[0].strip()) change = Change(field, old, new, author, date) changes.append(change) return changes
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) bugs = []; bugsdb = get_database (DBGerritBackend()) # still useless in gerrit bugsdb.insert_supported_traker("gerrit", "beta") trk = Tracker (Config.url+"_"+Config.gerrit_project, "gerrit", "beta") dbtrk = bugsdb.insert_tracker(trk) last_mod_time = 0 last_mod_date = bugsdb.get_last_modification_date(dbtrk.id) if last_mod_date: printdbg("Last reviews analyzed were modified on date: %s" % last_mod_date) last_mod_time = time.mktime(time.strptime (last_mod_date, '%Y-%m-%d %H:%M:%S')) limit = 500 # gerrit default 500 last_item = ""; # last_item = "001f672c00002f80"; number_results = limit total_reviews = 0 while (number_results == limit or number_results == limit+1): # wikimedia gerrit returns limit+1 # ordered by lastUpdated tickets = self.getReviews(limit, last_item) number_results = 0 reviews = [] for entry in tickets: if 'project' in entry.keys(): if (entry['lastUpdated']<last_mod_time): break reviews.append(entry["number"]) review_data = self.analyze_review(entry) last_item = entry['sortKey'] bugsdb.insert_issue(review_data, dbtrk.id) number_results = number_results+1 elif 'rowCount' in entry.keys(): pprint.pprint(entry) printdbg("CONTINUE FROM: " + last_item) total_reviews = total_reviews + int(number_results) print("Done. Number of reviews: " + str(total_reviews))
def store_final_relationships(self): """ """ temp_rels = self.store.find(DBIssueTempRelationship) for tr in temp_rels: aux_issue_id = self._get_db_issue(tr.issue_id, tr.tracker_id) aux_related_to = self._get_db_issue(tr.related_to, tr.tracker_id) if (aux_related_to != -1 and aux_issue_id != -1): self._insert_relationship(aux_issue_id.id, tr.type, aux_related_to.id) else: printdbg( "Issue %s belongs to a different tracker and won't be stored" % tr.related_to)
def analyze_bug_list(self, nissues, offset, bugsdb, dbtrk_id): url_issues = self.basic_jira_url() url_issues += "&tempMax=" + str(nissues) + "&pager/start=" + str(offset) printdbg(url_issues) handler = BugsHandler() self.safe_xml_parse(url_issues, handler) try: issues = handler.getIssues() for issue in issues: bugsdb.insert_issue(issue, dbtrk_id) except Exception, e: import traceback traceback.print_exc() sys.exit(0)
def analyze_bug_list(self, nissues, offset, bugsdb, dbtrk_id): url_issues = self.basic_jira_url() url_issues += "&tempMax=" + str(nissues) + "&pager/start=" + str( offset) printdbg(url_issues) handler = BugsHandler() self.safe_xml_parse(url_issues, handler) try: issues = handler.getIssues() for issue in issues: bugsdb.insert_issue(issue, dbtrk_id) except Exception, e: import traceback traceback.print_exc() sys.exit(0)
def run(self, url): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) self.url = url ids = [] self.parser = SourceForgeParser() # first we take the bugs ids if url.find("aid=") > 0: aux = url.split("aid=")[1].split("&")[0] ids.append(aux) else: ids = self.__get_issues_list(self.url) self.__check_tracker_url(self.url) # order the parameters in the url to add the same tracker url # to data base without aid parameter self.__order_query(self.url) self.db = get_database(DBSourceForgeBackend()) self.db.insert_supported_traker(SUPPORTED_SF_TRACKERS[0], SUPPORTED_SF_TRACKERS[1]) self.__insert_tracker(self.url) nbugs = len(ids) if nbugs == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) nbugs = len(ids) if nbugs == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) for id in ids: url = self.url + "&func=detail&aid=%s" % id # FIXME:urls!!! printdbg(url) issue = self.__get_issue(url) self.__insert_issue(issue) time.sleep(self.delay) printout("Done. %s bugs analyzed" % (nbugs))
def _get_author_identity(self, author_id): if author_id in self.identities: return self.identities[author_id] root = self._get_redmine_root(Config.url) author_url = root + "users/" + str(author_id) + ".json" #print author_url identity = None try: f = urllib2.urlopen(author_url) person = json.loads(f.read()) identity = person['user']['mail'] except (urllib2.HTTPError, KeyError): printdbg("User with id %s has no account information" % author_id) identity = author_id self.identities[author_id] = identity return identity
def _get_people_id(self, email): """ Gets the id of an user """ try: p = self.store.find(DBPeople, DBPeople.email == email).one() return p.id except (AttributeError, NotOneError): p = self.store.find(DBPeople, DBPeople.user_id == email).one() try: return p.id except AttributeError: # no person was found in People with the email above, so # we include it printdbg("Person not found. Inserted with email %s " % (email)) dp = DBPeople(email) self.store.add(dp) self.store.commit() return dp.id
def getUserEmail(username): return "" # http://issues.liferay.com/activity?maxResults=1&streams=user+IS+kalman.vincze if not vars(BugsHandler).has_key("_emails"): BugsHandler._emails = {} if BugsHandler._emails.has_key(username): email = BugsHandler._emails[username] else: serverUrl = Config.url.split("/browse/")[0] user_url = serverUrl + "/activity?maxResults=1&streams=user+IS+" + username email = "" d = feedparser.parse(user_url) if d.has_key('entries'): if len(d['entries']) > 0: email = d['entries'][0]['author_detail']['email'] email = BugsHandler.remove_unicode(email) printdbg(username + " " + email) BugsHandler._emails[username] = email return email
def getUserEmail(username): return "" # http://issues.liferay.com/activity?maxResults=1&streams=user+IS+kalman.vincze if not vars(BugsHandler).has_key("_emails"): BugsHandler._emails = {} if BugsHandler._emails.has_key(username): email = BugsHandler._emails[username] else: serverUrl = Config.url.split("/browse/")[0] user_url = serverUrl + "/activity?maxResults=1&streams=user+IS+"+username email = "" d = feedparser.parse(user_url) if d.has_key('entries'): if len(d['entries'])>0: email = d['entries'][0]['author_detail']['email'] email = BugsHandler.remove_unicode(email) printdbg(username + " " + email) BugsHandler._emails[username] = email return email
def _set_version(self): if self.version: printdbg("Bugzilla version: %s" % self.version) return info_url = self._get_info_url(self.url) f = self._urlopen_auth(info_url) try: printdbg("Getting bugzilla version from %s" % info_url) contents = f.read() except Exception: printerr("Error retrieving URL %s" % info_url) raise f.close() handler = BugzillaHandler() parser = xml.sax.make_parser() parser.setContentHandler(handler) try: cleaned_contents = ''. \ join(c for c in contents if valid_XML_char_ordinal(ord(c))) parser.feed(cleaned_contents) except Exception: printerr("Error parsing URL %s" % info_url) raise parser.close() self.version = handler.get_version() printdbg("Bugzilla version: %s" % self.version)
def _urlopen_auth(self, url): """ Opens an URL using an authenticated session """ keep_trying = True while keep_trying: if self._is_auth_session(): opener = urllib2.build_opener() for c in self.cookies: q = str(c) + '=' + self.cookies[c] opener.addheaders.append(('Cookie', q)) keep_trying = False try: aux = urllib2.urlopen(url) except urllib2.HTTPError as e: printerr("The server couldn\'t fulfill the request.") printerr("Error code: %s" % e.code) except urllib2.URLError as e: printdbg("Bicho failed to reach the Bugzilla server") printdbg("Reason: %s" % e.reason) printdbg("Bicho goes into hibernation for %s seconds" % HIBERNATION_LENGTH) time.sleep(HIBERNATION_LENGTH) keep_trying = True return aux
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) ids = [] self.parser = SourceForgeParser() #first we take the bugs ids if url.find("aid=") > 0: aux = url.split("aid=")[1].split("&")[0] ids.append(aux) else: ids = self.__get_issues_list(self.url) self.__check_tracker_url(self.url) #order the parameters in the url to add the same tracker url #to data base without aid parameter self.__order_query(self.url) self.db = get_database(DBSourceForgeBackend()) self.db.insert_supported_traker(SUPPORTED_SF_TRACKERS[0], SUPPORTED_SF_TRACKERS[1]) self.__insert_tracker(self.url) nbugs = len(ids) if nbugs == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) for id in ids: url = self.url + '&func=detail&aid=%s' % id # FIXME:urls!!! printdbg(url) issue = self.__get_issue(url) self.__insert_issue(issue) time.sleep(self.delay) printout("Done. %s bugs analyzed" % (nbugs))
def get_people_id(self, email, tracker_id): """ Gets the id of an user """ p = self.store.find(DBPeople, DBPeople.email == email).one() ## ## the code below was created ad-hoc for KDE solid ## try: return p.id except AttributeError: p = self.store.find(DBPeople, DBPeople.user_id == email).one() try: return p.id except AttributeError: # no person was found in People with the email above, so # we include it printdbg("Person not found. Inserted with email %s " % (email)) dp = DBPeople(email, tracker_id) self.store.add(dp) self.store.commit() return dp.id
def _process_issues(self): if self._is_issue_url(self.url): # FIXME: this only works for one issue, if more id parameters # are set, those issues will not be processed ids = [self.url.split("show_bug.cgi?id=")[1]] printdbg("Issue #%s URL found" % ids[0]) url = self._get_domain(self.url) self._retrieve_issues(ids, url, self.tracker.id) else: i = 0 max_rounds = 50 # 50*10000 url = self._get_domain(self.url) last_date, next_date = self._get_last_and_next_dates() # Some bugzillas limit the number of results that a query can return. # Due to this, bicho will search for new issues/changes until find # no one new. ids = self._retrieve_issues_ids(self.url, self.version, next_date) while (ids): if (i >= max_rounds): break printout("Round #%d - Total issues to retrieve: %d" % (i, len(ids))) self._retrieve_issues(ids, url, self.tracker.id) i += 1 # Search new ids, but first, we have to check whether they are # already stored or not last_date, next_date = self._get_last_and_next_dates() ids = self._retrieve_issues_ids(self.url, self.version, last_date) # If there aren't new issues from the same date, ask for a new one if not ids: printdbg("No issues found for date %s. Trying with %s" % (last_date, next_date)) ids = self._retrieve_issues_ids(self.url, self.version, next_date) if i > 0: printout("No more issues to retrieve")
def analyze_bug(self, entry): people = People(entry['author_detail']['href']) people.set_name(entry['author_detail']['name']) issue = GoogleCodeIssue(entry['id'], 'issue', entry['title'], entry['content'], people, self._convert_to_datetime(entry['published'])) # Strange how the parser rename this fields if 'issues_uri' in entry.keys(): people = People(entry['issues_uri']) people.set_name(entry['issues_username']) issue.assigned_to = people issue.status = entry['issues_status'] issue.resolution = entry['issues_state'] issue.priority = entry['issues_label'] # Extended attributes # issue.labels = str(issue_googlecode["labels"]) issue.star = entry['issues_stars'] issue.ticket_num = entry['issues_id'] issue.mod_date = self._convert_to_datetime(entry['updated']) issue.closed_date = None if 'issues_closeddate' in entry.keys(): issue.closed_date = self._convert_to_datetime( entry['issues_closeddate']) changes_url = Config.url + "/issues/" + issue.ticket_num + "/comments/full" printdbg("Analyzing issue " + changes_url) d = feedparser.parse(changes_url) changes = self.parse_changes(d, issue.ticket_num) for c in changes: issue.add_change(c) return issue