def _set_version(self): if self.version: printdbg("Bugzilla version: %s" % self.version) return info_url = self._get_info_url(self.url) f = self._urlopen_auth(info_url) try: printdbg("Getting bugzilla version from %s" % info_url) contents = f.read() except Exception: printerr("Error retrieving URL %s" % info_url) raise f.close() handler = BugzillaHandler() parser = xml.sax.make_parser() parser.setContentHandler(handler) try: cleaned_contents = ''. \ join(c for c in contents if valid_XML_char_ordinal(ord(c))) parser.feed(cleaned_contents) except Exception: printerr("Error parsing URL %s" % info_url) raise parser.close() self.version = handler.get_version() printdbg("Bugzilla version: %s" % self.version)
def analyze_bug(self, bug_id, url): #Retrieving main bug information bug_url = url + "show_bug.cgi?id=" + bug_id + "&ctype=xml" printdbg(bug_url) handler = BugsHandler() parser = xml.sax.make_parser() parser.setContentHandler(handler) f = urllib.urlopen(bug_url) try: parser.feed(f.read()) except Exception: printerr("Error parsing URL: %s" % (bug_url)) raise f.close() parser.close() #handler.print_debug_data() issue = handler.get_issue() #Retrieving changes bug_activity_url = url + "show_activity.cgi?id=" + bug_id printdbg( bug_activity_url ) data_activity = urllib.urlopen(bug_activity_url).read() parser = SoupHtmlParser(data_activity, bug_id) changes = parser.parse_changes() for c in changes: issue.add_change(c) return issue
def _safe_xml_parse(self, bugs_url, handler): f = self._urlopen_auth(bugs_url) parser = xml.sax.make_parser() parser.setContentHandler(handler) try: contents = f.read() except Exception: printerr("Error retrieving URL: %s" % (bugs_url)) raise try: parser.feed(contents) parser.close() except Exception: # Clean only the invalid XML try: parser2 = xml.sax.make_parser() parser2.setContentHandler(handler) parser2.setContentHandler(handler) printdbg("Cleaning dirty XML") cleaned_contents = ''. \ join(c for c in contents if valid_XML_char_ordinal(ord(c))) parser2.feed(cleaned_contents) parser2.close() except Exception: printerr("Error parsing URL: %s" % (bugs_url)) raise f.close()
def analyze_bug(self, bug_url): #Retrieving main bug information printdbg(bug_url) bug_number = bug_url.split('/')[-1] try: f = urllib.urlopen(bug_url) # f = urllib.urlopen(bug_url) json_ticket = f.read() # print json_ticket try: issue_allura = json.loads(json_ticket)["ticket"] issue = self.parse_bug(issue_allura) changes = self.analyze_bug_changes(bug_url) for c in changes: issue.add_change(c) return issue except Exception, e: print "Problems with Ticket format: " + bug_number print e return None except Exception, e: printerr("Error in bug analysis: " + bug_url) print(e) raise
def _store_issue(self, issue, trk_id): try: self.bugsdb.insert_issue(issue, trk_id) printdbg("Issue #%s stored " % issue.issue) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % issue.issue)
def _urlopen_auth(self, url): """ Opens an URL using an authenticated session """ keep_trying = True while keep_trying: if self._is_auth_session(): opener = urllib2.build_opener() for c in self.cookies: q = str(c) + '=' + self.cookies[c] opener.addheaders.append(('Cookie', q)) keep_trying = False try: aux = urllib2.urlopen(url) except urllib2.HTTPError as e: printerr("The server couldn\'t fulfill the request.") printerr("Error code: %s" % e.code) except urllib2.URLError as e: printdbg("Bicho failed to reach the Bugzilla server") printdbg("Reason: %s" % e.reason) printdbg("Bicho goes into hibernation for %s seconds" % HIBERNATION_LENGTH) time.sleep(HIBERNATION_LENGTH) keep_trying = True return aux
def analyze_bug(self, bug_url): #Retrieving main bug information printdbg(bug_url) bug_number = bug_url.split('/')[-1] try: f = urllib.urlopen(bug_url) # f = urllib.urlopen(bug_url) json_ticket = f.read() # print json_ticket try: issue_allura = json.loads(json_ticket)["ticket"] issue = self.parse_bug(issue_allura) changes = self.analyze_bug_changes(bug_url) for c in changes: issue.add_change(c) return issue except Exception, e: print "Problems with Ticket format: " + bug_number print e return None except Exception, e: printerr("Error in bug analysis: " + bug_url); print(e) raise
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_query = 250 start_issue=1 bugs = []; bugsdb = get_database (DBGoogleCodeBackend()) # still useless bugsdb.insert_supported_traker("googlecode", "beta") trk = Tracker (Config.url, "googlecode", "beta") dbtrk = bugsdb.insert_tracker(trk) self.url = Config.url # https://code.google.com/feeds/issues/p/mobile-time-care self.url_issues = Config.url + "/issues/full?max-results=1" printdbg("URL for getting metadata " + self.url_issues) d = feedparser.parse(self.url_issues) total_issues = int(d['feed']['opensearch_totalresults']) print "Total bugs: ", total_issues if total_issues == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) remaining = total_issues print "ETA ", (total_issues*Config.delay)/(60), "m (", (total_issues*Config.delay)/(60*60), "h)" while start_issue < total_issues: self.url_issues = Config.url + "/issues/full?max-results=" + str(issues_per_query) self.url_issues += "&start-index=" + str(start_issue) printdbg("URL for next issues " + self.url_issues) d = feedparser.parse(self.url_issues) for entry in d['entries']: try: issue = self.analyze_bug(entry) if issue is None: continue bugsdb.insert_issue(issue, dbtrk.id) remaining -= 1 print "Remaining time: ", (remaining)*Config.delay/60, "m", " issues ", str(remaining) time.sleep(Config.delay) except Exception, e: printerr("Error in function analyze_bug ") pprint.pprint(entry) traceback.print_exc(file=sys.stdout) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue.issue))
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['a', 'span', 'i'] changes = [] tables = soup.findAll('table') # We look for the first table with 5 cols table = None for table in tables: if len(table.tr.findAll('th')) == 5: try: for i in table.findAll(remove_tags): i.replaceWith(i.text) except: printerr("error removing HTML tags") break if table is None: return changes rows = list(table.findAll('tr')) for row in rows[1:]: cols = list(row.findAll('td')) if len(cols) == 5: person_email = cols[0].contents[0].strip() person_email = unicode(person_email.replace('@', '@')) date = self._to_datetime_with_secs(cols[1].contents[0].strip()) # when the field contains an Attachment, the list has more #than a field. For example: # # [u'\n', u'Attachment #12723', u'\n Flag\n '] # if len(cols[2].contents) > 1: aux_c = unicode(" ".join(cols[2].contents)) field = unicode(aux_c.replace("\n", "").strip()) else: field = unicode(cols[2].contents[0].replace("\n", "").strip()) removed = unicode(cols[3].contents[0].strip()) added = unicode(cols[4].contents[0].strip()) else: # same as above with the Attachment example if len(cols[0].contents) > 1: aux_c = unicode(" ".join(cols[0].contents)) field = aux_c.replace("\n", "").strip() else: field = cols[0].contents[0].strip() removed = cols[1].contents[0].strip() added = cols[2].contents[0].strip() field, removed, added = self.sanityze_change(field, removed, added) by = People(person_email) by.set_email(person_email) change = Change(field, removed, added, by, date) changes.append(change) return changes
def __parse_issue_category(self, soup): """ """ try: category = soup.find({"label": True}, text=ISSUE_CATEGORY_PATTERN).findNext("p") return category.contents[0] except: printerr("Error parsing issue category") raise SourceForgeParserError("Error parsing issue category")
def __parse_issue_summary(self, soup): """ """ try: m = ISSUE_SUMMARY_PATTERN.match(unicode(soup.title.string)) return m.group(1) except: printerr('Error parsing issue summary') raise SourceForgeParserError('Error parsing issue summary')
def __parse_issue_group(self, soup): """ """ try: group = soup.find({"label": True}, text=ISSUE_GROUP_PATTERN).findNext("p") return group.contents[0] except: printerr("Error parsing issue group") raise SourceForgeParserError("Error parsing issue group")
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['a', 'span','i'] changes = [] tables = soup.findAll('table') # We look for the first table with 5 cols table = None for table in tables: if len(table.tr.findAll('th')) == 5: try: for i in table.findAll(remove_tags): i.replaceWith(i.text) except: printerr("error removing HTML tags") break if table is None: return changes rows = list(table.findAll('tr')) for row in rows[1:]: cols = list(row.findAll('td')) if len(cols) == 5: person_email = cols[0].contents[0].strip() person_email = unicode(person_email.replace('@', '@')) date = self._to_datetime_with_secs(cols[1].contents[0].strip()) # when the field contains an Attachment, the list has more #than a field. For example: # # [u'\n', u'Attachment #12723', u'\n Flag\n '] # if len(cols[2].contents) > 1: aux_c = unicode(" ".join(cols[2].contents)) field = unicode(aux_c.replace("\n","").strip()) else: field = unicode(cols[2].contents[0].replace("\n","").strip()) removed = unicode(cols[3].contents[0].strip()) added = unicode(cols[4].contents[0].strip()) else: # same as above with the Attachment example if len(cols[0].contents) > 1: aux_c = unicode(" ".join(cols[0].contents)) field = aux_c.replace("\n","").strip() else: field = cols[0].contents[0].strip() removed = cols[1].contents[0].strip() added = cols[2].contents[0].strip() field, removed, added = self.sanityze_change(field, removed, added) by = People(person_email) by.set_email(person_email) change = Change(field, removed, added, by, date) changes.append(change) return changes
def __parse_issue_resolution(self, soup): """ """ try: resolution = soup.find({"label": True}, text=ISSUE_RESOLUTION_PATTERN).findNext("p") return resolution.contents[0] except: printerr("Error parsing issue resolution") raise SourceForgeParserError("Error parsing issue resolution")
def __parse_issue_assigned_to(self, soup): """ """ try: assigned = soup.find({"label": True}, text=ISSUE_ASSIGNED_TO_PATTERN).findNext("p") return assigned.contents[0] except: printerr("Error parsing issue assigned to") raise SourceForgeParserError("Error parsing issue assigned to")
def __parse_issue_priority(self, soup): """ """ try: priority = soup.find({"label": True}, text=ISSUE_PRIORITY_PATTERN).findNext("p") return priority.contents[0] except: printerr("Error parsing issue priority") raise SourceForgeParserError("Error parsing issue priority")
def __parse_issue_status(self, soup): """ """ try: status = soup.find({"label": True}, text=ISSUE_STATUS_PATTERN).findNext("p") return status.contents[0] except: printerr("Error parsing issue status") raise SourceForgeParserError("Error parsing issue status")
def __parse_issue_id(self, soup): """ """ try: m = ISSUE_ID_PATTERN.match(unicode(soup.title.string)) return m.group(1) except: printerr("Error parsing issue id") raise SourceForgeParserError("Error parsing issue id")
def __parse_issue_visibility(self, soup): """ """ try: visibility = soup.find({"label": True}, text=ISSUE_VISIBILITY_PATTERN).findNext("p") return visibility.contents[0] except: printerr("Error parsing issue visibility") raise SourceForgeParserError("Error parsing issue visibility")
def __init__(self): self.url = Config.url self.delay = Config.delay try: self.backend_password = Config.backend_password self.backend_user = Config.backend_user except AttributeError: printerr("\n--backend-user and --backend-password are mandatory \ to download bugs from Github\n") sys.exit(1) self.remaining_ratelimit = 0
def __parse_issue_priority(self, soup): """ """ try: priority = soup.find({ 'label': True }, text=ISSUE_PRIORITY_PATTERN).findNext('p') return priority.contents[0] except: printerr('Error parsing issue priority') raise SourceForgeParserError('Error parsing issue priority')
def __parse_issue_status(self, soup): """ """ try: status = soup.find({ 'label': True }, text=ISSUE_STATUS_PATTERN).findNext('p') return status.contents[0] except: printerr('Error parsing issue status') raise SourceForgeParserError('Error parsing issue status')
def __parse_issue_group(self, soup): """ """ try: group = soup.find({ 'label': True }, text=ISSUE_GROUP_PATTERN).findNext('p') return group.contents[0] except: printerr('Error parsing issue group') raise SourceForgeParserError('Error parsing issue group')
def __parse_issue_category(self, soup): """ """ try: category = soup.find({ 'label': True }, text=ISSUE_CATEGORY_PATTERN).findNext('p') return category.contents[0] except: printerr('Error parsing issue category') raise SourceForgeParserError('Error parsing issue category')
def __parse_issue_resolution(self, soup): """ """ try: resolution = soup.find({ 'label': True }, text=ISSUE_RESOLUTION_PATTERN).findNext('p') return resolution.contents[0] except: printerr('Error parsing issue resolution') raise SourceForgeParserError('Error parsing issue resolution')
def __parse_issue_visibility(self, soup): """ """ try: visibility = soup.find({ 'label': True }, text=ISSUE_VISIBILITY_PATTERN).findNext('p') return visibility.contents[0] except: printerr('Error parsing issue visibility') raise SourceForgeParserError('Error parsing issue visibility')
def __parse_issue_assigned_to(self, soup): """ """ try: assigned = soup.find({ 'label': True }, text=ISSUE_ASSIGNED_TO_PATTERN).findNext('p') return assigned.contents[0] except: printerr('Error parsing issue assigned to') raise SourceForgeParserError('Error parsing issue assigned to')
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['i'] try: [i.replaceWith(i.contents[0]) for i in soup.findAll(remove_tags)] except Exception: None changes = [] #FIXME The id of the changes are not stored tables = soup.findAll("div", {"class": "actionContainer"}) table = None for table in tables: author_date_text = table.find("div", {"class": "action-details"}) if author_date_text is None: # no changes have been performed on the issue continue elif len(author_date_text) < 3: self.changes_lost += 1 printerr("Change author format not supported. Change lost!") continue a_link = author_date_text.findAll('a')[1] # at this point a_link will be similar to the lines below: #<a class="user-hover user-avatar" rel="kiyoshi.lee" rel = a_link.attrs[1] author_url = rel[1] author = People(author_url) # we look for a string similar to: #<time datetime="2011-11-19T00:27-0800">19/Nov/11 12:27 AM</time> raw_date = author_date_text.findAll('time')[0].attrs[0][1] date = parse(raw_date).replace(tzinfo=None) rows = list(table.findAll('tr')) for row in rows: cols = list(row.findAll('td')) if len(cols) == 3: field = unicode(cols[0].contents[0].strip()) if field == "Assignee": old = unicode(self._get_identifier(cols[1])) new = unicode(self._get_identifier(cols[2])) else: old = unicode(cols[1].contents[0].strip()) new = unicode(cols[2].contents[0].strip()) change = Change(field, old, new, author, date) changes.append(change) return changes
def __parse_issue_description(self, soup): """ """ try: # Details is a list of unicode strings, so the # strings are joined into a string to build the # description field. details = soup.find({"label": True}, text=ISSUE_DETAILS_PATTERN).findNext("p") desc = u"".join(details.contents) return desc except: printerr("Error parsing issue description") raise SourceForgeParserError("Error parsing issue description")
def __parse_issue_description(self, soup): """ """ try: # Details is a list of unicode strings, so the # strings are joined into a string to build the # description field. details = soup.find({ 'label': True }, text=ISSUE_DETAILS_PATTERN).findNext('p') desc = u''.join(details.contents) return desc except: printerr('Error parsing issue description') raise SourceForgeParserError('Error parsing issue description')
def safe_xml_parse(self, url_issues, handler): f = urllib.urlopen(url_issues) parser = xml.sax.make_parser() parser.setContentHandler(handler) try: contents = f.read() parser.feed(contents) parser.close() except Exception: # Clean only the invalid XML try: parser2 = xml.sax.make_parser() parser2.setContentHandler(handler) parser2.setContentHandler(handler) printdbg("Cleaning dirty XML") cleaned_contents = ''. \ join(c for c in contents if self.valid_XML_char_ordinal(ord(c))) parser2.feed(cleaned_contents) parser2.close() except Exception: printerr("Error parsing URL: %s" % (url_issues)) raise f.close()
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['a', 'span','i'] try: [i.replaceWith(i.contents[0]) for i in soup.findAll(remove_tags)] except Exception: None changes = [] #FIXME The id of the changes are not stored tables = soup.findAll("div", {"class": "actionContainer"}) table = None for table in tables: change_author = table.find("div", {"class": "action-details"}) if change_author == None or len(change_author)<3: self.changes_lost += 1 printerr("Change author format not supported. Change lost!") continue if isinstance(change_author.contents[2], Tag): change_author_str = change_author.contents[2]['rel'] elif isinstance(change_author.contents[2], NavigableString): change_author_str = change_author.contents[2] else: printerr("Change author format not supported") printdbg(change_author) continue author = People(change_author_str.strip()) author.set_email(BugsHandler.getUserEmail(change_author_str.strip())) if isinstance(change_author.contents[4], Tag): date_str = change_author.contents[4].find('time')['datetime'] elif isinstance(change_author.contents[4], NavigableString): date_str = change_author.contents[4] else: printerr("Change date format not supported") continue date = parse(date_str).replace(tzinfo=None) rows = list(table.findAll('tr')) for row in rows: cols = list(row.findAll('td')) if len(cols) == 3: field = unicode(cols[0].contents[0].strip()) old = unicode(cols[1].contents[0].strip()) new = unicode(cols[2].contents[0].strip()) change = Change(field, old, new, author, date) changes.append(change) return changes
def run(self): print("Running Bicho with delay of %s seconds" % (str(self.delay))) url = self.url pname = None pname = self.__get_project_from_url() bugsdb = get_database(DBLaunchpadBackend()) printdbg(url) # launchpad needs a temp directory to store cached data homedir = pwd.getpwuid(os.getuid()).pw_dir cachedir = os.path.join(homedir, ".cache/bicho/") if not os.path.exists(cachedir): os.makedirs(cachedir) cre_file = os.path.join(cachedir + 'launchpad-credential') self.lp = Launchpad.login_with('Bicho', 'production', credentials_file=cre_file) aux_status = [ "New", "Incomplete", "Opinion", "Invalid", "Won't Fix", "Expired", "Confirmed", "Triaged", "In Progress", "Fix Committed", "Fix Released", "Incomplete (with response)", "Incomplete (without response)" ] last_mod_date = bugsdb.get_last_modification_date() if last_mod_date: bugs = self.lp.projects[pname].searchTasks( status=aux_status, omit_duplicates=False, order_by='date_last_updated', modified_since=last_mod_date) else: bugs = self.lp.projects[pname].searchTasks( status=aux_status, omit_duplicates=False, order_by='date_last_updated') printdbg("Last bug already cached: %s" % last_mod_date) nbugs = len(bugs) # still useless bugsdb.insert_supported_traker("launchpad", "x.x") trk = Tracker(url, "launchpad", "x.x") dbtrk = bugsdb.insert_tracker(trk) # if nbugs == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) analyzed = [] for bug in bugs: if bug.web_link in analyzed: continue #for the bizarre error #338 try: issue_data = self.analyze_bug(bug) except Exception: #FIXME it does not handle the e printerr("Error in function analyzeBug with URL: ' \ '%s and Bug: %s" % (url, bug)) raise try: # we can have meta-trackers but we want to have the original #tracker name tr_url = self.__get_tracker_url_from_bug(bug) if (tr_url != url): aux_trk = Tracker(tr_url, "launchpad", "x.x") dbtrk = bugsdb.insert_tracker(aux_trk) bugsdb.insert_issue(issue_data, dbtrk.id) except UnicodeEncodeError: printerr( "UnicodeEncodeError: the issue %s couldn't be stored" % (issue_data.issue)) except NotFoundError: printerr("NotFoundError: the issue %s couldn't be stored" % (issue_data.issue)) except Exception, e: printerr("Unexpected Error: the issue %s couldn't be stored" % (issue_data.issue)) print e analyzed.append(bug.web_link) # for the bizarre error #338 time.sleep(self.delay)
def parse_issue(self, html): """ """ soup = BeautifulSoup.BeautifulSoup(html, convertEntities=BeautifulSoup.BeautifulSoup.XHTML_ENTITIES) self.__prepare_soup(soup) try: id = self.__parse_issue_id(soup) summary = self.__parse_issue_summary(soup) desc = self.__parse_issue_description(soup) submission = self.__parse_issue_submission(soup) priority = self.__parse_issue_priority(soup) status = self.__parse_issue_status(soup) resolution = self.__parse_issue_resolution(soup) asignation = self.__parse_issue_assigned_to(soup) category = self.__parse_issue_category(soup) group = self.__parse_issue_group(soup) # FIXME the visibility var below is never used!! #visibility = self.__parse_issue_visibility(soup) try: comments = self.__parse_issue_comments(soup) except SourceForgeParserError: printerr("Error parsing issue's comments") comments = None pass try: attachments = self.__parse_issue_attachments(soup) except SourceForgeParserError: printerr("Error parsing issue's attachments") attachments = None pass try: changes = self.__parse_issue_changes(soup) except SourceForgeParserError: printerr("Error parsing issue's changes") changes = None pass except: raise submitted_by = People(submission['id']) submitted_by.set_name(submission['name']) submitted_on = submission['date'] #assigned_to = People(asignation) assigned_to = People('') assigned_to.set_name(asignation) issue = SourceForgeIssue(id, 'bug', summary, desc, submitted_by, submitted_on) issue.set_priority(priority) issue.set_status(status, resolution) issue.set_assigned(assigned_to) issue.set_category(category) issue.set_group(group) if comments: for comment in comments: submitted_by = People(comment['by']['id']) submitted_by.set_name(comment['by']['name']) issue.add_comment(Comment(comment['desc'], submitted_by, comment['date'])) if attachments: for attachment in attachments: a = Attachment(attachment['url']) a.set_name(attachment['filename']) a.set_description(attachment['desc']) issue.add_attachment(a) if changes: for change in changes: changed_by = People(change['by']['id']) changed_by.set_name(change['by']['name']) issue.add_change(Change(change['field'], change['old_value'], 'unknown', changed_by, change['date'])) return issue
def run (self, url): print("Running Bicho with delay of %s seconds" % (str(self.delay))) #retrieving data in csv format if not self.url: self.url = url bugsdb = get_database (DBBugzillaBackend()) url = self.url + "&ctype=csv" printdbg(url) #The url is a bug if url.find("show_bug.cgi")>0: bugs = [] bugs.append(self.url.split("show_bug.cgi?id=")[1]) else: f = urllib.urlopen(url) #Problems using csv library, not all the fields are delimited by # '"' character. Easier using split. bugList_csv = f.read().split('\n') bugs = [] #Ignoring first row for bug_csv in bugList_csv[1:]: #First field is the id field, necessary to later create the url #to retrieve bug information bugs.append(bug_csv.split(',')[0]) nbugs = len(bugs) nbugs = len(bugs) url = self.url url = self.get_domain(url) if url.find("apache")>0: url = url + "bugzilla/" # still useless bugsdb.insert_supported_traker("bugzilla", "3.2.3") trk = Tracker ( url, "bugzilla", "3.2.3") dbtrk = bugsdb.insert_tracker(trk) if nbugs == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) for bug in bugs: #The URL from bugzilla (so far KDE and GNOME) are like: #http://<domain>/show_bug.cgi?id=<bugid>&ctype=xml try: issue_data = self.analyze_bug(bug, url) except Exception: #FIXME it does not handle the e printerr("Error in function analyzeBug with URL: %s and Bug: %s" % (url,bug)) #print e #continue raise try: bugsdb.insert_issue(issue_data, dbtrk.id) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue_data.issue)) time.sleep(self.delay) printout("Done. %s bugs analyzed" % (nbugs))
def run(self): print("Running Bicho with delay of %s seconds" % (str(self.delay))) bugsdb = get_database(DBGithubBackend()) url = self.url pname = None pname = self.__get_project_from_url() printdbg(url) self.bugs_state = "open" self.pagecont = 1 self.mod_date_open = None self.mod_date_closed = None ## FIXME tracker must be also checked!!! aux_date_open = bugsdb.get_last_modification_date(state="open") if aux_date_open: self.mod_date_open = aux_date_open.isoformat() aux_date_closed = bugsdb.get_last_modification_date(state="closed") if aux_date_closed: self.mod_date_closed = aux_date_closed.isoformat() printdbg("Last open bug already cached: %s" % self.mod_date_open) printdbg("Last closed bug already cached: %s" % self.mod_date_closed) bugs = self.__get_batch_bugs() nbugs = len(bugs) # still useless bugsdb.insert_supported_traker("github", "v3") trk = Tracker(url, "github", "v3") dbtrk = bugsdb.insert_tracker(trk) # if len(bugs) == 0: if aux_date_open or aux_date_closed: printout("Bicho database up to date") else: printout("No bugs found. Did you provide the correct url?") sys.exit(0) auxcont = 0 while len(bugs) > 0: for bug in bugs: try: issue_data = self.analyze_bug(bug) except Exception: #FIXME it does not handle the e printerr("Error in function analyzeBug with URL: ' \ '%s and Bug: %s" % (url, bug)) raise try: # we can have meta-trackers but we want to have the # original tracker name tr_url = self.__get_tracker_url_from_bug(bug) if (tr_url != url): aux_trk = Tracker(tr_url, "github", "v3") dbtrk = bugsdb.insert_tracker(aux_trk) bugsdb.insert_issue(issue_data, dbtrk.id) except UnicodeEncodeError: printerr( "UnicodeEncodeError: the issue %s couldn't be stored" % (issue_data.issue)) except Exception, e: printerr("ERROR: ") print e time.sleep(self.delay) self.pagecont += 1 bugs = self.__get_batch_bugs() nbugs = nbugs + len(bugs)
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) # limit=-1 is NOT recognized as 'all'. 500 is a reasonable limit. - allura code issues_per_query = 500 start_page=0 bugs = []; bugsdb = get_database (DBAlluraBackend()) # still useless in allura bugsdb.insert_supported_traker("allura", "beta") trk = Tracker (Config.url, "allura", "beta") dbtrk = bugsdb.insert_tracker(trk) last_mod_date = bugsdb.get_last_modification_date() # Date before the first ticket time_window_start = "1900-01-01T00:00:00Z" time_window_end = datetime.now().isoformat()+"Z" if last_mod_date: time_window_start = last_mod_date printdbg("Last bugs analyzed were modified on: %s" % last_mod_date) time_window = time_window_start + " TO " + time_window_end self.url_issues = Config.url + "/search/?limit=1" self.url_issues += "&q=" # A time range with all the tickets self.url_issues += urllib.quote("mod_date_dt:["+time_window+"]") printdbg("URL for getting metadata " + self.url_issues) f = urllib.urlopen(self.url_issues) ticketTotal = json.loads(f.read()) total_issues = int(ticketTotal['count']) total_pages = total_issues/issues_per_query print("Number of tickets: " + str(total_issues)) if total_issues == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) remaining = total_issues print "ETA ", (total_issues*Config.delay)/(60), "m (", (total_issues*Config.delay)/(60*60), "h)" while start_page <= total_pages: self.url_issues = Config.url + "/search/?limit="+str(issues_per_query) self.url_issues += "&page=" + str(start_page) + "&q=" # A time range with all the tickets self.url_issues += urllib.quote("mod_date_dt:["+time_window+"]") # Order by mod_date_dt desc self.url_issues += "&sort=mod_date_dt+asc" printdbg("URL for next issues " + self.url_issues) f = urllib.urlopen(self.url_issues) ticketList = json.loads(f.read()) bugs=[] for ticket in ticketList["tickets"]: bugs.append(ticket["ticket_num"]) for bug in bugs: try: issue_url = Config.url+"/"+str(bug) issue_data = self.analyze_bug(issue_url) if issue_data is None: continue bugsdb.insert_issue(issue_data, dbtrk.id) remaining -= 1 print "Remaining time: ", (remaining)*Config.delay/60, "m" time.sleep(self.delay) except Exception, e: printerr("Error in function analyze_bug " + issue_url) traceback.print_exc(file=sys.stdout) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue_data.issue))
def parse_issue(self, html): """ """ soup = BeautifulSoup.BeautifulSoup( html, convertEntities=BeautifulSoup.BeautifulSoup.XHTML_ENTITIES) self.__prepare_soup(soup) try: id = self.__parse_issue_id(soup) summary = self.__parse_issue_summary(soup) desc = self.__parse_issue_description(soup) submission = self.__parse_issue_submission(soup) priority = self.__parse_issue_priority(soup) status = self.__parse_issue_status(soup) resolution = self.__parse_issue_resolution(soup) asignation = self.__parse_issue_assigned_to(soup) category = self.__parse_issue_category(soup) group = self.__parse_issue_group(soup) # FIXME the visibility var below is never used!! #visibility = self.__parse_issue_visibility(soup) try: comments = self.__parse_issue_comments(soup) except SourceForgeParserError: printerr("Error parsing issue's comments") comments = None pass try: attachments = self.__parse_issue_attachments(soup) except SourceForgeParserError: printerr("Error parsing issue's attachments") attachments = None pass try: changes = self.__parse_issue_changes(soup) except SourceForgeParserError: printerr("Error parsing issue's changes") changes = None pass except: raise submitted_by = People(submission['id']) submitted_by.set_name(submission['name']) submitted_on = submission['date'] #assigned_to = People(asignation) assigned_to = People('') assigned_to.set_name(asignation) issue = SourceForgeIssue(id, 'bug', summary, desc, submitted_by, submitted_on) issue.set_priority(priority) issue.set_status(status, resolution) issue.set_assigned(assigned_to) issue.set_category(category) issue.set_group(group) if comments: for comment in comments: submitted_by = People(comment['by']['id']) submitted_by.set_name(comment['by']['name']) issue.add_comment( Comment(comment['desc'], submitted_by, comment['date'])) if attachments: for attachment in attachments: a = Attachment(attachment['url']) a.set_name(attachment['filename']) a.set_description(attachment['desc']) issue.add_attachment(a) if changes: for change in changes: changed_by = People(change['by']['id']) changed_by.set_name(change['by']['name']) issue.add_change( Change(change['field'], change['old_value'], 'unknown', changed_by, change['date'])) return issue
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) # limit=-1 is NOT recognized as 'all'. 500 is a reasonable limit. - allura code issues_per_query = 500 start_page = 0 bugs = [] bugsdb = get_database(DBAlluraBackend()) # still useless in allura bugsdb.insert_supported_traker("allura", "beta") trk = Tracker(Config.url, "allura", "beta") dbtrk = bugsdb.insert_tracker(trk) last_mod_date = bugsdb.get_last_modification_date() # Date before the first ticket time_window_start = "1900-01-01T00:00:00Z" time_window_end = datetime.now().isoformat() + "Z" if last_mod_date: time_window_start = last_mod_date printdbg("Last bugs analyzed were modified on: %s" % last_mod_date) time_window = time_window_start + " TO " + time_window_end self.url_issues = Config.url + "/search/?limit=1" self.url_issues += "&q=" # A time range with all the tickets self.url_issues += urllib.quote("mod_date_dt:[" + time_window + "]") printdbg("URL for getting metadata " + self.url_issues) f = urllib.urlopen(self.url_issues) ticketTotal = json.loads(f.read()) total_issues = int(ticketTotal['count']) total_pages = total_issues / issues_per_query print("Number of tickets: " + str(total_issues)) if total_issues == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) remaining = total_issues print "ETA ", (total_issues * Config.delay) / (60), "m (", (total_issues * Config.delay) / (60 * 60), "h)" while start_page <= total_pages: self.url_issues = Config.url + "/search/?limit=" + str(issues_per_query) self.url_issues += "&page=" + str(start_page) + "&q=" # A time range with all the tickets self.url_issues += urllib.quote("mod_date_dt:[" + time_window + "]") # Order by mod_date_dt desc self.url_issues += "&sort=mod_date_dt+asc" printdbg("URL for next issues " + self.url_issues) f = urllib.urlopen(self.url_issues) ticketList = json.loads(f.read()) bugs = [] for ticket in ticketList["tickets"]: bugs.append(ticket["ticket_num"]) for bug in bugs: try: issue_url = Config.url + "/" + str(bug) issue_data = self.analyze_bug(issue_url) if issue_data is None: continue bugsdb.insert_issue(issue_data, dbtrk.id) remaining -= 1 print "Remaining time: ", (remaining) * Config.delay / 60, "m" time.sleep(self.delay) except Exception, e: printerr("Error in function analyze_bug " + issue_url) traceback.print_exc(file=sys.stdout) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue_data.issue))
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_query = 250 start_issue = 1 bugs = [] bugsdb = get_database(DBGoogleCodeBackend()) # still useless bugsdb.insert_supported_traker("googlecode", "beta") trk = Tracker(Config.url, "googlecode", "beta") dbtrk = bugsdb.insert_tracker(trk) self.url = Config.url # https://code.google.com/feeds/issues/p/mobile-time-care self.url_issues = Config.url + "/issues/full?max-results=1" printdbg("URL for getting metadata " + self.url_issues) d = feedparser.parse(self.url_issues) total_issues = int(d['feed']['opensearch_totalresults']) print "Total bugs: ", total_issues if total_issues == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) remaining = total_issues print "ETA ", (total_issues * Config.delay) / (60), "m (", ( total_issues * Config.delay) / (60 * 60), "h)" while start_issue < total_issues: self.url_issues = Config.url + "/issues/full?max-results=" + str( issues_per_query) self.url_issues += "&start-index=" + str(start_issue) printdbg("URL for next issues " + self.url_issues) d = feedparser.parse(self.url_issues) for entry in d['entries']: try: issue = self.analyze_bug(entry) if issue is None: continue bugsdb.insert_issue(issue, dbtrk.id) remaining -= 1 print "Remaining time: ", ( remaining) * Config.delay / 60, "m", " issues ", str( remaining) time.sleep(Config.delay) except Exception, e: printerr("Error in function analyze_bug ") pprint.pprint(entry) traceback.print_exc(file=sys.stdout) except UnicodeEncodeError: printerr( "UnicodeEncodeError: the issue %s couldn't be stored" % (issue.issue))
def run(self): print("Running Bicho with delay of %s seconds" % (str(self.delay))) url = self.url pname = None pname = self.__get_project_from_url() bugsdb = get_database(DBLaunchpadBackend()) printdbg(url) # launchpad needs a temp directory to store cached data homedir = pwd.getpwuid(os.getuid()).pw_dir cachedir = os.path.join(homedir, ".cache/bicho/") if not os.path.exists(cachedir): os.makedirs(cachedir) cre_file = os.path.join(cachedir + 'launchpad-credential') self.lp = Launchpad.login_with('Bicho', 'production', credentials_file=cre_file) aux_status = ["New", "Incomplete", "Opinion", "Invalid", "Won't Fix", "Expired", "Confirmed", "Triaged", "In Progress", "Fix Committed", "Fix Released", "Incomplete (with response)", "Incomplete (without response)"] last_mod_date = bugsdb.get_last_modification_date() if last_mod_date: bugs = self.lp.projects[pname].searchTasks(status=aux_status, omit_duplicates=False, order_by='date_last_updated', modified_since=last_mod_date) else: bugs = self.lp.projects[pname].searchTasks(status=aux_status, omit_duplicates=False, order_by='date_last_updated') printdbg("Last bug already cached: %s" % last_mod_date) nbugs = len(bugs) # still useless bugsdb.insert_supported_traker("launchpad", "x.x") trk = Tracker(url, "launchpad", "x.x") dbtrk = bugsdb.insert_tracker(trk) # if nbugs == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) analyzed = [] for bug in bugs: if bug.web_link in analyzed: continue # for the bizarre error #338 try: issue_data = self.analyze_bug(bug) except Exception: #FIXME it does not handle the e printerr("Error in function analyzeBug with URL: ' \ '%s and Bug: %s" % (url, bug)) raise try: # we can have meta-trackers but we want to have the original #tracker name tr_url = self.__get_tracker_url_from_bug(bug) if (tr_url != url): aux_trk = Tracker(tr_url, "launchpad", "x.x") dbtrk = bugsdb.insert_tracker(aux_trk) bugsdb.insert_issue(issue_data, dbtrk.id) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue_data.issue)) except NotFoundError: printerr("NotFoundError: the issue %s couldn't be stored" % (issue_data.issue)) except Exception, e: printerr("Unexpected Error: the issue %s couldn't be stored" % (issue_data.issue)) print e analyzed.append(bug.web_link) # for the bizarre error #338 time.sleep(self.delay)