def analyze_bug(self, bug_url): #Retrieving main bug information printdbg(bug_url) bug_number = bug_url.split('/')[-1] try: f = urllib.urlopen(bug_url) # f = urllib.urlopen(bug_url) json_ticket = f.read() # print json_ticket try: issue_allura = json.loads(json_ticket)["ticket"] issue = self.parse_bug(issue_allura) changes = self.analyze_bug_changes(bug_url) for c in changes: issue.add_change(c) return issue except Exception, e: print "Problems with Ticket format: " + bug_number print e return None except Exception, e: printerr("Error in bug analysis: " + bug_url) print(e) raise
def _login(self): """ Authenticates a user in a bugzilla tracker """ if not (self.backend_user and self.backend_password): printdbg("No account data provided. Not logged in bugzilla") return import cookielib cookie_j = cookielib.CookieJar() cookie_h = urllib2.HTTPCookieProcessor(cookie_j) url = self._get_login_url(self.url) values = {'Bugzilla_login': self.backend_user, 'Bugzilla_password': self.backend_password} opener = urllib2.build_opener(cookie_h) urllib2.install_opener(opener) data = urllib.urlencode(values) request = urllib2.Request(url, data) urllib2.urlopen(request) for i, c in enumerate(cookie_j): self.cookies[c.name] = c.value printout("Logged in bugzilla as %s" % self.backend_user) printdbg("Bugzilla session cookies: %s" % self.cookies)
def __parse_issue_changes(self, soup): """ """ changes = [] try: entries = soup.find('h4', {'id': 'changebar'}).findNext('tbody').findAll('tr') for e in entries: change = {} # Each change contains four fields (td tags) that # follow the next order: field, old value, date, by. aux = e.findAll('td') change['field'] = self.__clean_str(aux[0].string) change['old_value'] = self.__clean_str(aux[1].string) change['date'] = self.__str_to_date(self.__clean_str(aux[2].string)) if aux[3].a: change['by'] = {'name': self.__clean_str(aux[3].a.get('title')), 'id': self.__clean_str(aux[3].a.string)} else: change['by'] = {'name': 'nobody', 'id': 'nobody'} changes.append(change) except AttributeError: # there are no changes pass except: raise SourceForgeParserError('Errror parsing issue changes') printdbg("%s changes" % (str(len(changes)))) return changes
def __parse_issue_comments(self, soup): """ """ comments = [] try: artifacts = soup.findAll('tr', {'class': ISSUE_COMMENT_CLASS_PATTERN}) for art in artifacts: comment = {} rawsub, rawdesc = art.findAll('p') # Date and sender are content on the first 'p' a = rawsub.find('a') if a: comment['by'] = {'name': a.get('title'), 'id': a.string} else: comment['by'] = {'name': 'nobody', 'id': 'nobody'} # Time stamp is the first value of the 'p' contents d = self.__clean_str(rawsub.contents[0]) comment['date'] = self.__str_to_date(ISSUE_COMMENT_DATE_PATTERN.match(d).group(1)) # Description is content on the second 'p'. comment['desc'] = self.__clean_str(u''.join(rawdesc.contents)) comments.append(comment) except: SourceForgeParserError('Errror parsing issue comments') printdbg("%s comments" % (str(len(comments)))) return comments
def call(self, method, params): # Conduit parameters params['__conduit__'] = {'token' : self.token} # POST parameters data = {'params' : json.dumps(params), 'output' : 'json', '__conduit__' : True} req = requests.post('%s/api/%s' % (self.url, method), headers=self.HEADERS, data=data) printdbg("Conduit %s method called: %s" % (method, req.url)) # Raise HTTP errors, if any req.raise_for_status() # Check for possible Conduit API errors result = req.json() if result['error_code']: raise ConduitError(result['error_code'], result['error_info']) return result['result']
def get_events_from_transactions(self, phtrans): comments = [] changes = [] for phtr in phtrans: printdbg("Parsing transaction %s - date: %s" \ % (phtr['transactionPHID'], phtr['dateCreated'])) field = phtr['transactionType'] dt = unix_to_datetime(phtr['dateCreated']) author = self.get_identity(phtr['authorPHID']) ov = phtr['oldValue'] nv = phtr['newValue'] text = phtr['comments'] if field == 'core:comment': comment = Comment(text, author, dt) comments.append(comment) else: old_value = unicode(ov) if ov is not None else None new_value = unicode(nv) if nv is not None else None change = Change(field, old_value, new_value, author, dt) changes.append(change) return comments, changes
def _retrieve_issues(self, ids, base_url, trk_id): # We want to use pop() to get the oldest first so we must reverse the # order ids.reverse() while (ids): query_issues = [] while (len(query_issues) < self.max_issues and ids): query_issues.append(ids.pop()) # Retrieving main bug information url = self._get_issues_info_url(base_url, query_issues) printdbg("Issues to retrieve from: %s" % url) handler = BugsHandler() self._safe_xml_parse(url, handler) issues = handler.get_issues() # Retrieving changes for issue in issues: changes = self._retrieve_issue_activity(base_url, issue.issue) for c in changes: issue.add_change(c) # We store here the issue once the complete retrieval # for each bug is done self._store_issue(issue, trk_id) self.retrieved[issue.issue] = self._timestamp_to_str( issue.delta_ts) time.sleep(self.delay)
def insert_issue_ext(self, store, issue, issue_id): """ """ newissue = False try: db_issue_ext = store.find(DBTracIssueExt, DBTracIssueExt.issue_id == issue_id).one() if not db_issue_ext: newissue = True printdbg("This is a new issue") db_issue_ext = DBTracIssueExt(issue_id) db_issue_ext.cc = self.__return_unicode(issue.cc) db_issue_ext.component = self.__return_unicode(issue.component) db_issue_ext.keywords = self.__return_unicode(issue.keywords) db_issue_ext.milestone = self.__return_unicode(issue.milestone) db_issue_ext.priority = self.__return_unicode(issue.priority) db_issue_ext.status = self.__return_unicode(issue.status) db_issue_ext.severity = self.__return_unicode(issue.severity) db_issue_ext.version = self.__return_unicode(issue.version) db_issue_ext.modified_at = issue.modified_at db_issue_ext.closed_at = issue.closed_at if newissue is True: store.add(db_issue_ext) store.flush() return db_issue_ext except: store.rollback() raise
def bugsNumber(self, url): oneBug = self.basic_jira_url() oneBug += "&tempMax=1" printdbg("Getting number of issues: " + oneBug) data_url = urllib.urlopen(oneBug).read() bugs = data_url.split("<issue")[1].split('\"/>')[0].split("total=\"")[1] return int(bugs)
def _login(self): """ Authenticates a user in a bugzilla tracker """ if not (self.backend_user and self.backend_password): printdbg("No account data provided. Not logged in bugzilla") return import cookielib cookie_j = cookielib.CookieJar() cookie_h = urllib2.HTTPCookieProcessor(cookie_j) url = self._get_login_url(self.url) values = { 'Bugzilla_login': self.backend_user, 'Bugzilla_password': self.backend_password } opener = urllib2.build_opener(cookie_h) urllib2.install_opener(opener) data = urllib.urlencode(values) request = urllib2.Request(url, data) urllib2.urlopen(request) for i, c in enumerate(cookie_j): self.cookies[c.name] = c.value printout("Logged in bugzilla as %s" % self.backend_user) printdbg("Bugzilla session cookies: %s" % self.cookies)
def _safe_xml_parse(self, bugs_url, handler): f = self._urlopen_auth(bugs_url) parser = xml.sax.make_parser() parser.setContentHandler(handler) try: contents = f.read() except Exception: printerr("Error retrieving URL: %s" % (bugs_url)) raise try: parser.feed(contents) parser.close() except Exception: # Clean only the invalid XML try: parser2 = xml.sax.make_parser() parser2.setContentHandler(handler) parser2.setContentHandler(handler) printdbg("Cleaning dirty XML") cleaned_contents = ''. \ join(c for c in contents if valid_XML_char_ordinal(ord(c))) parser2.feed(cleaned_contents) parser2.close() except Exception: printerr("Error parsing URL: %s" % (bugs_url)) raise f.close()
def run(self): printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_xml_query = 500 bugsdb = get_database(DBJiraBackend()) bugsdb.insert_supported_traker("jira", "4.1.2") trk = Tracker(self.url.split("-")[0], "jira", "4.1.2") dbtrk = bugsdb.insert_tracker(trk) serverUrl = self.url.split("/browse/")[0] query = "/si/jira.issueviews:issue-xml/" project = self.url.split("/browse/")[1] if (project.split("-").__len__() > 1): bug_key = project project = project.split("-")[0] bugs_number = 1 printdbg(serverUrl + query + bug_key + "/" + bug_key + ".xml") parser = xml.sax.make_parser() handler = BugsHandler() parser.setContentHandler(handler) try: parser.parse(serverUrl + query + bug_key + "/" + bug_key + ".xml") issue = handler.getIssues()[0] bugsdb.insert_issue(issue, dbtrk.id) except Exception, e: #printerr(e) print(e)
def insert_new_bugs_created(self, date_from, date_to): """ This method inserts an entry with the data of the creation time """ if (not date_from) and (not date_to): issues = self.store.find(DBIssue) elif not date_from: issues = self.store.find(DBIssue, DBIssue.submitted_on < date_to) elif not date_to: issues = self.store.find(DBIssue, DBIssue.submitted_on > date_from) else: issues = self.store.find( DBIssue, And(DBIssue.submitted_on <= date_to, DBIssue.submitted_on > date_from)) issues = issues.order_by(Asc(DBIssue.submitted_on)) ## we store the initial data for each bug found for i in issues: db_ilog = self.get_last_values(i) # from issues and change tables db_ilog = self.build_initial_state(db_ilog) self.store.add(db_ilog) printdbg( "Issue #%s created at %s - date_from = %s - date_to = %s" % (db_ilog.issue, db_ilog.date, date_from, date_to))
def analyze_resolution(self, arg2): printdbg("We're analyzing resolution") # Get the time-date of the event td = self.__get_time(arg2) # Get the submitter by = self.__get_submitter(arg2) rr = re.search('\s+<em>(.*)</em>\s(?=deleted)', str(arg2)) rs = re.search('(?<=\sset\sto\s)<em>(.*)</em>', str(arg2)) # If a resolution is deleted, added = '' # If a resolution is added, removed = '' if rr is not None: removed = rr.group(1).decode('UTF-8') added = u'' elif rs is not None: removed = u'' added = rs.group(1).decode('UTF-8') else: printdbg("Resolution unknown case. Error ?") ch = Change(u'resolution', removed, added, by, td) return ch
def parse_changes(self, activity): changesList = [] for entry in activity['entries']: # print "changed_by:" + entry['author'] by = People(entry['author']) # print "changed_on:" + entry['updated'] description = entry['description'].split('updated:') changes = description.pop(0) field = changes.rpartition('\n')[2].strip() while description: changes = description.pop(0).split('\n') values = changes[0].split('=>') if (len(values) != 2): printdbg(field + " not supported in changes analysis") old_value = new_value = "" else: # u'in-progress' => u'closed' values = changes[0].split('=>') old_value = self.remove_unicode(values[0].strip()) if old_value == "''": old_value = "" new_value = self.remove_unicode(values[1].strip()) if new_value == "''": new_value = "" update = parse(entry['updated']) change = Change(unicode(field), unicode(old_value), unicode(new_value), by, update) changesList.append(change) if (len(changes) > 1): field = changes[1].strip() return changesList
def _retrieve_issues_ids(self, base_url, version, from_date, not_retrieved=True): url = self._get_issues_list_url(base_url, version, from_date) printdbg("Getting bugzilla issues from %s" % url) f = self._urlopen_auth(url) # Problems using csv library, not all the fields are delimited by # '"' character. Easier using split. # Moreover, we drop the first line of the CSV because it contains # the headers ids = [] csv = f.read().split('\n')[1:] for line in csv: # 0: bug_id, 7: changeddate values = line.split(',') id = values[0] change_ts = values[7].strip('"') # Filter issues already retrieved if not_retrieved: if (not id in self.retrieved) or (self.retrieved[id] != change_ts): ids.append(id) else: ids.append(id) return ids
def call(self, method, params): # POST parameters data = {'method': method, 'params': params} data = json.dumps(data) auth = None if self.backend_user and self.backend_password: auth = (self.backend_user, self.backend_password) url = '%s/login/jsonrpc' % self.url else: url = '%s/jsonrpc' % self.url res = requests.post(url, headers=self.HEADERS, data=data, auth=auth) printdbg("Trac RPC %s method called: %s" % (method, res.url)) # Raise HTTP errors, if any res.raise_for_status() # Check for possible Conduit API errors result = res.json() if result['error']: raise TracRPCError(result['error']['code'], result['error']['message']) return result['result']
def __parse_issue_comments(self, soup): """ """ comments = [] try: artifacts = soup.findAll('tr', {'class': ISSUE_COMMENT_CLASS_PATTERN}) for art in artifacts: comment = {} rawsub, rawdesc = art.findAll('p') # Date and sender are content on the first 'p' a = rawsub.find('a') if a: comment['by'] = {'name': a.get('title'), 'id': a.string} else: comment['by'] = {'name': 'nobody', 'id': 'nobody'} # Time stamp is the first value of the 'p' contents d = self.__clean_str(rawsub.contents[0]) comment['date'] = self.__str_to_date( ISSUE_COMMENT_DATE_PATTERN.match(d).group(1)) # Description is content on the second 'p'. comment['desc'] = self.__clean_str(u''.join(rawdesc.contents)) comments.append(comment) except: SourceForgeParserError('Errror parsing issue comments') printdbg("%s comments" % (str(len(comments)))) return comments
def login(self, url, user=None, password=None): """ Authenticates a user in a Jira tracker """ if not (user and password): printout("No account data provided. Not logged in Jira") return import cookielib cookie_j = cookielib.CookieJar() cookie_h = urllib2.HTTPCookieProcessor(cookie_j) auth_info = user + ':' + password auth_info = auth_info.replace('\n', '') base64string = base64.encodestring(auth_info) request = urllib2.Request(url) request.add_header("Authorization", "Basic %s" % base64string) opener = urllib2.build_opener(cookie_h) urllib2.install_opener(opener) urllib2.urlopen(request) for i, c in enumerate(cookie_j): self.cookies[c.name] = c.value printout("Logged in Jira as %s" % user) printdbg("Jira session cookies: %s" % self.cookies)
def run(self): printout("Running Bicho with delay of %s seconds" % (str(self.delay))) self.conn.login(self.url, self.backend_user, self.backend_password) bugsdb = get_database(DBJiraBackend()) bugsdb.insert_supported_traker("jira", "4.1.2") trk = Tracker(self.url.split("-")[0], "jira", "4.1.2") dbtrk = bugsdb.insert_tracker(trk) serverUrl = self.url.split("/browse/")[0] query = "/si/jira.issueviews:issue-xml/" project = self.url.split("/browse/")[1] if (project.split("-").__len__() > 1): bug_key = project project = project.split("-")[0] bugs_number = 1 printdbg(serverUrl + query + bug_key + "/" + bug_key + ".xml") parser = xml.sax.make_parser() handler = BugsHandler() parser.setContentHandler(handler) try: parser.parse(serverUrl + query + bug_key + "/" + bug_key + ".xml") issue = handler.getIssues(self.conn)[0] bugsdb.insert_issue(issue, dbtrk.id) except Exception, e: #printerr(e) print(e)
def analyze_version(self, arg2): printdbg("We're analyzing version") # Get the time-date of the event td = self.__get_time(arg2) # Get the submitter by = self.__get_submitter(arg2) vr = re.search('\s+<em>(.*)</em>\s(?=deleted)', arg2) vc = re.search('(?<=\schanged\sfrom\s)<em>(.*)</em>\sto\s<em>(.*)</em>\s', arg2) vs = re.search('(?<=\sset\sto\s)<em>(.*)</em>', arg2) # If a milestone is deleted, added = '' # If a milestone is added, removed = '' if vr is not None: removed = vr.group(1).decode('UTF-8') added = u'' elif vc is not None: removed = vc.group(1).decode('UTF-8') added = vc.group(2).decode('UTF-8') elif vs is not None: removed = u'' added = vs.group(1).decode('UTF-8') else: printdbg("Version unknown case. Error ?") ch = Change(u'version', removed, added, by, td) return ch
def _store_issue(self, issue, trk_id): try: self.bugsdb.insert_issue(issue, trk_id) printdbg("Issue #%s stored " % issue.issue) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % issue.issue)
def call(self, method, params): # Conduit parameters params['__conduit__'] = {'token': self.token} # POST parameters data = { 'params': json.dumps(params), 'output': 'json', '__conduit__': True } req = requests.post('%s/api/%s' % (self.url, method), headers=self.HEADERS, data=data) printdbg("Conduit %s method called: %s" % (method, req.url)) # Raise HTTP errors, if any req.raise_for_status() # Check for possible Conduit API errors result = req.json() if result['error_code']: raise ConduitError(result['error_code'], result['error_info']) return result['result']
def analyze_cc(self, arg2): printdbg("We're analyzing cc") # Get the time-date of the event td = self.__get_time(arg2) # Get the submitter by = self.__get_submitter(arg2) # We check which form of keywords modification we have # possible cases are # 1) tag1 tag2 tag3 added # 2) tag1 tag2 tag3 removed # 3) tag4 tag5 tag6 added; tag1 tag2 tag3 removed cc_a = re.search('(<em>.*)\sadded', str(arg2)) cc_r = re.search('(<em>.*)\sremoved', str(arg2)) if cc_r is not None: removed = cc_r.group(1).replace('<em>', '').replace('</em>', '').decode('UTF-8') added = u'' elif cc_a is not None: removed = u'' added = cc_a.group(1).replace('<em>', '').replace('</em>', '').decode('UTF-8') else: printdbg("CC unknown case. Error ?") ch = Change(u'cc', removed, added, by, td) return ch
def getReviews(self, limit, start, version_mayor, version_minor): args_gerrit = "gerrit query " args_gerrit += "project:" + Config.gerrit_project args_gerrit += " limit:" + str(limit) if start: if version_mayor == 2 and version_minor >= 9: args_gerrit += " --start=" + str(start) else: args_gerrit += " resume_sortkey:" + start # --patch-sets --submit args_gerrit += " --all-approvals --comments --format=JSON" if 'backend_user' in vars(Config): cmd = ["ssh", "-p 29418", Config.backend_user + "@" + Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh -p 29418 " + Config.backend_user + "@" + Config.url + " " + args_gerrit) else: cmd = ["ssh", "-p 29418", Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh " + "-p 29418 " + Config.url + " " + args_gerrit) tickets_raw = self.run_ssh_command(cmd) # tickets_raw = open('./tickets.json', 'r').read() tickets_raw = "[" + tickets_raw.replace("\n", ",") + "]" tickets_raw = tickets_raw.replace(",]", "]") tickets = json.loads(tickets_raw) # tickets_test = '[{"project":"openstack/nova","branch":"master","topic":"bug/857209","id":"I660532ee5758c7595138d4dcf5a2825ddf898c65","number":"637","subject":"contrib/nova.sh: Updated to latest \\u0027upstream\\u0027 commit:6a8433a resolves bug 857209","owner":{"name":"Dave Walker","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/637","createdOn":1316815511,"lastUpdated":1316815646,"sortKey":"0017e78f0000027d","open":false,"status":"ABANDONED","patchSets":[{"number":"1","revision":"95d8d0f75c188f7eabf00ecf6bd5b397852e67b9","ref":"refs/changes/37/637/1","uploader":{"name":"Dave Walker","email":"*****@*****.**","username":"******"},"createdOn":1316815511}]},' # tickets_test += '{"project":"openstack/nova","branch":"master","id":"I812e95fb0744ad84abd7ea2ad7d11123667abbc8","number":"635","subject":"Made jenkins email pruning more resilient.","owner":{"name":"Monty Taylor","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/635","createdOn":1316813897,"lastUpdated":1316814951,"sortKey":"0017e7830000027b","open":false,"status":"MERGED","patchSets":[{"number":"1","revision":"c586e4ed23846420177802c164f594e021cceea8","ref":"refs/changes/35/635/1","uploader":{"name":"Monty Taylor","email":"*****@*****.**","username":"******"},"createdOn":1316813897,"approvals":[{"type":"SUBM","value":"1","grantedOn":1316814951,"by":{"name":"Jenkins","username":"******"}},{"type":"VRIF","description":"Verified","value":"1","grantedOn":1316814948,"by":{"name":"Jenkins","username":"******"}},{"type":"CRVW","description":"Code Review","value":"2","grantedOn":1316814192,"by":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"}}]}]},' # tickets_test += '{"project":"openstack/nova","branch":"master","id":"I495363b44d9da96d66f85c2a621393329830aeb3","number":"630","subject":"Fixing bug 857712","owner":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/630","createdOn":1316810421,"lastUpdated":1316813692,"sortKey":"0017e76e00000276","open":false,"status":"MERGED","patchSets":[{"number":"1","revision":"ddb6945e8fbb8a00d5b67a6a6b8a069b7642022d","ref":"refs/changes/30/630/1","uploader":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"},"createdOn":1316810421,"approvals":[{"type":"SUBM","value":"1","grantedOn":1316813692,"by":{"name":"Jenkins","username":"******"}},{"type":"VRIF","description":"Verified","value":"1","grantedOn":1316813689,"by":{"name":"Jenkins","username":"******"}},{"type":"CRVW","description":"Code Review","value":"1","grantedOn":1316811221,"by":{"name":"Josh Kearney","email":"*****@*****.**","username":"******"}},{"type":"CRVW","description":"Code Review","value":"2","grantedOn":1316812789,"by":{"name":"Brian Lamar","email":"*****@*****.**","username":"******"}},{"type":"CRVW","description":"Code Review","value":"1","grantedOn":1316810744,"by":{"name":"Mark McLoughlin","email":"*****@*****.**","username":"******"}}]}]},' # tickets_test += '{"type":"stats","rowCount":67,"runTimeMilliseconds":365}]' # tickets = json.loads(tickets_test) return tickets
def __parse_issue_changes(self, soup): """ """ changes = [] try: entries = soup.find('h4', { 'id': 'changebar' }).findNext('tbody').findAll('tr') for e in entries: change = {} # Each change contains four fields (td tags) that # follow the next order: field, old value, date, by. aux = e.findAll('td') change['field'] = self.__clean_str(aux[0].string) change['old_value'] = self.__clean_str(aux[1].string) change['date'] = self.__str_to_date( self.__clean_str(aux[2].string)) if aux[3].a: change['by'] = { 'name': self.__clean_str(aux[3].a.get('title')), 'id': self.__clean_str(aux[3].a.string) } else: change['by'] = {'name': 'nobody', 'id': 'nobody'} changes.append(change) except AttributeError: # there are no changes pass except: raise SourceForgeParserError('Errror parsing issue changes') printdbg("%s changes" % (str(len(changes)))) return changes
def __parse_issue_attachments(self, soup): """ """ attachments = [] try: files = soup.find('h4', { 'id': 'filebar' }).findNext('tbody').findAll('tr') for f in files: attch = {} # Each entry contains three fields (td tags) that # follow the next order: filename, description and URL. aux = f.findAll('td') attch['filename'] = self.__clean_str(u''.join(aux[0].contents)) attch['desc'] = self.__clean_str(u''.join(aux[1].contents)) attch['url'] = SOURCEFORGE_DOMAIN + aux[2].a.get('href') attachments.append(attch) except AttributeError: # there is no attachment pass except: raise SourceForgeParserError("Error parsing issue's attachments") printdbg("%s attachments" % (str(len(attachments)))) return attachments
def getVersion(self): args_gerrit = "gerrit version" if 'backend_user' in vars(Config): cmd = [ "ssh", "-p 29418", Config.backend_user + "@" + Config.url, args_gerrit ] printdbg("Gerrit cmd: " + "ssh -p 29418 " + Config.backend_user + "@" + Config.url + " " + args_gerrit) else: cmd = ["ssh", "-p 29418", Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh " + "-p 29418 " + Config.url + " " + args_gerrit) version_raw = subprocess.check_output(cmd) # output: gerrit version 2.10-rc1-988-g333a9dd m = re.match("gerrit version (\d+)\.(\d+).*", version_raw) if not m: raise Exception("Invalid gerrit version %s" % version_raw) try: mayor = int(m.group(1)) minor = int(m.group(2)) except Exception, e: raise Exception("Invalid gerrit version %s. Error: %s" % (version_raw, str(e)))
def getReviews(self, limit, start): args_gerrit = "gerrit query " args_gerrit += "project:" + Config.gerrit_project args_gerrit += " limit:" + str(limit) if (start != ""): args_gerrit += " resume_sortkey:" + start args_gerrit += " --all-approvals --comments --format=JSON" if 'backend_user' in vars(Config): cmd = ["ssh", "-p 29418", Config.backend_user + "@" + Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh -p 29418 " + Config.backend_user + "@" + Config.url + " " + args_gerrit) else: cmd = ["ssh", "-p 29418", Config.url, args_gerrit] printdbg("Gerrit cmd: " + "ssh " + "-p 29418 " + Config.url + " " + args_gerrit) tickets_raw = subprocess.check_output(cmd) # tickets_raw = open('./tickets.json', 'r').read() tickets_raw = "[" + tickets_raw.replace("\n", ",") + "]" tickets_raw = tickets_raw.replace(",]", "]") tickets = json.loads(tickets_raw) # tickets_test = '[{"project":"openstack/nova","branch":"master","topic":"bug/857209","id":"I660532ee5758c7595138d4dcf5a2825ddf898c65","number":"637","subject":"contrib/nova.sh: Updated to latest \\u0027upstream\\u0027 commit:6a8433a resolves bug 857209","owner":{"name":"Dave Walker","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/637","createdOn":1316815511,"lastUpdated":1316815646,"sortKey":"0017e78f0000027d","open":false,"status":"ABANDONED","patchSets":[{"number":"1","revision":"95d8d0f75c188f7eabf00ecf6bd5b397852e67b9","ref":"refs/changes/37/637/1","uploader":{"name":"Dave Walker","email":"*****@*****.**","username":"******"},"createdOn":1316815511}]},' # tickets_test += '{"project":"openstack/nova","branch":"master","id":"I812e95fb0744ad84abd7ea2ad7d11123667abbc8","number":"635","subject":"Made jenkins email pruning more resilient.","owner":{"name":"Monty Taylor","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/635","createdOn":1316813897,"lastUpdated":1316814951,"sortKey":"0017e7830000027b","open":false,"status":"MERGED","patchSets":[{"number":"1","revision":"c586e4ed23846420177802c164f594e021cceea8","ref":"refs/changes/35/635/1","uploader":{"name":"Monty Taylor","email":"*****@*****.**","username":"******"},"createdOn":1316813897,"approvals":[{"type":"SUBM","value":"1","grantedOn":1316814951,"by":{"name":"Jenkins","username":"******"}},{"type":"VRIF","description":"Verified","value":"1","grantedOn":1316814948,"by":{"name":"Jenkins","username":"******"}},{"type":"CRVW","description":"Code Review","value":"2","grantedOn":1316814192,"by":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"}}]}]},' # tickets_test += '{"project":"openstack/nova","branch":"master","id":"I495363b44d9da96d66f85c2a621393329830aeb3","number":"630","subject":"Fixing bug 857712","owner":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"},"url":"https://review.openstack.org/630","createdOn":1316810421,"lastUpdated":1316813692,"sortKey":"0017e76e00000276","open":false,"status":"MERGED","patchSets":[{"number":"1","revision":"ddb6945e8fbb8a00d5b67a6a6b8a069b7642022d","ref":"refs/changes/30/630/1","uploader":{"name":"Brian Waldon","email":"*****@*****.**","username":"******"},"createdOn":1316810421,"approvals":[{"type":"SUBM","value":"1","grantedOn":1316813692,"by":{"name":"Jenkins","username":"******"}},{"type":"VRIF","description":"Verified","value":"1","grantedOn":1316813689,"by":{"name":"Jenkins","username":"******"}},{"type":"CRVW","description":"Code Review","value":"1","grantedOn":1316811221,"by":{"name":"Josh Kearney","email":"*****@*****.**","username":"******"}},{"type":"CRVW","description":"Code Review","value":"2","grantedOn":1316812789,"by":{"name":"Brian Lamar","email":"*****@*****.**","username":"******"}},{"type":"CRVW","description":"Code Review","value":"1","grantedOn":1316810744,"by":{"name":"Mark McLoughlin","email":"*****@*****.**","username":"******"}}]}]},' # tickets_test += '{"type":"stats","rowCount":67,"runTimeMilliseconds":365}]' # tickets = json.loads(tickets_test) return tickets
def analyze_milestone(self, arg2): """ :type arg2: basestring """ printdbg("We're analyzing milestones") # Get the time-date of submission td = self.__get_time(arg2) # Get the submitter by = self.__get_submitter(arg2) mr = re.search('\s+<em>(.*)</em>\s(?=deleted)', str(arg2)) mc = re.search('(?<=\schanged\sfrom\s)<em>(.*)</em>\sto\s<em>(.*)</em>\s', str(arg2)) ms = re.search('(?<=\sset\sto\s)<em>(.*)</em>', str(arg2)) # If a milestone is deleted, added = '' # If a milestone is added, removed = '' if mr is not None: removed = mr.group(1).decode('UTF-8') added = u'' elif mc is not None: removed = mc.group(1).decode('UTF-8') added = mc.group(2).decode('UTF-8') elif ms is not None: removed = u'' added = ms.group(1).decode('UTF-8') else: printdbg("Milestone unknown case. Error ?") ch = Change(u'milestone', removed, added, by, td) return ch
def _retrieve_issues(self, ids, base_url, trk_id): # We want to use pop() to get the oldest first so we must reverse the # order ids.reverse() while(ids): query_issues = [] while (len(query_issues) < self.max_issues and ids): query_issues.append(ids.pop()) # Retrieving main bug information url = self._get_issues_info_url(base_url, query_issues) printdbg("Issues to retrieve from: %s" % url) handler = BugsHandler() self._safe_xml_parse(url, handler) issues = handler.get_issues() # Retrieving changes for issue in issues: changes = self._retrieve_issue_activity(base_url, issue.issue) for c in changes: issue.add_change(c) # We store here the issue once the complete retrieval # for each bug is done self._store_issue(issue, trk_id) self.retrieved[issue.issue] = self._timestamp_to_str(issue.delta_ts) time.sleep(self.delay)
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) bugs = [] bugsdb = get_database(DBGerritBackend()) # still useless in gerrit bugsdb.insert_supported_traker("gerrit", "beta") trk = Tracker(Config.url + "_" + Config.gerrit_project, "gerrit", "beta") dbtrk = bugsdb.insert_tracker(trk) last_mod_time = 0 last_mod_date = bugsdb.get_last_modification_date(tracker_id=dbtrk.id) if last_mod_date: printdbg("Last reviews analyzed were modified on date: %s" % last_mod_date) last_mod_time = time.mktime(time.strptime(last_mod_date, '%Y-%m-%d %H:%M:%S')) limit = 500 # gerrit default 500 last_item = "" # last_item = "001f672c00002f80"; number_results = limit total_reviews = 0 while (number_results == limit or number_results == limit + 1): # wikimedia gerrit returns limit+1 # ordered by lastUpdated tickets = self.getReviews(limit, last_item) number_results = 0 reviews = [] for entry in tickets: if 'project' in entry.keys(): if (entry['lastUpdated'] < last_mod_time): break reviews.append(entry["number"]) review_data = self.analyze_review(entry) if review_data is None: pprint.pprint("ERROR in review. Ignoring it.") continue last_item = entry['sortKey'] # extra changes not included in gerrit changes # self.add_merged_abandoned_changes_from_comments(entry, review_data) self.add_merged_abandoned_changes(entry, review_data) self.add_uploaded_patchset_from_comments(entry, review_data) self.add_new_change(review_data) bugsdb.insert_issue(review_data, dbtrk.id) number_results += 1 elif 'rowCount' in entry.keys(): pprint.pprint(entry) printdbg("CONTINUE FROM: " + last_item) total_reviews = total_reviews + int(number_results) self.check_merged_abandoned_changes(bugsdb.store, dbtrk.id) print("Done. Number of reviews: " + str(total_reviews))
def bugsNumber(self, url): oneBug = self.basic_jira_url() oneBug += "&tempMax=1" printdbg("Getting number of issues: " + oneBug) f = self.conn.urlopen_auth(oneBug) data_url = f.read() bugs = data_url.split("<issue")[1].split('\"/>')[0].split("total=\"")[1] return int(bugs)
def _retrieve_issue_activity(self, base_url, id): activity_url = self._get_issue_activity_url(base_url, id) printdbg("Retrieving activity of issue #%s from %s" % (id, activity_url)) data = self._urlopen_auth(activity_url).read() parser = SoupHtmlParser(data, id) changes = parser.parse_changes() return changes
def bugsNumber(self, url): oneBug = self.basic_jira_url() oneBug += "&tempMax=1" printdbg("Getting number of issues: " + oneBug) f = self.conn.urlopen_auth(oneBug) data_url = f.read() bugs = data_url.split("<issue")[1].split('"/>')[0].split('total="')[1] return int(bugs)
def run(self): """ """ printout("Running Bicho with delay of %s seconds" % (str(self.delay))) issues_per_query = 250 start_issue = 1 bugs = [] bugsdb = get_database(DBGoogleCodeBackend()) # still useless bugsdb.insert_supported_traker("googlecode", "beta") trk = Tracker(Config.url, "googlecode", "beta") dbtrk = bugsdb.insert_tracker(trk) self.url = Config.url # https://code.google.com/feeds/issues/p/mobile-time-care self.url_issues = Config.url + "/issues/full?max-results=1" printdbg("URL for getting metadata " + self.url_issues) d = feedparser.parse(self.url_issues) total_issues = int(d['feed']['opensearch_totalresults']) print "Total bugs: ", total_issues if total_issues == 0: printout("No bugs found. Did you provide the correct url?") sys.exit(0) remaining = total_issues print "ETA ", (total_issues * Config.delay) / (60), "m (", (total_issues * Config.delay) / (60 * 60), "h)" while start_issue < total_issues: self.url_issues = Config.url + "/issues/full?max-results=" + str(issues_per_query) self.url_issues += "&start-index=" + str(start_issue) printdbg("URL for next issues " + self.url_issues) d = feedparser.parse(self.url_issues) for entry in d['entries']: try: issue = self.analyze_bug(entry) if issue is None: continue bugsdb.insert_issue(issue, dbtrk.id) remaining -= 1 print "Remaining time: ", (remaining) * Config.delay / 60, "m", " issues ", str(remaining) time.sleep(Config.delay) except Exception, e: printerr("Error in function analyze_bug ") pprint.pprint(entry) traceback.print_exc(file=sys.stdout) except UnicodeEncodeError: printerr("UnicodeEncodeError: the issue %s couldn't be stored" % (issue.issue))
def analyze_bug_changes(self, bug_url): bug_number = bug_url.split('/')[-1] changes_url = bug_url.replace("rest/", "") + "/feed.atom" printdbg("Analyzing issue changes" + changes_url) d = feedparser.parse(changes_url) changes = self.parse_changes(d) return changes
def check_auth(self): # Check conduit credentials try: printdbg("Checking conduit credentials") self.conduit.whoami() printdbg("Credentials checked") return True except (requests.exceptions.HTTPError, ConduitError), e: printerr("Error: %s" % e) return False
def __fetch_data(self, url): request = urllib2.Request(url) self.__set_request_auth(request) try: result = urllib2.urlopen(request) content = result.read() except urllib2.HTTPError, e: if e.code == 403: raise GitHubRateLimitReached() printdbg("Error raised on %s" % url) raise e
def __get_batch_bugs_state(self, state=ALL_STATES, since=None, direction='asc'): url = self.url + "?state=" + state + "&page=" + str(self.pagecont) \ + "&per_page=100&sort=updated&direction=" + direction if since: url = url + "&since=" + str(since) printdbg(url) bugs = self.__fetch_data(url) return bugs
def store_final_relationships(self): """ """ temp_rels = self.store.find(DBIssueTempRelationship) for tr in temp_rels: aux_issue_id = self._get_db_issue(tr.issue_id, tr.tracker_id) aux_related_to = self._get_db_issue(tr.related_to, tr.tracker_id) if (aux_related_to != -1 and aux_issue_id != -1): self._insert_relationship(aux_issue_id.id, tr.type, aux_related_to.id) else: printdbg("Issue %s belongs to a different tracker and won't be stored" % tr.related_to)
def _get_last_and_next_dates(self): last_ts = self.bugsdb.get_last_modification_date( tracker_id=self.tracker.id) if not last_ts: return None, None printdbg("Last issues cached were modified on: %s" % last_ts) last_ts_str = self._timestamp_to_str(last_ts) # We add one second to the last date to avoid retrieving the same # changes modified at that date. next_ts = last_ts + timedelta(seconds=1) next_ts_str = self._timestamp_to_str(next_ts) return last_ts_str, next_ts_str
def fetch_and_store(self): printdbg("Fetching reviews from") total_rqs = 0 nrqs = 0 offset = 0 # Insert tracker information dbtrk = self.insert_tracker(self.base_url, self.group) last_mod_date = self.db.get_last_modification_date(tracker_id=dbtrk.id) if last_mod_date: printdbg("Last modification date stored: %s" % last_mod_date) printout("Fetching reviews requests from %s to %s" % (offset, offset + self.max_issues)) result = self.api_client.review_requests(offset=offset, limit=self.max_issues, group=self.group, last_date=last_mod_date) raw_rqs = result['review_requests'] while raw_rqs: total_rqs += len(raw_rqs) for raw_rq in raw_rqs: rq = self.get_review_request(raw_rq) # Insert review request self.db.insert_issue(rq, dbtrk.id) nrqs += 1 time.sleep(self.delay) offset += self.max_issues printout("Fetching reviews requests from %s to %s" % (offset, offset + self.max_issues)) result = self.api_client.review_requests(offset=offset, limit=self.max_issues, group=self.group, last_date=last_mod_date) raw_rqs = result['review_requests'] printout("Done. %s review requests analyzed from %s" % (nrqs, total_rqs))
def analyze_bug_list(self, nissues, offset, bugsdb, dbtrk_id): url_issues = self.basic_jira_url() url_issues += "&tempMax=" + str(nissues) + "&pager/start=" + str(offset) printdbg(url_issues) handler = BugsHandler() self.safe_xml_parse(url_issues, handler) try: issues = handler.getIssues(self.conn) for issue in issues: bugsdb.insert_issue(issue, dbtrk_id) except Exception, e: import traceback traceback.print_exc() sys.exit(0)
def call(self, method, params): url = self.URL % {'base': self.url, 'method': method} req = requests.get(url, params=params, headers=self.HEADERS) printdbg("Review Board %s method called: %s" % (method, req.url)) # Raise HTTP errors, if any req.raise_for_status() result = req.json() # Check for possible API errors if 'err' in result: raise ReviewBoardAPIError(result['err']['code'], result['message']) return result
def _get_author_identity(self, author_id): if author_id in self.identities: return self.identities[author_id] root = self._get_redmine_root(Config.url) author_url = root + "users/" + str(author_id) + ".json" #print author_url identity = None try: f = urllib2.urlopen(author_url) person = json.loads(f.read()) identity = person['user']['mail'] except (urllib2.HTTPError, KeyError): printdbg("User with id %s has no account information" % author_id) identity = author_id self.identities[author_id] = identity return identity