def parse_changes (self, activity): changesList = [] for entry in activity['entries']: # print "changed_by:" + entry['author'] by = People(entry['author']) # print "changed_on:" + entry['updated'] description = entry['description'].split('updated:') changes = description.pop(0) field = changes.rpartition('\n')[2].strip() while description: changes = description.pop(0).split('\n') values = changes[0].split('=>') if (len(values) != 2): printdbg(field + " not supported in changes analysis") old_value = new_value = "" else: # u'in-progress' => u'closed' values = changes[0].split('=>') old_value = self.remove_unicode(values[0].strip()) if old_value == "''": old_value ="" new_value = self.remove_unicode(values[1].strip()) if new_value == "''": new_value ="" update = parse(entry['updated']) change = Change(unicode(field), unicode(old_value), unicode(new_value), by, update) changesList.append(change) if (len(changes)>1): field = changes[1].strip() return changesList
def parse_changes (self, review): changesList = [] patchSets = review['patchSets'] for activity in patchSets: if "approvals" not in activity.keys(): continue patchSetNumber = activity['number'] for entry in activity['approvals']: # print "changed_by:" + entry['author'] if "username" in entry["by"].keys(): by = People(entry['by']['username']) elif "email" in entry["by"].keys(): by = People(entry['by']['email']) elif "name" in entry["by"].keys(): by = People(entry['by']['name']) else: by = People(unicode('')) if "name" in entry["by"].keys(): by.set_name(entry["by"]["name"]) if "email" in entry["by"].keys(): by.set_email(entry["by"]["email"]) # print "changed_on:" + entry['updated'] field = entry['type'] new_value = entry['value'] old_value = patchSetNumber update = self._convert_to_datetime(entry["grantedOn"]) change = Change(field, old_value, new_value, by, update) changesList.append(change) return changesList
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['a', 'span', 'i'] changes = [] tables = soup.findAll('table') # We look for the first table with 5 cols table = None for table in tables: if len(table.tr.findAll('th')) == 5: try: for i in table.findAll(remove_tags): i.replaceWith(i.text) except: printerr("error removing HTML tags") break if table is None: return changes rows = list(table.findAll('tr')) for row in rows[1:]: cols = list(row.findAll('td')) if len(cols) == 5: person_email = cols[0].contents[0].strip() person_email = unicode(person_email.replace('@', '@')) date = self._to_datetime_with_secs(cols[1].contents[0].strip()) # when the field contains an Attachment, the list has more #than a field. For example: # # [u'\n', u'Attachment #12723', u'\n Flag\n '] # if len(cols[2].contents) > 1: aux_c = unicode(" ".join(cols[2].contents)) field = unicode(aux_c.replace("\n", "").strip()) else: field = unicode(cols[2].contents[0].replace("\n", "").strip()) removed = unicode(cols[3].contents[0].strip()) added = unicode(cols[4].contents[0].strip()) else: # same as above with the Attachment example if len(cols[0].contents) > 1: aux_c = unicode(" ".join(cols[0].contents)) field = aux_c.replace("\n", "").strip() else: field = cols[0].contents[0].strip() removed = cols[1].contents[0].strip() added = cols[2].contents[0].strip() field, removed, added = self.sanityze_change(field, removed, added) by = People(person_email) by.set_email(person_email) change = Change(field, removed, added, by, date) changes.append(change) return changes
def parse_changes(self): soup = BeautifulSoup(self.html) self.remove_comments(soup) remove_tags = ['i'] try: [i.replaceWith(i.contents[0]) for i in soup.findAll(remove_tags)] except Exception: None changes = [] #FIXME The id of the changes are not stored tables = soup.findAll("div", {"class": "actionContainer"}) table = None for table in tables: author_date_text = table.find("div", {"class": "action-details"}) if author_date_text is None: # no changes have been performed on the issue continue elif len(author_date_text) < 3: self.changes_lost += 1 printerr("Change author format not supported. Change lost!") continue a_link = author_date_text.findAll('a')[1] # at this point a_link will be similar to the lines below: #<a class="user-hover user-avatar" rel="kiyoshi.lee" rel = a_link.attrs[1] author_url = rel[1] author = People(author_url) # we look for a string similar to: #<time datetime="2011-11-19T00:27-0800">19/Nov/11 12:27 AM</time> raw_date = author_date_text.findAll('time')[0].attrs[0][1] date = parse(raw_date).replace(tzinfo=None) rows = list(table.findAll('tr')) for row in rows: cols = list(row.findAll('td')) if len(cols) == 3: field = unicode(cols[0].contents[0].strip()) if field == "Assignee": old = unicode(self._get_identifier(cols[1])) new = unicode(self._get_identifier(cols[2])) else: old = unicode(cols[1].contents[0].strip()) new = unicode(cols[2].contents[0].strip()) change = Change(field, old, new, author, date) changes.append(change) return changes
def parse_changes (self, activity): changesList = [] for entry in activity['entries']: try: by = People(entry['author_detail']['email']) except KeyError: by = People(entry['author_detail']['name']) date = parse(entry['updated']) fields = self._parse_html_change(entry['summary']) for f in fields: change = Change(f["what"], f["old_value"], f["new_value"], by, date) changesList.append(change) return changesList
def parse_changes(self, activity, bug_id): changesList = [] for entry in activity['entries']: if not 'issues_status' in entry.keys(): continue by = People(entry['author_detail']['href']) update = parse(entry['updated']) field = 'Status' old_value = '' new_value = entry['issues_status'] change = Change(unicode(field), unicode(old_value), unicode(new_value), by, update) changesList.append(change) return changesList
def parse_issue(self, html): """ """ soup = BeautifulSoup.BeautifulSoup( html, convertEntities=BeautifulSoup.BeautifulSoup.XHTML_ENTITIES) self.__prepare_soup(soup) try: id = self.__parse_issue_id(soup) summary = self.__parse_issue_summary(soup) desc = self.__parse_issue_description(soup) submission = self.__parse_issue_submission(soup) priority = self.__parse_issue_priority(soup) status = self.__parse_issue_status(soup) resolution = self.__parse_issue_resolution(soup) asignation = self.__parse_issue_assigned_to(soup) category = self.__parse_issue_category(soup) group = self.__parse_issue_group(soup) # FIXME the visibility var below is never used!! #visibility = self.__parse_issue_visibility(soup) try: comments = self.__parse_issue_comments(soup) except SourceForgeParserError: printerr("Error parsing issue's comments") comments = None pass try: attachments = self.__parse_issue_attachments(soup) except SourceForgeParserError: printerr("Error parsing issue's attachments") attachments = None pass try: changes = self.__parse_issue_changes(soup) except SourceForgeParserError: printerr("Error parsing issue's changes") changes = None pass except: raise submitted_by = People(submission['id']) submitted_by.set_name(submission['name']) submitted_on = submission['date'] #assigned_to = People(asignation) assigned_to = People('') assigned_to.set_name(asignation) issue = SourceForgeIssue(id, 'bug', summary, desc, submitted_by, submitted_on) issue.set_priority(priority) issue.set_status(status, resolution) issue.set_assigned(assigned_to) issue.set_category(category) issue.set_group(group) if comments: for comment in comments: submitted_by = People(comment['by']['id']) submitted_by.set_name(comment['by']['name']) issue.add_comment( Comment(comment['desc'], submitted_by, comment['date'])) if attachments: for attachment in attachments: a = Attachment(attachment['url']) a.set_name(attachment['filename']) a.set_description(attachment['desc']) issue.add_attachment(a) if changes: for change in changes: changed_by = People(change['by']['id']) changed_by.set_name(change['by']['name']) issue.add_change( Change(change['field'], change['old_value'], 'unknown', changed_by, change['date'])) return issue
def analyze_bug(self, bug): #Retrieving main bug information printdbg(bug['url'] + " " + bug['state'] + " updated_at " + bug['updated_at'] + ' (ratelimit = ' + str(self.remaining_ratelimit) + ")") issue = bug['id'] if bug['labels']: bug_type = bug['labels'][0]['name'] # FIXME else: bug_type = unicode('') summary = bug['title'] desc = bug['body'] submitted_by = People(bug['user']['login']) ## FIXME send petition to bug['user']['url'] submitted_on = self.__to_datetime(bug['created_at']) if bug['assignee']: assignee = People(bug['assignee']['login']) ## assignee.set_name(bug.assignee.display_name) ## FIXME get name from bug['assignee']['url'] else: assignee = People(unicode("nobody")) issue = GithubIssue(issue, bug_type, summary, desc, submitted_by, submitted_on) issue.set_assigned(assignee) issue.set_status(bug['state']) issue.set_description(bug['body']) issue.set_web_link(bug['html_url']) try: if bug['closed_at']: issue.set_closed_at(self.__to_datetime(bug['closed_at'])) except AttributeError: pass # updated_at offers ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ # MySQL doesn't support timezone, we remove it issue.set_updated_at(self.__to_datetime(bug['updated_at'])) if bug['milestone']: issue.set_milestone_name(bug['milestone']['id']) issue.set_milestone_summary(bug['milestone']['description']) issue.set_milestone_title(bug['milestone']['title']) issue.set_milestone_web_link(bug['milestone']['url']) comments = self.__get_batch_comments(bug['number']) for c in comments: by = People(c['user']['login']) ## by.setname() FIXME - to be done date = self.__to_datetime(c['created_at']) com = Comment(c['body'], by, date) issue.add_comment(com) # activity entries = self.__get_batch_activities(bug['number']) for e in entries: field = e['event'] added = e['commit_id'] removed = unicode('') if e['actor']: by = People(e['actor']['login']) else: by = People(u"nobody") ## by.setname() FIXME - to be done date = self.__to_datetime(e['created_at']) change = Change(field, removed, added, by, date) issue.add_change(change) return issue
def analyze_bug(self, bug): #Retrieving main bug information ## ## all the retrieval can be improved. The method bug.lp_attributes ##offers a list of the available attributes for the object ## printdbg(bug.web_link + " updated at " + bug.bug.date_last_updated.isoformat()) issue = bug.web_link[bug.web_link.rfind('/') + 1:] bug_type = bug.importance summary = bug.bug.title desc = bug.bug.description submitted_by = self._get_person(bug.owner) submitted_on = self.__drop_timezone(bug.date_created) if bug.assignee: assignee = self._get_person(bug.assignee) else: assignee = People("nobody") issue = LaunchpadIssue(issue, bug_type, summary, desc, submitted_by, submitted_on) issue.set_assigned(assignee) issue.set_status(bug.status) issue.set_description(bug.bug.description) issue.set_web_link(bug.web_link) issue.set_target_display_name(bug.bug_target_display_name) issue.set_target_name(bug.bug_target_name) try: if bug.date_assigned: issue.set_date_assigned(self.__drop_timezone( bug.date_assigned)) except AttributeError: pass try: if bug.date_closed: issue.set_date_closed(self.__drop_timezone(bug.date_closed)) except AttributeError: pass try: if bug.date_confirmed: issue.set_date_confirmed( self.__drop_timezone(bug.date_confirmed)) except AttributeError: pass try: if bug.date_created: issue.set_date_created(self.__drop_timezone(bug.date_created)) except AttributeError: pass try: if bug.date_fix_committed: issue.set_date_fix_committed( self.__drop_timezone(bug.date_fix_committed)) except AttributeError: pass try: if bug.date_fix_released: issue.set_date_fix_released( self.__drop_timezone(bug.date_fix_released)) except AttributeError: pass try: if bug.date_in_progress: issue.set_date_in_progress( self.__drop_timezone(bug.date_in_progress)) except AttributeError: pass try: if bug.date_incomplete: issue.set_date_incomplete( self.__drop_timezone(bug.date_incomplete)) except AttributeError: pass try: if bug.date_left_closed: issue.set_date_left_closed( self.__drop_timezone(bug.date_left_closed)) except AttributeError: pass try: if bug.date_left_new: issue.set_date_left_new(self.__drop_timezone( bug.date_left_new)) except AttributeError: pass try: if bug.date_triaged: issue.set_date_triaged(self.__drop_timezone(bug.date_triaged)) except AttributeError: pass try: if bug.date_last_message: issue.set_date_last_message( self.__drop_timezone(bug.date_last_message)) except AttributeError: pass try: if bug.bug.date_last_updated: issue.set_date_last_updated( self.__drop_timezone(bug.bug.date_last_updated)) except AttributeError: pass if bug.milestone: issue.set_milestone_code_name(bug.milestone.code_name) issue.set_milestone_data_targeted(bug.milestone.date_targeted) issue.set_milestone_name(bug.milestone.name) issue.set_milestone_summary(bug.milestone.summary) issue.set_milestone_title(bug.milestone.title) issue.set_milestone_web_link(bug.milestone.web_link) if bug.bug.duplicate_of: temp_rel = TempRelationship(bug.bug.id, unicode('duplicate_of'), unicode(bug.bug.duplicate_of.id)) issue.add_temp_relationship(temp_rel) issue.set_heat(bug.bug.heat) issue.set_linked_branches(bug.bug.linked_branches) # storing the comments: # first message of the bugs contains the description if (bug.bug.messages and len(bug.bug.messages) > 1): skip = 1 for c in bug.bug.messages: if (skip == 1): # we skip the first comment which is the description skip = 0 continue by = self._get_person(c.owner) com = Comment(c.content, by, c.date_created) issue.add_comment(com) issue.set_tags(bug.bug.tags) issue.set_title(bug.bug.title) issue.set_users_affected_count(bug.bug.users_affected_count) issue.set_web_link_standalone(bug.bug.web_link) # activity for entry in bug.bug.activity.entries: field = entry['whatchanged'] removed = entry['oldvalue'] added = entry['newvalue'] by = self.__get_people_from_uri(entry['person_link']) date = self.__to_datetime(entry['datechanged']) change = Change(field, removed, added, by, date) issue.add_change(change) for a in bug.bug.attachments.entries: a_url = a['data_link'] a_name = a['title'] # author and date are stored in the comment object aux = a['message_link'] comment_id = int(aux[aux.rfind('/') + 1:]) comment = bug.bug.messages[comment_id] a_by = self._get_person(comment.owner) a_on = self.__drop_timezone(comment.date_created) #a_desc = a[''] att = Attachment(a_url, a_by, a_on) att.set_name(a_name) #att.set_description() issue.add_attachment(att) return issue