def _string_contents(self, soup): # WebKit's bugzilla instance uses UTF-8. # BeautifulStoneSoup always returns Unicode strings, however # the .string method returns a (unicode) NavigableString. # NavigableString can confuse other parts of the code, so we # convert from NavigableString to a real unicode() object using unicode(). return unicode(soup.string)
def _parse_attachment_ids_request_query(self, page, since=None): # Formats digits = re.compile("\d+") attachment_href = re.compile("attachment.cgi\?id=\d+&action=review") # if no date is given, return all ids if not since: attachment_links = SoupStrainer("a", href=attachment_href) return [ int(digits.search(tag["href"]).group(0)) for tag in BeautifulSoup(page, parseOnlyThese=attachment_links) ] # Parse the main table only date_format = re.compile("\d{4}-\d{2}-\d{2} \d{2}:\d{2}") mtab = SoupStrainer("table", {"class": "requests"}) soup = BeautifulSoup(page, parseOnlyThese=mtab) patch_ids = [] for row in soup.findAll("tr"): patch_tag = row.find("a", {"href": attachment_href}) if not patch_tag: continue patch_id = int(digits.search(patch_tag["href"]).group(0)) date_tag = row.find("td", text=date_format) if date_tag and datetime.strptime( date_format.search(unicode(date_tag)).group(0), "%Y-%m-%d %H:%M") < since: continue patch_ids.append(patch_id) return patch_ids
def message_with_output(self, output_limit=500): if self.output: if output_limit and len(self.output) > output_limit: return u"%s\n\nLast %s characters of output:\n%s" % \ (self, output_limit, self.output[-output_limit:]) return u"%s\n\n%s" % (self, self.output) return unicode(self)
def user_dict_from_edit_user_page(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES) user_table = soup.find("table", {'class': 'main'}) user_dict = {} for row in user_table('tr'): label_element = row.find('label') if not label_element: continue # This must not be a row we know how to parse. if row.find('table'): continue # Skip the <tr> holding the groups table. key = label_element['for'] if "group" in key: key = "groups" value = user_dict.get('groups', set()) # We must be parsing a "tr" inside the inner group table. (group_name, _) = self._group_name_and_string_from_row(row) if row.find('input', { 'type': 'checkbox', 'checked': 'checked' }): value.add(group_name) else: value = unicode(row.find('td').string).strip() user_dict[key] = value return user_dict
def _parse_bugs_from_xml(self, page): soup = BeautifulSoup(page) # Without the unicode() call, BeautifulSoup occasionally complains of being # passed None for no apparent reason. return [ Bug(self._parse_bug_dictionary_from_xml(unicode(bug_xml)), self) for bug_xml in soup('bug') ]
def create_bug(self, bug_title, bug_description, component=None, diff=None, patch_description=None, cc=None, blocked=None, assignee=None, mark_for_review=False, mark_for_commit_queue=False): self.authenticate() _log.info('Creating bug with title "%s"' % bug_title) self.open_url(config_urls.bug_server_url + "enter_bug.cgi?product=WebKit") self.browser.select_form(name="Create") component_items = self.browser.find_control('component').items component_names = map(lambda item: item.name, component_items) if not component: component = "New Bugs" if component not in component_names: component = User.prompt_with_list("Please pick a component:", component_names) self.browser["component"] = [component] if cc: self.browser["cc"] = cc if blocked: self.browser["blocked"] = unicode(blocked) if not assignee: assignee = self.username if assignee and not self.browser.find_control("assigned_to").disabled: self.browser["assigned_to"] = assignee self.browser["short_desc"] = bug_title self.browser["comment"] = bug_description if diff: # _fill_attachment_form expects a file-like object # Patch files are already binary, so no encoding needed. assert (isinstance(diff, str)) patch_file_object = StringIO(diff) commit_flag = CommitQueueFlag.mark_for_nothing if mark_for_commit_queue: commit_flag = CommitQueueFlag.mark_for_commit_queue self._fill_attachment_form(patch_description, patch_file_object, mark_for_review=mark_for_review, commit_flag=commit_flag, is_patch=True) response = self.browser.submit() bug_id = self._check_create_bug_response(response.read()) _log.info("Bug %s created." % bug_id) _log.info("%sshow_bug.cgi?id=%s" % (config_urls.bug_server_url, bug_id)) return bug_id
def test_unicode(self): self.assertEqual( unicode( Contributor(u'Michael Br\u00fcning', ['*****@*****.**'])), string_utils.encode( u'Michael Br\u00fcning <*****@*****.**>', target_type=unicode), )
def _parse_twisted_file_row(self, file_row): string_or_empty = lambda string: unicode(string) if string else u"" file_cells = file_row.findAll('td') return { "filename": string_or_empty(self._file_cell_text(file_cells[0])), "size": string_or_empty(self._file_cell_text(file_cells[1])), "type": string_or_empty(self._file_cell_text(file_cells[2])), "encoding": string_or_empty(self._file_cell_text(file_cells[3])), }
def _parse_builder_status_from_row(self, status_row): status_cells = status_row.findAll('td') builder = {} # First cell is the name name_link = status_cells[0].find('a') builder["name"] = unicode(name_link.string) self._parse_last_build_cell(builder, status_cells[1]) self._parse_current_build_cell(builder, status_cells[2]) return builder
def _full_record_and_nick(self, contributor): result = '' if contributor.irc_nicknames: result += ' (:%s)' % ', :'.join(contributor.irc_nicknames) if contributor.can_review: result += ' (r)' elif contributor.can_commit: result += ' (c)' return unicode(contributor) + result
def execute(self, nick, args, tool, sheriff): if not args: return self.usage(nick) search_string = unicode(" ".join(args)) # FIXME: We should get the ContributorList off the tool somewhere. contributors = CommitterList().contributors_by_search_string( search_string) if not contributors: return unicode( "%s: Sorry, I don't know any contributors matching '%s'.") % ( nick, search_string) if len(contributors) > 5: return unicode( "%s: More than 5 contributors match '%s', could you be more specific?" ) % (nick, search_string) if len(contributors) == 1: contributor = contributors[0] if not contributor.irc_nicknames: return unicode("%s: %s hasn't told me their nick. Boo hoo :-(" ) % (nick, contributor) return unicode("%s: %s is %s. Why do you ask?") % ( nick, search_string, self._full_record_and_nick(contributor)) contributor_nicks = list(map(self._full_record_and_nick, contributors)) contributors_string = join_with_separators(contributor_nicks, only_two_separator=" or ", last_separator=', or ') return unicode("%s: I'm not sure who you mean? %s could be '%s'.") % ( nick, contributors_string, search_string)
def _login_and_uid_from_row(self, row): first_cell = row.find("td") # The first row is just headers, we skip it. if not first_cell: return None # When there were no results, we have a fake "<none>" entry in the table. if first_cell.find(text="<none>"): return None # Otherwise the <td> contains a single <a> which contains the login name or a single <i> with the string "<none>". anchor_tag = first_cell.find("a") login = unicode(anchor_tag.string).strip() user_id = int(re.search(r"userid=(\d+)", str(anchor_tag['href'])).group(1)) return (login, user_id)
def strip_r_from_svn_revision(self, svn_revision): match = re.match(r"^r(?P<svn_revision>\d+)", unicode(svn_revision)) if (match): return match.group('svn_revision') return svn_revision
def _post_patch_to_ews(self, attachment_id): submit_to_ews_url = '{}/submit-to-ews'.format(self._server_url()) self._browser.open(submit_to_ews_url) self._browser.select_form(name='submit_to_ews') self._browser['patch_id'] = unicode(attachment_id) self._browser.submit()
def __str__(self): return string_utils.encode(u'"{}" <{}>'.format(unicode(self.full_name), unicode( self.emails[0])), target_type=str)
def _group_name_and_string_from_row(self, row): label_element = row.find('label') group_string = unicode(label_element['for']) group_name = unicode(label_element.find('strong').string).rstrip(':') return (group_name, group_string)
def __unicode__(self): return u'"{}" <{}>'.format(unicode(self.full_name), unicode(self.emails[0]))
def test_unicode(self): self.assertEqual(unicode(u'unicode: \u00E9'), u'unicode: \u00E9')
def _parse_quips(self, page): soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES) quips = soup.find( text=re.compile(r"Existing quips:")).findNext("ul").findAll("li") return [unicode(quip_entry.string) for quip_entry in quips]