def _do_almost_everything(self, localpath): page_data = super(OneBoxAction, self)._do_almost_everything( localpath + '/' + ONE_BOX_URL) if page_data: builders = self.request.GET.getall('builder') if builders and len(builders): one_box = BeautifulSoup(page_data['content']) all_tds = one_box.findAll('td') for td in all_tds: if td.a and td.a['title'] not in builders: td.extract() page_data['content'] = self.ContentsToHtml(one_box) return page_data
def _do_almost_everything(self, localpath): page_data = super(OneBoxAction, self)._do_almost_everything(localpath + '/' + ONE_BOX_URL) if page_data: builders = self.request.GET.getall('builder') if builders and len(builders): one_box = BeautifulSoup(page_data['content']) all_tds = one_box.findAll('td') for td in all_tds: if td.a and td.a['title'] not in builders: td.extract() page_data['content'] = self.ContentsToHtml(one_box) return page_data
def test_html_format(self): import gatekeeper_mailer template = gatekeeper_mailer.MailTemplate( self.build_data['waterfall_url'], self.build_data['build_url'], self.build_data['project_name'], '*****@*****.**') _, html_content, _ = template.genMessageContent(self.build_data) expected_html = ' '.join(self.read_file('expected.html').splitlines()) saw = str(BeautifulSoup(html_content)).split() expected = str(BeautifulSoup(expected_html)).split() self.assertEqual(saw, expected)
def AddRow(self, row): self.SawRevision(row['rev'], row['rev_number']) revlink = BeautifulSoup(row['rev']).a['href'] self.SetLink(revlink) name = BeautifulSoup(row['name']) self.SetName(self.ContentsToHtml(name)) status = BeautifulSoup(row['status']).findAll('table') for i, stat in enumerate(status): self.SetStatus(self.category_order[self.lastMasterSeen][i], unicode(stat)) comment = BeautifulSoup(row['comment']) self.SetComment(self.ContentsToHtml(comment)) if row['details']: details = BeautifulSoup(row['details']) self.SetDetail(self.ContentsToHtml(details))
def update_status(master, status_html, status_dict): """Parses build status information and saves it to a status dictionary.""" builder_soup = BeautifulSoup(status_html) builders_by_category = builder_soup.findAll('table') for i, c in enumerate(data.ordered_categories[master]): status_dict[master].setdefault(c, {}) statuses_by_builder = builders_by_category[i].findAll( 'td', 'DevStatusBox') # If we didn't get anything, it's because we're parsing the overall # summary, so look for Slave boxes instead of Status boxes. if not statuses_by_builder: statuses_by_builder = builders_by_category[i].findAll( 'td', 'DevSlaveBox') for j, b in enumerate(data.ordered_builders[master][c]): # Save the whole link as the status to keep ETA and build number info. status = unicode(statuses_by_builder[j].a) status_dict[master][c][b] = status
def update_status(master, status_html, status_dict): """Parses build status information and saves it to a status dictionary.""" builder_soup = BeautifulSoup(status_html) builders_by_category = builder_soup.findAll('table') for i, c in enumerate(data.ordered_categories[master]): status_dict[master].setdefault(c, {}) statuses_by_builder = builders_by_category[i].findAll('td', 'DevStatusBox') # If we didn't get anything, it's because we're parsing the overall # summary, so look for Slave boxes instead of Status boxes. if not statuses_by_builder: statuses_by_builder = builders_by_category[i].findAll('td', 'DevSlaveBox') for j, b in enumerate(data.ordered_builders[master][c]): # Save the whole link as the status to keep ETA and build number info. status = unicode(statuses_by_builder[j].a) status_dict[master][c][b] = status
def test_html_format_status(self): import gatekeeper_mailer status_header = ('Perf alert for "%(steps)s" on "%(builder_name)s"') template = gatekeeper_mailer.MailTemplate( self.build_data['waterfall_url'], self.build_data['build_url'], self.build_data['project_name'], '*****@*****.**', status_header=status_header) _, html_content, _ = template.genMessageContent(self.build_data) expected_html = ' '.join( self.read_file('expected_status.html').splitlines()) saw = str(BeautifulSoup(html_content)).split() expected = str(BeautifulSoup(expected_html)).split() self.assertEqual(saw, expected)
def bootstrap(self): """Fills an empty MergerData with 100 rows of data.""" # Populate the categories, masters, status, and failures data. for m in self.ordered_masters: for d in (self.ordered_builders, self.ordered_categories, self.status, self.failures): d.setdefault(m, {}) # Get the category data and construct the list of categories # for this master. category_data = app.get_and_cache_pagedata( '%s/console/categories' % m) if not category_data['content']: category_list = [u'default'] else: category_soup = BeautifulSoup(category_data['content']) category_list = [ tag.string.strip() for tag in category_soup.findAll('td', 'DevStatus') ] self.ordered_categories[m] = category_list # Get the builder status data. builder_data = app.get_and_cache_pagedata('%s/console/summary' % m) if not builder_data['content']: continue builder_soup = BeautifulSoup(builder_data['content']) builders_by_category = builder_soup.tr.findAll('td', 'DevSlave', recursive=False) # Construct the list of builders for this category. for i, c in enumerate(self.ordered_categories[m]): self.ordered_builders[m].setdefault(c, {}) builder_list = [ tag['title'] for tag in builders_by_category[i].findAll( 'a', 'DevSlaveBox') ] self.ordered_builders[m][c] = builder_list # Fill in the status data for all of this master's builders. update_status(m, builder_data['content'], self.status) # Copy that status data over into the failures dictionary too. for c in self.ordered_categories[m]: self.failures[m].setdefault(c, {}) for b in self.ordered_builders[m][c]: if self.status[m][c][b] not in ('success', 'running', 'notstarted'): self.failures[m][c][b] = True else: self.failures[m][c][b] = False # Populate the individual row data, saving status info in the same # master/category/builder tree format constructed above. latest_rev = int(app.get_and_cache_rowdata('latest_rev')['rev_number']) if not latest_rev: logging.error( "MergerData.bootstrap(): Didn't get latest_rev. Aborting.") return n = latest_rev num_rows_saved = num_rows_skipped = 0 while num_rows_saved < self.SIZE and num_rows_skipped < 10: curr_row = RowData() for m in self.ordered_masters: update_row(n, m, curr_row) # If we didn't get any data, that revision doesn't exist, so skip on. if not curr_row.revision: num_rows_skipped += 1 n -= 1 continue self.rows[n] = curr_row num_rows_skipped = 0 num_rows_saved += 1 n -= 1 self.latest_rev = max(self.rows.keys())
def notstarted(status): """Converts a DevSlave status box to a notstarted DevStatus box.""" status_soup = BeautifulSoup(status) status_soup['class'] = 'DevStatusBox notstarted' return unicode(status_soup)
def bootstrap(self): """Fills an empty MergerData with 100 rows of data.""" # Populate the categories, masters, status, and failures data. for m in self.ordered_masters: for d in (self.ordered_builders, self.ordered_categories, self.status, self.failures): d.setdefault(m, {}) # Get the category data and construct the list of categories # for this master. category_data = app.get_and_cache_pagedata('%s/console/categories' % m) if not category_data['content']: category_list = [u'default'] else: category_soup = BeautifulSoup(category_data['content']) category_list = [tag.string.strip() for tag in category_soup.findAll('td', 'DevStatus')] self.ordered_categories[m] = category_list # Get the builder status data. builder_data = app.get_and_cache_pagedata('%s/console/summary' % m) if not builder_data['content']: continue builder_soup = BeautifulSoup(builder_data['content']) builders_by_category = builder_soup.tr.findAll('td', 'DevSlave', recursive=False) # Construct the list of builders for this category. for i, c in enumerate(self.ordered_categories[m]): self.ordered_builders[m].setdefault(c, {}) builder_list = [tag['title'] for tag in builders_by_category[i].findAll('a', 'DevSlaveBox')] self.ordered_builders[m][c] = builder_list # Fill in the status data for all of this master's builders. update_status(m, builder_data['content'], self.status) # Copy that status data over into the failures dictionary too. for c in self.ordered_categories[m]: self.failures[m].setdefault(c, {}) for b in self.ordered_builders[m][c]: if self.status[m][c][b] not in ('success', 'running', 'notstarted'): self.failures[m][c][b] = True else: self.failures[m][c][b] = False # Populate the individual row data, saving status info in the same # master/category/builder tree format constructed above. latest_rev = int(app.get_and_cache_rowdata('latest_rev')['rev_number']) if not latest_rev: logging.error("MergerData.bootstrap(): Didn't get latest_rev. Aborting.") return n = latest_rev num_rows_saved = num_rows_skipped = 0 while num_rows_saved < self.SIZE and num_rows_skipped < 10: curr_row = RowData() for m in self.ordered_masters: update_row(n, m, curr_row) # If we didn't get any data, that revision doesn't exist, so skip on. if not curr_row.revision: num_rows_skipped += 1 n -= 1 continue self.rows[n] = curr_row num_rows_skipped = 0 num_rows_saved += 1 n -= 1 self.latest_rev = max(self.rows.keys())
def parse_master(localpath, remoteurl, page_data=None): """Part of the new pipeline to store individual rows rather than whole pages of html. Parses the master data into a set of rows, and writes them out to the datastore in an easily retrievable format. Doesn't modify page_data dict. """ ts = datetime.datetime.now() page_data = page_data or {} content = page_data.get('content') if not content: return page_data content = content.decode('utf-8', 'replace') # Split page into surroundings (announce, legend, footer) and data (rows). surroundings = BeautifulSoup(content) data = surroundings.find('table', 'ConsoleData') if data is None: raise Exception('parse_master: data can not be None') new_data = Tag(surroundings, 'table', [('class', 'ConsoleData'), ('width', '96%')]) data.replaceWith(new_data) surroundings_page = get_or_create_page(localpath + '/surroundings', None, maxage=30) surroundings_data = {} surroundings_data['title'] = 'Surroundings for ' + localpath surroundings_data['content'] = utf8_convert(surroundings) save_page(surroundings_page, localpath + '/surroundings', ts, surroundings_data) rows = data.findAll('tr', recursive=False) # The first table row can be special: the list of categories. categories = None # If the first row contains a DevStatus cell... if rows[0].find('td', 'DevStatus') != None: # ...extract it into the categories... categories = rows[0] # ...and get rid of the next (spacer) row too. rows = rows[2:] if categories: category_page = get_or_create_page(localpath + '/categories', None, maxage=30) category_data = {} category_data['title'] = 'Categories for ' + localpath category_data['content'] = utf8_convert(categories) save_page(category_page, localpath + '/categories', ts, category_data) # The next table row is special, it's the summary one-box-per-builder. summary = rows[0] rows = rows[1:] summary_page = get_or_create_page(localpath + '/summary', None, maxage=30) summary_data = {} summary_data['title'] = 'Summary for ' + localpath summary_data['content'] = utf8_convert(summary) save_page(summary_page, localpath + '/summary', ts, summary_data) curr_row = {} # Each table row is either a status row with a revision, name, and status, # a comment row with the commit message, a details row with flakiness info, # or a spacer row (in which case we finalize the row and save it). for row in rows: if row.find('td', 'DevComment'): curr_row['comment'] = ''.join(utf8_convert(tag).strip() for tag in row.td.contents) elif row.find('td', 'DevDetails'): curr_row['details'] = ''.join(utf8_convert(tag).strip() for tag in row.td.contents) elif row.find('td', 'DevStatus'): curr_row['rev'] = ''.join(utf8_convert(tag).strip() for tag in row.find('td', 'DevRev').contents) curr_row['name'] = ''.join(utf8_convert(tag).strip() for tag in row.find('td', 'DevName').contents) curr_row['status'] = ''.join(utf8_convert(box.table).strip() for box in row.findAll('td', 'DevStatus')) else: if 'details' not in curr_row: curr_row['details'] = '' curr_row['fetch_timestamp'] = ts curr_row['rev_number'] = get_position_number(curr_row['comment']) save_row(curr_row, localpath + '/' + curr_row['rev_number']) curr_row = {} return page_data
def console_merger(localpath, remoteurl, page_data, masters_to_merge=None, num_rows_to_merge=None): masters_to_merge = masters_to_merge or DEFAULT_MASTERS_TO_MERGE num_rows_to_merge = num_rows_to_merge or 25 console_data = ConsoleData() surroundings = get_and_cache_pagedata( '%s/console/surroundings' % masters_to_merge[0]) merged_page = BeautifulSoup(surroundings['content']) merged_tag = merged_page.find('table', 'ConsoleData') if merged_tag is None: msg = 'console_merger("%s", "%s", "%s"): merged_tag cannot be None.' % ( localpath, remoteurl, page_data) logging.error(msg) raise Exception(msg) latest_rev = int(get_and_cache_rowdata('latest_rev')['rev_number']) if not latest_rev: logging.error('console_merger(\'%s\', \'%s\', \'%s\'): cannot get latest ' 'revision number.' % ( localpath, remoteurl, page_data)) return fetch_timestamp = datetime.datetime.now() for master in masters_to_merge: # Fetch the summary one-box-per-builder for the master. # If we don't get it, something is wrong, skip the master entirely. master_summary = get_and_cache_pagedata('%s/console/summary' % master) if not master_summary['content']: continue console_data.SawMaster(master) # Get the categories for this builder. If the builder doesn't have any # categories, just use the default empty-string category. category_list = [] master_categories = get_and_cache_pagedata('%s/console/categories' % master) if not master_categories['content']: category_list.append('') else: category_row = BeautifulSoup(master_categories['content']) category_list = [c.text for c in category_row.findAll('td', 'DevStatus')] # Get the corresponding summary box(es). summary_row = BeautifulSoup(master_summary['content']) summary_list = summary_row.findAll('table') for category, summary in zip(category_list, summary_list): console_data.AddCategory(category, summary) # Fetch all of the rows that we need. rows_fetched = 0 revs_skipped = 0 current_rev = latest_rev while rows_fetched < num_rows_to_merge and current_rev >= 0: # Don't get stuck looping backwards forever into data we don't have. # How hard we try scales with how many rows the person wants. if revs_skipped > max(num_rows_to_merge, 10): break row_data = get_and_cache_rowdata('%s/console/%s' % (master, current_rev)) if not row_data: current_rev -= 1 revs_skipped += 1 continue console_data.AddRow(row_data) current_rev -= 1 revs_skipped = 0 rows_fetched += 1 # Convert the merged content into console content. console_data.Finish() template_environment = Environment() template_environment.loader = FileSystemLoader('.') def notstarted(builder_status): """Convert a BeautifulSoup Tag from builder status to a notstarted line.""" builder_status = re.sub(r'DevSlaveBox', 'DevStatusBox', str(builder_status)) builder_status = re.sub(r'class=\'([^\']*)\' target=', 'class=\'DevStatusBox notstarted\' target=', builder_status) builder_status = re.sub(r'class="([^"]*)" target=', 'class="DevStatusBox notstarted" target=', builder_status) return builder_status template_environment.filters['notstarted'] = notstarted merged_template = template_environment.from_string(console_template) merged_console = merged_template.render(data=console_data) # For debugging: # logging.info('%r' % merged_console) # import code # code.interact(local=locals()) # Place merged console at |merged_tag|'s location in |merged_page|, and put # the result in |merged_content|. merged_tag.replaceWith(merged_console) merged_content = utf8_convert(merged_page) merged_content = re.sub( r'\'\<a href="\'', '\'<a \' + attributes + \' href="\'', merged_content) merged_content = re.sub( r'\'\<table\>\'', r"'<table ' + attributes + '>'", merged_content) merged_content = re.sub( r'\'\<div\>\'', r"'<div ' + attributes + '>'", merged_content) merged_content = re.sub( r'\'\<td\>\'', r"'<td ' + attributes + '>'", merged_content) merged_content = re.sub( r'\<iframe\>\</iframe\>', '<iframe \' + attributes + \' src="\' + url + \'"></iframe>', merged_content) # Update the merged console page. merged_page = get_or_create_page(localpath, None, maxage=30) logging.info('console_merger: saving merged console') page_data = get_and_cache_pagedata(localpath) page_data['title'] = 'BuildBot: Chromium' page_data['offsite_base'] = 'http://build.chromium.org/p/chromium' page_data['body_class'] = 'interface' page_data['content'] = merged_content save_page(merged_page, localpath, fetch_timestamp, page_data) return