def _parse(): if xmldoc.xpathEval('//div/p[contains(.,"There are currently no open bugs.")]') or xmldoc.xpathEval('//div/p[contains(.,"No results for search")]'): raise StopIteration # count number of columns # Bug-Pages have seven columns: icon|nr|summary(url)|icon(milestone|branch|blueprint)|package|importance|status # personal Bug-Pages have six columns: icon|nr|summary(url)|package|importance|status # TODO: look for more simple XPath-statements col = int(xmldoc.xpathEval('count(//table[@id="buglisting"]//thead//tr//th[not(@*)])')) for span in xmldoc.xpathEval('//table[@id="buglisting"]//thead//tr//@colspan'): col += int(span.content) assert col == 6 or col == 7, "Parsing of this page (%s) is not \ supported by python-launchpad-bugs" %debug_url bug_table_rows = xmldoc.xpathEval('//table[@id="buglisting"]//tbody//tr') for row in bug_table_rows: out = [] for i in xrange(2,col+1): if i == 3: expr = 'td[' + str(i) + ']//a' else: expr = 'td[' + str(i) + ']/text()' res = row.xpathEval(expr) parse_error(res, "BugPage.parse_html_bugpage._parse.row[%s]" %i, xml=row, url=debug_url) if i == 3: out.append(res[0].prop("href")) out.append(res[0].content) #drop icon td out.pop(3) # package is optional, move package to the end of the list if len(out) == 6: out.append(out.pop(3)) else: out.append(None) yield BugInfo(out[0], out[1], out[4],out[3], out[2], out[5], all_tasks)
def parse_html_blueprintpage(xmldoc, all_tasks, url): def _parse(): if not xmldoc.xpathEval('//table[@id="speclisting"]'): xmldoc.freeDoc() return blueprinttable = xmldoc.xpathEval('//table[@id="speclisting"]//tbody//tr') for row in blueprinttable: m = row.xpathEval('td[1]//span[not(@class="sortkey")]') assert m priority = m[0].prop("class") m = row.xpathEval('td[2]//a') assert m url = m[0].prop("href") title = m[0].prop("title") spec = m[0].content mentorship = bool(row.xpathEval('td[2]//img[@alt="mentoring"]')) #add INFORMATIONAL m = row.xpathEval('td[3]//span[not(@class="sortkey")]') assert m status = m[0].prop("class") m = row.xpathEval('td[4]//span[not(@class="sortkey")]') assert m delivery = m[0].prop("class") m = row.xpathEval('td[5]//a') if m: assignee = user.parse_html_user(m[0]) else: assignee = user(None) m = row.xpathEval('td[6]//a') # on personal blueprint pages this column does not exist if m: project = Project(m[0].prop("href"), m[0].content) else: project = None yield BPInfo(priority, spec, title, url, status, delivery, assignee, project, mentorship) next = xmldoc.xpathEval('//div[@class="lesser"]//a[@rel="next"]//@href') m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]') if m: m = m.pop() n = re.search(r'(\d+)\s+results?', m.content) parse_error(n, "BugPage.parse_html_bugpage.length", url=url) length = n.group(1) n = m.xpathEval("strong") batchsize = int(n[1].content) - int(n[0].content) + 1 else: length = batchsize = 0 if next: return _parse(), next[0].content, batchsize, int(length) return _parse(), False, batchsize, int(length)
def parse_html_bugpage(xmldoc, all_tasks, debug_url): def _parse(): if xmldoc.xpathEval('//div/p[contains(.,"There are currently no open bugs.")]') or xmldoc.xpathEval('//div/p[contains(.,"No results for search")]'): raise StopIteration # count number of columns # Bug-Pages have seven columns: icon|nr|summary(url)|icon(milestone|branch|blueprint)|package|importance|status # personal Bug-Pages have six columns: icon|nr|summary(url)|package|importance|status # TODO: look for more simple XPath-statements col = int(xmldoc.xpathEval('count(//table[@id="buglisting"]//thead//tr//th[not(@*)])')) for span in xmldoc.xpathEval('//table[@id="buglisting"]//thead//tr//@colspan'): col += int(span.content) assert col == 6 or col == 7, "Parsing of this page (%s) is not \ supported by python-launchpad-bugs" %debug_url bug_table_rows = xmldoc.xpathEval('//table[@id="buglisting"]//tbody//tr') for row in bug_table_rows: out = [] for i in xrange(2,col+1): if i == 3: expr = 'td[' + str(i) + ']//a' else: expr = 'td[' + str(i) + ']/text()' res = row.xpathEval(expr) parse_error(res, "BugPage.parse_html_bugpage._parse.row[%s]" %i, xml=row, url=debug_url) if i == 3: out.append(res[0].prop("href")) out.append(res[0].content) #drop icon td out.pop(3) # package is optional, move package to the end of the list if len(out) == 6: out.append(out.pop(3)) else: out.append(None) yield BugInfo(out[0], out[1], out[4],out[3], out[2], out[5], all_tasks) next = xmldoc.xpathEval('//div[@class="lesser"]//a[@rel="next"]//@href') m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]') if m: m = m.pop() n = re.search(r'(\d+)\s+results?', m.content) parse_error(n, "BugPage.parse_html_bugpage.length", url=debug_url) length = n.group(1) n = m.xpathEval("strong") batchsize = int(n[1].content) - int(n[0].content) + 1 else: length = batchsize = 0 if next: return _parse(), next[0].content, batchsize, int(length) return _parse(), False, batchsize, int(length)
def parse_html_expirable_bugpage(xmldoc, all_tasks, debug_url): def _parse(): rows = xmldoc.xpathEval('//table[@class="listing" and @id="buglisting"]//tbody//tr') for row in rows: col_count = len(row.xpathEval("td")) parse_error( 4 < col_count < 7, "BugPage.parse_html_expirable_bugpage.col_count", xml=row, url=debug_url) m = row.xpathEval("td[1]/img") parse_error( m, "BugPage.parse_html_expirable_bugpage.importance", xml=row, url=debug_url) importance = m[0].prop("title").split()[0] m = row.xpathEval("td[2]") parse_error( m, "BugPage.parse_html_expirable_bugpage.bugnumber", xml=row, url=debug_url) bugnumber = int(m[0].content) m = row.xpathEval("td[3]/a") parse_error( m, "BugPage.parse_html_expirable_bugpage.url", xml=row, url=debug_url) url = m[0].prop("href") summary = m[0].content m = row.xpathEval("td[4]/img") private = False if m: private = m[0].prop("alt").lower() == "private" if col_count == 6: m = row.xpathEval("td[5]") parse_error( m, "BugPage.parse_html_expirable_bugpage.package", xml=row, url=debug_url) package = m[0].content if package == '\xe2\x80\x94': package = None m = row.xpathEval("td[6]") parse_error( m, "BugPage.parse_html_expirable_bugpage.date_last_update.1", xml=row, url=debug_url) date_last_update = LPTime(m[0].content) elif col_count == 5: package = None #this should be the package related to given url m = row.xpathEval("td[5]") parse_error( m, "BugPage.parse_html_expirable_bugpage.date_last_update.2", xml=row, url=debug_url) date_last_update = LPTime(m[0].content) yield ExpBugInfo(bugnumber, url, importance, summary, private, package, date_last_update, all_tasks) next = xmldoc.xpathEval('//td[@class="batch-navigation-links"]//a[@rel="next"]//@href') m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]') if m: m = m.pop() n = re.search(r'(\d+)\s+results?', m.content) parse_error(n, "BugPage.parse_html_bugpage.length", url=debug_url) length = n.group(1) n = m.xpathEval("strong") batchsize = int(n[1].content) - int(n[0].content) + 1 else: length = batchsize = 0 if next: return _parse(), next[0].content, batchsize, int(length) return _parse(), False, batchsize, int(length)
def parse_html_bugtracker_bugpage(xmldoc, all_tasks, debug_url): def _parse(): rows = xmldoc.xpathEval('//table[@class="sortable listing" and @id="latestwatches"]/tbody//tr') for row in rows: lp_url, lp_bugnr, lp_title, watch_url, watch_bugnr, watch_status, private = [None] * 7 data = row.xpathEval("td") parse_error(len(data) == 3, "BugPage.parse_html_bugtracker_bugpage.len_td=%s" %len(data), xml=row, url=debug_url) x = data[0].xpathEval("a") if x: lp_url = x[0].prop("href") lp_bugnr = int(lp_url.split("/").pop()) lp_title = x[0].content.split(":", 1)[-1].strip("\n ") x = data[1].xpathEval("a") parse_error(x, "BugPage.parse_html_bugtracker_bugpage.watch_url", xml=row, url=debug_url) watch_url = x[0].prop("href") watch_bugnr = x[0].content watch_status = data[2].content else: x = data[0].content parse_error("(Private)" in x, "BugPage.parse_html_bugtracker_bugpage.private", xml=row, url=debug_url) private = True x = x.split("#").pop() lp_bugnr = int(x.split(":").pop(0)) lp_url = bugnumber_to_url(lp_bugnr) b = BugInfoWatches( lp_url, lp_bugnr, lp_title, watch_url, watch_bugnr, watch_status, bool(private), all_tasks) yield b next = xmldoc.xpathEval('//td[@class="batch-navigation-links"]//a[@rel="next"]//@href') m = xmldoc.xpathEval('//td[@class="batch-navigation-index"]') if m: m = m.pop() n = re.search(r'(\d+)\s+results?', m.content) parse_error(n, "BugPage.parse_html_bugpage.length", url=debug_url) length = n.group(1) n = m.xpathEval("strong") batchsize = int(n[1].content) - int(n[0].content) + 1 else: length = batchsize = 0 if next: return _parse(), next[0].content, batchsize, int(length) return _parse(), False, batchsize, int(length)
def _parse(): rows = xmldoc.xpathEval('//table[@class="sortable listing" and @id="latestwatches"]/tbody//tr') for row in rows: lp_url, lp_bugnr, lp_title, watch_url, watch_bugnr, watch_status, private = [None] * 7 data = row.xpathEval("td") parse_error(len(data) == 3, "BugPage.parse_html_bugtracker_bugpage.len_td=%s" %len(data), xml=row, url=debug_url) x = data[0].xpathEval("a") if x: lp_url = x[0].prop("href") lp_bugnr = int(lp_url.split("/").pop()) lp_title = x[0].content.split(":", 1)[-1].strip("\n ") x = data[1].xpathEval("a") parse_error(x, "BugPage.parse_html_bugtracker_bugpage.watch_url", xml=row, url=debug_url) watch_url = x[0].prop("href") watch_bugnr = x[0].content watch_status = data[2].content else: x = data[0].content parse_error("(Private)" in x, "BugPage.parse_html_bugtracker_bugpage.private", xml=row, url=debug_url) private = True x = x.split("#").pop() lp_bugnr = int(x.split(":").pop(0)) lp_url = bugnumber_to_url(lp_bugnr) b = BugInfoWatches( lp_url, lp_bugnr, lp_title, watch_url, watch_bugnr, watch_status, bool(private), all_tasks) yield b
def _parse(): bug_table_rows = xmldoc.xpathEval('//table[@id="milestone_bugtasks"]//tbody//tr') for row in bug_table_rows: x = row.xpathEval('td[1]//span/img') parse_error(x, "BugPage.parse_html_milestone_bugpage.importance", xml=row, url=debug_url) importance = x[0].prop("alt").strip("()").title() x = row.xpathEval('td[2]') parse_error(x, "BugPage.parse_html_milestone_bugpage.nr", xml=row, url=debug_url) nr = x[0].content x = row.xpathEval('td[3]/a') parse_error(x, "BugPage.parse_html_milestone_bugpage.url", xml=row, url=debug_url) url = x[0].prop("href") summary = x[0].content x = row.xpathEval('td[5]//a') if x: usr = user.parse_html_user(x[0]) else: usr = user(None) x = row.xpathEval('td[6]/span[2]') parse_error(x, "BugPage.parse_html_milestone_bugpage.status", xml=row, url=debug_url) status = x[0].content x = BugInfo(nr, url, status, importance, summary, None, all_tasks) x.assignee = usr yield x
def _parse(): rows = xmldoc.xpathEval('//table[@class="listing" and @id="buglisting"]//tbody//tr') for row in rows: col_count = len(row.xpathEval("td")) parse_error( 4 < col_count < 7, "BugPage.parse_html_expirable_bugpage.col_count", xml=row, url=debug_url) m = row.xpathEval("td[1]/img") parse_error( m, "BugPage.parse_html_expirable_bugpage.importance", xml=row, url=debug_url) importance = m[0].prop("title").split()[0] m = row.xpathEval("td[2]") parse_error( m, "BugPage.parse_html_expirable_bugpage.bugnumber", xml=row, url=debug_url) bugnumber = int(m[0].content) m = row.xpathEval("td[3]/a") parse_error( m, "BugPage.parse_html_expirable_bugpage.url", xml=row, url=debug_url) url = m[0].prop("href") summary = m[0].content m = row.xpathEval("td[4]/img") private = False if m: private = m[0].prop("alt").lower() == "private" if col_count == 6: m = row.xpathEval("td[5]") parse_error( m, "BugPage.parse_html_expirable_bugpage.package", xml=row, url=debug_url) package = m[0].content if package == '\xe2\x80\x94': package = None m = row.xpathEval("td[6]") parse_error( m, "BugPage.parse_html_expirable_bugpage.date_last_update.1", xml=row, url=debug_url) date_last_update = LPTime(m[0].content) elif col_count == 5: package = None #this should be the package related to given url m = row.xpathEval("td[5]") parse_error( m, "BugPage.parse_html_expirable_bugpage.date_last_update.2", xml=row, url=debug_url) date_last_update = LPTime(m[0].content) yield ExpBugInfo(bugnumber, url, importance, summary, private, package, date_last_update, all_tasks)