def get_raw_disputes(url): try: htmlcontent = urllib2.urlopen(url,None,2).read(200000) text = h.html_to_text(htmlcontent) matches = r.get_sorted_claims(text) disputes = [dispute for dispute in matches if (dispute[0] > 0)][:4] unique = [] used = set({}) for dispute in disputes: if (not dispute[3] in used) and (not dispute[4] in used): used.add(dispute[3]) used.add(dispute[4]) unique.append(dispute) return unique except: return []
def get_raw_disputes(url): try: htmlcontent = urllib2.urlopen(url, None, 2).read(200000) text = h.html_to_text(htmlcontent) matches = r.get_sorted_claims(text) disputes = [dispute for dispute in matches if (dispute[0] > 0)][:4] unique = [] used = set({}) for dispute in disputes: if (not dispute[3] in used) and (not dispute[4] in used): used.add(dispute[3]) used.add(dispute[4]) unique.append(dispute) return unique except: return []
def get_page_disputes(url,pages): try: htmlcontent = pages[url] text = h.html_to_text(htmlcontent) matches = r.get_sorted_claims(text) disputes = [dispute for dispute in matches if (dispute[0] > 0)][:4] unique = [] used = set({}) for dispute in disputes: if (not dispute[3] in used) and (not dispute[4] in used): used.add(dispute[3]) used.add(dispute[4]) unique.append(dispute) disputes = [template("disputed_box",dispute = d[1]) for d in unique] return " ".join(disputes) except: return ""
def get_page_disputes(url, pages=None): try: if pages: htmlcontent = pages[url] else: htmlcontent = urllib2.urlopen(url, None, 2).read(200000) text = h.html_to_text(htmlcontent) matches = r.get_sorted_claims(text) disputes = [dispute for dispute in matches if (dispute[0] > 0)][:4] unique = [] used = set({}) for dispute in disputes: if (not dispute[3] in used) and (not dispute[4] in used): used.add(dispute[3]) used.add(dispute[4]) unique.append(dispute) disputes = [template("disputed_box", dispute=d[1]) for d in unique] return " ".join(disputes) except: return ""