コード例 #1
0
def getText(url):
    page = urllib.urlopen(url).read()
    page = unicode(page, "utf-8")
    text = extMainText(page).strip()
    # matches = re.findall("\s{2,}", text)
#     if len(matches) > 0:
#         string = max(matches, key = len)
#         index = text.find(string)
#         text = text[:index]
    return text
コード例 #2
0
def getText(url):
	page = urllib.urlopen(url).read()
	page = unicode(page, "utf-8")
	text = extMainText(page).strip()
	return text