def main(): """""" mainUrl = "http://events.cnbloggercon.org/event/cnbloggercon2007/" content = Page(mainUrl).Content soup = BeautifulSoup(content) links = soup("a") peopleList = [] for item in links: href = item["href"] if href.lower()[:8] == "/people/": peopleList.append("http://events.cnbloggercon.org" + href + "profile/") pageLink = [] for link in peopleList: print link ct = Page(str(link.encode("utf-8")).lower()).Content pos = ct.find("个人网站") if pos == -1: continue ct = ct[pos:] so = BeautifulSoup(ct) fLink = so("a")[0]["href"] pageLink.append(fLink) f = file("abcde.txt", "w") for i in pageLink: f.write(i) f.write("\r\n")
def get(self, slug): page = Page.find(slug=slug)[0] page.content = "This is the content" page.save() return repr(page)
def get_page(year, month, day, slug): def match(page): date = page.date() return date.year == year and date.month == month \ and date.day == day and page.slug() == slug return Page.find(match)