Example #1
0
def main():
    # 根据所带参数,确定使用哪个网站的配置参数
    try:
        website = sys.argv[1]
        url = sys.argv[2]
    except Exception as e:
        print "please choose one website"
        exit()

    # 实例化
    dic = {
        "qidian": Qidian,
        "heiyan": Heiyan,
    }
    config = dic[website]()

    # 获取关键信息
    handler = Spider(config.title, config.content, config.next)

    chapters = config.getList(url)

    book = open("text.txt", "w")

    for item in chapters:
        print "正在下载->", item["title"]
        content = handler.getContent(item["href"])

        book.writelines(item["title"] + "\n")
        book.writelines(content["content"] + "\n")
Example #2
0
	def startCB(self):
		# 保存内容的文件
		file = open(self.filePath, "w")

		# 爬取得规则
		titleKlass = {"class": "j_chapterName"}
		contentKlass = {"class": "j_readContent"}
		nextKlass = {"id": "j_chapterNext"}

		page = self.entryUrl.get()
		# 开始爬取
		spider = Spider(titleKlass, contentKlass, nextKlass)

		if page == "" or self.filePath == "":
			tkMessageBox.showerror("woolson", "小说名称或链接未填写!")
		else:
			# 循环抓取下一章
			while page != "":
				result = spider.getContent(page)

				try:
					page = result["nextUrl"]
					file.write(result["title"] + "\n")
					file.write(result["content"] + "\n\n")

					print "正在写入->" + result["title"]
				except Exception as e:
					page = ""
					print "结束", result["error"]