Python SpiderTools.getRes Examples

Programming Language: Python

Namespace/Package Name: NovelSpider

Class/Type: SpiderTools

Method/Function: getRes

Examples at hotexamples.com: 4

Python SpiderTools.getRes - 4 examples found. These are the top rated real world Python examples of NovelSpider.SpiderTools.getRes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_pyquery_content(7)

get_html(5)

getRes(4)

save_to_file(3)

addRes(1)

deal_with_status_500(1)

Example #1

Show file

 def get_chapters_save(self, url, novel_id):
     novel_mulu_fn = SpiderTools.save_to_file(file_name="novel_mulu.bak",
                                              save_text=url + "," +
                                              str(novel_id))
     html = SpiderTools.get_html(url,
                                 encoding=SpiderTools.getRes().encoding,
                                 network_err_fn=novel_mulu_fn)
     if html is None:
         return 0
     chapters = SpiderTools.get_pyquery_content(
         html,
         SpiderTools.getRes().select_chapter)
     insertchapters = []
     for chapter_id in range(0, len(chapters), 1):
         if chapter_id > 2000:
             break
         title = chapters.eq(chapter_id).text()
         source = SpiderTools.getRes().chapter_url(
             SpiderTools.getRes(),
             chapters.eq(chapter_id).attr("href"), url)
         insertchapters.append(
             str((novel_id, chapter_id + 1, str(title).replace("%", "%%"),
                  source, SpiderTools.sourceid)))
     self.init_chapter_table(novel_id)
     sql = "INSERT into %s (novelId,chapterId,title,source,sourceid) VALUES " % SpiderTools.table_name[
         SpiderTools.sourceid]
     sql = sql + ",".join(insertchapters)
     default_dbhelper.update(sql)
     SpiderTools.total[SpiderTools.sourceid] = SpiderTools.total[
         SpiderTools.sourceid] + len(insertchapters)
     if SpiderTools.total[SpiderTools.sourceid] > 5000000:
         default_dbhelper.update(
             "update router set novel_id_end = %s where sourceid = %s and novel_id_end is null",
             (novel_id, SpiderTools.sourceid))

Example #2

Show file

def novel_chapter_detail_save_by_tablename(table_name):
    dbhelper = DBhelper(host="localhost",
                        user='******',
                        password='******',
                        database='novels')
    sql = "select novelId,chapterId,source,sourceid from {} where flag = 0 limit 100".format(
        table_name)
    result = dbhelper.query(sql)
    while result is not None and len(result) > 0:
        updatesql = "update {} set flag = 1 ,content = %s where novelId = %s and chapterId = %s".format(
            table_name)
        for item in result:
            novelId, chapterId, source, sourceid = item[0], item[1], item[
                2], item[3]
            SpiderTools.sourceid = sourceid
            html = SpiderTools.get_html(
                source,
                encoding=SpiderTools.getRes().encoding,
                header_host=SpiderTools.getRes().host,
                network_err_fn=SpiderTools.deal_with_status_500(
                    table_name, novelId, chapterId, dbhelper))
            if html is None:
                time.sleep(random.uniform(0.2, 0.4))
                continue
            content = SpiderTools.get_pyquery_content(
                html,
                SpiderTools.getRes().select_chapter_content)
            content.remove("script")
            text = content.text().encode("utf-8", errors="ignore")
            zlib_chapter_text = zlib.compress(text)
            dbhelper.update(updatesql, (zlib_chapter_text, novelId, chapterId))
            if chapterId == 1:
                updateNovelSql = "update novel set status = 2 where id =%s"
                dbhelper.update(updateNovelSql, (novelId))
            time.sleep(random.uniform(0.2, 0.4))
        result = dbhelper.query(sql)

Example #3

Show file

 def novel_detail_save(self):
     tags = {}
     tag_list = default_dbhelper.query(
         "SELECT id,`name` from dictionary where type = 'tag'")
     for _item in tag_list:
         tags[_item[1]] = _item[0]
     # 获取需要更新的小说
     count_sql = "select count(1) from novel where tagid is null limit 1"
     count = default_dbhelper.query_one(count_sql)
     while int(count[0]) > 0:
         values = default_dbhelper.query(
             "select source,id,sourceid from novel where tagid is null limit 0,500"
         )
         if values is None or len(values) == 0:
             break
         for item in values:
             novel_home_url = item[0]
             novel_id = item[1]
             SpiderTools.sourceid = item[2]
             # 保存小说详细信息
             html = SpiderTools.get_html(
                 novel_home_url, encoding=SpiderTools.getRes().encoding)
             if html is None:
                 continue
             # 封面
             cover = SpiderTools.get_pyquery_content(
                 html,
                 SpiderTools.getRes().select_novel_cover).attr("src")
             # 类型
             tag = str(
                 SpiderTools.get_pyquery_content(
                     html,
                     SpiderTools.getRes().select_novel_tag).text())[0:2]
             # 简介
             introduction = SpiderTools.get_pyquery_content(
                 html,
                 SpiderTools.getRes().select_novel_introduction).text()
             bconver = None
             if cover is not None:
                 # 此函式用于保存封面获取失败
                 img_fn = SpiderTools.save_to_file(
                     "img.bak", cover + "," + str(novel_id))
                 bconver = SpiderTools.get_html(cover,
                                                return_type="binary",
                                                network_err_fn=img_fn)
             # 获取tagId
             tag_id = 0
             for t in tags:
                 if str(t).find(tag) > -1:
                     tag_id = tags[t]
                     break
             time.sleep(random.uniform(0.5, 1.5))
             # 获取章节列表并保存,再更新novel
             is_update = self.get_chapters_save(
                 novel_home_url + SpiderTools.getRes().list_url_template,
                 novel_id)
             if is_update == 0:
                 continue
             default_dbhelper.update(
                 " update novel set tagid = %s,introduction = %s,cover = %s where id = %s ",
                 (tag_id, introduction, bconver, novel_id))
             time.sleep(random.uniform(0.5, 1.5))
         count = default_dbhelper.query_one(count_sql)

Example #4

Show file

 def novel_simple_save(self):
     self.__get_category_list()
     # 按类别循环获取
     for key in self.category_list:
         count = 0
         html = None
         templet = self.templet_format(self, key)
         if count == 0 and SpiderTools.getRes(
         ).category_template is not None:
             index_url = templet.format(1)
             html = SpiderTools.get_html(
                 url=index_url,
                 header_host=SpiderTools.getRes().host,
                 encoding=SpiderTools.getRes().encoding,
                 network_err_fn=SpiderTools.save_to_file(
                     "pageList.bak", index_url))
             if html is None:
                 continue
             self.__get_category_page_count(html)
         # 按页码循环列表
         for i in range(1, SpiderTools.getRes().category_page_count, 1):
             insertnovels = []
             if i != 1:
                 index_url = templet.format(i)
                 html = SpiderTools.get_html(
                     url=index_url,
                     header_host=SpiderTools.getRes().host,
                     encoding=SpiderTools.getRes().encoding,
                     network_err_fn=SpiderTools.save_to_file(
                         "pageList.bak", index_url))
                 if html is None:
                     continue
             novels = SpiderTools.get_pyquery_content(
                 html,
                 SpiderTools.getRes().select_novel_line)
             if len(novels) == 0:
                 break
             # 组装小说信息
             for item in novels:
                 novelname = SpiderTools.get_pyquery_content(
                     item,
                     SpiderTools.getRes().select_novel_name).text()
                 if novelname is None or novelname == '':
                     continue
                 url = SpiderTools.getRes().host + \
                       SpiderTools.get_pyquery_content(item, SpiderTools.getRes().select_novel_name).attr("href")
                 author = \
                 SpiderTools.get_pyquery_content(item, SpiderTools.getRes().select_novel_author).text().split(" ")[0]
                 insertnovels.append(
                     str((novelname, url, author,
                          SpiderTools.getRes().source_id)))
             # 小说简要信息保存到数据库
             if len(insertnovels) == 0:
                 break
             sql = "insert into novel (`name`,`source`,`author`,`sourceid`) values "
             sql = sql + ",".join(
                 insertnovels
             ) + " on DUPLICATE key update source = values(source)"
             default_dbhelper.update(sql)
             time.sleep(random.uniform(1, 3))