Ejemplo n.º 1
0
def reptileNovelByClassify(classify):
    db = Db()
    target_url = 'https://www.biquge5200.cc/' + classify['path']

    try:
        r = requests.get(target_url)
        root = etree.HTML(r.text)

        novel_list = root.xpath('//div[@class="r"]//li')

        arr = []
        for novel in novel_list:
            url = novel.xpath('span[@class="s2"]/a/@href')[0]
            book_name = novel.xpath('span[@class="s2"]/a/text()')[0]
            author_name = novel.xpath('span[@class="s5"]/text()')[0]
            classify_id = classify['id']
            arr.append((url, book_name, author_name, classify_id))
            print('抓取 %s' % book_name)

        print('开始保存数据....')
        db.insertMany(
            'insert into gysw_novel (`book_url`, `book_name`, `author_name`, `classify_id`) values (%s, %s, %s, %s)',
            tuple(arr))
    except Exception as e:
        print(e)

    print('操作结束')
    db.close()
Ejemplo n.º 2
0
def novel():
    db = Db()
    classifies = db.selectAll('select * from gysw_classify')

    for classify in classifies:
        target_url = 'https://www.biquge5200.cc/' + classify['path']
        try:
            r = requests.get(target_url)
            root = etree.HTML(r.text)

            novel_list = root.xpath('//div[@class="r"]//li')

            arr = []
            for novel in novel_list:
                url = novel.xpath('span[@class="s2"]/a/@href')[0]
                book_name = novel.xpath('span[@class="s2"]/a/text()')[0]
                author_name = novel.xpath('span[@class="s5"]/text()')[0]
                classify_id = classify['id']
                arr.append((url, book_name, author_name, classify_id))

            print('开始保存数据....')
            db.insertMany(
                'insert into gysw_novel (`book_url`, `book_name`, `author_name`, `classify_id`) values (%s, %s, %s, %s)',
                tuple(arr))
            # db.close()
        except Exception as e:
            print('cuowu  chu xian ', e)

    print('操作结束')
    db.close()
Ejemplo n.º 3
0
    def reptileIndexClassify(self):
        print('爬取首页分类数据:开始:(classify/reptileIndexClassify)...')
        target_url = 'https://www.biquge5200.com/modules/article/search.php'
        try:
            r = requests.get(target_url)
            root = etree.HTML(r.text)
            classifies = root.xpath('//div[@class="nav"]//li[position()>2]')
            #
            arr1 = []
            for classify in classifies:
                path = classify.xpath('a/@href')[0].split('/')[-2]
                desc = classify.xpath('a/text()')[0]
                arr1.append((path, desc))

            db = Db()
            db.insertMany(
                'insert ignore into gysw_classify (`path`, `desc`) values (%s, %s)',
                tuple(arr1))
            # db.insertOne('insert ignore into gysw_classify(`path`, `desc`) values ("xxx2", "yyy2")')
            db.close()
            print('爬取首页分类数据:成功:(classify/reptileIndexClassify)...')
        except Exception as e:
            print('爬取首页分类数据:失败:(classify/reptileIndexClassify)...')
            print(e)