Python BaseRequest.GetUrlContent Examples

Programming Language: Python

Namespace/Package Name: request

Class/Type: BaseRequest

Method/Function: GetUrlContent

Examples at hotexamples.com: 3

Python BaseRequest.GetUrlContent - 3 examples found. These are the top rated real world Python examples of request.BaseRequest.GetUrlContent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetUrlSoup(11)

DownloadData(6)

PostUrlSoup(4)

GetUrlContent(3)

new(2)

SaveData(1)

Example #1

Show file

File: Comics.py Project: txz19881104/python

    def _GetContentByKeyword(self, keyword, mode, download_path=None):
        """通过关键字查找到需要的内容，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字

        mode    : str or unicode 
            download : 下载
            updata   ：更新图片
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        #请求keyword网页
        self.keyword       = keyword
        self.download_path = download_path
        url_keyword        = self._url + '/web/topic/search?keyword' +  parse.urlencode({"": keyword})
        content_keyword    = BaseRequest.GetUrlContent(url_keyword)
        if content_keyword == None:
            return False

        #将返回的内容解析
        content_keyword_json = json.loads(content_keyword.decode("utf8"))
        if content_keyword_json == False:
            return False

        #取出id关键字，从而访问搜索到的内容
        for data in content_keyword_json['data']['topic']:

            if mode == "download":
                #判断此漫画是否已经下载过
                sql = "SELECT * FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data['title'])
                if self._EntertainmentSelect(sql):
                    print(data['title'])
                    continue

            #等待上一部漫画下载完成   
            while True:
                if not priority_queue.empty():
                    print("threads conunt :%d" %threading.active_count())
                    print("queue size : %d" %(queue.qsize()))
                    if threading.active_count() < 10:
                        StartComicThread(10)  
                    time.sleep(60)
                    continue
                else:
                    break

            self.keyword         = data['title']
            url_keyword_content  = self._url + '/web/topic/' + str(data['id'])
            soup_keyword_content = BaseRequest.GetUrlSoup(url_keyword_content)
            if soup_keyword_content == None:
                return False

            #找到漫画所有章节的地址,由于网页的顺序是从最后一章至第一章，所以要反向循环
            a_book = soup_keyword_content.find_all('a',{'class':'article-img'})

            if mode == "download":

                a_author    = soup_keyword_content.find('div', {'class':'author-nickname'})
                a_introduce = soup_keyword_content.find('div', {'class':'switch-content'})
                a_img       = soup_keyword_content.find('img', {'class':'kk-img'})

                #下载漫画封面
                for i in range(5):
                    if download_path != None:
                        path = '%s/Comics/%s/' %(download_path, self.keyword)
                        if not BaseRequest.DownloadData(a_img['src'], path, "封面.jpg"):
                            print("download %s failed %d time" % ("封面.jpg", i))
                        else:
                            print("download %s%s success" % (path,"封面.jpg"))
                            break

                src = "https://txz-1256783950.cos.ap-beijing.myqcloud.com/Comics/" + self.keyword + "/" + "封面.jpg"

                #将漫画信息存储到数据库
                sql_dict = collections.OrderedDict()
                sql_dict['Name']      = "\"" + data['title'] + "\""         #名字
                sql_dict['WatchNum']  = 0                                   #编号  
                sql_dict['Website']   = "\"" + self._url + "\""             #网址
                sql_dict['ChapterNum']= len(a_book)                         #总共有多少章节
                sql_dict['IsFinish']  = 0                                   #是否完结
                sql_dict['Introduce'] = "\"" + a_introduce.p.contents[0].replace('\"', '') + "\""   #漫画介绍
                sql_dict['Author']    = "\"" + a_author.contents[0] + "\""  #作者
                sql_dict['Img']       = "\"" + src + "\""                   #封面图片
                sql_dict['Type']      = "\"" + self.type + "\""             #漫画类型
                sql_dict['Time']      = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间

                if not self._EntertainmentInsert('ComicName', sql_dict):
                    print("inster ComicName table failed!")
                    continue

                #获取漫画编号，唯一
                sql = "SELECT ID FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(data['title'])
                max_id = self._EntertainmentSelect(sql)
                if max_id:
                    self.id = max_id[0][0]
                else:
                    print("get max_id failed!")
                    continue

            elif mode == "update":
                now_Time = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间
                sql = "update EntertainmentDB.ComicName set Time = %s  where ID = %d;" %(now_Time, self.id)
                if not self._EntertainmentUpdate(sql):
                    print("%s update failed!" %(sql))

            count = 1
            for book in reversed(a_book):
                href  = book['href']
                title = book['title']
                src   = book.img['src']

                #当前章节的内容插入到队列中
                url_a_book  = self._url + href
                data = {"url": url_a_book, "title":title, "src": src, "href":href, "count": count}
                if mode == "download":
                    dic_queue = {"type": "download", "subtype": "download", "self":self, "data":data}
                elif mode == "update":
                    dic_queue = {"type": "download", "subtype": "update", "self":self, "data":data}

                priority_queue.put(base.Job(2,dic_queue,self._url))

                count += 1


                p.spawn(run)
            
            p.join()

        return True

Example #2

Show file

File: Comics.py Project: txz19881104/python

    def _UpdataChapter(self, result, download_path=None):
        """更新最新章节，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        keyword     = result[1]
        chapter_num = result[4]
        self.id     = result[0]

        #请求keyword网页
        self.keyword       = keyword
        self.download_path = download_path

        url_keyword        = self._url + '/web/topic/search?keyword' +  parse.urlencode({"": keyword})
        content_keyword    = BaseRequest.GetUrlContent(url_keyword)
        if content_keyword == False:
            return None

        #将返回的内容解析
        content_keyword_json = json.loads(content_keyword.decode("utf8"))
        if content_keyword_json == False:
            return None

        
        #取出id关键字，从而访问搜索到的内容
        for data in content_keyword_json['data']['topic']:
            #获取漫画编号，唯一
            
            if data['title'] != keyword:
                continue

            url_keyword_content = self._url + '/web/topic/' + str(data['id'])
            soup_keyword_content = BaseRequest.GetUrlSoup(url_keyword_content)
            if soup_keyword_content == False:
                return None

            #找到漫画所有章节的地址,由于网页的顺序是从最后一章至第一章，所以要反向循环
            a_book = soup_keyword_content.find_all('a',{'class':'article-img'})

            now_chapter_num = len(a_book)
            for book in a_book:
                print(now_chapter_num, chapter_num)
                if now_chapter_num <= chapter_num:
                    return None

                
                href  = book['href']
                title = book['title']
                lst_img_book = []
                dct_img_book = {}

                #下载当前章节的内容
                url_a_book  = self._url + href
                soup_a_book = BaseRequest.GetUrlSoup(url_a_book)
                if soup_a_book == None:
                    return None

                #找到每一章节的图片地址并保存
                content_img_book = soup_a_book.find_all('img',{'class':'kklazy', 'title':title})
                for img_book in content_img_book:
                    lst_img_book.append(img_book['data-kksrc'].replace('amp;', ''))

                #将数据存储到结构体中,用于后续保存
                dct_img_book = {'href':href, 'title':title, 'chapter':now_chapter_num, 'download_url':lst_img_book}
                self.lst_kkmh_content.append(dct_img_book)

                now_chapter_num = now_chapter_num - 1
                
                yield dct_img_book

Example #3

Show file

File: Comics.py Project: SherryHolmes/Entertainment

    def _GetContentByKeyword(self, keyword):
        """通过关键字查找到需要的内容，然后将返回的内容记录在kkmh_content结构中

        Parameters
        ----------
        keyword : str or unicode
            搜索文字
        Returns
        -------
        success: dict[list]--self.kkmh_content
        failed : None
        """

        #请求keyword网页
        self.keyword       = keyword
        url_keyword        = self._url + '/web/topic/search?keyword' +  parse.urlencode({"": keyword})
        content_keyword    = BaseRequest.GetUrlContent(url_keyword)
        if content_keyword == False:
            return None

        #将返回的内容解析
        content_keyword_json = json.loads(content_keyword.decode("utf8"))
        if content_keyword_json == False:
            return None

        #取出id关键字，从而访问搜索到的内容
        url_keyword_content = self._url + '/web/topic/' + str(content_keyword_json['data']['topic'][0]['id'])
        soup_keyword_content = BaseRequest.GetUrlSoup(url_keyword_content)
        if soup_keyword_content == False:
            return None

        #将漫画信息存储到数据库
        sql_dict = collections.OrderedDict()
        sql_dict['Name']    = "\"" + self.keyword + "\""      #名字
        sql_dict['Num']     = 0                               #编号  
        sql_dict['Website'] = "\"" + self._url + "\""         #网址
        sql_dict['Time']    = "\"" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "\"" #下载时间
        if not self._ComicInsert('ComicName', sql_dict):
            print("inster ComicName table failed!")

        #获取漫画编号，唯一
        sql = "SELECT ID FROM EntertainmentDB.ComicName WHERE Name=\"%s\";" %(self.keyword)
        max_id = self._ComicSelect(sql)
        self.id = max_id[0][0]

        #找到漫画所有章节的地址,由于网页的顺序是从最后一章至第一章，所以要反向循环
        a_book = soup_keyword_content.find_all('a',{'class':'article-img'})
        for book in reversed(a_book):
            href  = book['href']
            title = book['title']
            src   = book.img['src']
            lst_img_book = []
            dct_img_book = {}

            #下载当前章节的内容
            url_a_book  = self._url + href
            soup_a_book = BaseRequest.GetUrlSoup(url_a_book)
            if soup_a_book == None:
                return None

            #找到每一章节的图片地址并保存
            content_img_book = soup_a_book.find_all('img',{'class':'kklazy', 'title':title})
            for img_book in content_img_book:
                lst_img_book.append(img_book['data-kksrc'].replace('amp;', ''))

            #将数据存储到结构体中,用于后续保存
            dct_img_book = {'href':href, 'title':title, 'src':src, 'download_url':lst_img_book}
            self.lst_kkmh_content.append(dct_img_book)

            yield dct_img_book