コード例 #1
0
 def __init__(self, savePath,parseOnly):
     self.savePath = savePath
     self.utils = CWebSpiderUtils(savePath) 
     self.dbclient = pymongo.MongoClient("mongodb://localhost:27017/")
     self.dbname   = self.dbclient["HegreHunter"]
     self.dbcol    = self.dbname["datas"]   
     self.parseOnly = parseOnly 
コード例 #2
0
    def __init__(self, url, savePath, start, len, parseOnly):
#         super(CWebParserSingleUrl, self).__init__(url)
        self.savePath = savePath
        self.utils = CWebSpiderUtils(savePath)  
        self.common = CWebParserHunterCommon(savePath, parseOnly)     
        self.parseOnly = parseOnly   
        self.start = start
        self.len = len
コード例 #3
0
    def parse_video(self, url):
        videos_dict = []

        browser = CWebSpiderUtils(None)
        browser.init_chrome()

        while True:
            html = self.utils.get_page(url)
            page = pq(html)
            items = page('.listThumbs li')

            for item in items.items():
                url = urljoin('http://www.pornvidhub.com/',
                              item('a.title').attr['href'])
                if url:
                    video, still = self.parse_video_detail(url, browser)
                else:
                    video = None
                    still = []

                video_item = {
                    'name':
                    self.utils.format_name(item('a.title').attr['title']),
                    'url': url,
                    'video': video,
                    'stills': still
                }
                yield [video_item], False
                videos_dict.append(video_item)

            next_btn = page('span.numbers').nextAll('a.nav')
            if next_btn:
                url = urljoin('http://www.pornvidhub.com/',
                              next_btn.attr['href'])
            else:
                break

        browser.close_chrome()
        yield videos_dict, True

        yield None, False
コード例 #4
0
ファイル: Models.py プロジェクト: povillechan/Python
 def __init__(self, **kwArgs):
     super().__init__(**kwArgs)
     self.utils = CWebSpiderUtils(self.savePath)
     self.common = CWebParserSiteCommon(self)
     self.dbUtils = CWebDataDbUtis(kwArgs.get('database'))
コード例 #5
0
 def __init__(self, url, start, end, savePath):
     super(CWebParserMultiUrl, self).__init__(url, start, end)
     self.savePath = savePath
     self.utils = CWebSpiderUtils(savePath)
コード例 #6
0
 def __init__(self, url, savePath):
     super(CWebParserSingleUrl, self).__init__(url)
     self.savePath = savePath
     self.utils = CWebSpiderUtils(savePath)
コード例 #7
0
 def __init__(self, url, start, end, savePath, parseOnly):
     super(CWebParserMultiUrl, self).__init__(url, start, end)
     self.savePath = savePath
     self.utils = CWebSpiderUtils(savePath)  
     self.common = CWebParserHunterCommon(savePath, parseOnly)    
     self.parseOnly = parseOnly