def __init__(self, savePath,parseOnly): self.savePath = savePath self.utils = CWebSpiderUtils(savePath) self.dbclient = pymongo.MongoClient("mongodb://localhost:27017/") self.dbname = self.dbclient["HegreHunter"] self.dbcol = self.dbname["datas"] self.parseOnly = parseOnly
def __init__(self, url, savePath, start, len, parseOnly): # super(CWebParserSingleUrl, self).__init__(url) self.savePath = savePath self.utils = CWebSpiderUtils(savePath) self.common = CWebParserHunterCommon(savePath, parseOnly) self.parseOnly = parseOnly self.start = start self.len = len
def parse_video(self, url): videos_dict = [] browser = CWebSpiderUtils(None) browser.init_chrome() while True: html = self.utils.get_page(url) page = pq(html) items = page('.listThumbs li') for item in items.items(): url = urljoin('http://www.pornvidhub.com/', item('a.title').attr['href']) if url: video, still = self.parse_video_detail(url, browser) else: video = None still = [] video_item = { 'name': self.utils.format_name(item('a.title').attr['title']), 'url': url, 'video': video, 'stills': still } yield [video_item], False videos_dict.append(video_item) next_btn = page('span.numbers').nextAll('a.nav') if next_btn: url = urljoin('http://www.pornvidhub.com/', next_btn.attr['href']) else: break browser.close_chrome() yield videos_dict, True yield None, False
def __init__(self, **kwArgs): super().__init__(**kwArgs) self.utils = CWebSpiderUtils(self.savePath) self.common = CWebParserSiteCommon(self) self.dbUtils = CWebDataDbUtis(kwArgs.get('database'))
def __init__(self, url, start, end, savePath): super(CWebParserMultiUrl, self).__init__(url, start, end) self.savePath = savePath self.utils = CWebSpiderUtils(savePath)
def __init__(self, url, savePath): super(CWebParserSingleUrl, self).__init__(url) self.savePath = savePath self.utils = CWebSpiderUtils(savePath)
def __init__(self, url, start, end, savePath, parseOnly): super(CWebParserMultiUrl, self).__init__(url, start, end) self.savePath = savePath self.utils = CWebSpiderUtils(savePath) self.common = CWebParserHunterCommon(savePath, parseOnly) self.parseOnly = parseOnly