def _get_title(self, html): found_title = re.search(r'video_title":"(.*?)"', html) if not found_title: raise VideoParserError( 'Cannot find video title in pornhub.com video') return found_title.group(1)
def get_video_stats(self, html, **kwargs): document = self.etree.fromstring(html, self.parser) title = document.xpath('//p[@class="title_substrate"]')[0].text author = document.xpath( '//span[@class="autor text_t"]/a[@href]')[0].text uploaded_date = document.xpath( '//span[@class="autor text_t"]//text()')[2].strip() website = document.xpath('//span[@class="autor_web"]/a[@href]')[0]. \ attrib['href'].replace('/user_site_out.php?link=','') website = urllib.unquote(website) c_list = document.xpath('//div[@class="categories_list"]')[0] categories = self._get_categories(html=self.tostring(c_list)) thumbnail = document.xpath('//video[@controls="controls"]/@poster')[0] found_video_id = re.search(r'var video_id = "(.*?)"', html) if not found_video_id: raise VideoParserError('Could not find video id for drtuber video') embed_code = '<iframe src=http://www.drtuber.com/embed/{video_id}' \ ' width=608 height=454 frameborder=0 ' \ ' scrolling=no></iframe>'.format(video_id=found_video_id.group(1)) return { 'author': author, 'categories': categories, 'embed_code': embed_code, 'thumbnail': thumbnail, 'uploaded_date': uploaded_date, 'title': title }
def get_download_options(self, html, **kwargs): regex = '"quality_720p":"(.*?)","quality_240p":"(.*?)","quality_180p":"(.*?)","quality_480p":"(.*?)",' find_video_options = re.search(regex, html) if find_video_options: video_urls = [] password = self._get_title(html=html) for i in xrange(1, 5): encrypted_url = find_video_options.group(i) if encrypted_url: video_urls.append( aes_decrypt_text(encrypted_url, password, 32).decode('utf-8')) else: video_urls.append(None) return { '720': video_urls[0], '240': video_urls[1], '180': video_urls[2], '480': video_urls[3] } raise VideoParserError( 'Could not parse video download options for pornhub.com video')
def get_download_url(self, html, **kwargs): found_flv_url = re.search(r'flv_url=(.*?)\&', html) if not found_flv_url: raise VideoParserError( 'Missing video download url for xvideos video') return urllib.unquote_plus(found_flv_url.group(1))
def get_download_url(self, html, **kwargs): found_download_url = re.search(r"__fileurl = '(.*?)';", html) if not found_download_url: raise VideoParserError('Could not find video download url for motherless' \ ' video') return found_download_url.group(1)
def get_download_url(self,html,**kwargs): download_quality = kwargs.get('download_quality','default') download_options = self._get_download_options(html=html) if download_quality == 'default': for quality in ['720','480','240','180']: if download_options[quality]: return download_options[quality] raise VideoParserError('Could not find video download url for youporn.com video') else: if download_quality in download_options: return download_options[download_quality] raise VideoParserError('Invalid download quality, only available options are 720,480,240,180 or default')
def get_download_options(self,html): find_video_options = re.search(r'{"hd":"(.*?)","480p":"(.*?)","mobile":"(.*?)"}\]}',html) if find_video_options: return {'720':None if not find_video_options.group(1) else find_video_options.group(1), '480':None if not find_video_options.group(2) else find_video_options.group(2), '240':None if not find_video_options.group(3) else find_video_options.group(3)} raise VideoParserError('Could not parse video download options for redtube.com video')
def get_download_url(self, html, **kwargs): document = self.etree.fromstring(html, self.parser) download_url = document.xpath('//source[@src]/@src') if not download_url: raise VideoParserError( 'Could not find video download url for drtuber video') return download_url[0]
def get_video_stats(self,html): document = self.etree.fromstring(html,self.parser) details = document.xpath('//div[@class="video-details"]') details_doc = self.etree.fromstring(self.tostring(details[0]),self.parser) has_user_link = details_doc.xpath('//span[@class="linkImitation"]') if has_user_link: author = has_user_link[0].text else: author = details_doc.xpath('//a[@rel="nofollow"]')[0].text trs = details_doc.xpath('//table//tr') title = document.xpath('//h1[@class="videoTitle"]')[0].text categories = self._get_categories(html=self.tostring(trs[0])) uploaded_date = self._get_uploaded_date(html=self.tostring(trs[1])) views = self._get_views(html=self.tostring(trs[1])) tags = self._get_tags(html=self.tostring(trs[2])) ratings_percentage = document.xpath('//span[@class="percent-likes"]')[0].text thumbnail = document.xpath('//meta[@property="og:image"]/@content')[0] title = document.xpath('//h1[@class="videoTitle"]')[0].text found_video_id = re.search(r'var iVideoID = (.*?);',html) if not found_video_id: raise VideoParserError('Could not find video id for redtube.com video') embed_code = '<iframe src=http://www.embed.redtube.com?id={video_id}' \ '&bgcolor=000000 width=640 height=360 frameborder=0 ' \ 'scrolling=no></iframe>'.format(video_id=found_video_id.group(1)) return {'author':author, 'tags':tags, 'categories':categories, 'embed_code':embed_code, 'thumbnail':thumbnail, 'uploaded_date':uploaded_date, 'ratings_percentage':ratings_percentage, 'title':title, 'views':views}
def get_download_options(self,html,**kwargs): options = {'720':None, '480':None, '240':None, '180':None} doc = self.etree.fromstring(html,self.parser) download_options = doc.xpath('//ul[@class="downloadList"]/li') if not download_options: raise VideoParserError('Could not find all the video download '\ 'options for youporn.com video') for li in download_options: li_doc = self.etree.fromstring(self.tostring(li),self.parser) link = li_doc.xpath('//a')[0] for quality in ['/720p','/480p','/240p','/180p']: if quality in link.attrib['href']: options[quality.replace('/','').replace('p','')] = link.attrib['href'] break return options
def get_video_stats(self, html): document = self.etree.fromstring(html, self.parser) thumbnail = document.xpath('//meta[@property="og:image"]/@content')[0] author = document.xpath( '//div[@class="usernameWrap clearfix"]//a[@class="bolded"]' )[0].text vid_row = document.xpath('//div[@class="video-info-row"]') pornstars = self._get_porn_stars(self.tostring(vid_row[0])) vid_show_less = document.xpath( '//div[@class="video-info-row showLess"]') categories = self._get_categories(html=self.tostring(vid_show_less[0])) production = self._get_production_type( html=self.tostring(vid_show_less[1])) tags = self._get_tags(html=self.tostring(vid_show_less[2])) uploaded_date = self._get_uploaded_date( html=self.tostring(vid_show_less[3])) stats_count = document.xpath('//div[@class="rating-info-container"]') views, \ ratings_percentage, \ thumbs_up, \ thumbs_down = self._get_stats_count(html=self.tostring(stats_count[0])) found_embed_code = re.search(r'"embedCode":"(.*?)\<\\/iframe>', html) if not found_embed_code: raise VideoParserError( 'Cannot get embed code for pornhub.com video') embed_code = found_embed_code.group(1).replace('\\', '').replace('"', '') embed_code = '{e}</iframe>'.format(e=embed_code) found_duration = re.search(r'"video_duration":"(.*?)",', html) if not found_duration: raise VideoParserError( 'Cannot find video duration in pornhub.com video') duration_seconds = found_duration.group(1) title = self._get_title(html=html) download_options = self._get_download_options(html=html) options = [download_options[download] for download in download_options] return { 'author': author, 'tags': tags, 'categories': categories, 'embed_code': embed_code, 'thumbnail': thumbnail, 'uploaded_date': uploaded_date, 'production': production, 'pornstars': pornstars, 'ratings_percentage': ratings_percentage, 'thumbs_up': thumbs_up, 'thumbs_down': thumbs_down, 'title': title, 'views': views, 'duration_seconds': duration_seconds }
def get_video_stats(self,html,**kwargs): categories = [] tags = [] porn_stars = [] document = self.etree.fromstring(html,self.parser) ratings = document.xpath('//div[@class="rating-count"]')[0].text.replace(' ratings)','').replace('(','') ratings_percentage = document.xpath('//div[@class="rating-percentage"]')[0] views = document.xpath('//div[@id="stats-views"]/text()')[0].replace(',','') uploaded_date = document.xpath('//div[@id="stats-date"]/text()')[0] title = document.xpath('//div[@class="container_15"]/h1[@class="grid_9"]')[0].text get_video_details = document.xpath('//ul[@class="info-list-content"]//a') for a in get_video_details: get_type = re.match(r'/(.*?)/(.*?)/',a.attrib['href'],re.I|re.M) if get_type: #figure out the type of data, either tag, category or p**n star name item_type = get_type.group(1) if item_type == 'category': #create a new YouPornCategory categories.append(YouPornCategory(name=a.text,href=a.attrib['href'])) elif item_type == 'porntags': #create a new YouPornTag tags.append(YouPornTag(name=a.text,href=a.attrib['href'])) elif item_type == 'pornstar': porn_stars.append((a.text,a.attrib['href'])) video_url = document.xpath('//link[@href]/@href')[0] embed_code = "<iframe src={url}" \ " frameborder=0 height=481 width=608" \ " scrolling=no name=yp_embed_video>" \ "</iframe>".format(url=video_url) author_name = document.xpath('//button[@data-name]//@data-name')[0] author_href = document.xpath('//div[@class="author-block--line"]//a')[0].attrib['href'] author = YouPornAuthor(name=author_name,href=author_href) has_zero_comments = document.xpath('//div[@id="tab-comments"]//h2[@class="psi"]') if has_zero_comments: if has_zero_comments[0].text == 'All Comments (0)': total_comments = 0 else: total_comments = document.xpath('//div[@id="tab-comments"]/ul/h2')[0] \ .text.replace('All Comments (','').replace(')','') has_description = document.xpath('//div[@id="videoDescription"]') if has_description: description = document.xpath('//div[@id="videoDescription"]//p')[0].text else: description = '' found_default_thumbnail = re.search(r'"default_thumbnail_url":"(.*?)"',html) if not found_default_thumbnail: raise VideoParserError('Cannot get thumbnail image for youporn video') thumbnail = found_default_thumbnail.group(1) found_duration_seconds = re.search(r'"duration_in_seconds":(.*?)',html) if not found_duration_seconds: raise VideoParserError('Cannot get duration in seconds for youporn video') duration_seconds = found_duration_seconds.group(1) found_duration_text = re.search(r'"duration_f":"(.*?)"',html) if not found_duration_text: raise VideoParserError('Cannot get duration text for youporn video') duration_text = found_duration_text.group(1) return {'total_comments':int(total_comments), 'author':author, 'porn_stars':porn_stars, 'categories':categories, 'tags':tags, 'uploaded_date':uploaded_date, 'views':views, 'ratings':ratings, 'ratings_percentage':ratings_percentage, 'title':title, 'thumbnail':thumbnail, 'duration_seconds':duration_seconds, 'duration_text':duration_text, 'description':description, 'embed_code':embed_code}