Example #1
0
    def _get_title(self, html):

        found_title = re.search(r'video_title":"(.*?)"', html)
        if not found_title:
            raise VideoParserError(
                'Cannot find video title in pornhub.com video')
        return found_title.group(1)
Example #2
0
    def get_video_stats(self, html, **kwargs):

        document = self.etree.fromstring(html, self.parser)
        title = document.xpath('//p[@class="title_substrate"]')[0].text
        author = document.xpath(
            '//span[@class="autor text_t"]/a[@href]')[0].text

        uploaded_date = document.xpath(
            '//span[@class="autor text_t"]//text()')[2].strip()

        website = document.xpath('//span[@class="autor_web"]/a[@href]')[0]. \
                    attrib['href'].replace('/user_site_out.php?link=','')
        website = urllib.unquote(website)

        c_list = document.xpath('//div[@class="categories_list"]')[0]
        categories = self._get_categories(html=self.tostring(c_list))

        thumbnail = document.xpath('//video[@controls="controls"]/@poster')[0]
        found_video_id = re.search(r'var video_id = "(.*?)"', html)

        if not found_video_id:
            raise VideoParserError('Could not find video id for drtuber video')

        embed_code = '<iframe src=http://www.drtuber.com/embed/{video_id}' \
                    ' width=608 height=454 frameborder=0 ' \
                    ' scrolling=no></iframe>'.format(video_id=found_video_id.group(1))

        return {
            'author': author,
            'categories': categories,
            'embed_code': embed_code,
            'thumbnail': thumbnail,
            'uploaded_date': uploaded_date,
            'title': title
        }
Example #3
0
    def get_download_options(self, html, **kwargs):

        regex = '"quality_720p":"(.*?)","quality_240p":"(.*?)","quality_180p":"(.*?)","quality_480p":"(.*?)",'
        find_video_options = re.search(regex, html)
        if find_video_options:

            video_urls = []
            password = self._get_title(html=html)

            for i in xrange(1, 5):
                encrypted_url = find_video_options.group(i)
                if encrypted_url:
                    video_urls.append(
                        aes_decrypt_text(encrypted_url, password,
                                         32).decode('utf-8'))
                else:
                    video_urls.append(None)

            return {
                '720': video_urls[0],
                '240': video_urls[1],
                '180': video_urls[2],
                '480': video_urls[3]
            }

        raise VideoParserError(
            'Could not parse video download options for pornhub.com video')
Example #4
0
    def get_download_url(self, html, **kwargs):

        found_flv_url = re.search(r'flv_url=(.*?)\&amp;', html)
        if not found_flv_url:
            raise VideoParserError(
                'Missing video download url for xvideos video')
        return urllib.unquote_plus(found_flv_url.group(1))
Example #5
0
    def get_download_url(self, html, **kwargs):

        found_download_url = re.search(r"__fileurl = '(.*?)';", html)
        if not found_download_url:
            raise VideoParserError('Could not find video download url for motherless' \
                                   ' video')
        return found_download_url.group(1)
Example #6
0
    def get_download_url(self,html,**kwargs):

        download_quality = kwargs.get('download_quality','default')
        download_options = self._get_download_options(html=html)
        if download_quality == 'default':

            for quality in ['720','480','240','180']:
                if download_options[quality]:
                    return download_options[quality]

            raise VideoParserError('Could not find video download url for youporn.com video')

        else:
            if download_quality in download_options:
                return download_options[download_quality]

            raise VideoParserError('Invalid download quality, only available options are 720,480,240,180 or default')
Example #7
0
    def get_download_options(self,html):

        find_video_options = re.search(r'{"hd":"(.*?)","480p":"(.*?)","mobile":"(.*?)"}\]}',html)
        if find_video_options:
            return {'720':None if not find_video_options.group(1) else find_video_options.group(1),
                    '480':None if not find_video_options.group(2) else find_video_options.group(2),
                    '240':None if not find_video_options.group(3) else find_video_options.group(3)}

        raise VideoParserError('Could not parse video download options for redtube.com video')
Example #8
0
    def get_download_url(self, html, **kwargs):

        document = self.etree.fromstring(html, self.parser)
        download_url = document.xpath('//source[@src]/@src')
        if not download_url:
            raise VideoParserError(
                'Could not find video download url for drtuber video')

        return download_url[0]
Example #9
0
    def get_video_stats(self,html):

        document = self.etree.fromstring(html,self.parser)
        details = document.xpath('//div[@class="video-details"]')
        details_doc = self.etree.fromstring(self.tostring(details[0]),self.parser)
        has_user_link = details_doc.xpath('//span[@class="linkImitation"]')

        if has_user_link:
            author = has_user_link[0].text
        else:
            author = details_doc.xpath('//a[@rel="nofollow"]')[0].text

        trs = details_doc.xpath('//table//tr')

        title = document.xpath('//h1[@class="videoTitle"]')[0].text
        categories = self._get_categories(html=self.tostring(trs[0]))
        uploaded_date = self._get_uploaded_date(html=self.tostring(trs[1]))
        views = self._get_views(html=self.tostring(trs[1]))
        tags = self._get_tags(html=self.tostring(trs[2]))

        ratings_percentage = document.xpath('//span[@class="percent-likes"]')[0].text
        thumbnail = document.xpath('//meta[@property="og:image"]/@content')[0]
        title = document.xpath('//h1[@class="videoTitle"]')[0].text
        found_video_id = re.search(r'var iVideoID = (.*?);',html)

        if not found_video_id:
            raise VideoParserError('Could not find video id for redtube.com video')

        embed_code = '<iframe src=http://www.embed.redtube.com?id={video_id}' \
                    '&bgcolor=000000 width=640 height=360 frameborder=0 ' \
                    'scrolling=no></iframe>'.format(video_id=found_video_id.group(1))

        return {'author':author,
                'tags':tags,
                'categories':categories,
                'embed_code':embed_code,
                'thumbnail':thumbnail,
                'uploaded_date':uploaded_date,
                'ratings_percentage':ratings_percentage,
                'title':title,
                'views':views}
Example #10
0
    def get_download_options(self,html,**kwargs):

        options = {'720':None,
                   '480':None,
                   '240':None,
                   '180':None}

        doc = self.etree.fromstring(html,self.parser)
        download_options = doc.xpath('//ul[@class="downloadList"]/li')
        if not download_options:
            raise VideoParserError('Could not find all the video download '\
                                   'options for youporn.com video')

        for li in download_options:

            li_doc = self.etree.fromstring(self.tostring(li),self.parser)
            link = li_doc.xpath('//a')[0]

            for quality in ['/720p','/480p','/240p','/180p']:
                if quality in link.attrib['href']:
                    options[quality.replace('/','').replace('p','')] = link.attrib['href']
                    break

        return options
Example #11
0
    def get_video_stats(self, html):

        document = self.etree.fromstring(html, self.parser)

        thumbnail = document.xpath('//meta[@property="og:image"]/@content')[0]
        author = document.xpath(
            '//div[@class="usernameWrap clearfix"]//a[@class="bolded"]'
        )[0].text

        vid_row = document.xpath('//div[@class="video-info-row"]')
        pornstars = self._get_porn_stars(self.tostring(vid_row[0]))

        vid_show_less = document.xpath(
            '//div[@class="video-info-row showLess"]')
        categories = self._get_categories(html=self.tostring(vid_show_less[0]))
        production = self._get_production_type(
            html=self.tostring(vid_show_less[1]))
        tags = self._get_tags(html=self.tostring(vid_show_less[2]))
        uploaded_date = self._get_uploaded_date(
            html=self.tostring(vid_show_less[3]))

        stats_count = document.xpath('//div[@class="rating-info-container"]')
        views, \
        ratings_percentage, \
        thumbs_up, \
        thumbs_down = self._get_stats_count(html=self.tostring(stats_count[0]))

        found_embed_code = re.search(r'"embedCode":"(.*?)\<\\/iframe>', html)
        if not found_embed_code:
            raise VideoParserError(
                'Cannot get embed code for pornhub.com video')

        embed_code = found_embed_code.group(1).replace('\\',
                                                       '').replace('"', '')
        embed_code = '{e}</iframe>'.format(e=embed_code)

        found_duration = re.search(r'"video_duration":"(.*?)",', html)
        if not found_duration:
            raise VideoParserError(
                'Cannot find video duration in pornhub.com video')

        duration_seconds = found_duration.group(1)

        title = self._get_title(html=html)
        download_options = self._get_download_options(html=html)
        options = [download_options[download] for download in download_options]

        return {
            'author': author,
            'tags': tags,
            'categories': categories,
            'embed_code': embed_code,
            'thumbnail': thumbnail,
            'uploaded_date': uploaded_date,
            'production': production,
            'pornstars': pornstars,
            'ratings_percentage': ratings_percentage,
            'thumbs_up': thumbs_up,
            'thumbs_down': thumbs_down,
            'title': title,
            'views': views,
            'duration_seconds': duration_seconds
        }
Example #12
0
    def get_video_stats(self,html,**kwargs):

        categories = []
        tags = []
        porn_stars = []

        document = self.etree.fromstring(html,self.parser)
        ratings = document.xpath('//div[@class="rating-count"]')[0].text.replace(' ratings)','').replace('(','')
        ratings_percentage = document.xpath('//div[@class="rating-percentage"]')[0]
        views = document.xpath('//div[@id="stats-views"]/text()')[0].replace(',','')
        uploaded_date = document.xpath('//div[@id="stats-date"]/text()')[0]
        title = document.xpath('//div[@class="container_15"]/h1[@class="grid_9"]')[0].text

        get_video_details = document.xpath('//ul[@class="info-list-content"]//a')

        for a in get_video_details:
            get_type = re.match(r'/(.*?)/(.*?)/',a.attrib['href'],re.I|re.M)
            if get_type:
                #figure out the type of data, either tag, category or p**n star name
                item_type = get_type.group(1)
                if item_type == 'category':
                    #create a new YouPornCategory
                    categories.append(YouPornCategory(name=a.text,href=a.attrib['href']))
                elif item_type == 'porntags':
                    #create a new YouPornTag
                    tags.append(YouPornTag(name=a.text,href=a.attrib['href']))
                elif item_type == 'pornstar':
                    porn_stars.append((a.text,a.attrib['href']))

        video_url = document.xpath('//link[@href]/@href')[0]
        embed_code = "<iframe src={url}" \
        " frameborder=0 height=481 width=608" \
        " scrolling=no name=yp_embed_video>" \
        "</iframe>".format(url=video_url)

        author_name = document.xpath('//button[@data-name]//@data-name')[0]
        author_href = document.xpath('//div[@class="author-block--line"]//a')[0].attrib['href']
        author = YouPornAuthor(name=author_name,href=author_href)

        has_zero_comments = document.xpath('//div[@id="tab-comments"]//h2[@class="psi"]')
        if has_zero_comments:
            if has_zero_comments[0].text == 'All Comments (0)':
                total_comments = 0
        else:
            total_comments = document.xpath('//div[@id="tab-comments"]/ul/h2')[0] \
                         .text.replace('All Comments (','').replace(')','')

        has_description = document.xpath('//div[@id="videoDescription"]')
        if has_description:
            description = document.xpath('//div[@id="videoDescription"]//p')[0].text
        else:
            description = ''

        found_default_thumbnail = re.search(r'"default_thumbnail_url":"(.*?)"',html)
        if not found_default_thumbnail:
            raise VideoParserError('Cannot get thumbnail image for youporn video')

        thumbnail = found_default_thumbnail.group(1)

        found_duration_seconds = re.search(r'"duration_in_seconds":(.*?)',html)
        if not found_duration_seconds:
            raise VideoParserError('Cannot get duration in seconds for youporn video')
        duration_seconds = found_duration_seconds.group(1)

        found_duration_text = re.search(r'"duration_f":"(.*?)"',html)
        if not found_duration_text:
            raise VideoParserError('Cannot get duration text for youporn video')
        duration_text = found_duration_text.group(1)

        return {'total_comments':int(total_comments),
                'author':author,
                'porn_stars':porn_stars,
                'categories':categories,
                'tags':tags,
                'uploaded_date':uploaded_date,
                'views':views,
                'ratings':ratings,
                'ratings_percentage':ratings_percentage,
                'title':title,
                'thumbnail':thumbnail,
                'duration_seconds':duration_seconds,
                'duration_text':duration_text,
                'description':description,
                'embed_code':embed_code}