Python get_ua Examples, salts_lib.scraper_utils.get_ua Python Examples

Example #1

0

Show file

File: maksi_scraper.py Project: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = {}
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': 'videos'})
         if fragment:
             for match in re.finditer('href="([^"]+)[^>]*>([^<]+)', fragment[0]):
                 page_url, page_label = match.groups()
                 page_label = page_label.lower()
                 if page_label not in ALLOWED_LABELS: continue
                 sources = self.__get_sources(page_url, ALLOWED_LABELS[page_label])
                 for source in sources:
                     host = self._get_direct_hostname(source)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(source)
                         direct = True
                         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                     elif sources[source]['direct']:
                         quality = sources[source]['quality']
                         direct = True
                         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                     else:
                         quality = sources[source]['quality']
                         direct = False
                         host = urlparse.urlparse(source).hostname
                         stream_url = source
                     
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
                     if sources[source]['subs']: hoster['subs'] = 'Turkish Subtitles'
                     hosters.append(hoster)
             
     return hosters

Example #2

0

Show file

File: watchhd_scraper.py Project: dsjh/tknorris-beta-repo

    def resolve_link(self, link):
        try:
            headers = dict([item.split('=') for item in (link.split('|')[1]).split('&')])
            for key in headers: headers[key] = urllib.unquote(headers[key])
            link = link.split('|')[0]
        except:
            headers = {}

        if not link.startswith('http'):
            link = urlparse.urljoin(self.base_url, link)
        html = self._http_get(link, headers=headers, cache_limit=0)
                    
        fragment = dom_parser.parse_dom(html, 'div', {'class': 'player'})
        if fragment:
            iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
            if iframe_url:
                iframe_url = iframe_url[0]
                headers = {'Referer': link}
                html = self._http_get(iframe_url, headers=headers, cache_limit=0)
                sitekey = dom_parser.parse_dom(html, 'div', {'class': 'g-recaptcha'}, ret='data-sitekey')
                if sitekey:
                    token = recaptcha_v2.UnCaptchaReCaptcha().processCaptcha(sitekey[0], lang='en')
                    if token:
                        data = {'g-recaptcha-response': token}
                        html = self._http_get(iframe_url, data=data, cache_limit=0)
                        log_utils.log(html)
                        
                match = re.search("\.replace\(\s*'([^']+)'\s*,\s*'([^']*)'\s*\)", html, re.I)
                if match:
                    html = html.replace(match.group(1), match.group(2))

                match = re.search("window\.atob[\([]+'([^']+)", html)
                if match:
                    func_count = len(re.findall('window\.atob', html))
                    html = match.group(1)
                    for _i in xrange(func_count):
                        html = base64.decodestring(html)
                
                streams = []
                for match in re.finditer('''<source[^>]+src=["']([^'"]+)[^>]+label=['"]([^'"]+)''', html):
                    streams.append(match.groups())
                
                if len(streams) > 1:
                    if not self.auto_pick:
                        result = xbmcgui.Dialog().select(i18n('choose_stream'), [e[1] for e in streams])
                        if result > -1:
                            return streams[result][0] + '|User-Agent=%s' % (scraper_utils.get_ua())
                    else:
                        best_stream = ''
                        best_q = 0
                        for stream in streams:
                            stream_url, label = stream
                            if Q_ORDER[scraper_utils.height_get_quality(label)] > best_q:
                                best_q = Q_ORDER[scraper_utils.height_get_quality(label)]
                                best_stream = stream_url
                        
                        if best_stream:
                            return best_stream + '|User-Agent=%s' % (scraper_utils.get_ua())
                elif streams:
                    return streams[0][0] + '|User-Agent=%s' % (scraper_utils.get_ua())

Example #3

0

Show file

File: dizimag_scraper.py Project: enursha101/xbmc-addon

    def __create_source(self,
                        stream_url,
                        height,
                        page_url,
                        subs=False,
                        direct=True):
        if direct:
            stream_url = stream_url.replace('\\/', '/')
            if self.get_name().lower() in stream_url:
                headers = {'Referer': page_url}
                redir_url = self._http_get(stream_url,
                                           headers=headers,
                                           method='HEAD',
                                           allow_redirect=False,
                                           cache_limit=.25)
                if redir_url.startswith('http'):
                    stream_url = redir_url
                    stream_url += scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                else:
                    stream_url += scraper_utils.append_headers({
                        'User-Agent':
                        scraper_utils.get_ua(),
                        'Referer':
                        page_url,
                        'Cookie':
                        self._get_stream_cookies()
                    })
            else:
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            host = scraper_utils.get_direct_hostname(self, stream_url)
        else:
            host = urlparse.urlparse(stream_url).hostname

        if host == 'gvideo':
            quality = scraper_utils.gv_get_quality(stream_url)
        else:
            quality = scraper_utils.height_get_quality(height)

        hoster = {
            'multi-part': False,
            'host': host,
            'class': self,
            'quality': quality,
            'views': None,
            'rating': None,
            'url': stream_url,
            'direct': direct
        }
        if subs: hoster['subs'] = 'Turkish Subtitles'
        return hoster

Example #4

0

Show file

File: pubfilmto_scraper.py Project: enursha101/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        iframe_url = dom_parser2.parse_dom(html,
                                           'iframe', {'id': 'myiframe'},
                                           req='src',
                                           exclude_comments=True)
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        html = self._http_get(iframe_url,
                              headers={'Referer': page_url},
                              cache_limit=.5)

        for source in dom_parser2.parse_dom(html,
                                            'source', {'type': 'video/mp4'},
                                            req=['src', 'data-res']):
            stream_url = source.attrs['src']
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
            else:
                quality = scraper_utils.height_get_quality(
                    source.attrs['data-res'])
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

Example #5

0

Show file

File: onlinedizi_scraper.py Project: Stevie-Bs/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.25)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': 'dropdown-menu'})
         if fragment:
             match = re.search('''href=['"]([^'"]+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', fragment[0])
             if match:
                 option_url = urlparse.urljoin(self.base_url, match.group(1))
                 html = self._http_get(option_url, cache_limit=2)
                 fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-player'})
                 if fragment:
                     iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
                     if iframe_url:
                         html = self._http_get(iframe_url[0], cache_limit=.25)
                         iframe_url = dom_parser.parse_dom(html, 'iframe', {'id': 'ifr'}, ret='src')
                         if iframe_url:
                             html = self._http_get(iframe_url[0], allow_redirect=False, cache_limit=.25)
                             if html.startswith('http'):
                                 stream_url = html
                                 host = urlparse.urlparse(stream_url).hostname
                                 stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                                 quality = QUALITIES.HIGH
                                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
                                 hosters.append(hoster)
 
     return hosters

Example #6

0

Show file

File: m4ufree_scraper.py Project: kevintone/tdbaddon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            
            views = None
            fragment = dom_parser.parse_dom(html, 'img', {'src': '[^"]*view_icon.png'})
            if fragment:
                match = re.search('(\d+)', fragment[0])
                if match:
                    views = match.group(1)
                
            match = re.search('href="([^"]+-full-movie-[^"]+)', html)
            if match:
                url = match.group(1)
                html = self._http_get(url, cache_limit=.5)
            
            sources = self.__get_sources(html, url)
            
            match = re.search('href="([^"]+)[^>]*>\s*<button', html)
            if match:
                html = self._http_get(match.group(1), cache_limit=.5)
                sources.update(self.__get_sources(html, url))
            
            for source in sources:
                host = self._get_direct_hostname(source)
                stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                quality = sources[source]['quality']
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
                hosters.append(hoster)

        return hosters

Example #7

0

Show file

File: moviehubs_scraper.py Project: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=8)
            hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'})
            links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'})
            for host, link_frag in zip(hosts, links):
                stream_url = dom_parser.parse_dom(link_frag, 'a', ret='href')
                if stream_url:
                    stream_url = stream_url[0]
                    host = re.sub('^Server\s*', '', host, re.I)
                    host = re.sub('\s*Link\s+\d+', '', host)
                    if host.lower() == 'google':
                        sources = self.__get_gvideo_links(stream_url)
                    else:
                        sources = [{'host': host, 'link': stream_url}]
                    
                    for source in sources:
                        stream_url = source['link']
                        host = self._get_direct_hostname(stream_url)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(stream_url)
                            stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                            direct = True
                        else:
                            host = HOST_SUB.get(source['host'].lower(), source['host'])
                            quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                            direct = False
                        hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                        hosters.append(hoster)

        return hosters

Example #8

0

Show file

File: dizibox_scraper.py Project: EPiC-APOC/repository.xvbmc

 def __get_king_links(self, iframe_url):
     hosters = []
     match = re.search('v=(.*)', iframe_url)
     if match:
         data = {'ID': match.group(1)}
         headers = {'Referer': iframe_url}
         headers.update(XHR)
         xhr_url = iframe_url.split('?')[0] + '?p=GetVideoSources'
         html = self._http_get(xhr_url, data=data, headers=headers, cache_limit=.5)
         js_data = scraper_utils.parse_json(html, xhr_url)
         try:
             for source in js_data['VideoSources']:
                 stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                 host = self._get_direct_hostname(source['file'])
                 label = source.get('label', '')
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(source['file'])
                 elif label.isdigit():
                     quality = scraper_utils.height_get_quality(label)
                 else:
                     quality = QUALITY_MAP.get(label, QUALITIES.HIGH)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'}
                 hosters.append(hoster)
         except:
             pass
         
     return hosters

Example #9

0

Show file

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url
            
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

Example #10

0

Show file

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         match = re.search('var\s*video_id="([^"]+)', html)
         if match:
             video_id = match.group(1)
             url = urlparse.urljoin(self.base_url, VIDEO_URL)
             data = {'v': video_id}
             headers = XHR
             headers['Referer'] = page_url
             html = self._http_get(url, data=data, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_json(html, url)
             for source in sources:
                 match = re.search('url=(.*)', sources[source])
                 if match:
                     stream_url = urllib.unquote(match.group(1))
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(stream_url)
                     else:
                         quality = scraper_utils.height_get_quality(source)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     hosters.append(hoster)
     return hosters

Example #11

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=1)
            match = re.search('"file"\s*:\s*"([^"]+)', html)
            if match:
                if video.video_type == VIDEO_TYPES.MOVIE:
                    quality = QUALITIES.HD720
                else:
                    quality = QUALITIES.HIGH
                stream_url = match.group(1) + '|User-Agent=%s&Referer=%s' % (
                    scraper_utils.get_ua(), urllib.quote(url))
                hoster = {
                    'multi-part': False,
                    'host': self._get_direct_hostname(stream_url),
                    'class': self,
                    'url': stream_url,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': True
                }
                hosters.append(hoster)

        return hosters

Example #12

0

Show file

File: ororotv_scraper.py Project: kevintone/tdbaddon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, headers=XHR, cache_limit=.5)
            pattern = "source src='([^']+)'\s+type='video/([^']+)"
            quality = QUALITIES.HD720
            if video.video_type == VIDEO_TYPES.MOVIE:
                match = re.search('<a\s+data-href="([^"]+)', html)
                if match:
                    source_url = match.group(1)
                    url = urlparse.urljoin(self.base_url, source_url)
                    html = self._http_get(url, headers=XHR, cache_limit=.5)

            for match in re.finditer(pattern, html):
                stream_url = match.group(1)
                stream_url = stream_url.replace('&amp;', '&')
                stream_url = stream_url + '|User-Agent=%s' % (
                    scraper_utils.get_ua())
                hoster = {
                    'multi-part': False,
                    'host': self._get_direct_hostname(stream_url),
                    'class': self,
                    'url': stream_url,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': True
                }
                hosters.append(hoster)
        return hosters

Example #13

0

Show file

File: dizibox_scraper.py Project: c0ns0le/YCBuilds

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.25)
            match = re.search('''<option[^>]+value\s*=\s*["']([^"']+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', html)
            if match:
                option_url = urlparse.urljoin(self.base_url, match.group(1))
                html = self._http_get(option_url, cache_limit=.25)
                fragment = dom_parser.parse_dom(html, 'span', {'class': 'object-wrapper'})
                if fragment:
                    iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
                    if iframe_url:
                        html = self._http_get(iframe_url[0], cache_limit=.25)

                        seen_urls = {}
                        for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', html):
                            stream_url, height = match.groups()
                            if stream_url not in seen_urls:
                                seen_urls[stream_url] = True
                                stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                                host = self._get_direct_hostname(stream_url)
                                if host == 'gvideo':
                                    quality = scraper_utils.gv_get_quality(stream_url)
                                else:
                                    quality = scraper_utils.height_get_quality(height)
                                hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                                hosters.append(hoster)
    
        return hosters

Example #14

0

Show file

File: xmovies8_scraper.py Project: assli100/kodi-openelec

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         match = re.search('var\s*video_id="([^"]+)', html)
         if match:
             video_id = match.group(1)
             url = urlparse.urljoin(self.base_url, VIDEO_URL)
             data = {'v': video_id}
             headers = XHR
             headers['Referer'] = page_url
             html = self._http_get(url, data=data, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_json(html, url)
             for source in sources:
                 match = re.search('url=(.*)', sources[source])
                 if match:
                     stream_url = urllib.unquote(match.group(1))
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         quality = scraper_utils.gv_get_quality(stream_url)
                     else:
                         quality = scraper_utils.height_get_quality(source)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     hosters.append(hoster)
     return hosters

Example #15

0

Show file

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

Example #16

0

Show file

File: farda_scraper.py Project: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(meta['title']):
                         match = line
                 elif self.__episode_match(line, video):
                     match = line
                     meta = scraper_utils.parse_episode_link(line['link'])
                     
                 if match:
                     if meta['dubbed']: continue
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     stream_url = stream_url.replace(self.base_url, '')
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'format' in meta: hoster['format'] = meta['format']
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters

Example #17

0

Show file

File: sezonlukdizi_scraper.py Project: assli100/kodi-openelec

    def __get_links(self, url):
        sources = []
        match = re.search('src="([^"]+)', url)
        if match:
            url = match.group(1).replace('\\/', '/')
            html = self._http_get(url, cache_limit=0)
            match = re.search('<script\s+src="([^\']+)\'\+(\d+)\+\'([^\']+)', html)
            if match:
                page_url = ''.join(match.groups())
                page_url += str(random.random())
                html = self._http_get(page_url, cache_limit=0)
                
            for match in re.finditer('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"', html):
                stream_url, height = match.groups()
                stream_url = stream_url.replace('\\&', '&').replace('\\/', '/')
                if 'v.asp' in stream_url and 'ok.ru' not in url:
                    stream_redirect = self._http_get(stream_url, allow_redirect=False, cache_limit=0)
                    if stream_redirect: stream_url = stream_redirect

                if self._get_direct_hostname(stream_url) == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                        
                host = self._get_direct_hostname(stream_url)
                stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(url))
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                sources.append(hoster)
        return sources

Example #18

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        sources = self.__get_posts(html)
        sources.update(self.__get_ajax(html, url))
        sources.update(self.__get_embedded(html, url))
        for source in sources:
            stream_url = source + scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True,
                'subs': 'Turkish subtitles'
            }
            hosters.append(hoster)

        return hosters

Example #19

0

Show file

File: farda_scraper.py Project: henry73/salts

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self.__get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                 else:
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                     
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters

Example #20

0

Show file

File: dizilab_scraper.py Project: EPiC-APOC/repository.xvbmc

 def __get_cloud_links(self, html, page_url, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     match = re.search("dizi_kapak_getir\('([^']+)", html)
     if match:
         ep_id = match.group(1)
         for script_url in dom_parser.parse_dom(html, 'script', {'data-cfasync': 'false'}, ret='src'):
             html = self._http_get(script_url, cache_limit=24)
             match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html)
             match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html)
             if match1 and match2:
                 link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1))
                 headers = {'Referer': page_url}
                 html = self._http_get(link_url, headers=headers, cache_limit=.5)
                 js_data = scraper_utils.parse_json(html, link_url)
                 for variant in js_data.get('variants', {}):
                     stream_host = random.choice(variant.get('hosts', []))
                     if stream_host:
                         stream_url = STREAM_URL % (stream_host, variant['path'], scraper_utils.get_ua(), urllib.quote(page_url))
                         if not stream_url.startswith('http'):
                             stream_url = 'http://' + stream_url
                         host = self._get_direct_hostname(stream_url)
                         if 'width' in variant:
                             quality = scraper_utils.width_get_quality(variant['width'])
                         elif 'height' in variant:
                             quality = scraper_utils.height_get_quality(variant['height'])
                         else:
                             quality = QUALITIES.HIGH
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hoster['subs'] = sub
                         hosters.append(hoster)
     return hosters

Example #21

0

Show file

File: 123movies_scraper.py Project: freeworldxbmc/KAOSbox-Repo

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = {}
     if source_url and source_url != FORCE_NO_MATCH:
         html = self.__get_source_page(source_url)
         sources = {}
         page_url = urlparse.urljoin(self.base_url, source_url)
         for match in re.finditer('''loadEpisode\(\s*(\d+)\s*,\s*(\d+)\s*,\s*'([^']+)'\s*\).*?class="btn-eps[^>]*>([^<]+)''', html, re.DOTALL):
             link_type, link_id, hash_id, q_str = match.groups()
             pattern = 'Episode\s+%s(:|$| )' % (video.episode)
             if video.video_type == VIDEO_TYPES.EPISODE and not re.search(pattern, q_str):
                 continue
             
             if link_type in ['12', '13', '14']:
                 url = urlparse.urljoin(self.base_url, PLAYLIST_URL1 % (link_id))
                 sources.update(self.__get_link_from_json(url, q_str))
             else:
                 url = urlparse.urljoin(self.base_url, PLAYLIST_URL2 % (link_id, hash_id))
                 sources.update(self.__get_links_from_xml(url, video, page_url))
         
     for source in sources:
         if not source.lower().startswith('http'): continue
         if sources[source]['direct']:
             host = self._get_direct_hostname(source)
             if host != 'gvideo':
                 stream_url = source + '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), page_url)
             else:
                 stream_url = source
         else:
             host = urlparse.urlparse(source).hostname
             stream_url = source
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': sources[source]['direct']}
         hosters.append(hoster)
     return hosters

Example #22

0

Show file

File: vivoto_scraper.py Project: freeworldxbmc/KAOSbox-Repo

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            page_quality = dom_parser.parse_dom(html, 'dd', {'class': 'status'})
            if page_quality:
                page_quality = QUALITY_MAP.get(page_quality[0], QUALITIES.HIGH)
            else:
                page_quality = QUALITIES.HIGH
            
            sources = self.__get_gk_links(html, url, page_quality)
            for source in sources:
                host = self._get_direct_hostname(source)
                if host == 'gvideo':
                    direct = True
                    quality = sources[source]
                else:
                    host = urlparse.urlparse(source).hostname
                    quality = scraper_utils.get_quality(video, host, sources[source])
                    direct = False

                if host is not None:
                    stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
                    hosters.append(hoster)

        return hosters

Example #23

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I))
        else:
            gk_html = html
        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url)
        sources.update(self.__get_ht_links(html, page_url))
        
        for stream_url, quality in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
            else:
                host = urlparse.urlparse(stream_url).hostname
                direct = False
            
            if host is None: continue
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)

        return hosters

Example #24

0

Show file

File: fmovie_scraper.py Project: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     match = re.search('var\s*video_id="([^"]+)', html)
     if not match: return hosters
     
     video_id = match.group(1)
     data = {'v': video_id}
     headers = {'Referer': page_url}
     headers.update(XHR)
     html = self._http_get(INFO_URL, data=data, headers=headers, cache_limit=.5)
     sources = scraper_utils.parse_json(html, INFO_URL)
     for source in sources:
         match = re.search('url=(.*)', sources[source])
         if not match: continue
         
         stream_url = urllib.unquote(match.group(1))
         host = scraper_utils.get_direct_hostname(self, stream_url)
         if host == 'gvideo':
             quality = scraper_utils.gv_get_quality(stream_url)
         else:
             quality = scraper_utils.height_get_quality(source)
         stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
         hosters.append(hoster)
     return hosters

Example #25

0

Show file

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        if iframe_url.startswith('/'):
            iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
        html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.5)
        obj = dom_parser2.parse_dom(html, 'object', req='data')
        if obj:
            streams = dict((stream_url, {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}) for stream_url in
                           scraper_utils.parse_google(self, obj[0].attrs['data']))
        else:
            streams = scraper_utils.parse_sources_list(self, html)
            
        for stream_url, values in streams.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = values['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                 
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

Example #26

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=1)
            match = re.search('''["']sources['"]\s*:\s*\[(.*?)\]''', html,
                              re.DOTALL)
            if match:
                for match in re.finditer('''['"]*file['"]*\s*:\s*([^\(]+)''',
                                         match.group(1), re.DOTALL):
                    stream_url = self.__decode(match.group(1), html)
                    if stream_url:
                        if video.video_type == VIDEO_TYPES.MOVIE:
                            quality = QUALITIES.HD720
                        else:
                            quality = QUALITIES.HIGH
                        stream_url = stream_url + '|User-Agent=%s&Referer=%s&Cookie=%s' % (
                            scraper_utils.get_ua(), urllib.quote(url),
                            self._get_stream_cookies())
                        hoster = {
                            'multi-part': False,
                            'host': self._get_direct_hostname(stream_url),
                            'class': self,
                            'url': stream_url,
                            'quality': quality,
                            'views': None,
                            'rating': None,
                            'direct': True
                        }
                        hosters.append(hoster)

        return hosters

Example #27

0

Show file

File: filmovizjia_scraper.py Project: EPiC-APOC/repository.xvbmc

 def __get_direct_links(self, html, page_url):
     hosters = []
     match = re.search("&u=([^']+)", html)
     if match:
         u = match.group(1)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*rektab[^"]*'})
         if fragment:
             page = dom_parser.parse_dom(fragment[0], 'a', ret='id')
             if page:
                 page = page[0]
                 ss = dom_parser.parse_dom(fragment[0], 'a', {'id': page}, ret='class')
                 for s in ss:
                     yt_url = YT_URL % (page, s, u)
                     url = urlparse.urljoin(self.base_url, yt_url)
                     headers = {'Referer': page_url}
                     html = self._http_get(url, headers=headers, cache_limit=.5)
                     sources = self._parse_sources_list(html)
                     for source in sources:
                         host = self._get_direct_hostname(source)
                         if sources[source]['quality']:
                             quality = sources[source]['quality']
                         else:
                             quality = QUALITIES.HIGH
                         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hosters.append(hoster)
     return hosters

Example #28

0

Show file

File: rainierland_scraper.py Project: c0ns0le/YCBuilds

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'})
         if fragment:
             js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src')
             if js_src:
                 js_url = urlparse.urljoin(self.base_url, js_src[0])
                 html = self._http_get(js_url, cache_limit=.5)
             else:
                 html = fragment[0]
                 
             for match in re.finditer('<source[^>]+src="([^"]+)', html):
                 stream_url = match.group(1)
                 host = self._get_direct_hostname(stream_url)
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 elif 'blogspot' in stream_url:
                     quality = scraper_utils.gv_get_quality(stream_url)
                 else:
                     _, _, height, _ = scraper_utils.parse_movie_link(stream_url)
                     quality = scraper_utils.height_get_quality(height)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hosters.append(hoster)
     return hosters

Example #29

0

Show file

File: dizimag_scraper.py Project: azumimuo/family-xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.5)
            # exit early if trailer
            if re.search('Şu an fragman*', html, re.I):
                return hosters
            
            match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:\s*["'](id=\d+)''', html)
            if match:
                url, data = match.groups()
                url = urlparse.urljoin(self.base_url, url)
                result = self._http_get(url, data=data, headers=XHR, cache_limit=.5)
                for match in re.finditer('"videolink\d*"\s*:\s*"([^"]+)","videokalite\d*"\s*:\s*"?(\d+)p?', result):
                    stream_url, height = match.groups()
                    stream_url = stream_url.replace('\\/', '/')
                    host = self._get_direct_hostname(stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        quality = scraper_utils.height_get_quality(height)
                        stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url))

                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                    hosters.append(hoster)
    
        return hosters

Example #30

0

Show file

File: afdah_scraper.py Project: normico21/repository.xvbmc

 def __get_links(self, html):
     hosters = []
     r = re.search('tlas\("([^"]+)', html)
     if r:
         plaintext = self.__caesar(
             self.__get_f(self.__caesar(r.group(1), 13)), 13)
         sources = scraper_utils.parse_sources_list(self, plaintext)
         for source in sources:
             stream_url = source + scraper_utils.append_headers(
                 {
                     'User-Agent': scraper_utils.get_ua(),
                     'Cookie': self._get_stream_cookies()
                 })
             host = scraper_utils.get_direct_hostname(self, stream_url)
             hoster = {
                 'multi-part': False,
                 'url': stream_url,
                 'host': host,
                 'class': self,
                 'quality': sources[source]['quality'],
                 'rating': None,
                 'views': None,
                 'direct': True
             }
             hosters.append(hoster)
     return hosters

Example #31

0

Show file

File: tunemovie_scraper.py Project: kevintone/tdbaddon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            sources = self.__get_gk_links(html, url)
            if not sources:
                sources = self.__get_gk_links2(html)
            
            sources.update(self.__get_iframe_links(html))
            
            for source in sources:
                host = self._get_direct_hostname(source)
                if host == 'gvideo':
                    direct = True
                    quality = sources[source]
                    stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                else:
                    direct = False
                    stream_url = source
                    if self.base_url in source:
                        host = sources[source]
                        quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                    else:
                        host = urlparse.urlparse(source).hostname
                        quality = sources[source]
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
                hosters.append(hoster)

        return hosters

Example #32

0

Show file

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.25)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': 'dropdown-menu'})
         if fragment:
             match = re.search('''href=['"]([^'"]+)[^>]*>(?:Altyaz.{1,3}s.{1,3}z)<''', fragment[0])
             if match:
                 option_url = urlparse.urljoin(self.base_url, match.group(1))
                 html = self._http_get(option_url, cache_limit=2)
                 fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-player'})
                 if fragment:
                     iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
                     if iframe_url:
                         html = self._http_get(iframe_url[0], cache_limit=.25)
                         iframe_url = dom_parser.parse_dom(html, 'iframe', {'id': 'ifr'}, ret='src')
                         if iframe_url:
                             html = self._http_get(iframe_url[0], allow_redirect=False, method='HEAD', cache_limit=.25)
                             if html.startswith('http'):
                                 stream_url = html
                                 host = urlparse.urlparse(stream_url).hostname
                                 stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                                 quality = QUALITIES.HIGH
                                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
                                 hosters.append(hoster)
 
     return hosters

Example #33

0

Show file

File: moviego_scraper.py Project: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=8)
            q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'})
            if q_str:
                q_str = q_str[0].replace(' ', '').upper()
                page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
            else:
                page_quality = QUALITIES.HIGH
                
            for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}):
                match = re.search('file\s*:\s*"([^"]+)', fragment)
                if match:
                    stream_url = match.group(1)
                else:
                    stream_url = self.__get_ajax_sources(fragment, page_url)
                    
                if stream_url:
                    host = self._get_direct_hostname(stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        quality = page_quality
                        
                    stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url))
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
                    sources.append(source)

        return sources

Example #34

0

Show file

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': 'css_server_new'})
         if fragment:
             for match in re.finditer('href="([^"]+)[^>]*>(.*?)(?:-\d+)?</a>', fragment[0]):
                 url, host = match.groups()
                 host = host.lower()
                 host = re.sub('<img.*?/>', '', host)
                 host = HOSTS.get(host, host)
                 log_utils.log('%s - %s' % (url, host))
                 if host in GVIDEO_NAMES:
                     sources = self.__get_links(urlparse.urljoin(self.base_url, url))
                     direct = True
                 else:
                     sources = {url: host}
                     direct = False
                 
                 for source in sources:
                     if self._get_direct_hostname(source) == 'gvideo':
                         quality = scraper_utils.gv_get_quality(source)
                         source = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                     else:
                         quality = scraper_utils.get_quality(video, source, QUALITIES.HIGH)
                 
                     hoster = {'multi-part': False, 'host': sources[source], 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct}
                     hosters.append(hoster)
     return hosters

Example #35

0

Show file

File: diziay_scraper.py Project: enursha101/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'})
     if not fragment: return hosters
     
     iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
     if not iframe_url: return hosters
     
     html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25)
     sources.append(self.__get_embedded_sources(html))
     sources.append(self.__get_linked_sources(html))
     for source in sources:
         for stream_url in source['sources']:
             host = scraper_utils.get_direct_hostname(self, stream_url)
             if host == 'gvideo':
                 stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 quality = scraper_utils.gv_get_quality(stream_url)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hoster['subs'] = source.get('subs', True)
                 hosters.append(hoster)
 
     return hosters

Example #36

0

Show file

File: moviehdmax_scraper.py Project: henry73/salts

    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if source_url and source_url != FORCE_NO_MATCH:
            if video.video_type == VIDEO_TYPES.MOVIE:
                streams = self.__get_movie_sources(source_url)
            else:
                streams = self.__get_episode_sources(source_url)

            for stream_url in streams:
                stream_url2 = stream_url + '|User-Agent=%s' % (
                    scraper_utils.get_ua())
                host = self._get_direct_hostname(stream_url)
                source = {
                    'multi-part': False,
                    'url': stream_url2,
                    'host': host,
                    'class': self,
                    'quality': streams[stream_url],
                    'views': None,
                    'rating': None,
                    'direct': True
                }
                sources.append(source)

        return sources

Example #37

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            html = self.__get_embedded_page(source_url)
            fragment = dom_parser.parse_dom(html, 'div',
                                            {'class': '[^"]*download[^"]*'})
            if fragment:
                page_url = urlparse.urljoin(self.base_url, source_url)
                for match in re.finditer('href="([^"]+)[^>]+>([^<]+)',
                                         fragment[0]):
                    stream_url, label = match.groups()
                    quality = scraper_utils.height_get_quality(label)
                    stream_url += '|User-Agent=%s&Referer=%s' % (
                        scraper_utils.get_ua(), urllib.quote(page_url))
                    hoster = {
                        'multi-part': False,
                        'host': self._get_direct_hostname(stream_url),
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    hosters.append(hoster)

        return hosters

Example #38

0

Show file

File: xmovies8_scraper.py Project: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        
        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)
        
        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0)
        for source, value in scraper_utils.parse_json(html, vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            hosters.append(hoster)
        return hosters

Example #39

0

Show file

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        
        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)
        
        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0)
        for source, value in scraper_utils.parse_json(html, vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            hosters.append(hoster)
        return hosters

Example #40

0

Show file

File: farda_scraper.py Project: monicarero/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url2, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                 else:
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                     
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters

Example #41

0

Show file

File: tvwtvs_scraper.py Project: AMOboxTV/AMOBox.LegoBuild

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.5)
            sources.update(self.__get_gk_links(html, page_url))
            sources.update(self.__get_iframe_links(html, page_url))

            for source in sources:
                host = self._get_direct_hostname(source)
                stream_url = source + '|User-Agent=%s' % (
                    scraper_utils.get_ua())
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': sources[source],
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                hosters.append(hoster)

        return hosters

Example #42

0

Show file

File: firemovies_scraper.py Project: freeworldxbmc/KAOSbox-Repo

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = {}
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': 'meta-media'})
         if fragment:
             iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
             if iframe_url:
                 iframe_url = urlparse.urljoin(self.base_url, iframe_url[0])
                 html = self._http_get(iframe_url, cache_limit=.5)
                 for match in re.finditer('window.location.href\s*=\s*"([^"]+)', html):
                     stream_url = match.group(1)
                     host = self._get_direct_hostname(stream_url)
                     if host == 'gvideo':
                         sources[stream_url] = scraper_utils.gv_get_quality(stream_url)
                     else:
                         sources[source_url] = QUALITIES.HIGH
                 
         for source in sources:
             host = self._get_direct_hostname(stream_url)
             stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': source, 'direct': True}
             hosters.append(hoster)
     return hosters

Example #43

0

Show file

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'})
         if fragment:
             js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src')
             if js_src:
                 js_url = urlparse.urljoin(self.base_url, js_src[0])
                 html = self._http_get(js_url, cache_limit=.5)
             else:
                 html = fragment[0]
                 
             for match in re.finditer('<source[^>]+src="([^"]+)', html):
                 stream_url = match.group(1)
                 host = self._get_direct_hostname(stream_url)
                 if host == 'gvideo':
                     quality = scraper_utils.gv_get_quality(stream_url)
                 else:
                     _, _, height, _ = scraper_utils.parse_movie_link(stream_url)
                     quality = scraper_utils.height_get_quality(height)
                     stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                     
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hosters.append(hoster)
     return hosters

Example #44

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            
            views = None
            match = re.search('<li>\s*Views\s*:\s*(.*?)</li>', html)
            if match:
                views = re.sub('[^0-9]', '', match.group(1))
                
            hosts = dom_parser.parse_dom(html, 'p', {'class': 'server_servername'})
            links = dom_parser.parse_dom(html, 'p', {'class': 'server_play'})
            for item in zip(hosts, links):
                host, link_text = item
                host = host.lower().replace('server', '').strip()
                match = re.search('href="([^"]+)', link_text)
                if match:
                    link = match.group(1)
                    if 'google' in host:
                        sources = self.__get_google_links(link)
                        for source in sources:
                            source += '|User-Agent=%s' % (scraper_utils.get_ua())
                            hoster = {'multi-part': False, 'url': source, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'host': self._get_direct_hostname(source), 'rating': None, 'views': views, 'direct': True}
                            hosters.append(hoster)
                    else:
                            hoster = {'multi-part': False, 'url': link, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'host': host, 'rating': None, 'views': views, 'direct': False}
                            hosters.append(hoster)

        return hosters

Example #45

0

Show file

File: piratejunkies_scraper.py Project: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
     html = self._http_get(js_url, cache_limit=48)
     if source_url.startswith('/'):
         source_url = source_url[1:]
     pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url)
     match = re.search(pattern, html, re.I)
     if match:
         stream_url = match.group(1)
         if 'drive.google' in stream_url or 'docs.google' in stream_url:
             sources = scraper_utils.parse_google(self, stream_url)
         else:
             sources = [stream_url]
         
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             host = scraper_utils.get_direct_hostname(self, source)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(source)
                 direct = True
             elif 'youtube' in stream_url:
                 quality = QUALITIES.HD720
                 direct = False
                 host = 'youtube.com'
             else:
                 quality = QUALITIES.HIGH
                 direct = True
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
             hosters.append(hoster)
     return hosters

Example #46

0

Show file

File: watchhd_scraper.py Project: assli100/kodi-openelec

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         match = re.search('<b>Views:.*?([\d,]+)', html)
         if match:
             views = int(match.group(1).replace(',', ''))
         else:
             views = None
         button = dom_parser.parse_dom(html, 'a', {'class': '[^"]*btn_watch_detail[^"]*'}, ret='href')
         if button:
             html = self._http_get(button[0], cache_limit=.5)
             for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html):
                 title, fragment = match.groups()
                 for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment):
                     stream_url, name = match.groups()
                     match = re.search('(\d+)', name)
                     if match:
                         quality = scraper_utils.height_get_quality(match.group(1))
                     else:
                         quality = QUALITIES.HIGH
                     stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
                     hoster['title'] = title
                     hosters.append(hoster)
     return hosters

Example #47

0

Show file

File: watchhd_scraper.py Project: c0ns0le/YCBuilds

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=8)
         match = re.search('<b>Views:.*?([\d,]+)', html)
         if match:
             views = int(match.group(1).replace(',', ''))
         else:
             views = None
         
         html = self.__get_watch_now(html)
         for match in re.finditer('<span class="svname">\s*(.*?)\s*:?\s*</span>(.*?)(?=<span class="svname">|</div>)', html):
             title, fragment = match.groups()
             for match in re.finditer('<a[^>]+id="ep_\d+"[^>]+href="([^"]+)[^>]+>\s*([^<]+)', fragment):
                 stream_url, name = match.groups()
                 match = re.search('(\d+)', name)
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     if match:
                         quality = scraper_utils.height_get_quality(match.group(1))
                     else:
                         quality = QUALITIES.HIGH
                 else:
                     if not match or int(name) != int(video.episode):
                         continue
                     
                     quality = QUALITIES.HIGH
                 stream_url += '|User-Agent=%s&Referer=%s&Cookie=%s' % (scraper_utils.get_ua(), url, self._get_stream_cookies())
                 hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': True}
                 hoster['title'] = title
                 hosters.append(hoster)
     return hosters

Example #48

0

Show file

File: snagfilms_scraper.py Project: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': 'film-container'})
         if fragment:
             iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
             if iframe_url:
                 iframe_url = urlparse.urljoin(self.base_url, iframe_url[0])
                 headers = {'Referer': page_url}
                 html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
                 sources = self._parse_sources_list(html)
                 for source in sources:
                     quality = sources[source]['quality']
                     host = self._get_direct_hostname(source)
                     stream_url = source + '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(iframe_url))
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     match = re.search('(\d+[a-z]bps)', source)
                     if match:
                         hoster['extra'] = match.group(1)
                     hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

Example #49

0

Show file

File: dizigold_scraper.py Project: henry73/salts

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.25)
            match = re.search('var\s+view_id\s*=\s*"([^"]+)', html)
            if match:
                view_data = {'id': match.group(1), 'tip': 'view', 'dil': 'or'}
                html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25)
                html = html.strip()
                html = re.sub(r'\\n|\\t', '', html)
                match = re.search('var\s+sources\s*=\s*(\[.*?\])', html)
                if match:
                    raw_data = match.group(1)
                    raw_data = raw_data.replace('\\', '')
                else:
                    raw_data = html
                
                js_data = scraper_utils.parse_json(raw_data, self.ajax_url)
                if 'data' in js_data:
                    src = dom_parser.parse_dom(js_data['data'], 'iframe', ret='src')
                    if src:
                        html = self._http_get(src[0], cache_limit=.25)
                        match = re.search('url=([^"]+)', html)
                        if match:
                            stream_url = match.group(1).replace('&gt;', '')
                            sources.append({'label': '720p', 'file': stream_url})
                            direct = False
                        else:
                            src = dom_parser.parse_dom(html, 'iframe', ret='src')
                            if src:
                                sources.append({'label': '720p', 'file': src[0]})
                                direct = False
                            else:
                                for match in re.finditer('"file"\s*:\s*"([^"]+)"\s*,\s*"label"\s*:\s*"([^"]+)', html):
                                    sources.append({'label': match.group(2), 'file': match.group(1)})
                                direct = True
                else:
                    sources = js_data
                    direct = True

                for source in sources:
                    stream_url = source['file'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                    if direct:
                        host = self._get_direct_hostname(stream_url)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(stream_url)
                        else:
                            quality = scraper_utils.height_get_quality(source['label'])
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        quality = scraper_utils.height_get_quality(source['label'])
                
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
                    hosters.append(hoster)
    
        return hosters

Example #50

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = {}
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for match in re.finditer("embeds\[(\d+)\]\s*=\s*'([^']+)", html):
                match = re.search('src="([^"]+)', match.group(2))
                if match:
                    iframe_url = match.group(1)
                    if 'play-en.php' in iframe_url:
                        match = re.search('id=([^"&]+)', iframe_url)
                        if match:
                            proxy_link = match.group(1)
                            proxy_link = proxy_link.split('*', 1)[-1]
                            picasa_url = scraper_utils.gk_decrypt(
                                self.get_name(), GK_KEY, proxy_link)
                            for stream_url in self._parse_google(picasa_url):
                                sources[stream_url] = {
                                    'quality':
                                    scraper_utils.gv_get_quality(stream_url),
                                    'direct':
                                    True
                                }
                    else:
                        html = self._http_get(iframe_url, cache_limit=0)
                        temp_sources = self._parse_sources_list(html)
                        for source in temp_sources:
                            if 'download.php' in source:
                                redir_html = self._http_get(
                                    source,
                                    allow_redirect=False,
                                    method='HEAD',
                                    cache_limit=0)
                                if redir_html.startswith('http'):
                                    temp_sources[redir_html] = temp_sources[
                                        source]
                                    del temp_sources[source]
                        sources.update(temp_sources)

        for source in sources:
            host = self._get_direct_hostname(source)
            stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
            quality = QUALITY_MAP.get(sources[source]['quality'],
                                      QUALITIES.HIGH)
            hoster = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(hoster)

        return hosters

Example #51

0

Show file

File: moviexk_scraper.py Project: kevintone/tdbaddon

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            if video.video_type == VIDEO_TYPES.MOVIE:
                fragment = dom_parser.parse_dom(html, 'div',
                                                {'class': 'poster'})
                if fragment:
                    movie_url = dom_parser.parse_dom(fragment[0],
                                                     'a',
                                                     ret='href')
                    if movie_url:
                        url = urlparse.urljoin(self.base_url, movie_url[0])
                        html = self._http_get(url, cache_limit=.5)
                        episodes = self.__get_episodes(html)
                        url = self.__get_best_page(episodes)
                        if not url:
                            return sources
                        else:
                            url = urlparse.urljoin(self.base_url, url)
                            html = self._http_get(url, cache_limit=.5)

            for match in re.finditer('''<source[^>]+src=['"]([^'"]+)([^>]+)''',
                                     html):
                stream_url, extra = match.groups()
                if 'video.php' in stream_url:
                    redir_url = self._http_get(stream_url,
                                               allow_redirect=False,
                                               method='HEAD',
                                               cache_limit=.25)
                    if redir_url.startswith('http'): stream_url = redir_url

                host = self._get_direct_hostname(stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    match = re.search('''data-res\s*=\s*["']([^"']+)''', extra)
                    if match:
                        height = re.sub('(hd|px)', '', match.group(1))
                        quality = scraper_utils.height_get_quality(height)
                    else:
                        quality = QUALITIES.HIGH

                stream_url += '|User-Agent=%s' % (scraper_utils.get_ua())
                source = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': True
                }
                sources.append(source)

        return sources

Example #52

0

Show file

File: afdah_scraper.py Project: kevintone/tdbaddon

 def _get_links(self, html):
     hosters = []
     for match in re.finditer('file\s*:\s*"([^"]+).*?label\s*:\s*"([^"]+)', html):
         url, resolution = match.groups()
         url += '|User-Agent=%s&Cookie=%s' % (scraper_utils.get_ua(), self._get_stream_cookies())
         hoster = {'multi-part': False, 'url': url, 'host': self._get_direct_hostname(url), 'class': self, 'quality': scraper_utils.height_get_quality(resolution), 'rating': None, 'views': None, 'direct': True}
         hosters.append(hoster)
     return hosters

Example #53

0

Show file

 def resolve_link(self, link):
     url = urlparse.urljoin(self.base_url, link)
     request = urllib2.Request(url)
     request.add_header('User-Agent', scraper_utils.get_ua())
     request.add_unredirected_header('Host', request.get_host())
     request.add_unredirected_header('Referer', url)
     response = urllib2.urlopen(request)
     return response.geturl()

Example #54

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Referer': page_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
            if fragment:
                movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                if movie_url:
                    page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href'])
                    html = self._http_get(page_url, cache_limit=.5)
                    episodes = self.__get_episodes(html)
                    page_url = self.__get_best_page(episodes)
                    if not page_url:
                        return hosters
                    else:
                        page_url = scraper_utils.urljoin(self.base_url, page_url)
                        html = self._http_get(page_url, cache_limit=.5)
        
        streams = dom_parser2.parse_dom(html, 'iframe', req='src')
        if streams:
            streams = [(attrs['src'], 480) for attrs, _content in streams]
            direct = False
        else:
            streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])]
            direct = True
            
        headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}
        for stream_url, height in streams:
            if 'video.php' in stream_url or 'moviexk.php' in stream_url:
                if 'title=' in stream_url:
                    title = stream_url.split('title=')[-1]
                    stream_url = stream_url.replace(title, urllib.quote(title))
                redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0)
                if redir_url.startswith('http'):
                    redir_url = redir_url.replace(' ', '').split(';codec')[0]
                    stream_url = redir_url
                else:
                    continue
            
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                stream_url += scraper_utils.append_headers(headers)
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(height)
            
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
            hosters.append(source)

        return hosters

Example #55

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=2)
            fragment = dom_parser.parse_dom(html, 'div', {'id': 'embed'})
            if fragment:
                iframe_url = dom_parser.parse_dom(fragment[0],
                                                  'iframe',
                                                  ret='src')
                if iframe_url:
                    html = self._http_get(iframe_url[0], cache_limit=.25)
                    seen_urls = {}
                    # if captions exist, then they aren't hardcoded
                    if re.search('kind\s*:\s*"captions"', html):
                        subs = False
                    else:
                        subs = True

                    for match in re.finditer(
                            '"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"',
                            html):
                        stream_url, height = match.groups()
                        if stream_url not in seen_urls:
                            seen_urls[stream_url] = True
                            if 'v.asp' in stream_url:
                                stream_redirect = self._http_get(
                                    stream_url,
                                    allow_redirect=False,
                                    method='HEAD',
                                    cache_limit=0)
                                if stream_redirect:
                                    stream_url = stream_redirect

                            stream_url += '|User-Agent=%s' % (
                                scraper_utils.get_ua())
                            host = self._get_direct_hostname(stream_url)
                            if host == 'gvideo':
                                quality = scraper_utils.gv_get_quality(
                                    stream_url)
                            else:
                                quality = scraper_utils.height_get_quality(
                                    height)
                            hoster = {
                                'multi-part': False,
                                'host': self._get_direct_hostname(stream_url),
                                'class': self,
                                'quality': quality,
                                'views': None,
                                'rating': None,
                                'url': stream_url,
                                'direct': True,
                                'subs': subs
                            }

                            hosters.append(hoster)
        return hosters