Python urljoinの例、salts_lib.scraper_utils.urljoin Pythonの例

コード例 #1

0

ファイルを表示

ファイル: xmovies8_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=0)
        match = re.search('var\s*video_id\s*=\s*"([^"]+)', html)
        if not match: return hosters
        
        video_id = match.group(1)
        headers = {'Referer': page_url}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)
        
        vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL)
        html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0)
        for source, value in scraper_utils.parse_json(html, vid_url).iteritems():
            match = re.search('url=(.*)', value)
            if not match: continue
            stream_url = urllib.unquote(match.group(1))

            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = scraper_utils.height_get_quality(source)
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
            hosters.append(hoster)
        return hosters

コード例 #2

0

ファイルを表示

ファイル: dizist_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     pages = self.__get_alt_pages(html, page_url)
     sources = self.__get_sources(html, page_url, pages.get(page_url, True))
     for page in pages:
         if page == page_url: continue
         page_url = scraper_utils.urljoin(self.base_url, page, pages[page])
         html = self._http_get(page_url, cache_limit=1)
         sources.update(self.__get_sources(html, page, pages[page]))
         
     for stream_url, values in sources.iteritems():
         host = scraper_utils.get_direct_hostname(self, stream_url)
         if host == 'gvideo':
             quality = scraper_utils.gv_get_quality(stream_url)
             direct = True
         elif values['direct']:
             quality = values['quality']
             direct = True
         else:
             quality = values['quality']
             direct = False
             host = urlparse.urlparse(stream_url).hostname
         
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
         if values['subs']: hoster['subs'] = 'Turkish Subtitles'
         hosters.append(hoster)
             
     return hosters

コード例 #3

0

ファイルを表示

ファイル: snagfilms_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

コード例 #4

0

ファイルを表示

ファイル: tvhd_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     hosters = []
     sources = {}
     today = datetime.datetime.today().date()
     max_age = today - datetime.timedelta(days=self.filter)
     if video.ep_airdate and max_age < video.ep_airdate:
         day_after = video.ep_airdate + datetime.timedelta(days=1)
         for day in [day_after, video.ep_airdate]:
             if day < today:
                 page_url = EP_PAGE % (day.strftime('%Y.%m.%d'))
                 page_url = scraper_utils.urljoin(self.base_url, page_url)
                 html = self._http_get(page_url, require_debrid=True, cache_limit=30 * 24)
                 sources.update(self.__get_sources(video, html))
             if sources: break
             
         if not sources and kodi.get_setting('scraper_url'):
             page_url = scraper_utils.urljoin(self.base_url, '/index.html')
             html = self._http_get(page_url, require_debrid=True, cache_limit=2)
             sources.update(self.__get_sources(video, html))
         
     for source in sources:
         host = urlparse.urlparse(source).hostname
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False}
         hosters.append(hoster)
     return hosters

コード例 #5

0

ファイルを表示

ファイル: xmovies8_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/results')
        params = {'q': title}
        referer = search_url + '?' + urllib.urlencode(params)
        headers = {'Referer': referer}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        cookies = {'begin_referer': referer, 'prounder': 1}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')):
            html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0)
            
        for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}):
            title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'})
            year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'})
            if not title_frag: continue
            match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href')
            if not match: continue
            match_url = match[0].attrs['href']
            match_title = match[0].content
            try:
                match = re.search('\s+(\d{4})\s+', year_frag[0].content)
                match_year = match.group(1)
            except:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #6

0

ファイルを表示

ファイル: yesmovies_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def __get_source_page(self, video_type, page_url):
     match = re.search('/movie/(.*?)-(\d+)\.html', page_url)
     if not match: return '', '', ''
     slug, movie_id = match.groups()
     
     vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series'
     qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type)
     qp_url = scraper_utils.urljoin(self.base_url, qp_url)
     headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)}
     headers.update(XHR)
     html = self._http_get(qp_url, headers=headers, cache_limit=8)
     watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href')
     if not watching_url: return '', '', ''
     
     watching_url = watching_url[0].attrs['href']
     page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'):
         _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8)
     
     sl_url = SL_URL.format(movie_id=movie_id)
     sl_url = scraper_utils.urljoin(self.base_url, sl_url)
     html = self._http_get(sl_url, headers=headers, cache_limit=8)
     js_data = scraper_utils.parse_json(html, sl_url)
     try: html = js_data['html']
     except: html = ''
     return movie_id, watching_url, html

コード例 #7

0

ファイルを表示

ファイル: moviesub_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            gk_html = ''.join(match.group(0) for match in re.finditer('<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I))
        else:
            gk_html = html
        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url)
        sources.update(self.__get_ht_links(html, page_url))
        
        for stream_url, quality in sources.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
            else:
                host = urlparse.urlparse(stream_url).hostname
                direct = False
            
            if host is None: continue
            stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)

        return hosters

コード例 #8

0

ファイルを表示

ファイル: moviexk_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Referer': page_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.MOVIE:
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
            if fragment:
                movie_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                if movie_url:
                    page_url = scraper_utils.urljoin(self.base_url, movie_url[0].attrs['href'])
                    html = self._http_get(page_url, cache_limit=.5)
                    episodes = self.__get_episodes(html)
                    page_url = self.__get_best_page(episodes)
                    if not page_url:
                        return hosters
                    else:
                        page_url = scraper_utils.urljoin(self.base_url, page_url)
                        html = self._http_get(page_url, cache_limit=.5)
        
        streams = dom_parser2.parse_dom(html, 'iframe', req='src')
        if streams:
            streams = [(attrs['src'], 480) for attrs, _content in streams]
            direct = False
        else:
            streams = [(attrs['src'], attrs.get('data-res', 480)) for attrs, _content in dom_parser2.parse_dom(html, 'source', req=['src'])]
            direct = True
            
        headers = {'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}
        for stream_url, height in streams:
            if 'video.php' in stream_url or 'moviexk.php' in stream_url:
                if 'title=' in stream_url:
                    title = stream_url.split('title=')[-1]
                    stream_url = stream_url.replace(title, urllib.quote(title))
                redir_url = self._http_get(stream_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0)
                if redir_url.startswith('http'):
                    redir_url = redir_url.replace(' ', '').split(';codec')[0]
                    stream_url = redir_url
                else:
                    continue
            
            if direct:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(stream_url)
                else:
                    quality = scraper_utils.height_get_quality(height)
                stream_url += scraper_utils.append_headers(headers)
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.height_get_quality(height)
            
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
            hosters.append(source)

        return hosters

コード例 #9

0

ファイルを表示

ファイル: snagfilms_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def __login(self):
     url = scraper_utils.urljoin(self.base_url, '/apis/v2/user/login.json')
     data = {'email': self.username, 'password': self.password, 'rememberMe': True}
     referer = scraper_utils.urljoin(self.base_url, '/login')
     headers = {'Content-Type': 'application/json', 'Referer': referer}
     headers.update(XHR)
     html = super(self.__class__, self)._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     return js_data.get('status') == 'success'

コード例 #10

0

ファイルを表示

ファイル: flixanity_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)'
     headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)}
     season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season))
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, headers=headers, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'})
     return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

コード例 #11

0

ファイルを表示

ファイル: diziay_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     show_id = dom_parser2.parse_dom(html, 'div', {'id': 'icerikid'}, req='value')
     if show_id:
         episode_pattern = 'href="([^"]*-%s-sezon-%s-bolum[^"]*)"' % (video.season, video.episode)
         title_pattern = 'href="(?P<url>[^"]+)[^>]*class="realcuf".*?class="realcuf">(?P<title>[^<]*)'
         season_url = scraper_utils.urljoin(self.base_url, SEASON_URL)
         data = {'sezon_id': video.season, 'dizi_id': show_id[0].attrs['value'], 'tip': 'dizi', 'bolumid': ''}
         html = self._http_get(season_url, data=data, headers=XHR, cache_limit=2)
         return self._default_get_episode_url(html, video, episode_pattern, title_pattern)

コード例 #12

0

ファイルを表示

ファイル: moviexk_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
     if not fragment: return
     show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
     if not show_url: return
     show_url = scraper_utils.urljoin(self.base_url, show_url[0].attrs['href'])
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'})
     episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % (video.season, video.episode)
     return self._default_get_episode_url(fragment or html, video, episode_pattern)

コード例 #13

0

ファイルを表示

ファイル: 2ddl_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _http_get(self, url, params=None, data=None, multipart_data=None, headers=None, cookies=None, allow_redirect=True, method=None, require_debrid=False, read_error=False, cache_limit=8):
     real_url = scraper_utils.urljoin(self.base_url, url)
     html = super(self.__class__, self)._http_get(real_url, params=params, data=data, multipart_data=multipart_data, headers=headers, cookies=cookies,
                                                  allow_redirect=allow_redirect, method=method, require_debrid=require_debrid, read_error=read_error,
                                                  cache_limit=cache_limit)
     if self.__update_base_url(html):
         real_url = scraper_utils.urljoin(self.base_url, url)
         html = super(self.__class__, self)._http_get(real_url, params=params, data=data, multipart_data=multipart_data, headers=headers,
                                                      cookies=cookies, allow_redirect=allow_redirect, method=method, require_debrid=require_debrid,
                                                      read_error=read_error, cache_limit=cache_limit)
     
     return html

コード例 #14

0

ファイルを表示

ファイル: putlocker_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def _get_episode_url(self, show_url, video):
        show_url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(show_url, headers={'Referer': self.base_url}, cache_limit=24 * 7)
        match = re.search('href="([^"]*season=0*%s(?!\d))[^"]*' % (video.season), html)
        if not match: return

        episode_pattern = 'href="([^"]*/0*%s-0*%s/[^"]*)' % (video.season, video.episode)
        title_pattern = 'href="(?P<url>[^"]+)[^>]*>\s*(?P<title>.*?)\s*</a>'
        season_url = scraper_utils.urljoin(show_url, match.group(1))
        html = self._http_get(season_url, headers={'Referer': show_url}, cache_limit=2)
        episodes = dom_parser2.parse_dom(html, 'div', {'class': 'episodeDetail'})
        fragment = '\n'.join(ep.content for ep in episodes)
        return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

コード例 #15

0

ファイルを表示

ファイル: hdmoviefree_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']):
            film_id, data_name = attrs['data-id'], attrs['data-name']
            data = {'id': film_id, 'n': data_name}
            server_url = scraper_utils.urljoin(self.base_url, SERVER_URL)
            server_url = server_url % (film_id)
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5)
            for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'):
                data = {'epid': attrs['data-id']}
                ep_url = scraper_utils.urljoin(self.base_url, EP_URL)
                ep_url = ep_url % (attrs['data-id'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5)
                js_data = scraper_utils.parse_json(html, ep_url)
                try:
                    links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')]
                except:
                    try: links = js_data['link']['l']
                    except: links = []
                try: heights = js_data['link']['q']
                except: heights = []
                for stream_url, height in map(None, links, heights):
                    match = re.search('movie_url=(.*)', stream_url)
                    if match:
                        stream_url = match.group(1)
                        
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                        direct = True
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        if height:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HD720
                        direct = False
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    sources.append(source)

        return sources

コード例 #16

0

ファイルを表示

ファイル: premiumize_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def __add_torrent(self, hash_id):
     list_url = scraper_utils.urljoin(self.base_url, LIST_URL)
     js_data = self._json_get(list_url, cache_limit=0)
     for transfer in js_data.get('transfers', []):
         if transfer['hash'].lower() == hash_id:
             return True
      
     add_url = scraper_utils.urljoin(self.base_url, ADD_URL)
     data = {'src': MAGNET_LINK % hash_id}
     js_data = self._json_get(add_url, data=data, cache_limit=0)
     if js_data.get('status') == 'success':
         return True
     else:
         return False

コード例 #17

0

ファイルを表示

ファイル: noobroom_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def resolve_link(self, link):
     url = scraper_utils.urljoin(self.base_url, link)
     html = self._http_get(url, cache_limit=.5)
     match = re.search('"file"\s*:\s*"([^"]+)', html)
     if match:
         file_link = match.group(1)
         stream_url = scraper_utils.urljoin(self.base_url, file_link)
         cj = self._set_cookies(self.base_url, {})
         request = urllib2.Request(stream_url)
         request.add_header('User-Agent', scraper_utils.get_ua())
         request.add_unredirected_header('Host', request.get_host())
         request.add_unredirected_header('Referer', url)
         cj.add_cookie_header(request)
         response = urllib2.urlopen(request)
         return response.geturl()

コード例 #18

0

ファイルを表示

ファイル: veocube_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        hosters += self.__get_sources(html, url)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'parts-middle'})
        if fragment:
            for attrs, _content in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
                url = scraper_utils.urljoin(self.base_url, attrs['href'])
                html = self._http_get(url, cache_limit=8)
                hosters += self.__get_sources(html, url)

        return hosters

コード例 #19

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=False, cache_limit=.5)

        q_str = ''
        match = re.search('class="entry-title">([^<]+)', html)
        if match:
            q_str = match.group(1)

        pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|'
        for match in re.finditer(pattern, html, re.DOTALL):
            url = match.group(1)
            if 'adf.ly' in url:
                continue

            hoster = {
                'multi-part': False,
                'class': self,
                'views': None,
                'url': url,
                'rating': None,
                'quality': None,
                'direct': False
            }
            hoster['host'] = urlparse.urlsplit(url).hostname
            hoster['quality'] = scraper_utils.blog_get_quality(
                video, q_str, hoster['host'])
            hosters.append(hoster)

        return hosters

コード例 #20

0

ファイルを表示

ファイル: cmz_scraper.py プロジェクト: enursha101/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        match = re.search('Views?\s*:\s*(\d+)', html, re.I)
        if match:
            views = match.group(1)
        else:
            views = None

        pattern = 'href="[^"]+/rd\.html\?url=([^"]+)'
        for match in re.finditer(pattern, html):
            url = match.group(1)
            host = urlparse.urlsplit(url).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'url': url,
                'class': self,
                'rating': None,
                'views': views,
                'quality': scraper_utils.get_quality(video, host,
                                                     QUALITIES.HIGH),
                'direct': False
            }
            hosters.append(hoster)

        return hosters

コード例 #21

0

ファイルを表示

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url,
                           headers=self.headers,
                           require_debrid=False,
                           cache_limit=.5)
     sources = self.__get_post_links(html, video)
     for source in sources:
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': sources[source],
             'direct': False
         }
         hosters.append(hoster)
     return hosters

コード例 #22

0

ファイルを表示

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters

コード例 #23

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't'
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url,
                              data={
                                  'searchquery': title,
                                  'searchin': search_in
                              },
                              cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'})
        if not fragment: return results
        fragment = dom_parser2.parse_dom(fragment[0].content, 'table')
        if not fragment: return results
        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

コード例 #24

0

ファイルを表示

ファイル: rlsmovies_scraper.py プロジェクト: yam4me/plugin.video.blamo

 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=False, cache_limit=.5)
     for source, values in self.__get_post_links(html).iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         release = values['release']
         quality = scraper_utils.blog_get_quality(video, release, host)
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'X265' in release or 'HEVC' in release:
             hoster['format'] = 'x265'
         hosters.append(hoster)
     return hosters

コード例 #25

0

ファイルを表示

ファイル: premiumize_scraper.py プロジェクト: enursha101/xbmc-addon

    def __find_episode(self, show_url, video):
        url = scraper_utils.urljoin(self.tv_base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        hashes = []
        for attrs, _magnet in dom_parser2.parse_dom(html,
                                                    'a', {'class': 'magnet'},
                                                    req=['href', 'title']):
            magnet_link, magnet_title = attrs['href'], attrs['title']
            match = re.search('urn:btih:(.*?)(?:&|$)', magnet_link, re.I)
            if match:
                magnet_title = re.sub(re.compile('\s+magnet\s+link', re.I), '',
                                      magnet_title)
                hashes.append((match.group(1), magnet_title))

        episode_pattern = 'S%02d\s*E%02d' % (int(
            video.season), int(video.episode))
        if video.ep_airdate:
            airdate_pattern = '%d{delim}%02d{delim}%02d'.format(delim=DELIM)
            airdate_pattern = airdate_pattern % (video.ep_airdate.year,
                                                 video.ep_airdate.month,
                                                 video.ep_airdate.day)
        else:
            airdate_pattern = ''

        matches = [
            link for link in hashes
            if re.search(episode_pattern, link[1], re.I)
        ]
        if not matches and airdate_pattern:
            matches = [
                link for link in hashes if re.search(airdate_pattern, link[1])
            ]
        return matches

コード例 #26

0

ファイルを表示

 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     query = scraper_utils.parse_query(source_url)
     if 'id' in query:
         vid_type = 'movies' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes'
         url = scraper_utils.urljoin(
             self.base_url, '/api/v2/%s/%s' % (vid_type, query['id']))
         js_data = self._http_get(url, cache_limit=.5)
         if 'url' in js_data:
             stream_url = js_data['url']
             quality = QUALITIES.HD720
             hoster = {
                 'multi-part': False,
                 'host':
                 scraper_utils.get_direct_hostname(self, stream_url),
                 'class': self,
                 'url': stream_url,
                 'quality': quality,
                 'views': None,
                 'rating': None,
                 'direct': True
             }
             hosters.append(hoster)
     return hosters

コード例 #27

0

ファイルを表示

ファイル: streamtv_scraper.py プロジェクト: enursha101/xbmc-addon

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+s0*%s-?e0*%s[^"]+)' % (video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+-s\d+-?e\d+-(?P<title>[^/"]*)[^"]*)'
     show_url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
     return self._default_get_episode_url(fragment or html, video, episode_pattern, title_pattern)

コード例 #28

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            url = '/api/v2/movies'
            key = 'movies'
        else:
            url = '/api/v2/shows'
            key = 'shows'
        url = scraper_utils.urljoin(self.base_url, url)
        js_data = self._http_get(url, cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        if key in js_data:
            for item in js_data[key]:
                match_title = item['name']
                match_year = item.get('year', '')
                match_url = '?id=%s' % (item['id'])
                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

コード例 #29

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/index.php')
        data = {'subaction': 'search', 'story': title, 'do': 'search'}
        headers = {'Referer': search_url}
        html = self._http_get(search_url,
                              params={'do': 'search'},
                              data=data,
                              headers=headers,
                              cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'})
        if not fragment: return results

        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div',
                                                  {'class': 'short-film'}):
            match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)',
                              item)
            if not match: continue

            url, match_title = match.groups('')
            result = {
                'url': scraper_utils.pathify_url(url),
                'title': scraper_utils.cleanse_title(match_title),
                'year': ''
            }
            results.append(result)

        return results

コード例 #30

0

ファイルを表示

ファイル: scenehdtv_scraper.py プロジェクト: enursha101/xbmc-addon

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'entry-content'})
        if fragment:
            for _attrs, td in dom_parser2.parse_dom(fragment[0].content, 'td'):
                for attrs, _content in dom_parser2.parse_dom(td,
                                                             'a',
                                                             req='href'):
                    meta = scraper_utils.parse_episode_link(attrs['href'])
                    sources[attrs['href']] = scraper_utils.height_get_quality(
                        meta['height'])

        for source, values in sources.iteritems():
            if scraper_utils.excluded_link(source): continue
            host = urlparse.urlparse(source).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': values,
                'direct': False
            }
            hosters.append(hoster)
        return hosters

コード例 #31

0

ファイルを表示

ファイル: filmikz_scraper.py プロジェクト: enursha101/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        pattern = "/w\.php\?q=([^']+)"
        seen_hosts = {}
        for match in re.finditer(pattern, html, re.DOTALL):
            url = match.group(1)
            hoster = {
                'multi-part': False,
                'url': url.decode('base-64'),
                'class': self,
                'quality': None,
                'views': None,
                'rating': None,
                'direct': False
            }
            hoster['host'] = urlparse.urlsplit(hoster['url']).hostname
            # top list is HD, bottom list is SD
            if hoster['host'] in seen_hosts:
                quality = QUALITIES.HIGH
            else:
                quality = QUALITIES.HD720
                seen_hosts[hoster['host']] = True
            hoster['quality'] = scraper_utils.get_quality(
                video, hoster['host'], quality)
            hosters.append(hoster)
        return hosters

コード例 #32

0

ファイルを表示

ファイル: movie25_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        quality = None
        match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I)
        if match:
            quality = QUALITY_MAP.get(match.group(1).strip().upper())

        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'links'})
        if not fragment: return hosters
        
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'ul'):
            stream_url = dom_parser2.parse_dom(item, 'a', req='href')
            host = dom_parser2.parse_dom(item, 'li', {'id': 'download'})
            if not stream_url or not host: continue
            
            stream_url = stream_url[0].attrs['href']
            host = host[-1].content
            hoster = {'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
            hosters.append(hoster)

        return hosters

コード例 #33

0

ファイルを表示

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, fragment in dom_parser2.parse_dom(
                html, 'div', {'class': 'videoPlayer'}):
            for attrs, _content in dom_parser2.parse_dom(fragment,
                                                         'iframe',
                                                         req='src'):
                html = self._http_get(attrs['src'],
                                      headers={'Referer': page_url},
                                      cache_limit=.5)
                match = re.search('downloadUrl\s*=\s*"([^"]+)', html)
                if match:
                    stream_url = match.group(1)
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        quality = QUALITIES.HIGH
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    hosters.append(hoster)

        return hosters

コード例 #34

0

ファイルを表示

ファイル: moviego_scraper.py プロジェクト: normico21/repository.xvbmc

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        data = {
            'hash': 'indexert',
            'do': 'search',
            'subaction': 'search',
            'search_start': 0,
            'full_search': 0,
            'result_from': 1,
            'story': title
        }
        search_url = scraper_utils.urljoin(self.base_url, 'index.php')
        html = self._http_get(search_url,
                              params={'do': 'search'},
                              data=data,
                              cache_limit=8)
        if dom_parser2.parse_dom(html, 'div', {'class': 'sresult'}):
            for _attrs, item in dom_parser2.parse_dom(
                    html, 'div', {'class': 'short_content'}):
                match_url = dom_parser2.parse_dom(item, 'a', req='href')
                match_title_year = dom_parser2.parse_dom(
                    item, 'div', {'class': 'short_header'})
                if match_url and match_title_year:
                    match_url = match_url[0].attrs['href']
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year[0].content)
                    if not year or not match_year or year == match_year:
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(match_url)
                        }
                        results.append(result)

        return results

コード例 #35

0

ファイルを表示

ファイル: watch8now_scraper.py プロジェクト: yam4me/plugin.video.blamo

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        fragment = dom_parser2.parse_dom(html, 'tbody')
        if fragment:
            fragment = fragment[0].content
            for attrs, content in dom_parser2.parse_dom(fragment,
                                                        'a',
                                                        req='href'):
                stream_url = attrs['href']
                match = dom_parser2.parse_dom(content, 'img')
                if not match: continue
                host = match[0].content.strip()
                quality = scraper_utils.get_quality(video, host,
                                                    QUALITIES.HIGH)
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters

コード例 #36

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'):
            stream_url = attrs['href']
            if MOVIE_URL in stream_url:
                meta = scraper_utils.parse_movie_link(stream_url)
                stream_url = scraper_utils.pathify_url(
                    stream_url) + scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                quality = scraper_utils.height_get_quality(meta['height'])
                hoster = {
                    'multi-part': False,
                    'host':
                    scraper_utils.get_direct_hostname(self, stream_url),
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': True
                }
                if 'format' in meta: hoster['format'] = meta['format']
                hosters.append(hoster)

        return hosters

コード例 #37

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser2.parse_dom(html, 'select', {'id': 'cat'})
        if fragment:
            for attrs, label in dom_parser2.parse_dom(fragment[0].content,
                                                      'option',
                                                      {'class': 'level-0'},
                                                      req='value'):
                label = scraper_utils.cleanse_title(label)
                label = re.sub('\s+\(\d+\)$', '', label)
                if norm_title in scraper_utils.normalize_title(label):
                    cat_url = scraper_utils.urljoin(
                        self.base_url, '/?cat=%s' % (attrs['value']))
                    html = self._http_get(cat_url,
                                          allow_redirect=False,
                                          cache_limit=8)
                    if html.startswith('http'):
                        cat_url = html
                    result = {
                        'url': scraper_utils.pathify_url(cat_url),
                        'title': label,
                        'year': ''
                    }
                    results.append(result)

        return results

コード例 #38

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        if not self.include_paid and video_type != VIDEO_TYPES.MOVIE: return []
        search_url = scraper_utils.urljoin(self.base_url, '/search.php')
        html = self._http_get(search_url, params={'q': title}, cache_limit=.25)
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            pattern = '<i>\s*Movies\s*</i>(.*)'
        else:
            pattern = '<i>\s*TV Series\s*</i>(.*)'

        match = re.search(pattern, html)
        if not match: return results

        container = match.group(1)
        pattern = "href='([^']+)'>([^<]+)\s*</a>\s*(?:\((\d{4})\))?"
        for match in re.finditer(pattern, container):
            url, match_title, match_year = match.groups('')
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

コード例 #39

0

ファイルを表示

ファイル: sit2play_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def __login(self):
     url = scraper_utils.urljoin(self.base_url, '/api/v1/user/login')
     data = {'user': self.username, 'password': self.password}
     headers = {'Content-Type': 'application/json'}
     html = self._http_get(url, data=json.dumps(data), headers=headers, cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     if 'user' not in js_data: raise Exception('sit2play login failed')

コード例 #40

0

ファイルを表示

ファイル: downloadtube_scraper.py プロジェクト: yam4me/plugin.video.blamo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search/%s' % (urllib.quote(title)))
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'})
        if not fragment: return results

        fragment = fragment[0].content
        match_url = dom_parser2.parse_dom(fragment, 'a', req='href')
        match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt')
        if match_url and match_title_year:
            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

コード例 #41

0

ファイルを表示

ファイル: streamlord_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]*-[Ss]0*%s[Ee]0*%s-[^"]+)' % (video.season, video.episode)
     title_pattern = 'class="head".*?</span>(?P<title>.*?)</a>.*?href="(?P<url>[^"]+)'
     show_url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'season-wrapper'})
     return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

コード例 #42

0

ファイルを表示

    def __get_direct_links(self, iframe_url, page_url):
        sources = []
        headers = {'Referer': page_url}
        html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
        
        # if captions exist, then they aren't hardcoded
        subs = '' if re.search('kind\s*:\s*"captions"', html) else 'Turkish subtitles'
         
        streams = scraper_utils.parse_sources_list(self, html, key='VideoSources')
        streams.update(scraper_utils.parse_sources_list(self, html, var='video'))
        for stream_url in streams:
            quality = streams[stream_url]['quality']
            if 'v.asp' in stream_url:
                stream_url = scraper_utils.urljoin(self.base_url, stream_url)
                stream_redirect = self._http_get(stream_url, allow_redirect=False, method='HEAD', cache_limit=0)
                if stream_redirect.startswith('http'):
                    stream_url = stream_redirect

            sources.append({'stream_url': stream_url, 'subs': subs, 'quality': quality, 'direct': True})
        
        if sources: return sources
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return sources
        sources.append({'stream_url': iframe_url[0].attrs['src'], 'subs': subs, 'quality': QUALITIES.HD720, 'direct': False})
                
        return sources

コード例 #43

0

ファイルを表示

ファイル: piratejunkies_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     js_url = scraper_utils.urljoin(self.base_url, '/javascript/movies.js')
     html = self._http_get(js_url, cache_limit=48)
     if source_url.startswith('/'):
         source_url = source_url[1:]
     pattern = '''getElementById\(\s*"%s".*?play\(\s*'([^']+)''' % (source_url)
     match = re.search(pattern, html, re.I)
     if match:
         stream_url = match.group(1)
         if 'drive.google' in stream_url or 'docs.google' in stream_url:
             sources = scraper_utils.parse_google(self, stream_url)
         else:
             sources = [stream_url]
         
         for source in sources:
             stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
             host = scraper_utils.get_direct_hostname(self, source)
             if host == 'gvideo':
                 quality = scraper_utils.gv_get_quality(source)
                 direct = True
             elif 'youtube' in stream_url:
                 quality = QUALITIES.HD720
                 direct = False
                 host = 'youtube.com'
             else:
                 quality = QUALITIES.HIGH
                 direct = True
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
             hosters.append(hoster)
     return hosters

コード例 #44

0

ファイルを表示

ファイル: hevcbluray_scraper.py プロジェクト: enursha101/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return sources
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     is_3d = False
     page_quality = QUALITIES.HD720
     title = dom_parser2.parse_dom(html, 'title')
     if title:
         title = title[0].content
         match = re.search('(\d{3,})p', title)
         if match:
             page_quality = scraper_utils.height_get_quality(match.group(1))
         
         is_3d = True if re.search('\s+3D\s+', title) else False
     
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
     if fragment:
         for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'h3'):
             for attrs, _content in dom_parser2.parse_dom(item, 'a', req='href'):
                 stream_url = attrs['href']
                 host = urlparse.urlparse(stream_url).hostname
                 source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': page_quality, 'views': None, 'rating': None, 'direct': False}
                 source['format'] = 'x265'
                 source['3D'] = is_3d
                 sources.append(source)
                 
     return sources

コード例 #45

0

ファイルを表示

ファイル: movie4k_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies.php')
        cookies = {'onlylanguage': 'en', 'lang': 'en'}
        params = {'list': 'search', 'search': title}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        for _attrs, content in dom_parser2.parse_dom(html, 'TR', {'id': re.compile('coverPreview\d+')}):
            match = dom_parser2.parse_dom(content, 'a', req='href')
            if not match: continue
            
            match_url, match_title = match[0].attrs['href'], match[0].content
            is_show = re.search('\(tvshow\)', match_title, re.I)
            if (video_type == VIDEO_TYPES.MOVIE and is_show) or (video_type == VIDEO_TYPES.TVSHOW and not is_show):
                continue

            match_title = match_title.replace('(TVshow)', '')
            match_title = match_title.strip()
            
            match_year = ''
            for _attrs, div in dom_parser2.parse_dom(content, 'div'):
                match = re.match('\s*(\d{4})\s*', div)
                if match:
                    match_year = match.group(1)

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #46

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        if video.video_type == VIDEO_TYPES.EPISODE:
            html = self.__get_episode_fragment(html, video)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'linkTr'}):
            stream_url = dom_parser2.parse_dom(item, 'div',
                                               {'class': 'linkHiddenUrl'})
            q_str = dom_parser2.parse_dom(item, 'div',
                                          {'class': 'linkQualityText'})
            if stream_url and q_str:
                stream_url = stream_url[0].content
                q_str = q_str[0].content
                host = urlparse.urlparse(stream_url).hostname
                base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH)
                quality = scraper_utils.get_quality(video, host, base_quality)
                source = {
                    'multi-part': False,
                    'url': stream_url,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'direct': False
                }
                hosters.append(source)

        return hosters

コード例 #47

0

ファイルを表示

ファイル: ororotv_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     query = scraper_utils.parse_query(show_url)
     if 'id' in query:
         url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id']))
         js_data = self._http_get(url, cache_limit=.5)
         if 'episodes' in js_data:
             force_title = scraper_utils.force_title(video)
             if not force_title:
                 for episode in js_data['episodes']:
                     if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
                 
                 if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                     for episode in js_data['episodes']:
                         if 'airdate' in episode:
                             ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date()
                             if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)):
                                 return scraper_utils.pathify_url('?id=%s' % (episode['id']))
             else:
                 logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
             
             if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                 norm_title = scraper_utils.normalize_title(video.ep_title)
                 for episode in js_data['episodes']:
                     if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))

コード例 #48

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        search_url = scraper_utils.urljoin(self.base_url, '/yabanci-diziler/')
        html = self._http_get(search_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'category-post'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'h3')
            if match_url and match_title:
                match_url = scraper_utils.pathify_url(
                    match_url[0].attrs['href'])
                match_title = match_title[0].content
                if match_url in seen_urls: continue
                seen_urls.add(match_url)
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': match_url,
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

コード例 #49

0

ファイルを表示

ファイル: watch8now_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+[sS]%s-?[eE]%s(?!\d)[^"]*)"' % (video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+)(?:[^>]*>){3}\s*S\d+\s+Episode\s+\d+\s*:\s*(?P<title>[^<]+)'
     show_url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'accordion'})
     return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

コード例 #50

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        links = scraper_utils.parse_sources_list(self, html)
        for link in links:
            stream_url = link
            if self.base_url in link:
                redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD')
                if redir_url.startswith('http'):
                    stream_url = redir_url
            
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = links[link]['quality']
                stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
            hosters.append(source)

        return hosters

コード例 #51

0

ファイルを表示

ファイル: diziay_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'})
     if not fragment: return hosters
     
     iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
     if not iframe_url: return hosters
     
     html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25)
     sources.append(self.__get_embedded_sources(html))
     sources.append(self.__get_linked_sources(html))
     for source in sources:
         for stream_url in source['sources']:
             host = scraper_utils.get_direct_hostname(self, stream_url)
             if host == 'gvideo':
                 stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 quality = scraper_utils.gv_get_quality(stream_url)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hoster['subs'] = source.get('subs', True)
                 hosters.append(hoster)
 
     return hosters

コード例 #52

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters

        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        match = re.search('This movie is of poor quality', html, re.I)
        if match:
            quality = QUALITIES.LOW
        else:
            quality = QUALITIES.HIGH

        for match in re.finditer('href="([^"]+/embed\d*/[^"]+)', html):
            url = match.group(1)
            embed_html = self._http_get(url, cache_limit=.5)
            hosters += self.__get_links(embed_html)

        pattern = 'href="([^"]+)[^>]*>\s*<[^>]+play_video.gif'
        for match in re.finditer(pattern, html, re.I):
            stream_url = match.group(1)
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.get_quality(video, host, quality)
            hoster = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'rating': None,
                'views': None,
                'direct': False
            }
            hosters.append(hoster)
        return hosters

コード例 #53

0

ファイルを表示

ファイル: sit2play_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        object_id = self.__extract_id(source_url)
        if object_id is None: return sources
        source_url = TITLE_URL.format(id=object_id)
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._authed_http_get(page_url, cache_limit=.5)
        js_data = scraper_utils.parse_json(html, page_url)
        if video.video_type == VIDEO_TYPES.MOVIE:
            links = js_data.get('links', {})
        else:
            links = self.__episode_match(js_data, video)

        prefix = js_data.get('domain', {}).get('prefix')
        suffix = js_data.get('domain', {}).get('suffix')
        for key, path in links.get('links', {}).iteritems():
            for mirror in sorted(list(set(links.get('mirrors', [])))):
                stream_url = TEMPLATE.format(prefix=prefix, mirror=mirror, suffix=suffix, path=path)
                host = scraper_utils.get_direct_hostname(self, stream_url)
                quality = Q_MAP.get(key, QUALITIES.HIGH)
                source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
                source['version'] = '(Mirror %d)' % (mirror)
                sources.append(source)

        return sources

コード例 #54

0

ファイルを表示

ファイル: moviehubs_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = re.search('onmouseover="([^"]+)', item)
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year.group(1)
                match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year)
                if not match: continue
                
                match_title, match_year = scraper_utils.extra_year(match.group(1))
                is_season = re.search('season\s+(\d+)', match_title_year, re.I)
                if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue
                
                if video_type == VIDEO_TYPES.MOVIE:
                    if not match_year:
                        match_year = re.search('>Release:\s*(\d{4})', match_title_year)
                        match_year = match_year.group(1) if match_year else ''
                else:
                    if season and int(season) != int(is_season.group(1)):
                        continue
                                
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #55

0

ファイルを表示

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, headers=XHR, cache_limit=8)
        js_data = scraper_utils.parse_json(html, url)
        quality = Q_MAP.get(
            js_data.get('Key', {}).get('MovieDefinition'), QUALITIES.HIGH)
        value = js_data.get('Value', {})
        stream_url = value.get('VideoLink')
        if stream_url and value.get('ProviderSource', '').lower() == 'youtube':
            host = 'youtube.com'
            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': False
            }
            hosters.append(source)

        return hosters

コード例 #56

0

ファイルを表示

ファイル: moviehut_scraper.py プロジェクト: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

コード例 #57

0

ファイルを表示

ファイル: onlinedizi_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     season_url = show_url
     if video.season != 1:
         show_url = scraper_utils.urljoin(self.base_url, show_url)
         html = self._http_get(show_url, cache_limit=24)
         fragment = dom_parser2.parse_dom(html, 'div', {'class': 'page-numbers'})
         if fragment:
             match = re.search('href="([^"]+-%s-sezon[^"]*)' % (video.season), fragment[0].content)
             if match:
                 season_url = match.group(1)
         
     episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (video.season, video.episode)
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'posts-list'})
     return self._default_get_episode_url(fragment or html, video, episode_pattern)

コード例 #58

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/search/searchBoxSuggestion')
        html = self._http_get(search_url,
                              params={
                                  'top': 8,
                                  'query': title
                              },
                              cache_limit=8)
        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            entityName = match_title_year = item.get('Value', '')
            if entityName:
                match_title, match_year2 = scraper_utils.extra_year(
                    match_title_year)
                match_year = str(item.get('ReleaseYear', ''))
                if not match_year: match_year = match_year2

                match_url = '/ontology/EntityDetails?' + urllib.urlencode(
                    {
                        'entityName': entityName,
                        'ignoreMediaLinkError': 'false'
                    })
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

コード例 #59

0

ファイルを表示

ファイル: ddlvalley_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = scraper_utils.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if scraper_utils.release_check(video, title, require_title=False):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

コード例 #60

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'User-Agent': LOCAL_UA}
        html = self._http_get(url,
                              require_debrid=False,
                              headers=headers,
                              cache_limit=.5)
        for match in re.finditer(
                "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)",
                html, re.DOTALL):
            for match2 in re.finditer('href="([^"]+)', match.group(1)):
                stream_url = match2.group(1)
                meta = scraper_utils.parse_episode_link(stream_url)
                quality = scraper_utils.height_get_quality(meta['height'])
                host = urlparse.urlparse(stream_url).hostname
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'quality': quality,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters