Python urljoin Exemples, deaths_lib.scraper_utils.urljoin Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : dizibox_scraper.py Projet : uguer30/Project

    def _get_episode_url(self, show_url, video):
        show_url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(show_url, cache_limit=8)
        pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % (
            video.season)
        match = re.search(pattern, html)
        if match:
            episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (
                video.season, video.episode)
            season_url = scraper_utils.urljoin(self.base_url, match.group(1))
            html = self._http_get(season_url, cache_limit=2)
            ep_url = self._default_get_episode_url(html, video,
                                                   episode_pattern)
            if ep_url: return ep_url

        # front page fallback
        html = self._http_get(self.base_url, cache_limit=2)
        for slug in reversed(show_url.split('/')):
            if slug: break

        ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format(
            slug=slug, season=video.season, episode=video.episode)
        match = re.search(ep_url_frag, html)
        if match:
            return scraper_utils.pathify_url(match.group(1))

Exemple #2

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        if not html:
            url = scraper_utils.urljoin(self.old_base_url, source_url)
            html = self._http_get(url, require_debrid=True, cache_limit=.5)

        sources.update(self.__get_post_links(html, video))

        if kodi.get_setting('%s-include_comments' %
                            (self.get_name())) == 'true':
            for _attrs, comment in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('commentbody-\d+')}):
                sources.update(self.__get_comment_links(comment, video))

        for source in sources:
            if scraper_utils.excluded_link(source): continue
            host = urlparse.urlparse(source).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': sources[source],
                'direct': False
            }
            hosters.append(hoster)
        return hosters

Exemple #3

0

Afficher le fichier

Fichier : snagfilms_scraper.py Projet : uguer30/Project

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
     referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s')
     referer = referer % (urllib.quote_plus(title))
     headers = {'Referer': referer}
     headers.update(XHR)
     params = {
         'searchTerm': title,
         'type': SEARCH_TYPES[video_type],
         'limit': 500
     }
     html = self._http_get(search_url,
                           params=params,
                           headers=headers,
                           auth=False,
                           cache_limit=2)
     js_data = scraper_utils.parse_json(html, search_url)
     if 'results' in js_data:
         for result in js_data['results']:
             match_year = str(result.get('year', ''))
             match_url = result.get('permalink', '')
             match_title = result.get('title', '')
             if not year or not match_year or year == match_year:
                 result = {
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year,
                     'url': scraper_utils.pathify_url(match_url)
                 }
                 results.append(result)
     return results

Exemple #4

0

Afficher le fichier

Fichier : dayt_scraper.py Projet : uguer30/Project

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=1)
        iframes = dom_parser2.parse_dom(html, 'iframe', req='src')
        for attrs, _content in iframes:
            iframe_url = attrs['src']
            if 'docs.google.com' in iframe_url:
                sources = scraper_utils.parse_google(self, iframe_url)
                break
            else:
                iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
                html = self._http_get(iframe_url, cache_limit=1)
                iframes += dom_parser2.parse_dom(html, 'iframe', req='src')

        for source in sources:
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': scraper_utils.gv_get_quality(source),
                'views': None,
                'rating': None,
                'url': source,
                'direct': True
            }
            hosters.append(hoster)

        return hosters

Exemple #5

0

Afficher le fichier

Fichier : dizilab_scraper.py Projet : uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        videos = re.findall('''onclick\s*=\s*"loadVideo\('([^']+)''', html)
        subs = self.__get_subs(html)
        for v_id, icon in map(None, videos, subs):
            ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL)
            data = {'vid': v_id, 'tip': 1, 'type': 'loadVideo'}
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(ajax_url,
                                  data=data,
                                  headers=headers,
                                  cache_limit=.5)
            sub = ICONS.get(icon, DEFAULT_SUB)
            hosters += self.__get_cloud_links(html, page_url, sub)
            hosters += self.__get_embedded_links(html, sub)
            hosters += self.__get_iframe_links(html, sub)
            hosters += self.__get_json_links(html, sub)
            if not kodi.get_setting('scraper_url'): break

        return hosters

Exemple #6

0

Afficher le fichier

 def __get_source_page(self, video_type, page_url):
     match = re.search('/movie/(.*?)-(\d+)\.html', page_url)
     if not match: return '', '', ''
     slug, movie_id = match.groups()
     
     vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series'
     qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type)
     qp_url = scraper_utils.urljoin(self.base_url, qp_url)
     headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)}
     headers.update(XHR)
     html = self._http_get(qp_url, headers=headers, cache_limit=8)
     watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href')
     if not watching_url: return '', '', ''
     
     watching_url = watching_url[0].attrs['href']
     page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'):
         _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8)
     
     sl_url = SL_URL.format(movie_id=movie_id)
     sl_url = scraper_utils.urljoin(self.base_url, sl_url)
     html = self._http_get(sl_url, headers=headers, cache_limit=8)
     js_data = scraper_utils.parse_json(html, sl_url)
     try: html = js_data['html']
     except: html = ''
     return movie_id, watching_url, html

Exemple #7

0

Afficher le fichier

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies/search')
        html = self._http_get(search_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue
            
            match_title_year = match[0].attrs['title']
            match_url = match[0].attrs['href']
            is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I)
            match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season)
            match_year = ''
            if video_type == VIDEO_TYPES.SEASON:
                if not season and not match_vt: continue
                if match_vt:
                    if season and int(is_season.group(1)) != int(season): continue
                else:
                    if season and int(season) != 1: continue
                    site_title, site_year = scraper_utils.extra_year(match_title_year)
                    if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue
                    
                match_title = match_title_year
            else:
                if not match_vt: continue
                match_title, match_year = scraper_utils.extra_year(match_title_year)

            match_url = scraper_utils.urljoin(match_url, 'watching.html')
            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

Exemple #8

0

Afficher le fichier

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)'
     headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)}
     season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season))
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, headers=headers, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'})
     return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

Exemple #9

0

Afficher le fichier

Fichier : watch5s_scraper.py Projet : uguer30/Project

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        headers = {'Accept-Language': 'en-US,en;q=0.5'}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, headers=headers, cache_limit=2)
        if video.video_type == VIDEO_TYPES.MOVIE:
            sources.update(self.__scrape_sources(html, page_url))
            pages = set([
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': 'btn-eps'}, req='href')
            ])
            active = set([
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': 'active'}, req='href')
            ])
            for page in list(pages - active):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))
        else:
            for page in self.__match_episode(video, html):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))

        for source, values in sources.iteritems():
            if not source.lower().startswith('http'): continue
            if values['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
                if host != 'gvideo':
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': page_url
                        })
                else:
                    stream_url = source
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': values['quality'],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': values['direct']
            }
            hosters.append(hoster)

        return hosters

Exemple #10

0

Afficher le fichier

Fichier : m4ufree_scraper.py Projet : uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        views = None
        fragment = dom_parser2.parse_dom(
            html, 'img', {'src': re.compile('[^"]*view_icon.png')})
        if fragment:
            match = re.search('(\d+)', fragment[0].content)
            if match:
                views = match.group(1)

        match = re.search('href="([^"]+-full-movie-[^"]+)', html)
        if match:
            url = match.group(1)
            html = self._http_get(url, cache_limit=.5)

        sources = self.__get_embedded(html)
        for link in dom_parser2.parse_dom(html,
                                          'span', {'class': 'btn-eps'},
                                          req='link'):
            link = link.attrs['link']
            ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL)
            headers = {'Referer': url}
            headers.update(XHR)
            html = self._http_get(ajax_url,
                                  params={'v': link},
                                  headers=headers,
                                  cache_limit=.5)
            sources.update(self.__get_sources(html))

        for source in sources:
            if sources[source]['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname
            stream_url = source + scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            direct = sources[source]['direct']
            quality = sources[source]['quality']
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': quality,
                'views': views,
                'rating': None,
                'url': stream_url,
                'direct': direct
            }
            hosters.append(hoster)

        return hosters

Exemple #11

0

Afficher le fichier

Fichier : watch5s_scraper.py Projet : uguer30/Project

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        headers = {'Accept-Language': 'en-US,en;q=0.5'}
        html = self._http_get(search_url,
                              params={'q': title},
                              headers=headers,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'ml-item'}):
            match_title = dom_parser2.parse_dom(item, 'span',
                                                {'class': 'mli-info'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            year_frag = dom_parser2.parse_dom(item, 'img', req='alt')
            is_episodes = dom_parser2.parse_dom(item, 'span',
                                                {'class': 'mli-eps'})

            if (video_type == VIDEO_TYPES.MOVIE
                    and not is_episodes) or (video_type == VIDEO_TYPES.SEASON
                                             and is_episodes):
                if match_title and match_url:
                    match_url = match_url[0].attrs['href']
                    match_title = match_title[0].content
                    match_title = re.sub('</?h2>', '', match_title)
                    match_title = re.sub('\s+\d{4}$', '', match_title)
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search(
                                'Season\s+0*%s$' % (season), match_title):
                            continue

                    if not match_url.endswith('/'): match_url += '/'
                    match_url = scraper_utils.urljoin(match_url, 'watch/')
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE and year_frag:
                        match = re.search('\s*-\s*(\d{4})$',
                                          year_frag[0].attrs['alt'])
                        if match:
                            match_year = match.group(1)

                    match_norm_title = scraper_utils.normalize_title(
                        match_title)
                    title_match = (norm_title
                                   in match_norm_title) or (match_norm_title
                                                            in norm_title)
                    if title_match and (not year or not match_year
                                        or year == match_year):
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(match_url)
                        }
                        results.append(result)

        return results

Exemple #12

0

Afficher le fichier

    def get_sources(self, video):
        hosters = []
        sources = {}
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        match = re.search("load_player\('([^']+)", html)
        if not match: return hosters
        
        headers = {'Referer': page_url, 'Server': 'cloudflare-nginx', 'Accept': 'text/html, */*; q=0.01',
                   'Accept-Language': 'en-US,en;q=0.5', 'Accept-Formating': 'application/json, text/javascript', 'Accept-Encoding': 'gzip, deflate'}
        headers.update(XHR)
        params = {'id': match.group(1)}
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        html = self._http_get(player_url, params=params, headers=headers, cache_limit=1)
        js_data = scraper_utils.parse_json(html, player_url)
        pl_url = js_data.get('value') or js_data.get('download')
        if not pl_url: return hosters
        
        headers = {'Referer': page_url}
        if pl_url.startswith('//'): pl_url = 'https:' + pl_url
        html = self._http_get(pl_url, headers=headers, allow_redirect=False, cache_limit=0)
        if html.startswith('http'):
            streams = [(html, '')]
        else:
            js_data = scraper_utils.parse_json(html, pl_url)
            try: streams = [(source['file'], source.get('label', '')) for source in js_data['playlist'][0]['sources']]
            except: streams = []
            
        for stream in streams:
            stream_url, label = stream
            if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo':
                sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True}
            else:
                if label:
                    quality = scraper_utils.height_get_quality(label)
                else:
                    quality = QUALITIES.HIGH
                sources[stream_url] = {'quality': quality, 'direct': False}
                    
        for source, value in sources.iteritems():
            direct = value['direct']
            quality = value['quality']
            if direct:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname

            stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)
            
        return hosters

Exemple #13

0

Afficher le fichier

Fichier : mehliz_scraper.py Projet : uguer30/Project

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        if iframe_url.startswith('/'):
            iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
        html = self._http_get(iframe_url,
                              headers={'Referer': page_url},
                              cache_limit=.5)
        obj = dom_parser2.parse_dom(html, 'object', req='data')
        if obj:
            streams = dict((stream_url, {
                'quality': scraper_utils.gv_get_quality(stream_url),
                'direct': True
            }) for stream_url in scraper_utils.parse_google(
                self, obj[0].attrs['data']))
        else:
            streams = scraper_utils.parse_sources_list(self, html)

        for stream_url, values in streams.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = values['quality']
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

Exemple #14

0

Afficher le fichier

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/results')
        params = {'q': title}
        referer = search_url + '?' + urllib.urlencode(params)
        headers = {'Referer': referer}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'),
                               headers=headers,
                               method='POST',
                               cache_limit=0)

        cookies = {'begin_referer': referer, 'prounder': 1}
        html = self._http_get(search_url,
                              params=params,
                              cookies=cookies,
                              cache_limit=8)
        if any('jquery.js' in match.attrs['src']
               for match in dom_parser2.parse_dom(html, 'script', req='src')):
            html = self._http_get(search_url,
                                  params=params,
                                  cookies=cookies,
                                  cache_limit=0)

        for _attrs, result in dom_parser2.parse_dom(html, 'div',
                                                    {'class': 'cell'}):
            title_frag = dom_parser2.parse_dom(result, 'div',
                                               {'class': 'video_title'})
            year_frag = dom_parser2.parse_dom(result, 'div',
                                              {'class': 'video_quality'})
            if not title_frag: continue
            match = dom_parser2.parse_dom(title_frag[0].content,
                                          'a',
                                          req='href')
            if not match: continue
            match_url = match[0].attrs['href']
            match_title = match[0].content
            try:
                match = re.search('\s+(\d{4})\s+', year_frag[0].content)
                match_year = match.group(1)
            except:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)
        return results

Exemple #15

0

Afficher le fichier

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'})
        if page_quality:
            page_quality = QUALITY_MAP.get(page_quality[0].content,
                                           QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        if video.video_type == VIDEO_TYPES.EPISODE:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'id': 'servers-list'})
            gk_html = fragment[0].content if fragment else ''
        else:
            gk_html = html

        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        for stream_url, quality in scraper_utils.get_gk_links(
                self, gk_html, page_url, page_quality, link_url,
                player_url).iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
                quality = quality
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                direct = False

            if host is not None:
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Exemple #16

0

Afficher le fichier

 def __add_torrent(self, hash_id):
     list_url = scraper_utils.urljoin(self.base_url, LIST_URL)
     js_data = self._json_get(list_url, cache_limit=0)
     for transfer in js_data.get('transfers', []):
         if transfer['hash'].lower() == hash_id:
             return True
      
     add_url = scraper_utils.urljoin(self.base_url, ADD_URL)
     data = {'src': MAGNET_LINK % hash_id}
     js_data = self._json_get(add_url, data=data, cache_limit=0)
     if js_data.get('status') == 'success':
         return True
     else:
         return False

Exemple #17

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(html, 'img', req=['data-id', 'data-name']):
            film_id, data_name = attrs['data-id'], attrs['data-name']
            data = {'id': film_id, 'n': data_name}
            server_url = scraper_utils.urljoin(self.base_url, SERVER_URL)
            server_url = server_url % (film_id)
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(server_url, data=data, headers=headers, cache_limit=.5)
            for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-id'):
                data = {'epid': attrs['data-id']}
                ep_url = scraper_utils.urljoin(self.base_url, EP_URL)
                ep_url = ep_url % (attrs['data-id'])
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ep_url, data=data, headers=headers, cache_limit=.5)
                js_data = scraper_utils.parse_json(html, ep_url)
                try:
                    links = [r.attrs['src'] for r in dom_parser2.parse_dom(js_data['link']['embed'], 'iframe', req='src')]
                except:
                    try: links = js_data['link']['l']
                    except: links = []
                try: heights = js_data['link']['q']
                except: heights = []
                for stream_url, height in map(None, links, heights):
                    match = re.search('movie_url=(.*)', stream_url)
                    if match:
                        stream_url = match.group(1)
                        
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                        stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url})
                        direct = True
                    else:
                        host = urlparse.urlparse(stream_url).hostname
                        if height:
                            quality = scraper_utils.height_get_quality(height)
                        else:
                            quality = QUALITIES.HD720
                        direct = False
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    sources.append(source)

        return sources

Exemple #18

0

Afficher le fichier

    def resolve_link(self, link):
        if not link.startswith('http'):
            url = scraper_utils.urljoin(self.base_url, link)
            html = self._http_get(url, cache_limit=.5)
            match = re.search('href="([^"]+).*?value="Continue to video"', html)
            if match:
                url = scraper_utils.urljoin(self.base_url, match.group(1))
                html = self._http_get(url, cache_limit=.5)
                redirect = dom_parser2.parse_dom(html, 'meta', {'http-equiv': 'refresh'}, req='content')
                if redirect:
                    match = re.search('url=([^"]+)', redirect[0].attrs['content'])
                    if match: return match.group(1)

        return link

Exemple #19

0

Afficher le fichier

Fichier : noobroom_scraper.py Projet : uguer30/Project

 def resolve_link(self, link):
     url = scraper_utils.urljoin(self.base_url, link)
     html = self._http_get(url, cache_limit=.5)
     match = re.search('"file"\s*:\s*"([^"]+)', html)
     if match:
         file_link = match.group(1)
         stream_url = scraper_utils.urljoin(self.base_url, file_link)
         cj = self._set_cookies(self.base_url, {})
         request = urllib2.Request(stream_url)
         request.add_header('User-Agent', scraper_utils.get_ua())
         request.add_unredirected_header('Host', request.get_host())
         request.add_unredirected_header('Referer', url)
         cj.add_cookie_header(request)
         response = urllib2.urlopen(request)
         return response.geturl()

Exemple #20

0

Afficher le fichier

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
     if not fragment: return
     show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
     if not show_url: return
     show_url = scraper_utils.urljoin(self.base_url,
                                      show_url[0].attrs['href'])
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'})
     episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % (
         video.season, video.episode)
     return self._default_get_episode_url(fragment or html, video,
                                          episode_pattern)

Exemple #21

0

Afficher le fichier

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'User-Agent': LOCAL_UA}
        html = self._http_get(url,
                              require_debrid=True,
                              headers=headers,
                              cache_limit=.5)
        for match in re.finditer(
                "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)",
                html, re.DOTALL):
            for match2 in re.finditer('href="([^"]+)', match.group(1)):
                stream_url = match2.group(1)
                meta = scraper_utils.parse_episode_link(stream_url)
                quality = scraper_utils.height_get_quality(meta['height'])
                host = urlparse.urlparse(stream_url).hostname
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'views': None,
                    'url': stream_url,
                    'rating': None,
                    'quality': quality,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters

Exemple #22

0

Afficher le fichier

Fichier : project.py Projet : Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        for match in re.finditer(
                '<td>\s*<a\s+href="([^"]+)(?:[^>]+>){2}\s*(?:&nbsp;)*\s*([^<]+)',
                html):
            stream_url, host = match.groups()
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': scraper_utils.get_quality(video, host,
                                                     QUALITIES.HIGH),
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': False
            }
            hosters.append(hoster)

        return hosters

Exemple #23

0

Afficher le fichier

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        headers = {'Refer': self.base_url}
        html = self._http_get(page_url, headers=headers, cache_limit=.5)
        for _attrs, table in dom_parser2.parse_dom(html, 'table',
                                                   {'class': 'W'}):
            for _attrs, row in dom_parser2.parse_dom(table, 'tr'):
                td = dom_parser2.parse_dom(row, 'td')
                stream_url = dom_parser2.parse_dom(row, 'a', req='href')
                if not td or not stream_url: continue

                host = td[0].content
                host = re.sub('<!--.*?-->', '', host)
                host = re.sub('<([^\s]+)[^>]*>.*?</\\1>', '', host)
                host = host.strip()
                stream_url = stream_url[0].attrs['href']
                quality = scraper_utils.get_quality(video, host,
                                                    QUALITIES.HIGH)
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': False
                }
                hosters.append(hoster)
        return hosters

Exemple #24

0

Afficher le fichier

Fichier : moviesplanet_scraper.py Projet : uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php')
        timestamp = int(time.time() * 1000)
        query = {
            'q': title,
            'limit': 100,
            'timestamp': timestamp,
            'verifiedCheck': ''
        }
        html = self._http_get(search_url,
                              data=query,
                              headers=XHR,
                              cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            if not item['meta'].upper().startswith(media_type): continue

            result = {
                'title': scraper_utils.cleanse_title(item['title']),
                'url': scraper_utils.pathify_url(item['permalink']),
                'year': ''
            }
            results.append(result)

        return results

Exemple #25

0

Afficher le fichier

Fichier : moviehut_scraper.py Projet : uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Exemple #26

0

Afficher le fichier

Fichier : snagfilms_scraper.py Projet : uguer30/Project

 def __login(self):
     url = scraper_utils.urljoin(self.base_url, '/apis/v2/user/login.json')
     data = {
         'email': self.username,
         'password': self.password,
         'rememberMe': True
     }
     referer = scraper_utils.urljoin(self.base_url, '/login')
     headers = {'Content-Type': 'application/json', 'Referer': referer}
     headers.update(XHR)
     html = super(self.__class__, self)._http_get(url,
                                                  data=json.dumps(data),
                                                  headers=headers,
                                                  cache_limit=0)
     js_data = scraper_utils.parse_json(html, url)
     return js_data.get('status') == 'success'

Exemple #27

0

Afficher le fichier

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           'http://dwatchseries.to/search/%s')
        headers = {'Referer': self.base_url}
        headers.update(XHR)
        params = {'ajax': 1, 's': title, 'type': 'TVShows'}
        html = self._http_get(search_url, params=params, cache_limit=8)
        for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'):
            match_url = attrs['href']
            match_title = re.sub('</?[^>]*>', '', match_title)
            match = re.search('\((\d{4})\)$', match_url)
            if match:
                match_year = match.group(1)
            else:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Exemple #28

0

Afficher le fichier

Fichier : cmz_scraper.py Projet : uguer30/Project

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        match = re.search('Views?\s*:\s*(\d+)', html, re.I)
        if match:
            views = match.group(1)
        else:
            views = None

        pattern = 'href="[^"]+/rd\.html\?url=([^"]+)'
        for match in re.finditer(pattern, html):
            url = match.group(1)
            host = urlparse.urlsplit(url).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'url': url,
                'class': self,
                'rating': None,
                'views': views,
                'quality': scraper_utils.get_quality(video, host,
                                                     QUALITIES.HIGH),
                'direct': False
            }
            hosters.append(hoster)

        return hosters

Exemple #29

0

Afficher le fichier

Fichier : dizimag_scraper.py Projet : Lhse44/repository.deallen

    def __get_ajax_sources(self, html, page_url):
        hosters = []
        match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:'id=''', html)
        if match:
            ajax_url = match.group(1)
            for data_id in re.findall("kaynakdegis\('([^']+)", html):
                url = scraper_utils.urljoin(self.base_url, ajax_url)
                data = {'id': data_id}
                headers = {'Referer': page_url}
                headers.update(XHR)
                result = self._http_get(url,
                                        data=data,
                                        headers=headers,
                                        cache_limit=.5)
                js_data = scraper_utils.parse_json(result, url)
                if 'iframe' in js_data:
                    if self.base_url in js_data['iframe']:
                        hosters += self.__get_iframe_sources(
                            js_data['iframe'], page_url)
                    else:
                        hosters.append(
                            self.__create_source(js_data['iframe'],
                                                 720,
                                                 page_url,
                                                 direct=False))
                else:
                    hosters += self.__get_js_sources(js_data, page_url)
                    pass

        return hosters

Exemple #30

0

Afficher le fichier

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        fragment = dom_parser2.parse_dom(html, 'tbody')
        if fragment:
            fragment = fragment[0].content
            for attrs, content in dom_parser2.parse_dom(fragment,
                                                        'a',
                                                        req='href'):
                stream_url = attrs['href']
                match = dom_parser2.parse_dom(content, 'img')
                if not match: continue
                host = match[0].content.strip()
                quality = scraper_utils.get_quality(video, host,
                                                    QUALITIES.HIGH)
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': False
                }
                hosters.append(hoster)

        return hosters