Python urljoin Examples, sigsaur_lib.scraper_utils.urljoin Python Examples

Example #1

0

Show file

 def __get_source_page(self, video_type, page_url):
     match = re.search('/movie/(.*?)-(\d+)\.html', page_url)
     if not match: return '', '', ''
     slug, movie_id = match.groups()
     
     vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series'
     qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type)
     qp_url = scraper_utils.urljoin(self.base_url, qp_url)
     headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)}
     headers.update(XHR)
     html = self._http_get(qp_url, headers=headers, cache_limit=8)
     watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href')
     if not watching_url: return '', '', ''
     
     watching_url = watching_url[0].attrs['href']
     page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'):
         _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8)
     
     sl_url = SL_URL.format(movie_id=movie_id)
     sl_url = scraper_utils.urljoin(self.base_url, sl_url)
     html = self._http_get(sl_url, headers=headers, cache_limit=8)
     js_data = scraper_utils.parse_json(html, sl_url)
     try: html = js_data['html']
     except: html = ''
     return movie_id, watching_url, html

Example #2

0

Show file

File: snagfilms_scraper.py Project: idaviesfmts/hmdsm.repository

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
     referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s')
     referer = referer % (urllib.quote_plus(title))
     headers = {'Referer': referer}
     headers.update(XHR)
     params = {
         'searchTerm': title,
         'type': SEARCH_TYPES[video_type],
         'limit': 500
     }
     html = self._http_get(search_url,
                           params=params,
                           headers=headers,
                           auth=False,
                           cache_limit=2)
     js_data = scraper_utils.parse_json(html, search_url)
     if 'results' in js_data:
         for result in js_data['results']:
             match_year = str(result.get('year', ''))
             match_url = result.get('permalink', '')
             match_title = result.get('title', '')
             if not year or not match_year or year == match_year:
                 result = {
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year,
                     'url': scraper_utils.pathify_url(match_url)
                 }
                 results.append(result)
     return results

Example #3

0

Show file

File: rlsbb_scraper.py Project: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        if not html:
            url = scraper_utils.urljoin(self.old_base_url, source_url)
            html = self._http_get(url, require_debrid=True, cache_limit=.5)

        sources.update(self.__get_post_links(html, video))

        if kodi.get_setting('%s-include_comments' %
                            (self.get_name())) == 'true':
            for _attrs, comment in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('commentbody-\d+')}):
                sources.update(self.__get_comment_links(comment, video))

        for source in sources:
            if scraper_utils.excluded_link(source): continue
            host = urlparse.urlparse(source).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': sources[source],
                'direct': False
            }
            hosters.append(hoster)
        return hosters

Example #4

0

Show file

 def get_sources(self, video):
     hosters = []
     sources = {}
     today = datetime.datetime.today().date()
     max_age = today - datetime.timedelta(days=self.filter)
     if video.ep_airdate and max_age < video.ep_airdate:
         day_after = video.ep_airdate + datetime.timedelta(days=1)
         for day in [day_after, video.ep_airdate]:
             if day < today:
                 page_url = EP_PAGE % (day.strftime('%Y.%m.%d'))
                 page_url = scraper_utils.urljoin(self.base_url, page_url)
                 html = self._http_get(page_url, require_debrid=True, cache_limit=30 * 24)
                 sources.update(self.__get_sources(video, html))
             if sources: break
             
         if not sources and kodi.get_setting('scraper_url'):
             page_url = scraper_utils.urljoin(self.base_url, '/index.html')
             html = self._http_get(page_url, require_debrid=True, cache_limit=2)
             sources.update(self.__get_sources(video, html))
         
     for source in sources:
         host = urlparse.urlparse(source).hostname
         hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': sources[source], 'direct': False}
         hosters.append(hoster)
     return hosters

Example #5

0

Show file

File: dayt_scraper.py Project: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=1)
        iframes = dom_parser2.parse_dom(html, 'iframe', req='src')
        for attrs, _content in iframes:
            iframe_url = attrs['src']
            if 'docs.google.com' in iframe_url:
                sources = scraper_utils.parse_google(self, iframe_url)
                break
            else:
                iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
                html = self._http_get(iframe_url, cache_limit=1)
                iframes += dom_parser2.parse_dom(html, 'iframe', req='src')

        for source in sources:
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': scraper_utils.gv_get_quality(source),
                'views': None,
                'rating': None,
                'url': source,
                'direct': True
            }
            hosters.append(hoster)

        return hosters

Example #6

0

Show file

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        videos = re.findall('''onclick\s*=\s*"loadVideo\('([^']+)''', html)
        subs = self.__get_subs(html)
        for v_id, icon in map(None, videos, subs):
            ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL)
            data = {'vid': v_id, 'tip': 1, 'type': 'loadVideo'}
            headers = {'Referer': page_url}
            headers.update(XHR)
            html = self._http_get(ajax_url,
                                  data=data,
                                  headers=headers,
                                  cache_limit=.5)
            sub = ICONS.get(icon, DEFAULT_SUB)
            hosters += self.__get_cloud_links(html, page_url, sub)
            hosters += self.__get_embedded_links(html, sub)
            hosters += self.__get_iframe_links(html, sub)
            hosters += self.__get_json_links(html, sub)
            if not kodi.get_setting('scraper_url'): break

        return hosters

Example #7

0

Show file

File: dizibox_scraper.py Project: idaviesfmts/hmdsm.repository

    def _get_episode_url(self, show_url, video):
        show_url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(show_url, cache_limit=8)
        pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % (
            video.season)
        match = re.search(pattern, html)
        if match:
            episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % (
                video.season, video.episode)
            season_url = scraper_utils.urljoin(self.base_url, match.group(1))
            html = self._http_get(season_url, cache_limit=2)
            ep_url = self._default_get_episode_url(html, video,
                                                   episode_pattern)
            if ep_url: return ep_url

        # front page fallback
        html = self._http_get(self.base_url, cache_limit=2)
        for slug in reversed(show_url.split('/')):
            if slug: break

        ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format(
            slug=slug, season=video.season, episode=video.episode)
        match = re.search(ep_url_frag, html)
        if match:
            return scraper_utils.pathify_url(match.group(1))

Example #8

0

Show file

File: flixanity_scraper.py Project: idaviesfmts/hmdsm.repository

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)'
     headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)}
     season_url = scraper_utils.urljoin(show_url, '/season/%s' % (video.season))
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, headers=headers, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'})
     return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

Example #9

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        headers = {'Accept-Language': 'en-US,en;q=0.5'}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, headers=headers, cache_limit=2)
        if video.video_type == VIDEO_TYPES.MOVIE:
            sources.update(self.__scrape_sources(html, page_url))
            pages = set([
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': 'btn-eps'}, req='href')
            ])
            active = set([
                r.attrs['href'] for r in dom_parser2.parse_dom(
                    html, 'a', {'class': 'active'}, req='href')
            ])
            for page in list(pages - active):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))
        else:
            for page in self.__match_episode(video, html):
                page_url = scraper_utils.urljoin(self.base_url, page)
                html = self._http_get(page_url, headers=headers, cache_limit=2)
                sources.update(self.__scrape_sources(html, page_url))

        for source, values in sources.iteritems():
            if not source.lower().startswith('http'): continue
            if values['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
                if host != 'gvideo':
                    stream_url = source + scraper_utils.append_headers(
                        {
                            'User-Agent': scraper_utils.get_ua(),
                            'Referer': page_url
                        })
                else:
                    stream_url = source
            else:
                host = urlparse.urlparse(source).hostname
                stream_url = source
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': values['quality'],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': values['direct']
            }
            hosters.append(hoster)

        return hosters

Example #10

0

Show file

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        headers = {'Accept-Language': 'en-US,en;q=0.5'}
        html = self._http_get(search_url,
                              params={'q': title},
                              headers=headers,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'ml-item'}):
            match_title = dom_parser2.parse_dom(item, 'span',
                                                {'class': 'mli-info'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            year_frag = dom_parser2.parse_dom(item, 'img', req='alt')
            is_episodes = dom_parser2.parse_dom(item, 'span',
                                                {'class': 'mli-eps'})

            if (video_type == VIDEO_TYPES.MOVIE
                    and not is_episodes) or (video_type == VIDEO_TYPES.SEASON
                                             and is_episodes):
                if match_title and match_url:
                    match_url = match_url[0].attrs['href']
                    match_title = match_title[0].content
                    match_title = re.sub('</?h2>', '', match_title)
                    match_title = re.sub('\s+\d{4}$', '', match_title)
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search(
                                'Season\s+0*%s$' % (season), match_title):
                            continue

                    if not match_url.endswith('/'): match_url += '/'
                    match_url = scraper_utils.urljoin(match_url, 'watch/')
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE and year_frag:
                        match = re.search('\s*-\s*(\d{4})$',
                                          year_frag[0].attrs['alt'])
                        if match:
                            match_year = match.group(1)

                    match_norm_title = scraper_utils.normalize_title(
                        match_title)
                    title_match = (norm_title
                                   in match_norm_title) or (match_norm_title
                                                            in norm_title)
                    if title_match and (not year or not match_year
                                        or year == match_year):
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(match_url)
                        }
                        results.append(result)

        return results

Example #11

0

Show file

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     show_id = dom_parser2.parse_dom(html, 'div', {'id': 'icerikid'}, req='value')
     if show_id:
         episode_pattern = 'href="([^"]*-%s-sezon-%s-bolum[^"]*)"' % (video.season, video.episode)
         title_pattern = 'href="(?P<url>[^"]+)[^>]*class="realcuf".*?class="realcuf">(?P<title>[^<]*)'
         season_url = scraper_utils.urljoin(self.base_url, SEASON_URL)
         data = {'sezon_id': video.season, 'dizi_id': show_id[0].attrs['value'], 'tip': 'dizi', 'bolumid': ''}
         html = self._http_get(season_url, data=data, headers=XHR, cache_limit=2)
         return self._default_get_episode_url(html, video, episode_pattern, title_pattern)

Example #12

0

Show file

File: mehliz_scraper.py Project: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'})
        if fragment: html = fragment[0].content
        iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        if iframe_url.startswith('/'):
            iframe_url = scraper_utils.urljoin(self.base_url, iframe_url)
        html = self._http_get(iframe_url,
                              headers={'Referer': page_url},
                              cache_limit=.5)
        obj = dom_parser2.parse_dom(html, 'object', req='data')
        if obj:
            streams = dict((stream_url, {
                'quality': scraper_utils.gv_get_quality(stream_url),
                'direct': True
            }) for stream_url in scraper_utils.parse_google(
                self, obj[0].attrs['data']))
        else:
            streams = scraper_utils.parse_sources_list(self, html)

        for stream_url, values in streams.iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                quality = scraper_utils.gv_get_quality(stream_url)
            else:
                quality = values['quality']
                stream_url += scraper_utils.append_headers({
                    'User-Agent':
                    scraper_utils.get_ua(),
                    'Referer':
                    page_url
                })

            source = {
                'multi-part': False,
                'url': stream_url,
                'host': host,
                'class': self,
                'quality': quality,
                'views': None,
                'rating': None,
                'direct': True
            }
            hosters.append(source)

        return hosters

Example #13

0

Show file

File: putlocker_scraper.py Project: idaviesfmts/hmdsm.repository

    def _get_episode_url(self, show_url, video):
        show_url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(show_url, headers={'Referer': self.base_url}, cache_limit=24 * 7)
        match = re.search('href="([^"]*season=0*%s(?!\d))[^"]*' % (video.season), html)
        if not match: return

        episode_pattern = 'href="([^"]*/0*%s-0*%s/[^"]*)' % (video.season, video.episode)
        title_pattern = 'href="(?P<url>[^"]+)[^>]*>\s*(?P<title>.*?)\s*</a>'
        season_url = scraper_utils.urljoin(show_url, match.group(1))
        html = self._http_get(season_url, headers={'Referer': show_url}, cache_limit=2)
        episodes = dom_parser2.parse_dom(html, 'div', {'class': 'episodeDetail'})
        fragment = '\n'.join(ep.content for ep in episodes)
        return self._default_get_episode_url(fragment, video, episode_pattern, title_pattern)

Example #14

0

Show file

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'})
        if page_quality:
            page_quality = QUALITY_MAP.get(page_quality[0].content,
                                           QUALITIES.HIGH)
        else:
            page_quality = QUALITIES.HIGH

        if video.video_type == VIDEO_TYPES.EPISODE:
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'id': 'servers-list'})
            gk_html = fragment[0].content if fragment else ''
        else:
            gk_html = html

        link_url = scraper_utils.urljoin(self.base_url, LINK_URL)
        player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL)
        for stream_url, quality in scraper_utils.get_gk_links(
                self, gk_html, page_url, page_quality, link_url,
                player_url).iteritems():
            host = scraper_utils.get_direct_hostname(self, stream_url)
            if host == 'gvideo':
                direct = True
                quality = quality
            else:
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, quality)
                direct = False

            if host is not None:
                stream_url += scraper_utils.append_headers(
                    {'User-Agent': scraper_utils.get_ua()})
                hoster = {
                    'multi-part': False,
                    'host': host,
                    'class': self,
                    'quality': quality,
                    'views': None,
                    'rating': None,
                    'url': stream_url,
                    'direct': direct
                }
                hosters.append(hoster)

        return hosters

Example #15

0

Show file

 def __add_torrent(self, hash_id):
     list_url = scraper_utils.urljoin(self.base_url, LIST_URL)
     js_data = self._json_get(list_url, cache_limit=0)
     for transfer in js_data.get('transfers', []):
         if transfer['hash'].lower() == hash_id:
             return True
      
     add_url = scraper_utils.urljoin(self.base_url, ADD_URL)
     data = {'src': MAGNET_LINK % hash_id}
     js_data = self._json_get(add_url, data=data, cache_limit=0)
     if js_data.get('status') == 'success':
         return True
     else:
         return False

Example #16

0

Show file

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=24)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'poster'})
     if not fragment: return
     show_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
     if not show_url: return
     show_url = scraper_utils.urljoin(self.base_url,
                                      show_url[0].attrs['href'])
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers'})
     episode_pattern = 'href="([^"]+)[^>]+>[Ee][Pp]\s*(?:[Ss]0*%s-)?E?p?0*%s(?!\d)' % (
         video.season, video.episode)
     return self._default_get_episode_url(fragment or html, video,
                                          episode_pattern)

Example #17

0

Show file

 def __get_movie_sources(self, source_url):
     hosters = []
     query = kodi.parse_query(urlparse.urlparse(source_url).query)
     movie_id = query.get('movie_id') or self.__get_movie_id(source_url)
     if not movie_id: return hosters
     
     details_url = scraper_utils.urljoin(self.movie_base_url, MOVIE_DETAILS_URL)
     detail_data = self._json_get(details_url, params={'movie_id': movie_id}, cache_limit=24)
     try: torrents = detail_data['data']['movie']['torrents']
     except KeyError: torrents = []
     try: hashes = [torrent['hash'].lower() for torrent in torrents]
     except KeyError: hashes = []
     hash_data = self.__get_hash_data(hashes)
     for torrent in torrents:
         hash_id = torrent['hash'].lower()
         try: status = hash_data['hashes'][hash_id]['status']
         except KeyError: status = ''
         if status.lower() != 'finished': continue
         stream_url = 'hash_id=%s' % (hash_id)
         host = scraper_utils.get_direct_hostname(self, stream_url)
         quality = QUALITY_MAP.get(torrent['quality'], QUALITIES.HD720)
         hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True}
         if 'size_bytes' in torrent: hoster['size'] = scraper_utils.format_size(torrent['size_bytes'], 'B')
         if torrent['quality'] == '3D': hoster['3D'] = True
         hosters.append(hoster)
     return hosters

Example #18

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't'
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url,
                              data={
                                  'searchquery': title,
                                  'searchin': search_in
                              },
                              cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'})
        if not fragment: return results
        fragment = dom_parser2.parse_dom(fragment[0].content, 'table')
        if not fragment: return results
        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #19

0

Show file

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        search_url += '%s.html' % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_year = re.search('class="jt-info">(\d{4})<', item)
            is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'})
            
            if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
                if not match_title or not match_url: continue
                
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].content
                match_title = re.sub('</?h2>', '', match_title)
                match_title = re.sub('\s+\d{4}$', '', match_title)
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+0*%s$' % (season), match_title): continue
                    
                match_year = match_year.group(1) if match_year else ''
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

Example #20

0

Show file

    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=8)
        for div in dom_parser2.parse_dom(html, 'div', {'id': re.compile('stb-container-\d+')}):
            stream_url = dom_parser2.parse_dom(div.content, 'iframe', req='src')
            if not stream_url: continue
            stream_url = stream_url[0].attrs['src']
            host = urlparse.urlparse(stream_url).hostname
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False}
            sources.append(source)
                
        fragment = dom_parser2.parse_dom(html, 'div', {'class': "stb-download-body_box"})
        if not fragment: return sources
        
        labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'})
        stream_urls = [result for result in dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if result.content.lower() == 'download now']
        for label, stream_url in zip(labels, stream_urls):
            stream_url = stream_url.attrs['href']
            label = re.sub('</?[^>]*>', '', label.content)
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.blog_get_quality(video, label, host)
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False}
            sources.append(source)

        return sources

Example #21

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        xml_url = scraper_utils.urljoin(self.base_url, '/series.xml')
        xml = self._http_get(xml_url, cache_limit=24)
        if not xml: return results
        try:
            norm_title = scraper_utils.normalize_title(title)
            match_year = ''
            for element in ET.fromstring(xml).findall('.//dizi'):
                name = element.find('adi')
                if name is not None and norm_title in scraper_utils.normalize_title(
                        name.text):
                    url = element.find('url')
                    if url is not None and (not year or not match_year
                                            or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url.text),
                            'title': scraper_utils.cleanse_title(name.text),
                            'year': ''
                        }
                        results.append(result)
        except (ParseError, ExpatError) as e:
            logger.log('Dizilab Search Parse Error: %s' % (e),
                       log_utils.LOGWARNING)

        return results

Example #22

0

Show file

File: filmovizjia_scraper.py Project: idaviesfmts/hmdsm.repository

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=8)
        pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
        match = re.search(pattern, html, re.DOTALL)
        if not match: return
        
        fragment = match.group(1)
        episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class')
        airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'})
        ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
        norm_title = scraper_utils.normalize_title(video.ep_title)
        num_id, airdate_id, title_id = '', '', ''
        for episode, airdate in zip(episodes, airdates):
            ep_id = episode.attrs['class']
            episode = episode.content
            
            if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
            match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
            if match:
                ep_num, ep_title = match.groups()
                if int(ep_num) == int(video.episode): num_id = ep_id
                if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id

        best_id = ''
        if not scraper_utils.force_title(video):
            if num_id: best_id = num_id
            if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
            if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
        else:
            if title_id: best_id = title_id
        
        if best_id:
            return EP_URL % (best_id)

Example #23

0

Show file

File: treasureen_scraper.py Project: idaviesfmts/hmdsm.repository

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     title = dom_parser2.parse_dom(html,
                                   'meta', {'property': 'og:title'},
                                   req='content')
     meta = scraper_utils.parse_movie_link(
         title[0].attrs['content']) if title else {}
     fragment = dom_parser2.parse_dom(html, 'p',
                                      {'class': 'download_message'})
     if fragment:
         for attrs, _content in dom_parser2.parse_dom(fragment[0].content,
                                                      'a',
                                                      req='href'):
             source = attrs['href']
             if scraper_utils.excluded_link(source): continue
             host = urlparse.urlparse(source).hostname
             quality = scraper_utils.height_get_quality(
                 meta.get('height', 480))
             hoster = {
                 'multi-part': False,
                 'host': host,
                 'class': self,
                 'views': None,
                 'url': source,
                 'rating': None,
                 'quality': quality,
                 'direct': False
             }
             if 'format' in meta: hoster['format'] = meta['format']
             hosters.append(hoster)
     return hosters

Example #24

0

Show file

File: onlinedizi_scraper.py Project: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        search_url = scraper_utils.urljoin(self.base_url, '/yabanci-diziler/')
        html = self._http_get(search_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'category-post'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'h3')
            if match_url and match_title:
                match_url = scraper_utils.pathify_url(
                    match_url[0].attrs['href'])
                match_title = match_title[0].content
                if match_url in seen_urls: continue
                seen_urls.add(match_url)
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': match_url,
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

Example #25

0

Show file

File: filmovizjia_scraper.py Project: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}):
            redirect = dom_parser2.parse_dom(row, 'span', req='id')
            link = dom_parser2.parse_dom(row, 'a', req='href')
            if link and link[0].attrs['href'].startswith('http'):
                stream_url = link[0].attrs['href']
            elif redirect:
                stream_url = redirect[0].attrs['id']
            else:
                stream_url = ''

            if stream_url.startswith('http'):
                host = urlparse.urlparse(stream_url).hostname
            else:
                host = dom_parser2.parse_dom(row, 'h9')
                host = host[0].content if host else ''
                
            if stream_url and host:
                quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
                hosters.append(hoster)
            
        return hosters

Example #26

0

Show file

File: moviehut_scraper.py Project: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #27

0

Show file

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        sources = self.__get_posts(html)
        sources.update(self.__get_ajax(html, url))
        sources.update(self.__get_embedded(html, url))
        for source in sources:
            stream_url = source + scraper_utils.append_headers(
                {'User-Agent': scraper_utils.get_ua()})
            host = scraper_utils.get_direct_hostname(self, source)
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'quality': sources[source],
                'views': None,
                'rating': None,
                'url': stream_url,
                'direct': True,
                'subs': 'Turkish subtitles'
            }
            hosters.append(hoster)

        return hosters

Example #28

0

Show file

File: flixanity_scraper.py Project: idaviesfmts/hmdsm.repository

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        self.__get_token()
        if self.__token is None: return results
        
        search_url, u = self.__get_search_url()
        search_url = scraper_utils.urljoin(API_BASE_URL, search_url)
        timestamp = int(time.time() * 1000)
        s = self.__get_s()
        query = {'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': self.__token, 'set': s, 'rt': self.__get_rt(self.__token + s),
                 'sl': self.__get_sl(u)}
        headers = {'Referer': self.base_url}
        html = self._http_get(search_url, data=query, headers=headers, cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        for item in scraper_utils.parse_json(html, search_url):
            if not item['meta'].upper().startswith(media_type): continue
            
            match_year = str(item['year']) if 'year' in item and item['year'] else ''
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink'].replace('/show/', '/tv-show/')), 'year': match_year}
                results.append(result)

        return results

Example #29

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, '/arsiv')
        html = self._http_get(url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'ts-list-content'})
        if not fragment: return results

        items = dom_parser2.parse_dom(fragment[0].content, 'h1',
                                      {'class': 'ts-list-name'})
        details = dom_parser2.parse_dom(fragment[0].content, 'ul')
        for item, detail in zip(items, details):
            match = dom_parser2.parse_dom(item.content, 'a', req='href')
            match_year = re.search('<span>(\d{4})</span>', detail.content)
            if not match: continue

            match_url = match[0].attrs['href']
            match_title = match[0].content
            match_year = match_year.group(1) if match_year else ''

            if norm_title in scraper_utils.normalize_title(match_title):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #30

0

Show file

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, require_debrid=True, cache_limit=.5)
     sources = self.__get_post_links(html)
     for source, value in sources.iteritems():
         if scraper_utils.excluded_link(source): continue
         host = urlparse.urlparse(source).hostname
         if video.video_type == VIDEO_TYPES.MOVIE:
             meta = scraper_utils.parse_movie_link(value['release'])
         else:
             meta = scraper_utils.parse_episode_link(value['release'])
         quality = scraper_utils.height_get_quality(meta['height'])
         hoster = {
             'multi-part': False,
             'host': host,
             'class': self,
             'views': None,
             'url': source,
             'rating': None,
             'quality': quality,
             'direct': False
         }
         if 'format' in meta: hoster['format'] = meta['format']
         hosters.append(hoster)
     return hosters