Python parse_domの例、dom_parser2.parse_dom Pythonの例

コード例 #1

0

ファイルを表示

ファイル: hevcbluray_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return sources
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     is_3d = False
     page_quality = QUALITIES.HD720
     title = dom_parser2.parse_dom(html, 'title')
     if title:
         title = title[0].content
         match = re.search('(\d{3,})p', title)
         if match:
             page_quality = scraper_utils.height_get_quality(match.group(1))
         
         is_3d = True if re.search('\s+3D\s+', title) else False
     
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
     if fragment:
         for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'h3'):
             for attrs, _content in dom_parser2.parse_dom(item, 'a', req='href'):
                 stream_url = attrs['href']
                 host = urlparse.urlparse(stream_url).hostname
                 source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': page_quality, 'views': None, 'rating': None, 'direct': False}
                 source['format'] = 'x265'
                 source['3D'] = is_3d
                 sources.append(source)
                 
     return sources

コード例 #2

0

ファイルを表示

ファイル: seriesonline_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movie/search/')
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        title = re.sub('\s+', '-', title)
        search_url += title
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_year = ''

            if not match_title or not match_url: continue
            match_url = match_url[0].attrs['href']
            match_title = match_title[0].content
            is_season = re.search('season\s+(\d+)', match_title, re.I)
            if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season):
                match_title = re.sub('</?h\d+>', '', match_title)
                if video_type == VIDEO_TYPES.SEASON:
                    if season and int(is_season.group(1)) != int(season): continue
                
                match_url += '/watching.html'
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #3

0

ファイルを表示

ファイル: snagfilms_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'})
     if fragment:
         iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
         if iframe_url:
             iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src'])
             headers = {'Referer': page_url}
             html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
             sources = scraper_utils.parse_sources_list(self, html)
             for source in sources:
                 quality = sources[source]['quality']
                 host = scraper_utils.get_direct_hostname(self, source)
                 stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url})
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 match = re.search('(\d+[a-z]bps)', source)
                 if match:
                     hoster['extra'] = match.group(1)
                 hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

コード例 #4

0

ファイルを表示

ファイル: watchepisodes_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        episode = episode.content
                        ep_url = dom_parser2.parse_dom(episode, 'a', req='href')
                        ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].content.strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0].attrs['href'])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    episode = episode.content
                    ep_url = dom_parser2.parse_dom(episode, 'a', req='href')
                    ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content):
                        return scraper_utils.pathify_url(ep_url[0].attrs['href'])

コード例 #5

0

ファイルを表示

ファイル: ddlvalley_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = scraper_utils.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if scraper_utils.release_check(video, title, require_title=False):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

コード例 #6

0

ファイルを表示

ファイル: yesmovies_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def __get_source_page(self, video_type, page_url):
     match = re.search('/movie/(.*?)-(\d+)\.html', page_url)
     if not match: return '', '', ''
     slug, movie_id = match.groups()
     
     vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series'
     qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type)
     qp_url = scraper_utils.urljoin(self.base_url, qp_url)
     headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)}
     headers.update(XHR)
     html = self._http_get(qp_url, headers=headers, cache_limit=8)
     watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href')
     if not watching_url: return '', '', ''
     
     watching_url = watching_url[0].attrs['href']
     page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8)
     for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'):
         _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8)
     
     sl_url = SL_URL.format(movie_id=movie_id)
     sl_url = scraper_utils.urljoin(self.base_url, sl_url)
     html = self._http_get(sl_url, headers=headers, cache_limit=8)
     js_data = scraper_utils.parse_json(html, sl_url)
     try: html = js_data['html']
     except: html = ''
     return movie_id, watching_url, html

コード例 #7

0

ファイルを表示

ファイル: yesmovies_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        search_url += '%s.html' % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_year = re.search('class="jt-info">(\d{4})<', item)
            is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'})
            
            if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
                if not match_title or not match_url: continue
                
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].content
                match_title = re.sub('</?h2>', '', match_title)
                match_title = re.sub('\s+\d{4}$', '', match_title)
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+0*%s$' % (season), match_title): continue
                    
                match_year = match_year.group(1) if match_year else ''
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #8

0

ファイルを表示

ファイル: pelispedia_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'repro'})
        if not fragment: return hosters
        
        iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
        if not iframe_url: return hosters
        iframe_url = iframe_url[0].attrs['src']
        
        html = self._http_get(iframe_url, cache_limit=.5)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'id': 'botones'}):
            for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'):
                media_url = attrs['href']
                media_url = media_url.replace(' ', '')
                if self.get_name().lower() in media_url:
                    headers = {'Referer': iframe_url[0]}
                    html = self._http_get(media_url, headers=headers, cache_limit=.5)
                    hosters += self.__get_page_links(html)
                    hosters += self.__get_pk_links(html)
#                     hosters += self.__get_gk_links(html, iframe_url)
                else:
                    host = urlparse.urlparse(media_url).hostname
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': media_url, 'direct': False}
                    hosters.append(hoster)
            
        return hosters

コード例 #9

0

ファイルを表示

ファイル: moviewatcher_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series'
        html = self._http_get(search_url, params={'query': title.lower(), 'type': search_type}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'one_movie-item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'img', req='alt')
            media_type = dom_parser2.parse_dom(item, 'div', {'class': 'movie-series'})
            if not media_type:
                media_type = VIDEO_TYPES.MOVIE
            elif media_type[0].content == 'TV SERIE':
                media_type = VIDEO_TYPES.TVSHOW
                
            if match_url and match_title and video_type == media_type:
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].attrs['alt']
                
                match_year = re.search('-(\d{4})-', match_url)
                if match_year:
                    match_year = match_year.group(1)
                else:
                    match_year = ''
        
                if not year or not match_year or year == match_year:
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

コード例 #10

0

ファイルを表示

ファイル: diziay_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'})
     if not fragment: return hosters
     
     iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
     if not iframe_url: return hosters
     
     html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25)
     sources.append(self.__get_embedded_sources(html))
     sources.append(self.__get_linked_sources(html))
     for source in sources:
         for stream_url in source['sources']:
             host = scraper_utils.get_direct_hostname(self, stream_url)
             if host == 'gvideo':
                 stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                 quality = scraper_utils.gv_get_quality(stream_url)
                 hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                 hoster['subs'] = source.get('subs', True)
                 hosters.append(hoster)
 
     return hosters

コード例 #11

0

ファイルを表示

ファイル: moviexk_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'inner'}):
            name = dom_parser2.parse_dom(fragment, 'div', {'class': 'name'})
            if not name: continue
            
            match = dom_parser2.parse_dom(name[0].content, 'a', req='href')
            if not match: continue
            
            match_url, match_title_year = match[0].attrs['href'], match[0].content
            if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue
            
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year)
            match_title_year = match_title_year.replace('&#8217;', "'")
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not match_year:
                year_span = dom_parser2.parse_dom(fragment, 'span', {'class': 'year'})
                if year_span:
                    year_text = dom_parser2.parse_dom(year_span[0].content, 'a')
                    if year_text:
                        match_year = year_text[0].content.strip()

            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
                results.append(result)

        return results

コード例 #12

0

ファイルを表示

ファイル: watchitvideos_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     hosters = []
     source_url = self.get_url(video)
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.5)
     
     best_quality = QUALITIES.HIGH
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'})
     if fragment:
         for match in re.finditer('href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)', fragment[0].content, re.I):
             quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH)
             if Q_ORDER[quality] > Q_ORDER[best_quality]:
                 best_quality = quality
                 
     sources = []
     for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-vid'):
         try:
             vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title(attrs['data-vid']), 'iframe', req='src')
             sources.append(vid_url[0])
         except:
             pass
         
     fragment = dom_parser2.parse_dom(html, 'table', {'class': 'additional-links'})
     if fragment:
         sources += dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
             
     for stream_url in sources:
         stream_url = stream_url.attrs.get('href') or stream_url.attrs.get('src')
         host = urlparse.urlparse(stream_url).hostname
         quality = scraper_utils.get_quality(video, host, best_quality)
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
         hosters.append(hoster)
     return hosters

コード例 #13

0

ファイルを表示

ファイル: xmovies8_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/results')
        params = {'q': title}
        referer = search_url + '?' + urllib.urlencode(params)
        headers = {'Referer': referer}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        cookies = {'begin_referer': referer, 'prounder': 1}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')):
            html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0)
            
        for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}):
            title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'})
            year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'})
            if not title_frag: continue
            match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href')
            if not match: continue
            match_url = match[0].attrs['href']
            match_title = match[0].content
            try:
                match = re.search('\s+(\d{4})\s+', year_frag[0].content)
                match_year = match.group(1)
            except:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #14

0

ファイルを表示

ファイル: 2ddl_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         html = self._http_get(page_url[0], require_debrid=True, cache_limit=1)
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = dom_parser2.parse_dom(post, 'a', req='href')
                 if match:
                     url, title = match[0].attrs['href'], match[0].content
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

コード例 #15

0

ファイルを表示

ファイル: movie4k_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies.php')
        cookies = {'onlylanguage': 'en', 'lang': 'en'}
        params = {'list': 'search', 'search': title}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        for _attrs, content in dom_parser2.parse_dom(html, 'TR', {'id': re.compile('coverPreview\d+')}):
            match = dom_parser2.parse_dom(content, 'a', req='href')
            if not match: continue
            
            match_url, match_title = match[0].attrs['href'], match[0].content
            is_show = re.search('\(tvshow\)', match_title, re.I)
            if (video_type == VIDEO_TYPES.MOVIE and is_show) or (video_type == VIDEO_TYPES.TVSHOW and not is_show):
                continue

            match_title = match_title.replace('(TVshow)', '')
            match_title = match_title.strip()
            
            match_year = ''
            for _attrs, div in dom_parser2.parse_dom(content, 'div'):
                match = re.match('\s*(\d{4})\s*', div)
                if match:
                    match_year = match.group(1)

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #16

0

ファイルを表示

ファイル: quikr_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=2)
     episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
     parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
     fragment = '\n'.join(part.content for part in parts)
     result = self._default_get_episode_url(fragment, video, episode_pattern)
     if result: return result
     
     ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')]
     ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})]
     ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})]
     force_title = scraper_utils.force_title(video)
     if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
         for ep_url, ep_date in zip(ep_urls, ep_dates):
             logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
             if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date():
                 return scraper_utils.pathify_url(ep_url)
 
     if force_title or kodi.get_setting('title-fallback') == 'true':
         norm_title = scraper_utils.normalize_title(video.ep_title)
         for ep_url, ep_title in zip(ep_urls, ep_titles):
             ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
             logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG)
             if norm_title == scraper_utils.normalize_title(ep_title):
                 return scraper_utils.pathify_url(ep_url)

コード例 #17

0

ファイルを表示

ファイル: watchepisodes_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.25)
        for _attrs, link in dom_parser2.parse_dom(html, 'div', {'class': 'ldr-item'}):
            stream_url = dom_parser2.parse_dom(link, 'a', req='data-actuallink')
            
            try:
                watched = dom_parser2.parse_dom(link, 'div', {'class': 'click-count'})
                match = re.search(' (\d+) ', watched[0].content)
                views = match.group(1)
            except:
                views = None
                    
            try:
                score = dom_parser2.parse_dom(link, 'div', {'class': 'point'})
                score = int(score[0].content)
                rating = score * 10 if score else None
            except:
                rating = None
            
            if stream_url:
                stream_url = stream_url[0].attrs['data-actuallink'].strip()
                host = urlparse.urlparse(stream_url).hostname
                quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False}
                hosters.append(hoster)

        return hosters

コード例 #18

0

ファイルを表示

ファイル: vebup_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=8)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}):
            iframe_src = dom_parser2.parse_dom(fragment, 'iframe', req='src')
            if iframe_src:
                iframe_src = iframe_src[0].attrs['src']
                if re.search('o(pen)?load', iframe_src, re.I):
                    meta = scraper_utils.parse_movie_link(iframe_src)
                    quality = scraper_utils.height_get_quality(meta['height'])
                    links = {iframe_src: {'quality': quality, 'direct': False}}
                else:
                    links = self.__get_links(iframe_src, url)

                for link in links:
                    direct = links[link]['direct']
                    quality = links[link]['quality']
                    if direct:
                        host = scraper_utils.get_direct_hostname(self, link)
                        if host == 'gvideo':
                            quality = scraper_utils.gv_get_quality(link)
                        stream_url = link + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': url})
                    else:
                        host = urlparse.urlparse(link).hostname
                        stream_url = link
                        
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                    hosters.append(source)

        return hosters

コード例 #19

0

ファイルを表示

ファイル: filmovizjia_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=8)
        pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
        match = re.search(pattern, html, re.DOTALL)
        if not match: return
        
        fragment = match.group(1)
        episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class')
        airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'})
        ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
        norm_title = scraper_utils.normalize_title(video.ep_title)
        num_id, airdate_id, title_id = '', '', ''
        for episode, airdate in zip(episodes, airdates):
            ep_id = episode.attrs['class']
            episode = episode.content
            
            if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
            match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
            if match:
                ep_num, ep_title = match.groups()
                if int(ep_num) == int(video.episode): num_id = ep_id
                if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id

        best_id = ''
        if not scraper_utils.force_title(video):
            if num_id: best_id = num_id
            if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
            if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
        else:
            if title_id: best_id = title_id
        
        if best_id:
            return EP_URL % (best_id)

コード例 #20

0

ファイルを表示

ファイル: 2ddl_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=True, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results

コード例 #21

0

ファイルを表示

ファイル: moviesub_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'})
        if not fragment: return results
        
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
            is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'})
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
                if not match: continue
                
                match_title = match[0].attrs['title']
                match_url = match[0].attrs['href']
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
                        continue
                else:
                    match = re.search('-(\d{4})[-.]', match_url)
                    if match:
                        match_year = match.group(1)
                
                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
                if title_match and (not year or not match_year or year == match_year):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #22

0

ファイルを表示

ファイル: pelispedia_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def __proc_results(self, url, title, year):
        results = []
        url = scraper_utils.urljoin(self.base_url, url)
        html = self._http_get(url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'bpM12'}):
            title_frag = dom_parser2.parse_dom(item, 'h2')
            year_frag = dom_parser2.parse_dom(item, 'div', {'class': 'sectionDetail'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            if title_frag and match_url:
                match_url = match_url[0].attrs['href']
                match = re.search('(.*?)<br>', title_frag[0].content)
                if match:
                    match_title = match.group(1)
                else:
                    match_title = title_frag[0]
                    
                match_year = ''
                if year_frag:
                    match = re.search('(\d{4})', year_frag[0].content)
                    if match:
                        match_year = match.group(1)

                if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        
        return results

コード例 #23

0

ファイルを表示

ファイル: filmovizjia_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}):
            redirect = dom_parser2.parse_dom(row, 'span', req='id')
            link = dom_parser2.parse_dom(row, 'a', req='href')
            if link and link[0].attrs['href'].startswith('http'):
                stream_url = link[0].attrs['href']
            elif redirect:
                stream_url = redirect[0].attrs['id']
            else:
                stream_url = ''

            if stream_url.startswith('http'):
                host = urlparse.urlparse(stream_url).hostname
            else:
                host = dom_parser2.parse_dom(row, 'h9')
                host = host[0].content if host else ''
                
            if stream_url and host:
                quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
                hosters.append(hoster)
            
        return hosters

コード例 #24

0

ファイルを表示

ファイル: onlinemoviespro_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-embed'})
        if not fragment: return hosters
        
        iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src')
        if not iframe_url: return hosters
        
        stream_url = iframe_url[0].attrs['src']
        host = urlparse.urlparse(stream_url).hostname
        q_str = 'HDRIP'
        match = re.search('>Quality(.*?)<br\s*/>', html, re.I)
        if match:
            q_str = match.group(1)
            q_str = q_str.decode('utf-8').encode('ascii', 'ignore')
            q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U)
            
        hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
        
        match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL)
        if match:
            hoster['views'] = int(match.group(1))
            hoster['rating'] = match.group(2)

        hosters.append(hoster)
        return hosters

コード例 #25

0

ファイルを表示

ファイル: cinemamkv_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        sources = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return sources
        
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=8)
        for div in dom_parser2.parse_dom(html, 'div', {'id': re.compile('stb-container-\d+')}):
            stream_url = dom_parser2.parse_dom(div.content, 'iframe', req='src')
            if not stream_url: continue
            stream_url = stream_url[0].attrs['src']
            host = urlparse.urlparse(stream_url).hostname
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False}
            sources.append(source)
                
        fragment = dom_parser2.parse_dom(html, 'div', {'class': "stb-download-body_box"})
        if not fragment: return sources
        
        labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'})
        stream_urls = [result for result in dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if result.content.lower() == 'download now']
        for label, stream_url in zip(labels, stream_urls):
            stream_url = stream_url.attrs['href']
            label = re.sub('</?[^>]*>', '', label.content)
            host = urlparse.urlparse(stream_url).hostname
            quality = scraper_utils.blog_get_quality(video, label, host)
            source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False}
            sources.append(source)

        return sources

コード例 #26

0

ファイルを表示

ファイル: quikr_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'embed-responsive'}):
            iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src')
            if iframe_url:
                iframe_url = iframe_url[0].attrs['data-src']
                iframe_host = urlparse.urlparse(iframe_url).hostname
                if iframe_host in DIRECT_HOSTS:
                    sources = self.__parse_streams(iframe_url, url)
                else:
                    sources = {iframe_url: {'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False}}
            
            for source in sources:
                quality = sources[source]['quality']
                direct = sources[source]['direct']
                if direct:
                    host = scraper_utils.get_direct_hostname(self, source)
                    stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
                else:
                    host = urlparse.urlparse(source).hostname
                    stream_url = source
                
                hoster = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                hosters.append(hoster)

        return hosters

コード例 #27

0

ファイルを表示

ファイル: movie25_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)

        quality = None
        match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I)
        if match:
            quality = QUALITY_MAP.get(match.group(1).strip().upper())

        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'links'})
        if not fragment: return hosters
        
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'ul'):
            stream_url = dom_parser2.parse_dom(item, 'a', req='href')
            host = dom_parser2.parse_dom(item, 'li', {'id': 'download'})
            if not stream_url or not host: continue
            
            stream_url = stream_url[0].attrs['href']
            host = host[-1].content
            hoster = {'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
            hosters.append(hoster)

        return hosters

コード例 #28

0

ファイルを表示

ファイル: moviewatcher_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=1)
     for _attrs, item in dom_parser2.parse_dom(html, 'a', {'class': 'full-torrent1'}):
         stream_url = dom_parser2.parse_dom(item, 'span', req='onclick')
         host = dom_parser2.parse_dom(item, 'div', {'class': 'small_server'})
         match = re.search('Views:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I)
         views = match.group(1) if match else None
         match = re.search('Size:\s*(?:</[^>]*>)?\s*(\d+)', item, re.I)
         size = int(match.group(1)) * 1024 * 1024 if match else None
         if not stream_url or not host: continue
         
         stream_url = stream_url[0].attrs['onclick']
         host = host[0].content.lower()
         host = host.replace('stream server: ', '')
         match = re.search("'(/redirect/[^']+)", stream_url)
         if match: stream_url = match.group(1)
         quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False}
         if size is not None: hoster['size'] = scraper_utils.format_size(size, 'B')
         hosters.append(hoster)
     return hosters

コード例 #29

0

ファイルを表示

ファイル: emoviespro_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        q_str = 'HDRIP'
        match = re.search('>Quality(.*?)<br\s*/?>', html, re.I)
        if match:
            q_str = match.group(1)
            q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U)

        for _attr, content in dom_parser2.parse_dom(html, 'div', {'class': 'tab_content'}):
            for attrs, _content in dom_parser2.parse_dom(content, 'iframe', req='src'):
                source = attrs['src']
                host = urlparse.urlparse(source).hostname
                quality = scraper_utils.blog_get_quality(video, q_str, host)
                hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False}
                match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL)
                if not match: continue
                
                hoster['views'] = int(match.group(1))
                hoster['rating'] = match.group(2)
            
            hosters.append(hoster)

        return hosters

コード例 #30

0

ファイルを表示

ファイル: moviehubs_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = re.search('onmouseover="([^"]+)', item)
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year.group(1)
                match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year)
                if not match: continue
                
                match_title, match_year = scraper_utils.extra_year(match.group(1))
                is_season = re.search('season\s+(\d+)', match_title_year, re.I)
                if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue
                
                if video_type == VIDEO_TYPES.MOVIE:
                    if not match_year:
                        match_year = re.search('>Release:\s*(\d{4})', match_title_year)
                        match_year = match_year.group(1) if match_year else ''
                else:
                    if season and int(season) != int(is_season.group(1)):
                        continue
                                
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #31

0

ファイルを表示

ファイル: mintmovies_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters

        sources = []
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tab-content'}):
            for attrs, _content in dom_parser2.parse_dom(div, 'iframe', req='src'):
                sources.append(attrs['src'])
        
        sources += [match.group(1) for match in re.finditer("window\.open\('([^']+)", html)]
        
        for source in sources:
            host = urlparse.urlparse(source).hostname
            quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False}
            hosters.append(hoster)
                    
        return hosters

コード例 #32

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, 'http://www.movie25.me/keywords/%s/' % (title))
        html = self._http_get(search_url, cache_limit=4)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'movie_about'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue

            match_url, match_title, extra = match[0].attrs['href'], match[
                0].attrs['title'], match[0].content
            _match_title, match_year = scraper_utils.extra_year(extra)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)
        return results

コード例 #33

0

ファイルを表示

ファイル: snagfilms_scraper.py プロジェクト: hpduong/retropie_configs

    def _http_get(self, url, params=None, data=None, headers=None, auth=True, method=None, cache_limit=8):
        # return all uncached blank pages if no user or pass
        if not self.username or not self.password:
            return ''

        html = super(self.__class__, self)._http_get(url, params=params, data=data, headers=headers, method=method, cache_limit=cache_limit)
        if auth and not dom_parser2.parse_dom(html, 'span', {'class': 'user-name'}):
            logger.log('Logging in for url (%s)' % (url), log_utils.LOGDEBUG)
            self.__login()
            html = super(self.__class__, self)._http_get(url, params=params, data=data, headers=headers, method=method, cache_limit=0)

        return html

コード例 #34

0

ファイルを表示

ファイル: putlocker_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=.5)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'videoPlayer'}):
            for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src'):
                html = self._http_get(attrs['src'], headers={'Referer': page_url}, cache_limit=.5)
                match = re.search('downloadUrl\s*=\s*"([^"]+)', html)
                if match:
                    stream_url = match.group(1)
                    host = scraper_utils.get_direct_hostname(self, stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        quality = QUALITIES.HIGH
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                    hosters.append(hoster)

        return hosters

コード例 #35

0

ファイルを表示

ファイル: pubfilm_scraper.py プロジェクト: hpduong/retropie_configs

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url,
            '/wp-content/plugins/ajax-search-pro/ajax_search.php')
        data = {
            'action':
            'ajaxsearchpro_search',
            'aspp':
            title,
            'asid':
            '1',
            'asp_inst_id':
            '1_1',
            'options':
            'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'
        }
        html = self._http_get(search_url,
                              data=data,
                              headers=XHR,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title_year in dom_parser2.parse_dom(
                html, 'a', {'class': 'asp_res_url'}):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                    is_season and video_type == VIDEO_TYPES.SEASON):
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and int(is_season.group(1)) != int(season):
                        continue

                    match_title = match_title_year
                    match_title = re.sub('\s*\d{4}', '', match_title)
                else:
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)

                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title
                               in match_norm_title) or (match_norm_title
                                                        in norm_title)
                if title_match and (not year or not match_year
                                    or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        return results

コード例 #36

0

ファイルを表示

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+/season/%s/episode/%s/?)"' % (
         video.season, video.episode)
     title_pattern = 'href="(?P<url>[^"]+)"[^>]+title="(?:S\d+\s*E\d+:\s*)?(?P<title>[^"]+)'
     headers = {'Referer': scraper_utils.urljoin(self.base_url, show_url)}
     season_url = scraper_utils.urljoin(show_url,
                                        '/season/%s' % (video.season))
     season_url = scraper_utils.urljoin(self.base_url, season_url)
     html = self._http_get(season_url, headers=headers, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'div', {'id': 'episodes'})
     return self._default_get_episode_url(fragment, video, episode_pattern,
                                          title_pattern)

コード例 #37

0

ファイルを表示

ファイル: diziay_scraper.py プロジェクト: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'dizis'})
        if not fragment: return results

        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title in dom_parser2.parse_dom(fragment[0].content,
                                                        'a',
                                                        req='href'):
            match_url = attrs['href']
            if norm_title in scraper_utils.normalize_title(match_title):
                match_title = re.sub('<div[^>]*>.*?</div>', '', match_title)
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': ''
                }
                results.append(result)

        return results

コード例 #38

0

ファイルを表示

ファイル: putmv_scraper.py プロジェクト: enursha101/xbmc-addon

 def resolve_link(self, link):
     html = self._http_get(link, cache_limit=.5)
     iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src')
     if iframe_url:
         return iframe_url[0].attrs['src']
     else:
         match = re.search('href="([^"]+)[^>]*>Click Here To Play<', html,
                           re.I)
         if match:
             return match.group(1)
         else:
             return link

コード例 #39

0

ファイルを表示

ファイル: pftv_scraper.py プロジェクト: enursha101/xbmc-addon

 def _get_episode_url(self, show_url, video):
     episode_pattern = 'href="([^"]+season-%s-episode-%s/)' % (
         video.season, video.episode)
     airdate_pattern = '{day} {short_month} {year}\s*<a\s+href="([^"]+)'
     show_url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(show_url, cache_limit=2)
     fragment = dom_parser2.parse_dom(html, 'table',
                                      {'class': 'alternate_color'})
     return self._default_get_episode_url(fragment or html,
                                          video,
                                          episode_pattern,
                                          airdate_pattern=airdate_pattern)

コード例 #40

0

ファイルを表示

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     page_url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(page_url, cache_limit=.25)
     for _attrs, button in dom_parser2.parse_dom(html, 'li', {'class': 'playing_button'}):
         try:
             link = dom_parser2.parse_dom(button, 'a', req='href')
             match = re.search('php\?.*?=?([^"]+)', link[0].attrs['href'])
             stream_url = base64.b64decode(match.group(1))
             match = re.search('(https?://.*)', stream_url)
             stream_url = match.group(1)
             host = urlparse.urlparse(stream_url).hostname
             quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH)
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
             hosters.append(hoster)
         except Exception as e:
             logger.log('Exception during tvonline source: %s - |%s|' % (e, button), log_utils.LOGDEBUG)
 
     return hosters

コード例 #41

0

ファイルを表示

ファイル: pftv_scraper.py プロジェクト: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        norm_title = scraper_utils.normalize_title(title)
        url = scraper_utils.urljoin(self.base_url, '/watch-series/')
        headers = {'Referer': self.base_url}
        html = self._http_get(url, headers=headers, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            for attrs, _content in dom_parser2.parse_dom(item,
                                                         'a',
                                                         req=['title',
                                                              'href']):
                match_title, match_url = attrs['title'], attrs['href']
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

コード例 #42

0

ファイルを表示

 def __get_iframe_links(self, html, sub):
     hosters = []
     html = html.replace('\\"', '"').replace('\\/', '/')
     iframe_urls = dom_parser2.parse_dom(html, 'iframe', {'id': 'episode_player'}, req='src')
     if iframe_urls:
         stream_url = iframe_urls[0].attrs['src']
         host = urlparse.urlparse(stream_url).hostname
         quality = QUALITIES.HD720
         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
         hoster['subs'] = sub
         hosters.append(hoster)
     return hosters

コード例 #43

0

ファイルを表示

    def _get_episode_url(self, show_url, video):
        season_url = show_url
        if video.season != 1:
            show_url = scraper_utils.urljoin(self.base_url, show_url)
            html = self._http_get(show_url, cache_limit=24)
            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'class': 'page-numbers'})
            if fragment:
                match = re.search(
                    'href="([^"]+-%s-sezon[^"]*)' % (video.season),
                    fragment[0].content)
                if match:
                    season_url = match.group(1)

        episode_pattern = '''href=['"]([^'"]+-%s-%01d-sezon-%01d-bolum[^'"]*)''' % (
            video.season, video.episode)
        season_url = scraper_utils.urljoin(self.base_url, season_url)
        html = self._http_get(season_url, cache_limit=2)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'posts-list'})
        return self._default_get_episode_url(fragment or html, video,
                                             episode_pattern)

コード例 #44

0

ファイルを表示

ファイル: watchitvideos_scraper.py プロジェクト: uguer30/Project

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     html = self._http_get(self.base_url,
                           params={'s': title},
                           cache_limit=8)
     for _attrs, item in dom_parser2.parse_dom(html, 'article',
                                               {'class': 'item-list'}):
         match = dom_parser2.parse_dom(item, 'a', req='href')
         if not match: continue
         match_title_year = match[0].content
         match_url = match[0].attrs['href']
         match_title, match_year = scraper_utils.extra_year(
             match_title_year)
         if not year or not match_year or year == match_year:
             result = {
                 'title': scraper_utils.cleanse_title(match_title),
                 'year': match_year,
                 'url': scraper_utils.pathify_url(match_url)
             }
             results.append(result)
     return results

コード例 #45

0

ファイルを表示

ファイル: bestmoviez_scraper.py プロジェクト: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        posts = []
        html = self._http_get(self.base_url,
                              params={
                                  's': title,
                                  'submit': 'Search'
                              },
                              require_debrid=True,
                              cache_limit=2)
        for _attr, post in dom_parser2.parse_dom(
                html, 'article', {'id': re.compile('post-\d+')}):
            if self.__too_old(post): continue
            posts += [
                post.content for post in dom_parser2.parse_dom(
                    post, 'h1', {'class': 'entry-title'})
            ]

        return self._blog_proc_results(
            '\n'.join(posts),
            'href="(?P<url>[^"]+)[^>]+>(?P<post_title>.*?)</a>', '',
            video_type, title, year)

コード例 #46

0

ファイルを表示

 def __too_old(self, post):
     filter_days = datetime.timedelta(days=int(kodi.get_setting('%s-filter' % (self.get_name()))))
     post_date = dom_parser2.parse_dom(post, 'time', {'class': 'entry-date'}, req='datetime')
     if filter_days and post_date:
         today = datetime.date.today()
         try:
             post_date = datetime.date.fromtimestamp(utils.iso_2_utc(post_date[0].content))
             if today - post_date > filter_days:
                 return True
         except ValueError:
             return False
     return False

コード例 #47

0

ファイルを表示

ファイル: girlfriendvideos.py プロジェクト: troywillett/xxxbones

def menu():

    try:
        url = urlparse.urljoin(base_domain, 'categories.php')
        c = client.request(url)
        r = dom_parser2.parse_dom(c, 'a', req='href')
        r = [i for i in r if len(i.content) > 2 and 'pages' in i.attrs['href'] \
             and '0-9' not in i.attrs['href'] and '&nbsp;' not in i.content]
        r = [(urlparse.urljoin(base_domain, i.attrs['href']), i.content)
             for i in r]
        if (not r):
            log_utils.log(
                'Scraping Error in %s:: Content of request: %s' %
                (base_name.title(), str(c)), log_utils.LOGERROR)
            kodi.notify(msg='Scraping Error: Info Added To Log File',
                        duration=6000,
                        sound=True)
            quit()
    except Exception as e:
        log_utils.log(
            'Fatal Error in %s:: Error: %s' % (base_name.title(), str(e)),
            log_utils.LOGERROR)
        kodi.notify(msg='Fatal Error', duration=4000, sound=True)
        quit()

    dirlst = []

    for i in r:
        try:
            name = kodi.sortX(i[1].encode('utf-8'))
            icon = xbmc.translatePath(
                os.path.join('special://home/addons/script.wankbank.artwork',
                             'resources/art/%s/icon.png' % filename))
            fanarts = xbmc.translatePath(
                os.path.join('special://home/addons/script.wankbank.artwork',
                             'resources/art/%s/fanart.jpg' % filename))
            dirlst.append({
                'name': name,
                'url': i[0],
                'mode': content_mode,
                'icon': icon,
                'fanart': fanarts,
                'folder': True
            })
        except Exception as e:
            log_utils.log(
                'Error adding menu item %s in %s:: Error: %s' %
                (i[1].title(), base_name.title(), str(e)), log_utils.LOGERROR)

    if dirlst: buildDirectory(dirlst)
    else:
        kodi.notify(msg='No Menu Items Found')
        quit()

コード例 #48

0

ファイルを表示

def get_gk_links(scraper, html, page_url, page_quality, link_url, player_url):
    def get_real_gk_url(scraper, player_url, params):
        html = scraper._http_get(player_url,
                                 params=params,
                                 headers=XHR,
                                 cache_limit=.25)
        js_data = parse_json(html, player_url)
        data = js_data.get('data', {})
        if data is not None and 'files' in data:
            return data['files']
        else:
            return data

    sources = {}
    for attrs, _content in dom_parser2.parse_dom(
            html, 'a', req=['data-film', 'data-name', 'data-server']):
        data = {
            'ipplugins': 1,
            'ip_film': attrs['data-film'],
            'ip_server': attrs['data-server'],
            'ip_name': attrs['data-name']
        }
        headers = {'Referer': page_url}
        headers.update(XHR)
        html = scraper._http_get(link_url,
                                 data=data,
                                 headers=headers,
                                 cache_limit=.25)
        js_data = parse_json(html, link_url)
        params = {
            'u': js_data.get('s'),
            'w': '100%',
            'h': 420,
            's': js_data.get('v'),
            'n': 0
        }
        stream_urls = get_real_gk_url(scraper, player_url, params)
        if stream_urls is None: continue

        if isinstance(stream_urls, basestring):
            sources[stream_urls] = page_quality
        else:
            for item in stream_urls:
                stream_url = item['files']
                if get_direct_hostname(scraper, stream_url) == 'gvideo':
                    quality = gv_get_quality(stream_url)
                elif 'quality' in item:
                    quality = height_get_quality(item['quality'])
                else:
                    quality = page_quality
                sources[stream_url] = quality

    return sources

コード例 #49

0

ファイルを表示

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, cache_limit=.5)
        
        views = None
        fragment = dom_parser2.parse_dom(html, 'img', {'src': re.compile('[^"]*view_icon.png')})
        if fragment:
            match = re.search('(\d+)', fragment[0].content)
            if match:
                views = match.group(1)
            
        match = re.search('href="([^"]+-full-movie-[^"]+)', html)
        if match:
            url = match.group(1)
            html = self._http_get(url, cache_limit=.5)
        
        sources = self.__get_embedded(html)
        for link in dom_parser2.parse_dom(html, 'span', {'class': 'btn-eps'}, req='link'):
            link = link.attrs['link']
            ajax_url = scraper_utils.urljoin(self.base_url, AJAX_URL)
            headers = {'Referer': url}
            headers.update(XHR)
            html = self._http_get(ajax_url, params={'v': link}, headers=headers, cache_limit=.5)
            sources.update(self.__get_sources(html))
        
        for source in sources:
            if sources[source]['direct']:
                host = scraper_utils.get_direct_hostname(self, source)
            else:
                host = urlparse.urlparse(source).hostname
            stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()})
            direct = sources[source]['direct']
            quality = sources[source]['quality']
            hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': direct}
            hosters.append(hoster)

        return hosters

コード例 #50

0

ファイルを表示

ファイル: miradetodo_scraper.py プロジェクト: enursha101/xbmc-addon

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     sources = {}
     if not source_url or source_url == FORCE_NO_MATCH: return hosters
     url = scraper_utils.urljoin(self.base_url, source_url)
     html = self._http_get(url, cache_limit=.5)
     for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}):
         for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src') + dom_parser2.parse_dom(fragment, 'iframe', req='data-lazy-src'):
             iframe_url = attrs.get('src', '')
             if not iframe_url.startswith('http'):
                 iframe_url = attrs.get('data-lazy-src', '')
                 if not iframe_url.startswith('http'): continue
                 
             if 'miradetodo' in iframe_url:
                 html = self._http_get(iframe_url, cache_limit=.5)
                 fragment = dom_parser2.parse_dom(html, 'nav', {'class': 'nav'})
                 if fragment:
                     stream_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                     if stream_url:
                         html = self._http_get(stream_url[0].attrs['href'], cache_limit=.5)
                         
                 sources.update(self.__get_gk_links(html))
                 sources.update(self.__get_gk_links2(html))
                 sources.update(self.__get_amazon_links(html))
                 sources.update(scraper_utils.parse_sources_list(self, html))
             else:
                 host = urlparse.urlparse(iframe_url).hostname
                 source = {'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False}
                 sources.update({iframe_url: source})
                 
     for source in sources:
         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
         direct = sources[source]['direct']
         quality = sources[source]['quality']
         host = scraper_utils.get_direct_hostname(self, source) if direct else urlparse.urlparse(source).hostname
         hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct}
         hosters.append(hoster)
         
     return hosters

コード例 #51

0

ファイルを表示

ファイル: porndig.py プロジェクト: alfrede/repository.streamarmy

def menu():
    
	lover.checkupdates()

	try:
		url = urlparse.urljoin(base_domain,'video')
		c = client.request(url)
		r = dom_parser2.parse_dom(c, 'a', {'class': 'sidebar_section_item'})
		r = [i for i in r if 'channels' in i.attrs['href']]        
		r = [(urlparse.urljoin(base_domain,i.attrs['href']), i.content + ' - [ Professional ]') for i in r]
		url = urlparse.urljoin(base_domain,'amateur/videos/')
		c = client.request(url)
		e = dom_parser2.parse_dom(c, 'a', {'class': 'sidebar_section_item'})
		e = [i for i in e if 'channels' in i.attrs['href']]
		r += [(urlparse.urljoin(base_domain,i.attrs['href']), i.content + ' - [ Amateur ]') for i in e]        
		r = sorted(r, key=lambda x: x[1])
		if ( not r ):
			log_utils.log('Scraping Error in %s:: Content of request: %s' % (base_name.title(),str(c)), log_utils.LOGERROR)
			kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True)
			quit()
	except Exception as e:
		log_utils.log('Fatal Error in %s:: Error: %s' % (base_name.title(),str(e)), log_utils.LOGERROR)
		kodi.notify(msg='Fatal Error', duration=4000, sound=True)
		quit()

	dirlst = []
	urls = []
	for i in r:
		try:
			name = i[1]
			icon = xbmc.translatePath(os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/icon.png' % filename))
			fanarts = xbmc.translatePath(os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/fanart.jpg' % filename))
			dirlst.append({'name': name, 'url': i[0], 'mode': content_mode, 'icon': icon, 'fanart': fanarts, 'folder': True})
		except Exception as e:
			log_utils.log('Error adding menu item %s in %s:: Error: %s' % (i[1].title(),base_name.title(),str(e)), log_utils.LOGERROR)

	if dirlst: buildDirectory(dirlst)    
	else:
		kodi.notify(msg='No Menu Items Found')
		quit()

コード例 #52

0

ファイルを表示

ファイル: moviewatcher_scraper.py プロジェクト: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series'
        html = self._http_get(search_url,
                              params={
                                  'query': title.lower(),
                                  'type': search_type
                              },
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'one_movie-item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'img', req='alt')
            media_type = dom_parser2.parse_dom(item, 'div',
                                               {'class': 'movie-series'})
            if not media_type:
                media_type = VIDEO_TYPES.MOVIE
            elif media_type[0].content == 'TV SERIE':
                media_type = VIDEO_TYPES.TVSHOW

            if match_url and match_title and video_type == media_type:
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].attrs['alt']

                match_year = re.search('-(\d{4})-', match_url)
                if match_year:
                    match_year = match_year.group(1)
                else:
                    match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

コード例 #53

0

ファイルを表示

ファイル: yesmovies_scraper.py プロジェクト: hpduong/retropie_configs

    def __get_source_page(self, video_type, page_url):
        match = re.search('/movie/(.*?)-(\d+)\.html', page_url)
        if not match: return '', '', ''
        slug, movie_id = match.groups()

        vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series'
        qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type)
        qp_url = scraper_utils.urljoin(self.base_url, qp_url)
        headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)}
        headers.update(XHR)
        html = self._http_get(qp_url, headers=headers, cache_limit=8)
        watching_url = dom_parser2.parse_dom(
            html, 'a', {'title': re.compile('View all episodes')}, req='href')
        if not watching_url: return '', '', ''

        watching_url = watching_url[0].attrs['href']
        page_html = self._http_get(watching_url,
                                   headers={
                                       'Referer':
                                       scraper_utils.urljoin(
                                           self.base_url, page_url)
                                   },
                                   cache_limit=8)
        for attrs, _content in dom_parser2.parse_dom(page_html,
                                                     'img',
                                                     {'class': 'hidden'},
                                                     req='src'):
            _img = self._http_get(attrs['src'],
                                  headers={'Referer': watching_url},
                                  cache_limit=8)

        sl_url = SL_URL.format(movie_id=movie_id)
        sl_url = scraper_utils.urljoin(self.base_url, sl_url)
        html = self._http_get(sl_url, headers=headers, cache_limit=8)
        js_data = scraper_utils.parse_json(html, sl_url)
        try:
            html = js_data['html']
        except:
            html = ''
        return movie_id, watching_url, html

コード例 #54

0

ファイルを表示

ファイル: daftporn.py プロジェクト: alfrede/repository.streamarmy

def menu():

    lover.checkupdates()

    try:
        url = urlparse.urljoin(base_domain, 'extreme-videos/')
        c = client.request(url)
        r = dom_parser2.parse_dom(c, 'a', {'class': 'url16'})
        r = [(i.attrs['href'], i.attrs['title']) for i in r if i]
        if (not r):
            log_utils.log(
                'Scraping Error in %s:: Content of request: %s' %
                (base_name.title(), str(c)), log_utils.LOGERROR)
            kodi.notify(msg='Scraping Error: Info Added To Log File',
                        duration=6000,
                        sound=True)
            quit()
    except Exception as e:
        log_utils.log(
            'Fatal Error in %s:: Error: %s' % (base_name.title(), str(e)),
            log_utils.LOGERROR)
        kodi.notify(msg='Fatal Error', duration=4000, sound=True)
        quit()

    dirlst = []

    for i in r:
        try:
            name = kodi.sortX(i[1].encode('utf-8'))
            icon = xbmc.translatePath(
                os.path.join('special://home/addons/script.xxxodus.artwork',
                             'resources/art/%s/icon.png' % filename))
            fanarts = xbmc.translatePath(
                os.path.join('special://home/addons/script.xxxodus.artwork',
                             'resources/art/%s/fanart.jpg' % filename))
            dirlst.append({
                'name': name,
                'url': i[0],
                'mode': content_mode,
                'icon': icon,
                'fanart': fanarts,
                'folder': True
            })
        except Exception as e:
            log_utils.log(
                'Error adding menu item %s in %s:: Error: %s' %
                (i[1].title(), base_name.title(), str(e)), log_utils.LOGERROR)

    if dirlst: buildDirectory(dirlst)
    else:
        kodi.notify(msg='No Menu Items Found')
        quit()

コード例 #55

0

ファイルを表示

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = []
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        page_url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(page_url, cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'player'})
        if not fragment: return hosters

        iframe_url = dom_parser2.parse_dom(fragment[0].content,
                                           'iframe',
                                           req='src')
        if not iframe_url: return hosters

        html = self._http_get(iframe_url[0].attrs['src'], cache_limit=.25)
        sources.append(self.__get_embedded_sources(html))
        sources.append(self.__get_linked_sources(html))
        for source in sources:
            for stream_url in source['sources']:
                host = scraper_utils.get_direct_hostname(self, stream_url)
                if host == 'gvideo':
                    stream_url += scraper_utils.append_headers(
                        {'User-Agent': scraper_utils.get_ua()})
                    quality = scraper_utils.gv_get_quality(stream_url)
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': True
                    }
                    hoster['subs'] = source.get('subs', True)
                    hosters.append(hoster)

        return hosters

コード例 #56

0

ファイルを表示

ファイル: dizimag_scraper.py プロジェクト: idaviesfmts/hmdsm.repository

 def __get_iframe_sources(self, iframe_url, page_url):
     hosters = []
     headers = {'Referer': page_url}
     html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
     sources = dom_parser2.parse_dom(html, 'div', {'class': 'dzst-player'}, req='data-dzst-player')
     if sources:
         sources = scraper_utils.cleanse_title(sources[0].attrs['data-dzst-player'].replace('&#x3D;', '='))
         js_data = scraper_utils.parse_json(scraper_utils.cleanse_title(sources), iframe_url)
         sources = js_data.get('tr', {})
         for key in sources:
             hosters.append(self.__create_source(sources[key], key, page_url, subs=True))
         
     return hosters

コード例 #57

0

ファイルを表示

ファイル: tvtsvod.py プロジェクト: KodiStream/xbmc-adult-addons

    def search_movie_name(self, name):

        meta = {}

        name = name.replace('!', '')
        url = self.SEARCH_MOVIES % (quote_plus(name))
        html = client.request(url)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'movie_box'})

        if not fragment:
            meta = {}

        if fragment:
            movie_url = dom_parser2.parse_dom(fragment, 'a', req='href')
            tvtsvod_id = (movie_url[0].attrs['href']).split('/', 5)[4]
            meta = self.search_movie_id(tvtsvod_id)

        xbmc.log('DATA URL: %s' % (str(url)), xbmc.LOGNOTICE)
        xbmc.log('MOVIE NAME: %s' % (str(name)), xbmc.LOGNOTICE)
        # xbmc.log('MOVIE ID: %s' % (str(tvtsvod_id)), xbmc.LOGNOTICE)

        return meta

コード例 #58

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        params = {'q': title, 's': 't'}
        html = self._http_get(search_url, params=params, cache_limit=1)
        for _attrs, content in dom_parser2.parse_dom(html, 'span',
                                                     {'class': 'title_list'}):
            match = dom_parser2.parse_dom(content, 'a', req=['href', 'title'])
            if match:
                attrs = match[0].attrs
                match_url, match_title_year = attrs['href'], attrs['title']
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

コード例 #59

0

ファイルを表示

ファイル: putmv_scraper.py プロジェクト: yam4me/plugin.video.blamo

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=8)
     for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_about'}):
         match = dom_parser2.parse_dom(item, 'a', req='href')
         if match:
             match_url = match[0].attrs['href']
             match_title_year = match[0].content
             is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
             if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                 match_title, match_year = scraper_utils.extra_year(match_title_year)
                 if video_type == VIDEO_TYPES.SEASON:
                     match_year = ''
                     if season and int(season) != int(is_season.group(1)):
                         continue
                             
                 if (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
     
     return results

コード例 #60

0

ファイルを表示

ファイル: vkflix_scraper.py プロジェクト: Lhse44/repository.deallen

 def __search(self, video_type, title, year, page):
     results = []
     url = scraper_utils.urljoin(self.base_url, page['url'])
     params = page['params'] if 'params' in page else None
     html = self._http_get(url, params=params, cache_limit=24)
     norm_title = scraper_utils.normalize_title(title)
     match_year = ''
     for _attrs, item in dom_parser2.parse_dom(html, 'div', {'id': re.compile('movie-+\d+')}):
         is_tvshow = dom_parser2.parse_dom(item, 'div', {'class': 'movieTV'})
         if (is_tvshow and video_type == VIDEO_TYPES.TVSHOW) or (not is_tvshow and video_type == VIDEO_TYPES.MOVIE):
             fragment = dom_parser2.parse_dom(item, 'h4', {'class': 'showRowName'})
             if fragment:
                 match = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                 if match:
                     match_url, match_title = match[0].attrs['href'], match[0].content
                     if re.search('/-?\d{7,}/', match_url): continue
                     
                     match_norm_title = scraper_utils.normalize_title(match_title)
                     if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                         result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
                         results.append(result)
     return results