Python parse_dom 예제들, dom_parser.parse_dom Python 예제들

예제 #1

0

파일 보기

파일: streamallthis_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=2)
            urls = dom_parser.parse_dom(html, "iframe", ret="src")
            for iframe_url in urls:
                if "/ads/" in iframe_url:
                    continue
                elif "/watch/" in iframe_url:
                    url = urlparse.urljoin(self.base_url, iframe_url)
                    html = self._http_get(url, cache_limit=2)
                    urls += dom_parser.parse_dom(html, "iframe", ret="src")
                    match = re.search("""location.href=['"]([^'"]+)""", html)
                    if match:
                        urls.append(match.group(1))
                else:
                    stream_url = iframe_url
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        "multi-part": False,
                        "host": host,
                        "class": self,
                        "url": stream_url,
                        "quality": QUALITIES.HIGH,
                        "views": None,
                        "rating": None,
                        "direct": False,
                    }
                    hosters.append(hoster)

        return hosters

예제 #2

0

파일 보기

파일: putlocker_scraper.py 프로젝트: Lhse44/repository.deallen

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     headers = {'Referer': self.base_url}
     params = {'search': title}
     html = self._http_get(self.base_url,
                           params=params,
                           headers=headers,
                           cache_limit=8)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'listCard'}):
         match_title = dom_parser.parse_dom(item, 'p',
                                            {'class': 'extraTitle'})
         match_url = dom_parser.parse_dom(item, 'a', ret='href')
         match_year = dom_parser.parse_dom(item, 'p', {'class': 'cardYear'})
         if match_url and match_title:
             match_url = match_url[0]
             match_title = match_title[0]
             match_year = match_year[0] if match_year else ''
             if not year or not match_year or year == match_year:
                 result = {
                     'url': scraper_utils.pathify_url(match_url),
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year
                 }
                 results.append(result)
     return results

예제 #3

0

파일 보기

파일: hdflix_scraper.py 프로젝트: uguer30/Project

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
     if video_type == VIDEO_TYPES.MOVIE:
         search = 'search'
     else:
         search = 'searchshow'
     search_url = search_url % (search, urllib.quote(title))
     html = self._http_get(search_url, cache_limit=8)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'movie'}):
         match_url = dom_parser.parse_dom(item,
                                          'a', {'class': 'poster'},
                                          ret='href')
         match_title = dom_parser.parse_dom(item, 'div', {'class': 'title'})
         match_year = dom_parser.parse_dom(item, 'div', {'class': 'year'})
         if match_url and match_title:
             match_url = match_url[0]
             match_title = match_title[0]
             match_year = match_year[0] if match_year else ''
             if match_title and (not year or not match_year
                                 or year == match_year):
                 result = {
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year,
                     'url': scraper_utils.pathify_url(match_url)
                 }
                 results.append(result)
     return results

예제 #4

0

파일 보기

파일: moviego_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=8)
            q_str = dom_parser.parse_dom(html, 'div', {'class': 'poster-qulabel'})
            if q_str:
                q_str = q_str[0].replace(' ', '').upper()
                page_quality = Q_MAP.get(q_str, QUALITIES.HIGH)
            else:
                page_quality = QUALITIES.HIGH
                
            for fragment in dom_parser.parse_dom(html, 'div', {'class': 'tab_box'}):
                match = re.search('file\s*:\s*"([^"]+)', fragment)
                if match:
                    stream_url = match.group(1)
                else:
                    stream_url = self.__get_ajax_sources(fragment, page_url)
                    
                if stream_url:
                    host = self._get_direct_hostname(stream_url)
                    if host == 'gvideo':
                        quality = scraper_utils.gv_get_quality(stream_url)
                    else:
                        quality = page_quality
                        
                    stream_url += '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(page_url))
                    source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
                    sources.append(source)

        return sources

예제 #5

0

파일 보기

파일: xmoviesv2_scraper.py 프로젝트: uguer30/Project

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/movies/search?s=%s' % urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=8)
     for item in dom_parser.parse_dom(html, 'div', {'class': '[^"]*c-content-product-2[^"]*'}):
         match_title_year = dom_parser.parse_dom(item, 'p', {'class': '[^"]*c-title[^"]*'})
         match_url = dom_parser.parse_dom(item, 'a', ret='href')
         is_season = dom_parser.parse_dom(item, 'div', {'class': '[^"]*c-bg-red[^'"]*"})
         if match_title_year and match_url:
             match_title_year = match_title_year[0]
             match_url = match_url[0]
             if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season):
                 match_year = ''
                 if video_type == VIDEO_TYPES.SEASON:
                     match_title = match_title_year
                     if season and not re.search('Season\s+(%s)\s+' % (season), match_title_year, re.I):
                         continue
                 else:
                     match = re.search('(.*?)\s+(\d{4})$', match_title_year)
                     if match:
                         match_title, match_year = match.groups()
                     else:
                         match_title = match_title_year
                         match_year = ''
     
                 match_url = urlparse.urljoin(match_url, 'watching.html')
                 if not year or not match_year or year == match_year:
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
     return results

예제 #6

0

파일 보기

파일: veocube_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def __get_sources(self, html, page_url):
     sources = []
     fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-content'})
     if fragment:
         iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
         if iframe_url:
             iframe_url = iframe_url[0]
             if self.base_url in iframe_url:
                 headers = {'Referer': page_url}
                 html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
                 referer = iframe_url
                 iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
                 if iframe_url:
                     iframe_url = iframe_url[0]
                     headers = {'Referer': referer}
                     html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
                     links = self._parse_sources_list(html)
                     for link in links:
                         host = self._get_direct_hostname(link)
                         if host == 'gvideo':
                             quality = scraper_utils.gv_get_quality(link)
                         else:
                             quality = links[link]['quality']
                         source = {'multi-part': False, 'url': link, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True}
                         sources.append(source)
             else:
                 host = urlparse.urlparse(iframe_url).hostname
                 source = {'multi-part': False, 'url': iframe_url, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'direct': False}
                 sources.append(source)
     return sources

예제 #7

0

파일 보기

파일: moviezone_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        hosters = []
        source_url = self.get_url(video)
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=8)
            sources = dom_parser.parse_dom(html, 'source', ret='src')
            iframes = dom_parser.parse_dom(html, 'iframe', {'class': 'movieframe'}, ret='src')
            headers = {'Referer': url}
            for iframe_url in iframes:
                html = self._http_get(iframe_url, headers=headers, allow_redirect=False, method='HEAD', cache_limit=0)
                if html.startswith('http'):
                    sources.append(html)
                else:
                    iframe_url = urlparse.urljoin(self.base_url, iframe_url)
                    html = self._http_get(iframe_url, headers=headers, cache_limit=1)
                    sources += dom_parser.parse_dom(html, 'source', ret='src')
                    iframes += dom_parser.parse_dom(html, 'iframe', ret='src')
            
            for source in sources:
                host = self._get_direct_hostname(source)
                if host == 'gvideo':
                    quality = scraper_utils.gv_get_quality(source)
                    direct = True
                else:
                    quality = QUALITIES.HIGH
                    direct = False
                    host = urlparse.urlparse(source).hostname
                source = {'multi-part': False, 'url': source, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct}
                hosters.append(source)

        return hosters

예제 #8

0

파일 보기

파일: hdflix_scraper.py 프로젝트: uguer30/Project

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.5)
            hosters += self.__add_sources(
                dom_parser.parse_dom(html,
                                     'a', {'rel': 'nofollow'},
                                     ret='href'), video)

            sources = []
            for match in re.finditer('''\$\.get\('([^']+)'\s*,\s*(\{.*?\})''',
                                     html):
                ajax_url, params = match.groups()
                ajax_url = ajax_url + '?' + urllib.urlencode(
                    scraper_utils.parse_params(params))
                ajax_url = urlparse.urljoin(self.base_url, ajax_url)
                headers = {'Referer': page_url}
                headers.update(XHR)
                html = self._http_get(ajax_url,
                                      headers=headers,
                                      auth=False,
                                      cache_limit=.5)
                sources += dom_parser.parse_dom(html,
                                                'source',
                                                {'type': '''video[^'"]*'''},
                                                ret='src')
                sources += dom_parser.parse_dom(html, 'iframe', ret='src')
            hosters += self.__add_sources(sources, video, QUALITIES.HD720)
        return hosters

예제 #9

0

파일 보기

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=.25)
            for button in dom_parser.parse_dom(html, 'li',
                                               {'class': 'playing_button'}):
                try:
                    link = dom_parser.parse_dom(button, 'a', ret='href')
                    match = re.search('php\?.*?=?([^"]+)', link[0])
                    stream_url = base64.b64decode(match.group(1))
                    match = re.search('(http://.*)', stream_url)
                    stream_url = match.group(1)
                    host = urlparse.urlparse(stream_url).hostname
                    quality = scraper_utils.get_quality(
                        video, host, QUALITIES.HIGH)
                    hoster = {
                        'multi-part': False,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'url': stream_url,
                        'direct': False
                    }
                    hosters.append(hoster)
                except Exception as e:
                    log_utils.log(
                        'Exception during tvonline source: %s - |%s|' %
                        (e, button), log_utils.LOGDEBUG)

        return hosters

예제 #10

0

파일 보기

파일: moviesub_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser.parse_dom(html, 'ul', {'class': 'cfv'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'li'):
                is_season = dom_parser.parse_dom(item, 'div', {'class': 'status'})
                if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                    match_url = dom_parser.parse_dom(item, 'a', ret='href')
                    match_title = dom_parser.parse_dom(item, 'a', ret='title')
                    if match_url and match_title:
                        match_title = match_title[0]
                        match_url = match_url[0]
                        match_year = ''
                        if video_type == VIDEO_TYPES.SEASON:
                            if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
                                continue
                        else:
                            match = re.search('-(\d{4})\.html', match_url)
                            if match:
                                match_year = match.group(1)
                        
                        if not year or not match_year or year == match_year:
                            result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                            results.append(result)

        return results

예제 #11

0

파일 보기

파일: iwatch_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            fragment = dom_parser.parse_dom(html, 'table', {'id': 'streamlinks'})
            if fragment:
                max_age = 0
                now = min_age = int(time.time())
                for row in dom_parser.parse_dom(fragment[0], 'tr', {'id': 'pt\d+'}):
                    if video.video_type == VIDEO_TYPES.MOVIE:
                        pattern = 'href="([^"]+).*?/>([^<]+).*?(?:<td>.*?</td>\s*){1}<td>(.*?)</td>\s*<td>(.*?)</td>'
                    else:
                        pattern = 'href="([^"]+).*?/>([^<]+).*?(<span class="linkdate">.*?)</td>\s*<td>(.*?)</td>'
                    match = re.search(pattern, row, re.DOTALL)
                    if match:
                        url, host, age, quality = match.groups()
                        age = self.__get_age(now, age)
                        quality = quality.upper()
                        if age > max_age: max_age = age
                        if age < min_age: min_age = age
                        host = host.strip()
                        hoster = {'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False}
                        hoster['quality'] = scraper_utils.get_quality(video, host, QUALITY_MAP.get(quality, QUALITIES.HIGH))
                        hosters.append(hoster)
    
                unit = (max_age - min_age) / 100
                if unit > 0:
                    for hoster in hosters:
                        hoster['rating'] = (hoster['age'] - min_age) / unit
        return hosters

예제 #12

0

파일 보기

파일: snagfilms_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         page_url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(page_url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': 'film-container'})
         if fragment:
             iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
             if iframe_url:
                 iframe_url = urlparse.urljoin(self.base_url, iframe_url[0])
                 headers = {'Referer': page_url}
                 html = self._http_get(iframe_url, headers=headers, cache_limit=.5)
                 sources = self._parse_sources_list(html)
                 for source in sources:
                     quality = sources[source]['quality']
                     host = self._get_direct_hostname(source)
                     stream_url = source + '|User-Agent=%s&Referer=%s' % (scraper_utils.get_ua(), urllib.quote(iframe_url))
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     match = re.search('(\d+[a-z]bps)', source)
                     if match:
                         hoster['extra'] = match.group(1)
                     hosters.append(hoster)
                     
     hosters.sort(key=lambda x: x.get('extra', ''), reverse=True)
     return hosters

예제 #13

0

파일 보기

파일: moviewatcher_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=""):
        results = []
        search_url = urlparse.urljoin(self.base_url, "/search?query=%s")
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        for item in dom_parser.parse_dom(html, "div", {"class": "one_movie-item"}):
            match_url = dom_parser.parse_dom(item, "a", ret="href")
            match_title = dom_parser.parse_dom(item, "img", ret="alt")
            media_type = dom_parser.parse_dom(item, "div", {"class": "movie-series"})
            if not media_type:
                media_type = VIDEO_TYPES.MOVIE
            elif media_type[0] == "TV SERIE":
                media_type = VIDEO_TYPES.TVSHOW

            if match_url and match_title and video_type == media_type:
                match_url = match_url[0]
                match_title = match_title[0]

                match_year = re.search("-(\d{4})-", match_url)
                if match_year:
                    match_year = match_year.group(1)
                else:
                    match_year = ""

                if not year or not match_year or year == match_year:
                    result = {
                        "url": scraper_utils.pathify_url(match_url),
                        "title": scraper_utils.cleanse_title(match_title),
                        "year": match_year,
                    }
                    results.append(result)

        return results

예제 #14

0

파일 보기

파일: pelispedia_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def __tv_search(self, title, year):
     results = []
     if title:
         norm_title = scraper_utils.normalize_title(title)
         url = '/series/letra/%s/' % (title[0])
         url = urlparse.urljoin(self.base_url, url)
         html = self._http_get(url, cache_limit=48)
         for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*bpM12[^"]*'}):
             title_frag = dom_parser.parse_dom(item, 'h2')
             year_frag = dom_parser.parse_dom(item, 'div', {'class': '[^"]*sectionDetail[^"]*'})
             match_url = dom_parser.parse_dom(item, 'a', ret='href')
             if title_frag and match_url:
                 match_url = match_url[0]
                 match = re.search('(.*?)<br>', title_frag[0])
                 if match:
                     match_title = match.group(1)
                 else:
                     match_title = title_frag[0]
                     
                 match_year = ''
                 if year_frag:
                     match = re.search('(\d{4})', year_frag[0])
                     if match:
                         match_year = match.group(1)
 
                 if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
                     
     return results

예제 #15

0

파일 보기

파일: uflix_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/index.php?menu=search&query=')
     search_url += urllib.quote_plus(title)
     html = self._http_get(search_url, cache_limit=.25)
     results = []
     sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'}
     
     fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'figcaption'):
             match = re.search('title="([^"]+)[^>]+href="([^"]+)', item)
             if match:
                 match_title_year, url = match.groups()
                 match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                 if match:
                     match_title, match_year = match.groups()
                 else:
                     match_title = match_title_year
                     match_year = ''
                 if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '')
                 if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '')
                 
                 if not year or not match_year or year == match_year:
                     result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(url), 'year': match_year}
                     results.append(result)
     return results

예제 #16

0

파일 보기

파일: filmovizjia_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def __movie_search(self, title, year):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search1.php?keywords=%s&ser=506')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'li'):
                match_url = dom_parser.parse_dom(item, 'a', ret='href')
                match_title_year = ''
                link_frag = dom_parser.parse_dom(item, 'a')
                if link_frag:
                    match_title_year = dom_parser.parse_dom(link_frag[0], 'div')
                    
                if match_url and match_title_year:
                    match_url = match_url[0]
                    match_title_year = match_title_year[0]
                    match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''
            
                    if not year or not match_year or year == match_year:
                        result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)

        return results

예제 #17

0

파일 보기

파일: filmovizjia_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=8)
        pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
        match = re.search(pattern, html, re.DOTALL)
        if match:
            fragment = match.group(1)
            ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class')
            episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'})
            airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'})
            ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
            norm_title = scraper_utils.normalize_title(video.ep_title)
            num_id, airdate_id, title_id = '', '', ''
            for episode, airdate, ep_id in zip(episodes, airdates, ep_ids):
                if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
                match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
                if match:
                    ep_num, ep_title = match.groups()
                    if int(ep_num) == int(video.episode): num_id = ep_id
                    if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id

            best_id = ''
            if not scraper_utils.force_title(video):
                if num_id: best_id = num_id
                if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
                if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
            else:
                if title_id: best_id = title_id
            
            if best_id:
                return EP_URL % (best_id)

예제 #18

0

파일 보기

파일: emoviespro_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'tab_container'})
            if fragment:
                q_str = 'HDRIP'
                match = re.search('>Quality(.*?)<br\s*/?>', html, re.I)
                if match:
                    q_str = match.group(1)
                    q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U)

                for source in dom_parser.parse_dom(fragment[0], 'iframe', ret='src'):
                    host = urlparse.urlparse(source).hostname
                    quality = scraper_utils.blog_get_quality(video, q_str, host)
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False}
                    
                    match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL)
                    if match:
                        hoster['views'] = int(match.group(1))
                        hoster['rating'] = match.group(2)
                    
                    hosters.append(hoster)

        return hosters

예제 #19

0

파일 보기

파일: vkflix_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=0.5)
            if video.video_type == VIDEO_TYPES.EPISODE:
                html = self.__get_episode_fragment(html, video)
            for item in dom_parser.parse_dom(html, "div", {"class": "linkTr"}):
                stream_url = dom_parser.parse_dom(item, "div", {"class": '[^"]*linkHiddenUrl[^"]*'})
                q_str = dom_parser.parse_dom(item, "div", {"class": '[^"]*linkQualityText[^"]*'})
                if stream_url and q_str:
                    stream_url = stream_url[0]
                    q_str = q_str[0]
                    host = urlparse.urlparse(stream_url).hostname
                    base_quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH)
                    quality = scraper_utils.get_quality(video, host, base_quality)
                    source = {
                        "multi-part": False,
                        "url": stream_url,
                        "host": host,
                        "class": self,
                        "quality": quality,
                        "views": None,
                        "rating": None,
                        "direct": False,
                    }
                    sources.append(source)

        return sources

예제 #20

0

파일 보기

파일: rlseries_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        if title and title[0].isalpha():
            page_url = ['/list/?char=%s' % (title[0])]
            while page_url:
                page_url = urlparse.urljoin(self.base_url, page_url[0])
                html = self._http_get(page_url, cache_limit=48)
                fragment = dom_parser.parse_dom(html, 'ul', {'class': 'list-film-char'})
                if fragment:
                    norm_title = scraper_utils.normalize_title(title)
                    for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]):
                        match_url, match_title = match.groups()
                        match_title = re.sub('</?strong>', '', match_title)
                        match = re.search('Season\s+(\d+)', match_title, re.I)
                        if match:
                            if season and int(season) != int(match.group(1)):
                                continue
                            
                            if norm_title in scraper_utils.normalize_title(match_title):
                                result = {'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url)}
                                results.append(result)
                
                if results:
                    break
                
                page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

        return results

예제 #21

0

파일 보기

파일: moviexk_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}):
            name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'})
            if name:
                match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0])
                if match:
                    match_url, match_title_year = match.groups()
                    if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue
                    
                    match_title_year = re.sub('</?[^>]*>', '', match_title_year)
                    match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year)
                    match_title_year = match_title_year.replace('&#8217;', "'")
                    match_title, match_year = scraper_utils.extra_year(match_title_year)
                    if not match_year:
                        year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'})
                        if year_span:
                            year_text = dom_parser.parse_dom(year_span[0], 'a')
                            if year_text:
                                match_year = year_text[0].strip()
    
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
                        results.append(result)

        return results

예제 #22

0

파일 보기

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)

            quality = None
            match = re.search('Links\s+-\s+Quality\s*([^<]*)</h1>', html, re.DOTALL | re.I)
            if match:
                quality = QUALITY_MAP.get(match.group(1).strip().upper())

            seen_links = {}
            for item in dom_parser.parse_dom(html, 'li', {'id': 'playing_button'}):
                stream_url = dom_parser.parse_dom(item, 'a', ret='href')
                if stream_url:
                    stream_url = stream_url[0]
                    match = re.search('url=([^&"]+)', stream_url)
                    if match:
                        stream_url = base64.b64decode(match.group(1))
                    else:
                        match = re.search('stream\.php\?([^"]+)', stream_url)
                        if match:
                            stream_url = base64.b64decode(match.group(1))
                            i = stream_url.rfind('&&')
                            if i > -1:
                                stream_url = stream_url[i + 2:]
                        
                    if stream_url in seen_links: continue
                    seen_links[stream_url] = True
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': scraper_utils.get_quality(video, host, quality), 'rating': None, 'views': None, 'direct': False}
                    hosters.append(hoster)
        return hosters

예제 #23

0

파일 보기

파일: hevcbluray_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
            if match and match_title:
                url = match.group(1)
                match_title = match_title[0]
                if re.search('\d+\s*x\s*\d+', match_title): continue  # exclude episodes
                match_title, match_year = scraper_utils.extra_year(match_title)
                if not match_year and year_frag:
                    match_year = year_frag[0]

                match = re.search('(.*?)\s+\d{3,}p', match_title)
                if match:
                    match_title = match.group(1)
                
                extra = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
                if extra:
                    match_title += ' [%s]' % (extra[0])
                    
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                    results.append(result)

        return results

예제 #24

0

파일 보기

 def search(self, video_type, title, year, season=''):
     scrape = title.lower().replace(' ','+').replace(':', '')
     search_url = urlparse.urljoin(self.base_url, '/movie/search/')
     search_url += urllib.quote_plus(title)
     html = self._http_get(search_url, cache_limit=.25)
     results = []
     sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'}
     
     fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'figcaption'):
             match = re.search('title="([^"]+)[^>]+href="([^"]+)', item)
             if match:
                 match_title_year, url = match.groups()
                 match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                 if match:
                     match_title, match_year = match.groups()
                 else:
                     match_title = match_title_year
                     url = urlparse.urljoin(match_title.group(1), 'watching.html')
                     match_year = ''
                 if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '')
                 if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '')
                 
                 if not year or not match_year or year == match_year:
                     result = {'title': scraper_utils.cleanse_title(match_title), 'year': scraper_utils.pathify_url(url), 'url': match_year}
                     results.append(result)
     return results

예제 #25

0

파일 보기

파일: quikr_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
        result = self._default_get_episode_url(show_url, video, episode_pattern)
        if result:
            return result

        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"})
        if fragment:
            ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href")
            ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"})
            ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"})
            force_title = scraper_utils.force_title(video)
            if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate:
                for ep_url, ep_date in zip(ep_urls, ep_dates):
                    log_utils.log(
                        "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate),
                        log_utils.LOGDEBUG,
                    )
                    if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date():
                        return scraper_utils.pathify_url(ep_url)

            if force_title or kodi.get_setting("title-fallback") == "true":
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for ep_url, ep_title in zip(ep_urls, ep_titles):
                    ep_title = re.sub("<span>.*?</span>\s*", "", ep_title)
                    log_utils.log(
                        "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title),
                        log_utils.LOGDEBUG,
                    )
                    if norm_title == scraper_utils.normalize_title(ep_title):
                        return scraper_utils.pathify_url(ep_url)

예제 #26

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+).*?alt="([^"]+)', item, re.DOTALL)
            if match:
                url, match_title_year = match.groups()
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    year_fragment = dom_parser.parse_dom(
                        item, 'span', {'class': 'year'})
                    if year_fragment:
                        match_year = year_fragment[0]
                    else:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        return results

예제 #27

0

파일 보기

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            q_str = dom_parser.parse_dom(html, 'span', {'class': 'calidad\d*'})
            if q_str:
                if q_str[0].upper() == 'COMING SOON':
                    return hosters

                try:
                    quality = scraper_utils.height_get_quality(q_str[0])
                except:
                    quality = QUALITIES.HIGH
            else:
                quality = QUALITIES.HIGH
            fragment = dom_parser.parse_dom(html, 'div', {'id': 'player\d+'})
            if fragment:
                for match in re.finditer('<iframe[^>]+src="([^"]+)',
                                         fragment[0], re.I):
                    stream_url = match.group(1)
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {
                        'multi-part': False,
                        'url': stream_url,
                        'host': host,
                        'class': self,
                        'quality': quality,
                        'views': None,
                        'rating': None,
                        'direct': False
                    }
                    hosters.append(hoster)
        return hosters

예제 #28

0

파일 보기

파일: stage66_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/?s=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for movie in dom_parser.parse_dom(html, 'div', {'class': 'movie'}):
            match = re.search('href="([^"]+)', movie)
            if match:
                match_url = match.group(1)
                match_title_year = dom_parser.parse_dom(movie, 'img', ret='alt')
                if match_title_year:
                    match_title_year = match_title_year[0]
                    match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = dom_parser.parse_dom(movie, 'div', {'class': 'year'})
                        try: match_year = match_year[0]
                        except: match_year = ''
                        
                    if not year or not match_year or year == match_year:
                        result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)

        return results

예제 #29

0

파일 보기

파일: tunemovie_scraper.py 프로젝트: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        results = []
        for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}):
            match_title = dom_parser.parse_dom(thumb,
                                               'a', {'class': 'clip-link'},
                                               ret='title')
            url = dom_parser.parse_dom(thumb,
                                       'a', {'class': 'clip-link'},
                                       ret='href')
            if match_title and url:
                match_title, url = match_title[0], url[0]
                is_season = re.search('Season\s+(\d+)$', match_title, re.I)
                if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                        is_season and video_type == VIDEO_TYPES.SEASON):
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE:
                        match_year = dom_parser.parse_dom(
                            thumb, 'div', {'class': '[^"]*status-year[^"]*'})
                        if match_year:
                            match_year = match_year[0]
                    else:
                        if season and int(is_season.group(1)) != int(season):
                            continue

                    if not year or not match_year or year == match_year:
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)
        return results

예제 #30

0

파일 보기

파일: vivoto_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
     search_url = search_url % (urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=1)
     results = []
     fragment = dom_parser.parse_dom(html, 'div', {'class': 'movie'})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'li'):
             match_url = dom_parser.parse_dom(item, 'a', ret='href')
             match_title = dom_parser.parse_dom(item, 'span', {'class': 'text'})
             match_year = dom_parser.parse_dom(item, 'span', {'class': 'year'})
             if match_url and match_title:
                 match_url = match_url[0]
                 match_title = re.sub('</?strong>', '', match_title[0])
                 is_season = re.search('Season\s+(\d+)$', match_title, re.I)
                 if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                     if video_type == VIDEO_TYPES.MOVIE:
                         if match_year:
                             match_year = match_year[0]
                         else:
                             match_year = ''
                     else:
                         if season and int(is_season.group(1)) != int(season):
                             continue
                         match_year = ''
                 
                     if not year or not match_year or year == match_year:
                         result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                         results.append(result)
     return results

예제 #31

0

파일 보기

파일: tunemovie_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def __get_gk_links(self, html, page_url):
     sources = {}
     for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}):
         film_id = dom_parser.parse_dom(link, 'a', ret='data-film')
         name_id = dom_parser.parse_dom(link, 'a', ret='data-name')
         server_id = dom_parser.parse_dom(link, 'a', ret='data-server')
         if film_id and name_id and server_id:
             data = {'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0]}
             headers = XHR
             headers['Referer'] = page_url
             url = urlparse.urljoin(self.base_url, LINK_URL)
             html = self._http_get(url, data=data, headers=headers, cache_limit=.25)
             js_data = scraper_utils.parse_json(html, url)
             if 's' in js_data:
                 url = urlparse.urljoin(self.base_url, LINK_URL2)
                 params = {'u': js_data['s'], 'w': '100%', 'h': 420}
                 html = self._http_get(url, params=params, data=data, headers=headers, cache_limit=.25)
                 js_data = scraper_utils.parse_json(html, url)
                 if 'data' in js_data and js_data['data']:
                     if isinstance(js_data['data'], basestring):
                         sources[js_data['data']] = QUALITIES.HIGH
                     else:
                         for link in js_data['data']:
                             stream_url = link['files']
                             if self._get_direct_hostname(stream_url) == 'gvideo':
                                 quality = scraper_utils.gv_get_quality(stream_url)
                             elif 'quality' in link:
                                 quality = scraper_utils.height_get_quality(link['quality'])
                             else:
                                 quality = QUALITIES.HIGH
                             sources[stream_url] = quality
     return sources

예제 #32

0

파일 보기

파일: hevcbluray_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     sources = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         is_3d = False
         page_quality = QUALITIES.HD720
         title = dom_parser.parse_dom(html, 'title')
         if title:
             title = title[0]
             match = re.search('(\d{3,})p', title)
             if match:
                 page_quality = scraper_utils.height_get_quality(match.group(1))
             
             is_3d = True if re.search('\s+3D\s+', title) else False
         
         fragments = dom_parser.parse_dom(html, 'div', {'class': 'txt-block'}) + dom_parser.parse_dom(html, 'li', {'class': 'elemento'})
         for fragment in fragments:
             for match in re.finditer('href="([^"]+)', fragment):
                 stream_url = match.group(1)
                 host = urlparse.urlparse(stream_url).hostname
                 q_str = dom_parser.parse_dom(fragment, 'span', {'class': 'd'})
                 q_str = q_str[0].upper() if q_str else ''
                 base_quality = QUALITY_MAP.get(q_str, page_quality)
                 quality = scraper_utils.get_quality(video, host, base_quality)
                 source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False}
                 source['format'] = 'x265'
                 source['3D'] = is_3d
                 sources.append(source)
                 
     return sources

예제 #33

0

파일 보기

파일: miradetodo_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
            if match and match_title:
                url = match.group(1)
                match_title = match_title[0]
                if re.search('\d+\s*x\s*\d+', match_title): continue  # exclude episodes
                match = re.search('(.*?)\s+\((\d{4})\)', match_title)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title
                    match_year = ''
                
                if year_frag:
                    match_year = year_frag[0]

                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                    results.append(result)

        return results

예제 #34

0

파일 보기

파일: spacemov_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        sources = []
        if source_url and source_url != FORCE_NO_MATCH:
            page_url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(page_url, cache_limit=8)
            for fragment in dom_parser.parse_dom(html, 'div',
                                                 {'class': 'video'}):
                for source in dom_parser.parse_dom(
                        fragment, 'iframe', ret='src') + dom_parser.parse_dom(
                            fragment, 'script', ret='src'):
                    if 'validateemb' in source: continue
                    host = urlparse.urlparse(source).hostname
                    source = {
                        'multi-part': False,
                        'url': source,
                        'host': host,
                        'class': self,
                        'quality': QUALITIES.HD720,
                        'views': None,
                        'rating': None,
                        'direct': False
                    }
                    sources.append(source)

        return sources

예제 #35

0

파일 보기

파일: filmovizjia_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def __get_direct_links(self, html, page_url):
     hosters = []
     match = re.search("&u=([^']+)", html)
     if match:
         u = match.group(1)
         fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*rektab[^"]*'})
         if fragment:
             page = dom_parser.parse_dom(fragment[0], 'a', ret='id')
             if page:
                 page = page[0]
                 ss = dom_parser.parse_dom(fragment[0], 'a', {'id': page}, ret='class')
                 for s in ss:
                     yt_url = YT_URL % (page, s, u)
                     url = urlparse.urljoin(self.base_url, yt_url)
                     headers = {'Referer': page_url}
                     html = self._http_get(url, headers=headers, cache_limit=.5)
                     sources = self._parse_sources_list(html)
                     for source in sources:
                         host = self._get_direct_hostname(source)
                         if sources[source]['quality']:
                             quality = sources[source]['quality']
                         else:
                             quality = QUALITIES.HIGH
                         stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua())
                         hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                         hosters.append(hoster)
     return hosters

예제 #36

0

파일 보기

파일: tunemovie_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
     search_url = search_url % (urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=8)
     results = []
     for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}):
         match_title = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='title')
         url = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='href')
         if match_title and url:
             match_title, url = match_title[0], url[0]
             is_season = re.search('Season\s+(\d+)$', match_title, re.I)
             if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                 match_year = ''
                 if video_type == VIDEO_TYPES.MOVIE:
                     match_year = dom_parser.parse_dom(thumb, 'div', {'class': '[^"]*status-year[^"]*'})
                     if match_year:
                         match_year = match_year[0]
                 else:
                     if season and int(is_season.group(1)) != int(season):
                         continue
                 
                 if not year or not match_year or year == match_year:
                     result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
     return results

예제 #37

0

파일 보기

파일: dayt_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1')
        while page_url:
            html = self._http_get(page_url, cache_limit=48)
            html = re.sub('<!--.*?-->', '', html)
            norm_title = scraper_utils.normalize_title(title)
            for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}):
                match_url = re.search('href="([^"]+)', td)
                match_title_year = dom_parser.parse_dom(td, 'img', ret='alt')
                if match_url and match_title_year:
                    match_url = match_url.group(1)
                    if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
                    match_title, match_year = scraper_utils.extra_year(match_title_year[0])
                    if norm_title in scraper_utils.normalize_title(match_title):
                        result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)
            
            match = re.search('href="([^"]+)[^>]*>>', html)
            if match:
                page_url = urlparse.urljoin(self.base_url, match.group(1))
            else:
                page_url = ''

        return results

예제 #38

0

파일 보기

파일: 2ddl_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for post in posts:
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                 if match:
                     url, title = match.groups()
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

예제 #39

0

파일 보기

파일: pelispedia_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, cache_limit=.5)
         fragment = dom_parser.parse_dom(html, 'div', {'class': 'repro'})
         if fragment:
             iframe_url = dom_parser.parse_dom(fragment[0], 'iframe', ret='src')
             if iframe_url:
                 html = self._http_get(iframe_url[0], cache_limit=.5)
                 fragment = dom_parser.parse_dom(html, 'div', {'id': 'botones'})
                 if fragment:
                     for media_url in dom_parser.parse_dom(fragment[0], 'a', ret='href'):
                         media_url = media_url.replace(' ', '')
                         if self.base_url in media_url or 'pelispedia.biz' in media_url:
                             headers = {'Referer': iframe_url[0]}
                             html = self._http_get(media_url, headers=headers, cache_limit=.5)
                             hosters += self.__get_page_links(html)
                             hosters += self.__get_pk_links(html)
                             hosters += self.__get_gk_links(html, url)
                         else:
                             host = urlparse.urlparse(media_url).hostname
                             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': media_url, 'direct': False}
                             hosters.append(hoster)
         
     return hosters

예제 #40

0

파일 보기

파일: 9movies_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/search?keyword=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        results = []
        match_year = ''
        fragment = dom_parser.parse_dom(html, 'href',
                                        {'class': '[^"]*movie-list[^"]*'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'div',
                                             {'class': 'item'}):
                links = dom_parser.parse_dom(item,
                                             'a', {'class': 'name'},
                                             ret='href')
                titles = dom_parser.parse_dom(item, 'a', {'class': 'name'})
                is_season = dom_parser.parse_dom(item, 'div',
                                                 {'class': 'status'})
                for match_url, match_title in zip(links, titles):
                    if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                            is_season and video_type == VIDEO_TYPES.SEASON):
                        if video_type == VIDEO_TYPES.SEASON:
                            if season and not re.search(
                                    '\s+%s$' % (season), match_title):
                                continue

                        if not year or not match_year or year == match_year:
                            result = {
                                'title':
                                scraper_utils.cleanse_title(match_title),
                                'year': '',
                                'url': scraper_utils.pathify_url(match_url)
                            }
                            results.append(result)

        return results

예제 #41

0

파일 보기

파일: tvshow_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     if source_url and source_url != FORCE_NO_MATCH:
         url = urlparse.urljoin(self.base_url, source_url)
         html = self._http_get(url, require_debrid=True, cache_limit=.5)
         title = dom_parser.parse_dom(html, 'title')
         if title:
             title = re.sub('^\[ST\]\s*&#8211;\s*', '', title[0])
             meta = scraper_utils.parse_episode_link(title)
             page_quality = scraper_utils.height_get_quality(meta['height'])
         else:
             page_quality = QUALITIES.HIGH
         
         fragment = dom_parser.parse_dom(html, 'section', {'class': '[^"]*entry-content[^"]*'})
         if fragment:
             for section in dom_parser.parse_dom(fragment[0], 'p'):
                 match = re.search('([^<]*)', section)
                 meta = scraper_utils.parse_episode_link(match.group(1))
                 if meta['episode'] != '-1' or meta['airdate']:
                     section_quality = scraper_utils.height_get_quality(meta['height'])
                 else:
                     section_quality = page_quality
                     
                 if Q_ORDER[section_quality] < Q_ORDER[page_quality]:
                     quality = section_quality
                 else:
                     quality = page_quality
                     
                 for stream_url in dom_parser.parse_dom(section, 'a', ret='href'):
                     host = urlparse.urlparse(stream_url).hostname
                     hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False}
                     hosters.append(hoster)
     return hosters

예제 #42

0

파일 보기

파일: yabanci_scraper.py 프로젝트: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for series in dom_parser.parse_dom(html, 'div', {'class': 'series-item'}):
            match_url = dom_parser.parse_dom(series, 'a', ret='href')
            match_title = dom_parser.parse_dom(series, 'h3')
            match_year = dom_parser.parse_dom(series, 'p')
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                if match_year:
                    match = re.search('\s*(\d{4})\s+', match_year[0])
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''
                else:
                    match_year = ''
                    
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

예제 #43

0

파일 보기

파일: 9movies_scraper.py 프로젝트: uguer30/Project

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            for server_list in dom_parser.parse_dom(html, 'ul',
                                                    {'class': 'episodes'}):
                labels = dom_parser.parse_dom(server_list, 'a')
                hash_ids = dom_parser.parse_dom(server_list,
                                                'a',
                                                ret='data-id')
                for label, hash_id in zip(labels, hash_ids):
                    if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match(
                            label, video.episode):
                        continue

                    hash_url = urlparse.urljoin(self.base_url, HASH_URL)
                    query = {'id': hash_id, 'update': '0'}
                    query.update(self.__get_token(query))
                    hash_url = hash_url + '?' + urllib.urlencode(query)
                    headers = XHR
                    headers['Referer'] = url
                    html = self._http_get(hash_url,
                                          headers=headers,
                                          cache_limit=.5)
                    js_data = scraper_utils.parse_json(html, hash_url)
                    sources = {}
                    link_type = js_data.get('type')
                    target = js_data.get('target')
                    grabber = js_data.get('grabber')
                    params = js_data.get('params')
                    if link_type == 'iframe' and target:
                        sources[target] = {
                            'direct': False,
                            'quality': QUALITIES.HD720
                        }
                    elif grabber and params:
                        sources = self.__grab_links(grabber, params, url)

                    for source in sources:
                        direct = sources[source]['direct']
                        quality = sources[source]['quality']
                        if direct:
                            host = self._get_direct_hostname(source)
                        else:
                            host = urlparse.urlparse(source).hostname
                        hoster = {
                            'multi-part': False,
                            'host': host,
                            'class': self,
                            'quality': quality,
                            'views': None,
                            'rating': None,
                            'url': source,
                            'direct': direct
                        }
                        hosters.append(hoster)
        return hosters

예제 #44

0

파일 보기

파일: 9movies_scraper.py 프로젝트: uguer30/Project

 def _get_episode_url(self, season_url, video):
     url = urlparse.urljoin(self.base_url, season_url)
     html = self._http_get(url, cache_limit=8)
     fragment = dom_parser.parse_dom(html, 'ul', {'class': 'episodes'})
     if fragment:
         for link in dom_parser.parse_dom(fragment[0], 'a'):
             if self.__episode_match(link, video.episode):
                 return season_url

예제 #45

0

파일 보기

파일: tvwtvs_scraper.py 프로젝트: Lhse44/repository.deallen

 def __get_pages(self, url):
     pages = []
     url = urlparse.urljoin(self.base_url, url)
     html = self._http_get(url, cache_limit=2)
     fragment = dom_parser.parse_dom(html, 'div', {'class': 'pagination'})
     if fragment:
         pages = dom_parser.parse_dom(fragment[0], 'a', ret='href')
     return pages

예제 #46

0

파일 보기

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = urlparse.urljoin(self.base_url, '/movies/')
     html = self._http_get(search_url, params={'q': title}, cache_limit=4)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'movie_about'}):
         match_url = dom_parser.parse_dom(item, 'a', ret='href')
         match_title_year = dom_parser.parse_dom(item, 'a')
         if match_url and match_title_year:
             match_url = match_url[0]
             match_title, match_year = scraper_utils.extra_year(match_title_year[0])
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

예제 #47

0

파일 보기

파일: afdah_scraper.py 프로젝트: Lhse44/repository.deallen

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = urlparse.urljoin(self.base_url, '%s/search?q=')
     data = {'search': title, 'type': 'title'}
     html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1)
     for item in dom_parser.parse_dom(html, 'li'):
         match_url = dom_parser.parse_dom(item, 'a', ret='div')
         match_title_year = dom_parser.parse_dom(item, 'a')
         if match_url and match_title_year:
             match_url = match_url[0]
             match_title, match_year = scraper_utils.extra_year(match_title_year[0])
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

예제 #48

0

파일 보기

 def __get_source_page(self, source_url):
     html = ''
     url = urlparse.urljoin(self.base_url, source_url)
     page_html = self._http_get(url, cache_limit=8)
     movie_id = dom_parser.parse_dom(page_html, 'div',
                                     {'id': 'media-player'}, 'movie-id')
     token = dom_parser.parse_dom(page_html, 'div', {'id': 'media-player'},
                                  'player-token')
     if movie_id and token:
         server_url = SL_URL % (movie_id[0], token[0])
         headers = XHR
         headers['Referer'] = url
         url = urlparse.urljoin(self.base_url, server_url)
         html = self._http_get(url, headers=headers, cache_limit=8)
     return html

예제 #49

0

파일 보기

파일: tunemovie_scraper.py 프로젝트: Lhse44/repository.deallen

 def __get_gk_links(self, html, page_url):
     sources = {}
     for link in dom_parser.parse_dom(html, 'div',
                                      {'class': '[^"]*server_line[^"]*'}):
         film_id = dom_parser.parse_dom(link, 'a', ret='data-film')
         name_id = dom_parser.parse_dom(link, 'a', ret='data-name')
         server_id = dom_parser.parse_dom(link, 'a', ret='data-server')
         if film_id and name_id and server_id:
             data = {
                 'ipplugins': 1,
                 'ip_film': film_id[0],
                 'ip_server': server_id[0],
                 'ip_name': name_id[0]
             }
             headers = XHR
             headers['Referer'] = page_url
             url = urlparse.urljoin(self.base_url, LINK_URL)
             html = self._http_get(url,
                                   data=data,
                                   headers=headers,
                                   cache_limit=.25)
             js_data = scraper_utils.parse_json(html, url)
             if 's' in js_data:
                 url = urlparse.urljoin(self.base_url, LINK_URL2)
                 params = {'u': js_data['s'], 'w': '100%', 'h': 420}
                 html = self._http_get(url,
                                       params=params,
                                       data=data,
                                       headers=headers,
                                       cache_limit=.25)
                 js_data = scraper_utils.parse_json(html, url)
                 if 'data' in js_data and js_data['data']:
                     if isinstance(js_data['data'], basestring):
                         sources[js_data['data']] = QUALITIES.HIGH
                     else:
                         for link in js_data['data']:
                             stream_url = link['files']
                             if self._get_direct_hostname(
                                     stream_url) == 'gvideo':
                                 quality = scraper_utils.gv_get_quality(
                                     stream_url)
                             elif 'quality' in link:
                                 quality = scraper_utils.height_get_quality(
                                     link['quality'])
                             else:
                                 quality = QUALITIES.HIGH
                             sources[stream_url] = quality
     return sources

예제 #50

0

파일 보기

파일: xmovies_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/results?q=%s' % urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for result in dom_parser.parse_dom(html, 'div', {'class': 'cell'}):
            match = re.search(
                'class="video_title".*?href="([^"]+)"[^>]*>\s*([^<]+)', result,
                re.DOTALL)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match = re.search(
                        'class="video_quality".*?Year\s*(?:</b>)?\s*:\s*(\d{4})',
                        result, re.DOTALL)
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        return results

예제 #51

0

파일 보기

    def search(self, video_type, title, year):
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=.25)

        elements = dom_parser.parse_dom(
            html, 'li', {'class': '[^"]*%s[^"]*' % (CATEGORIES[video_type])})
        results = []
        for element in elements:
            match = re.search('href="([^"]+)[^>]+>\s*([^<]+)', element,
                              re.DOTALL)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)',
                                  match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'title':
                        match_title,
                        'year':
                        match_year,
                        'url':
                        url.replace('https',
                                    'http').replace(self.base_url, '')
                    }
                    results.append(result)

        return results

예제 #52

0

파일 보기

파일: releasebb_scraper.py 프로젝트: Lhse44/repository.deallen

    def __get_post_links(self, html, video):
        sources = {}
        post = dom_parser.parse_dom(html, 'div', {'class': 'postContent'})
        if post:
            post = post[0].content
            results = re.findall(
                '<p\s+style="text-align:\s*center;">(?:\s*<strong>)*(.*?)<br(.*?)</p>',
                post, re.DOTALL)
            if not results:
                match = re.search('>Release Name\s*:(.*?)<br', post, re.I)
                release = match.group(1) if match else ''
                match = re.search('>Download\s*:(.*?)</p>', post,
                                  re.DOTALL | re.I)
                links = match.group(1) if match else ''
                results = [(release, links)]

            for result in results:
                release, links = result
                release = re.sub('</?[^>]*>', '', release)
                for match in re.finditer('href="([^"]+)">([^<]+)', links):
                    stream_url, hostname = match.groups()
                    if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']:
                        continue
                    host = urlparse.urlparse(stream_url).hostname
                    quality = scraper_utils.blog_get_quality(
                        video, release, host)
                    sources[stream_url] = quality
        return sources

예제 #53

0

파일 보기

파일: hdflix_scraper.py 프로젝트: uguer30/Project

    def _http_get(self,
                  url,
                  data=None,
                  headers=None,
                  auth=True,
                  method=None,
                  cache_limit=8):
        # return all uncached blank pages if no user or pass
        if not self.username or not self.password:
            return ''

        html = super(self.__class__, self)._http_get(url,
                                                     data=data,
                                                     headers=headers,
                                                     method=method,
                                                     cache_limit=cache_limit)
        if auth and not dom_parser.parse_dom(
                html, 'a', {'title': 'My Account'}, ret='href'):
            log_utils.log('Logging in for url (%s)' % (url),
                          log_utils.LOGDEBUG)
            self.__login()
            html = super(self.__class__, self)._http_get(url,
                                                         data=data,
                                                         headers=headers,
                                                         method=method,
                                                         cache_limit=0)

        return html

예제 #54

0

파일 보기

파일: releasebb_scraper.py 프로젝트: Lhse44/repository.deallen

    def get_sources(self, video):
        source_url = self.get_url(video)
        hosters = []
        sources = {}
        if not source_url or source_url == FORCE_NO_MATCH: return hosters
        url = scraper_utils.urljoin(self.base_url, source_url)
        html = self._http_get(url, require_debrid=True, cache_limit=.5)
        if not html:
            url = scraper_utils.urljoin(self.old_base_url, source_url)
            html = self._http_get(url, require_debrid=True, cache_limit=.5)

        sources.update(self.__get_post_links(html, video))

        if kodi.get_setting('%s-include_comments' %
                            (self.get_name())) == 'true':
            for _attrs, comment in dom_parser.parse_dom(
                    html, 'div', {'id': re.compile('commentbody-\d+')}):
                sources.update(self.__get_comment_links(comment, video))

        for source in sources:
            if scraper_utils.excluded_link(source): continue
            host = urlparse.urlparse(source).hostname
            hoster = {
                'multi-part': False,
                'host': host,
                'class': self,
                'views': None,
                'url': source,
                'rating': None,
                'quality': sources[source],
                'direct': False
            }
            hosters.append(hoster)
        return hosters

예제 #55

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if title:
            first_letter = title[:1].lower()
            if first_letter.isdigit(): first_letter = '0-9'
            search_url = '/search.php/%s/' % (first_letter)
            search_url = urlparse.urljoin(self.base_url, search_url)
            html = self._http_get(search_url, cache_limit=24)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''',
                                         fragment[0]):
                    url, match_title_year = match.groups()
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)
                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

예제 #56

0

파일 보기

파일: merb.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):
        search_url = self.base_url
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            search_url += '/?tv'

        search_url += '/index.php?advanced_search='
        search_url += urllib.quote_plus(title)
        search_url += '&year=' + urllib.quote_plus(str(year))
        search_url += '&advanced_search=Search'

        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for element in dom_parser.parse_dom(html, 'div',
                                            {'class': 'list_box_title'}):
            match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)',
                              element)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)',
                                  match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        return results

예제 #57

0

파일 보기

    def __alt_search(self, video_type, title, year, season=''):
        results = []
        params = title.lower()
        if year: params += ' %s' % (year)
        if video_type == VIDEO_TYPES.SEASON and season:
            params += ' Season %s' % (season)
        params = {'key': params}
        search_url = urlparse.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params=params, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}):
            match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item)
            if match:
                match_url, match_title = match.groups()
                is_season = re.search('-season-\d+', match_url)
                if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season):
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search('season-0*%s$' % (season), match_url): continue
                        
                    match_title = re.sub('</?[^>]*>', '', match_title)
                    match_title = re.sub('\s+Full\s+Movie', '', match_title)
                    match = re.search('-(\d{4})(?:$|-)', match_url)
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''
                    
                    if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                        results.append(result)

        return results

예제 #58

0

파일 보기

    def get_sources(self, goog, video):
        source_url = self.get_url(video)
        hosters = []
        if source_url and source_url != FORCE_NO_MATCH:
            url = urlparse.urljoin(self.base_url, source_url)
            html = self._http_get(url, cache_limit=.5)
            q_str = ''
            match = re.search('>quality(.*?)<br\s*/>', html, re.I)
            if match:
                q_str = match.group(1)
                q_str = q_str.decode('utf-8').encode('ascii', 'ignore')
                q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U)

            fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-embed'})
            if fragment:
                for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I):
                    stream_url = match.group(1)
                    host = urlparse.urlparse(stream_url).hostname
                    hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False}
                    match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL)
                    if match:
                        hoster['views'] = int(match.group(1))
                        hoster['rating'] = match.group(2)
    
                    hosters.append(hoster)
        return hosters

예제 #59

0

파일 보기

def parseDOM(html, name='', attrs=None, ret=False):
    if attrs: attrs = dict((key, re.compile(value + ('$' if value else ''))) for key, value in attrs.iteritems())
    results = dom_parser.parse_dom(html, name, attrs, ret)
    if ret:
        results = [result.attrs[ret.lower()] for result in results]
    else:
        results = [result.content for result in results]
    return results

예제 #60

0

파일 보기

파일: putlocker_scraper.py 프로젝트: uguer30/Project

 def resolve_link(self, link):
     if not link.startswith('http'):
         stream_url = urlparse.urljoin(self.base_url, link)
         html = self._http_get(stream_url, cache_limit=0)
         iframe_url = dom_parser.parse_dom(html, 'iframe', ret='src')
         if iframe_url:
             return iframe_url[0]
     else:
         return link