Python cleanse_titleの例、salts_lib.scraper_utils.cleanse_title Pythonの例

コード例 #1

0

ファイルを表示

ファイル: 2ddl_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=True, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results

コード例 #2

0

ファイルを表示

ファイル: filmikz_scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/index.php?search=%s&image.x=0&image.y=0')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=.25)

        results = []
        # Are we on a results page?
        if not re.search('window\.location', html):
            pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
            for match in re.finditer(pattern, html, re.DOTALL):
                match_title_year, match_url = match.groups('')
                # skip p**n
                if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue
                
                match_title_year = re.sub('</?.*?>', '', match_title_year)
                match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''
                
                if not year or not match_year or year == match_year:
                    result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        else:
            match = re.search('window\.location\s+=\s+"([^"]+)', html)
            if match:
                url = match.group(1)
                if url != 'movies.php':
                    result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
                    results.append(result)
        return results

コード例 #3

0

ファイルを表示

ファイル: filmikz_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/index.php')
        params = {'search': title, 'image.x': 0, 'image.y': 0}
        html = self._http_get(search_url, params=params, cache_limit=1)

        # Are we on a results page?
        if not re.search('window\.location', html):
            pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
            for match in re.finditer(pattern, html, re.DOTALL):
                match_title_year, match_url = match.groups('')
                # skip p**n
                if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue
                match_title_year = re.sub('</?.*?>', '', match_title_year)
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        else:
            match = re.search('window\.location\s+=\s+"([^"]+)', html)
            if match:
                url = match.group(1)
                if url != 'movies.php':
                    result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
                    results.append(result)
        return results

コード例 #4

0

ファイルを表示

ファイル: dizibox_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def __get_ok(self, embed, flashvars):
     hosters = []
     link = flashvars[0].attrs['value']
     match = re.search('metadataUrl=([^"]+)', link)
     if match:
         referer = scraper_utils.cleanse_title(urllib.unquote(embed[0].attrs['data']))
         ok_url = scraper_utils.cleanse_title(urllib.unquote(match.group(1)))
         html = self._http_get(ok_url, data='ok', headers={'Referer': referer}, cache_limit=.25)
         js_data = scraper_utils.parse_json(html, ok_url)
         stream_url = js_data.get('movie', {}).get('url')
         if stream_url is not None:
             host = urlparse.urlparse(stream_url).hostname
             hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': stream_url, 'direct': False, 'subs': 'Turkish Subtitles'}
             hosters.append(hoster)
     return hosters

コード例 #5

0

ファイルを表示

ファイル: torba_scraper.py プロジェクト: Stevie-Bs/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, SEARCH_URL)
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, headers=XHR, cache_limit=1)
        for film in dom_parser.parse_dom(html, 'li', {'class': 'films-item'}):
            match_url = dom_parser.parse_dom(film, 'a', ret='href')
            match_title = dom_parser.parse_dom(film, 'div', {'class': 'films-item-title'})
            match_year = dom_parser.parse_dom(film, 'div', {'class': 'films-item-year'})
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                match_title = re.sub('</?span>', '', match_title)
                if match_year:
                    match = re.search('(\d+)', match_year[0])
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''
                else:
                    match_year = ''
                    
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': match_url}
                    results.append(result)

        return results

コード例 #6

0

ファイルを表示

ファイル: hdmovie14_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def __alt_search(self, video_type, title, year, season=''):
        results = []
        params = title.lower()
        if year: params += ' %s' % (year)
        if video_type == VIDEO_TYPES.SEASON and season:
            params += ' Season %s' % (season)
        params = {'key': params}
        search_url = urlparse.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params=params, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}):
            match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item)
            if match:
                match_url, match_title = match.groups()
                is_season = re.search('-season-\d+', match_url)
                if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season):
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search('season-0*%s$' % (season), match_url): continue
                        
                    match_title = re.sub('</?[^>]*>', '', match_title)
                    match_title = re.sub('\s+Full\s+Movie', '', match_title)
                    match = re.search('-(\d{4})(?:$|-)', match_url)
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''
                    
                    if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                        results.append(result)

        return results

コード例 #7

0

ファイルを表示

ファイル: hdmovie14_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def __search(self, video_type, title, year, season=''):
        results = []
        search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=1)
        js_data = scraper_utils.parse_json(html)
        norm_title = scraper_utils.normalize_title(title)
        for item in js_data.get('results', []):
            if '/watch/' not in item['url'].lower(): continue
            is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match_title_year = item['titleNoFormatting']
                match_title_year = re.sub('^Watch\s+', '', match_title_year)
                match_url = item['url']
                match_year = ''
                if video_type == VIDEO_TYPES.MOVIE:
                    match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                else:
                    if season and int(is_season.group(1)) != int(season):
                        continue
                    match = re.search('(.*?)\s+\(\d{4}\)', match_title_year)
                    if match:
                        match_title = match.group(1)
                    else:
                        match_title = match_title_year
                
                if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #8

0

ファイルを表示

ファイル: merdb_scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        search_url = self.base_url
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            search_url += '/tvshow'

        search_url += '/advanced-search.php?search='
        search_url += urllib.quote_plus(title)
        search_url += '&year=' + urllib.quote_plus(str(year))
        search_url += '&advanced_search=Search'

        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for element in dom_parser.parse_dom(html, 'div', {'class': 'list_box_title'}):
            match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)', element)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)', match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''
                
                if not year or not match_year or year == match_year:
                    result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        return results

コード例 #9

0

ファイルを表示

ファイル: dizifilmhd_scraper.py プロジェクト: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=8)
        title_strip = [word.decode('utf-8') for word in TITLE_STRIP]
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match_url = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            if match_url and match_title:
                item_type = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
                if item_type and item_type[0] in SEARCH_EXCLUDE: continue
                match_url = match_url.group(1)
                match_title = match_title[0]
                if 'SEZON' in match_title.upper(): continue

                year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
                if year_frag:
                    match_year = year_frag[0]
                else:
                    match_year = ''
                        
                match_title = ' '.join([word for word in match_title.split() if word.upper() not in title_strip])
                if (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        
        return results

コード例 #10

0

ファイルを表示

ファイル: moviewatcher_scraper.py プロジェクト: freeworldxbmc/KAOSbox-Repo

    def __movie_search(self, title, year):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
            match_url = dom_parser.parse_dom(item, 'a', ret='href')
            match_title = dom_parser.parse_dom(item, 'img', ret='alt')
            match_year = ''
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                
                if match_year:
                    match_year = match_year[0]
                else:
                    match_year = ''
        
                if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

コード例 #11

0

ファイルを表示

ファイル: xmovies8_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/results')
        params = {'q': title}
        referer = search_url + '?' + urllib.urlencode(params)
        headers = {'Referer': referer}
        headers.update(XHR)
        _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0)

        cookies = {'begin_referer': referer, 'prounder': 1}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        if any('jquery.js' in match.attrs['src'] for match in dom_parser2.parse_dom(html, 'script', req='src')):
            html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=0)
            
        for _attrs, result in dom_parser2.parse_dom(html, 'div', {'class': 'cell'}):
            title_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_title'})
            year_frag = dom_parser2.parse_dom(result, 'div', {'class': 'video_quality'})
            if not title_frag: continue
            match = dom_parser2.parse_dom(title_frag[0].content, 'a', req='href')
            if not match: continue
            match_url = match[0].attrs['href']
            match_title = match[0].content
            try:
                match = re.search('\s+(\d{4})\s+', year_frag[0].content)
                match_year = match.group(1)
            except:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #12

0

ファイルを表示

ファイル: noobroom_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=""):
        if not self.include_paid and video_type != VIDEO_TYPES.MOVIE:
            return []
        search_url = urlparse.urljoin(self.base_url, "/search.php")
        html = self._http_get(search_url, params={"q": title}, cache_limit=0.25)
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            pattern = "<i>\s*Movies\s*</i>(.*)"
        else:
            pattern = "<i>\s*TV Series\s*</i>(.*)"

        match = re.search(pattern, html)
        if match:
            container = match.group(1)
            pattern = "href='([^']+)'>([^<]+)\s*</a>\s*(?:\((\d{4})\))?"
            for match in re.finditer(pattern, container):
                url, match_title, match_year = match.groups("")
                if not year or not match_year or year == match_year:
                    result = {
                        "url": scraper_utils.pathify_url(url),
                        "title": scraper_utils.cleanse_title(match_title),
                        "year": match_year,
                    }
                    results.append(result)

        return results

コード例 #13

0

ファイルを表示

ファイル: moviehubs_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = re.search('onmouseover="([^"]+)', item)
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year.group(1)
                match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year)
                if not match: continue
                
                match_title, match_year = scraper_utils.extra_year(match.group(1))
                is_season = re.search('season\s+(\d+)', match_title_year, re.I)
                if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue
                
                if video_type == VIDEO_TYPES.MOVIE:
                    if not match_year:
                        match_year = re.search('>Release:\s*(\d{4})', match_title_year)
                        match_year = match_year.group(1) if match_year else ''
                else:
                    if season and int(season) != int(is_season.group(1)):
                        continue
                                
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #14

0

ファイルを表示

ファイル: vivoto_scraper.py プロジェクト: freeworldxbmc/KAOSbox-Repo

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
     search_url = search_url % (urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=1)
     results = []
     fragment = dom_parser.parse_dom(html, 'div', {'class': 'movie'})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'li'):
             match_url = dom_parser.parse_dom(item, 'a', ret='href')
             match_title = dom_parser.parse_dom(item, 'span', {'class': 'text'})
             match_year = dom_parser.parse_dom(item, 'span', {'class': 'year'})
             if match_url and match_title:
                 match_url = match_url[0]
                 match_title = re.sub('</?strong>', '', match_title[0])
                 is_season = re.search('Season\s+(\d+)$', match_title, re.I)
                 if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                     if video_type == VIDEO_TYPES.MOVIE:
                         if match_year:
                             match_year = match_year[0]
                         else:
                             match_year = ''
                     else:
                         if season and int(is_season.group(1)) != int(season):
                             continue
                         match_year = ''
                 
                     result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                     results.append(result)
     return results

コード例 #15

0

ファイルを表示

ファイル: watchhd_scraper.py プロジェクト: c0ns0le/YCBuilds

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/search/%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'name_top'}):
            match = re.search('href="([^"]+)[^>]+>([^<]+)', item)
            if match:
                url, match_title_year = match.groups()
                is_season = re.search('Season\s+(\d+)', match_title_year, re.IGNORECASE)
                if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE:
                        match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
                        if match:
                            match_title, match_year = match.groups()
                        else:
                            match_title = match_title_year
                    else:
                        match_title = match_title_year
                        if season and int(is_season.group(1)) != int(season):
                            continue
                    
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                        results.append(result)

        return results

コード例 #16

0

ファイルを表示

ファイル: hevcbluray_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
            if match and match_title:
                url = match.group(1)
                match_title = match_title[0]
                if re.search('\d+\s*x\s*\d+', match_title): continue  # exclude episodes
                match_title, match_year = scraper_utils.extra_year(match_title)
                if not match_year and year_frag:
                    match_year = year_frag[0]

                match = re.search('(.*?)\s+\d{3,}p', match_title)
                if match:
                    match_title = match.group(1)
                
                extra = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
                if extra:
                    match_title += ' [%s]' % (extra[0])
                    
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                    results.append(result)

        return results

コード例 #17

0

ファイルを表示

ファイル: 123movies_scraper.py プロジェクト: freeworldxbmc/KAOSbox-Repo

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/movie/search/')
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'mli-info'})
            match_url = re.search('href="([^"]+)', item, re.DOTALL)
            match_year = re.search('class="jt-info">(\d{4})<', item)
            is_episodes = dom_parser.parse_dom(item, 'span', {'class': 'mli-eps'})
            
            if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
                if match_title and match_url:
                    match_title = match_title[0]
                    match_title = re.sub('</?h2>', '', match_title)
                    match_title = re.sub('\s+\d{4}$', '', match_title)
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search('Season\s+%s$' % (season), match_title): continue
                        
                    url = urlparse.urljoin(match_url.group(1), 'watching.html')
                    match_year = match_year.group(1) if match_year else ''
    
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                        results.append(result)

        return results

コード例 #18

0

ファイルを表示

ファイル: movie4k_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies.php')
        cookies = {'onlylanguage': 'en', 'lang': 'en'}
        params = {'list': 'search', 'search': title}
        html = self._http_get(search_url, params=params, cookies=cookies, cache_limit=8)
        for _attrs, content in dom_parser2.parse_dom(html, 'TR', {'id': re.compile('coverPreview\d+')}):
            match = dom_parser2.parse_dom(content, 'a', req='href')
            if not match: continue
            
            match_url, match_title = match[0].attrs['href'], match[0].content
            is_show = re.search('\(tvshow\)', match_title, re.I)
            if (video_type == VIDEO_TYPES.MOVIE and is_show) or (video_type == VIDEO_TYPES.TVSHOW and not is_show):
                continue

            match_title = match_title.replace('(TVshow)', '')
            match_title = match_title.strip()
            
            match_year = ''
            for _attrs, div in dom_parser2.parse_dom(content, 'div'):
                match = re.match('\s*(\d{4})\s*', div)
                if match:
                    match_year = match.group(1)

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #19

0

ファイルを表示

ファイル: tunemovie_scraper.py プロジェクト: ScriptUp/salts

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search-movies/%s.html')
     search_url = search_url % (urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=0)
     results = []
     for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}):
         match_title = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='title')
         url = dom_parser.parse_dom(thumb, 'a', {'class': 'clip-link'}, ret='href')
         if match_title and url:
             match_title, url = match_title[0], url[0]
             is_season = re.search('Season\s+(\d+)$', match_title, re.I)
             if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                 match_year = ''
                 if video_type == VIDEO_TYPES.MOVIE:
                     match_year = dom_parser.parse_dom(thumb, 'div', {'class': '[^"]*status-year[^"]*'})
                     if match_year:
                         match_year = match_year[0]
                 else:
                     if season and int(is_season.group(1)) != int(season):
                         continue
                 
                 if not year or not match_year or year == match_year:
                     result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
     return results

コード例 #20

0

ファイルを表示

ファイル: moviesub_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'})
        if not fragment: return results
        
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
            is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'})
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
                if not match: continue
                
                match_title = match[0].attrs['title']
                match_url = match[0].attrs['href']
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
                        continue
                else:
                    match = re.search('-(\d{4})[-.]', match_url)
                    if match:
                        match_year = match.group(1)
                
                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
                if title_match and (not year or not match_year or year == match_year):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #21

0

ファイルを表示

ファイル: moviesub_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser.parse_dom(html, 'ul', {'class': 'cfv'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'li'):
                is_season = dom_parser.parse_dom(item, 'div', {'class': 'status'})
                if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                    match_url = dom_parser.parse_dom(item, 'a', ret='href')
                    match_title = dom_parser.parse_dom(item, 'a', ret='title')
                    if match_url and match_title:
                        match_title = match_title[0]
                        match_url = match_url[0]
                        match_year = ''
                        if video_type == VIDEO_TYPES.SEASON:
                            if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
                                continue
                        else:
                            match = re.search('-(\d{4})\.html', match_url)
                            if match:
                                match_year = match.group(1)
                        
                        if not year or not match_year or year == match_year:
                            result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                            results.append(result)

        return results

コード例 #22

0

ファイルを表示

ファイル: uflix_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/index.php?menu=search&query=')
     search_url += urllib.quote_plus(title)
     html = self._http_get(search_url, cache_limit=.25)
     results = []
     sections = {VIDEO_TYPES.MOVIE: 'movies', VIDEO_TYPES.TVSHOW: 'series'}
     
     fragment = dom_parser.parse_dom(html, 'div', {'id': sections[video_type]})
     if fragment:
         for item in dom_parser.parse_dom(fragment[0], 'figcaption'):
             match = re.search('title="([^"]+)[^>]+href="([^"]+)', item)
             if match:
                 match_title_year, url = match.groups()
                 match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
                 if match:
                     match_title, match_year = match.groups()
                 else:
                     match_title = match_title_year
                     match_year = ''
                 if match_title.startswith('Watch '): match_title = match_title.replace('Watch ', '')
                 if match_title.endswith(' Online'): match_title = match_title.replace(' Online', '')
                 
                 if not year or not match_year or year == match_year:
                     result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(url), 'year': match_year}
                     results.append(result)
     return results

コード例 #23

0

ファイルを表示

ファイル: pw_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/index.php?search_keywords=')
        search_url += urllib.quote_plus(title)
        search_url += '&year=' + urllib.quote_plus(str(year))
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            search_url += '&search_section=2'
        else:
            search_url += '&search_section=1'

        results = []
        html = self. _http_get(self.base_url, cache_limit=0)
        match = re.search('input type="hidden" name="key" value="([0-9a-f]*)"', html)
        if match:
            key = match.group(1)
            search_url += '&key=' + key

            html = self._http_get(search_url, cache_limit=.25)
            pattern = r'class="index_item.+?href="(.+?)" title="Watch (.+?)"?\(?([0-9]{4})?\)?"?>'
            for match in re.finditer(pattern, html):
                url, title, year = match.groups('')
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
                results.append(result)
        else:
            log_utils.log('Unable to locate PW search key', log_utils.LOGWARNING)
        return results

コード例 #24

0

ファイルを表示

ファイル: view47_scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search.php?q=%s&limit=20&timestamp=%s' % (urllib.quote_plus(title), int(time.time())))
     html = self._http_get(search_url, cache_limit=.25)
     results = []
     items = dom_parser.parse_dom(html, 'li')
     if len(items) >= 2:
         items = items[1:]
         for item in items:
             match_url = dom_parser.parse_dom(item, 'a', ret='href')
             match_title_year = dom_parser.parse_dom(item, 'strong')
             if match_url and match_title_year:
                 match_url = match_url[0]
                 match_title_year = re.sub('</?strong>', '', match_title_year[0])
                 is_season = re.search('S(?:eason\s+)?(\d+)$', match_title_year, re.I)
                 if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                     if video_type == VIDEO_TYPES.MOVIE:
                         match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
                         if match:
                             match_title, match_year = match.groups()
                         else:
                             match_title = match_title_year
                             match_year = ''
                     else:
                         if season and int(is_season.group(1)) != int(season):
                             continue
                         match_title = match_title_year
                         match_year = ''
                 
                     result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                     results.append(result)
     return results

コード例 #25

0

ファイルを表示

ファイル: moviexk_scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}):
            name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'})
            if name:
                match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0])
                if match:
                    match_url, match_title_year = match.groups()
                    if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue
                    
                    match_title_year = re.sub('</?[^>]*>', '', match_title_year)
                    match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year)
                    match_title_year = match_title_year.replace('&#8217;', "'")
                    match = re.search('(.*?)\s+\((\d{4})[^)]*\)$', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''
    
                    if not match_year:
                        year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'})
                        if year_span:
                            year_text = dom_parser.parse_dom(year_span[0], 'a')
                            if year_text:
                                match_year = year_text[0].strip()
    
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
                        results.append(result)

        return results

コード例 #26

0

ファイルを表示

ファイル: watch5s_scraper.py プロジェクト: freeworldxbmc/KAOSbox-Repo

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/search/?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=8)
        results = []
        for item in dom_parser.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'mli-info'})
            match_url = re.search('href="([^"]+)', item, re.DOTALL)
            year_frag = dom_parser.parse_dom(item, 'img', ret='alt')
            is_episodes = dom_parser.parse_dom(item, 'span', {'class': 'mli-eps'})
            
            if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
                if match_title and match_url:
                    match_url = match_url.group(1)
                    match_title = match_title[0]
                    match_title = re.sub('</?h2>', '', match_title)
                    match_title = re.sub('\s+\d{4}$', '', match_title)
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search('Season\s+%s$' % (season), match_title): continue
                        
                    if not match_url.endswith('/'): match_url += '/'
                    match_url = urlparse.urljoin(match_url, 'watch/')
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE and year_frag:
                        match = re.search('\s*-\s*(\d{4})$', year_frag[0])
                        if match:
                            match_year = match.group(1)
    
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                        results.append(result)

        return results

コード例 #27

0

ファイルを表示

ファイル: moviestorm_scraper.py プロジェクト: freeworldxbmc/KAOSbox-Repo

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.TVSHOW:
            url = urlparse.urljoin(self.base_url, '/series/all/')
            html = self._http_get(url, cache_limit=8)
    
            links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'})
            items = zip(links, titles)
        else:
            url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title))
            data = {'q': title, 'go': 'Search'}
            html = self._http_get(url, data=data, cache_limit=8)
            match = re.search('you can search again in (\d+) seconds', html, re.I)
            if match:
                wait = int(match.group(1))
                if wait > self.timeout: wait = self.timeout
                time.sleep(wait)
                html = self._http_get(url, data=data, cache_limit=0)
                
            pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)'
            items = re.findall(pattern, html, re.DOTALL)

        norm_title = scraper_utils.normalize_title(title)
        for item in items:
            url, match_title = item
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                results.append(result)

        return results

コード例 #28

0

ファイルを表示

ファイル: movie4k_scraper.py プロジェクト: ScriptUp/salts

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/movies.php?list=search&search=')
        search_url += urllib.quote_plus(title)
        cookies = {'onlylanguage': 'en', 'lang': 'en'}
        html = self._http_get(search_url, cookies=cookies, cache_limit=.25)
        results = []
        pattern = 'id="tdmovies">\s*<a\s+href="([^"]+)">([^<]+).*?id="f7">(.*?)</TD>'
        for match in re.finditer(pattern, html, re.DOTALL):
            url, title, extra = match.groups('')
            if (video_type == VIDEO_TYPES.MOVIE and '(TVshow)' in title) or (video_type == VIDEO_TYPES.TVSHOW and '(TVshow)' not in title):
                continue

            title = title.replace('(TVshow)', '')
            title = title.strip()

            r = re.search('>(\d{4})<', extra)
            if r:
                match_year = r.group(1)
            else:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': match_year}
                results.append(result)
        return results

コード例 #29

0

ファイルを表示

ファイル: moviewatcher_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=""):
        results = []
        search_url = urlparse.urljoin(self.base_url, "/search?query=%s")
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        for item in dom_parser.parse_dom(html, "div", {"class": "one_movie-item"}):
            match_url = dom_parser.parse_dom(item, "a", ret="href")
            match_title = dom_parser.parse_dom(item, "img", ret="alt")
            media_type = dom_parser.parse_dom(item, "div", {"class": "movie-series"})
            if not media_type:
                media_type = VIDEO_TYPES.MOVIE
            elif media_type[0] == "TV SERIE":
                media_type = VIDEO_TYPES.TVSHOW

            if match_url and match_title and video_type == media_type:
                match_url = match_url[0]
                match_title = match_title[0]

                match_year = re.search("-(\d{4})-", match_url)
                if match_year:
                    match_year = match_year.group(1)
                else:
                    match_year = ""

                if not year or not match_year or year == match_year:
                    result = {
                        "url": scraper_utils.pathify_url(match_url),
                        "title": scraper_utils.cleanse_title(match_title),
                        "year": match_year,
                    }
                    results.append(result)

        return results

コード例 #30

0

ファイルを表示

ファイル: scenedown_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title))
     html = self._http_get(search_url, cache_limit=2)
     if html:
         js_data = scraper_utils.parse_json(html)
         search_meta = scraper_utils.parse_episode_link(title)
         for item in js_data.get('results', []):
             metatags = item.get('richSnippet', {}).get('metatags', {})
             post_date = metatags.get('articlePublishedTime')
             if post_date:
                 post_date = re.sub('[+-]\d+:\d+$', '', post_date)
                 post_date = scraper_utils.to_datetime(post_date, '%Y-%m-%dT%H:%M:%S').date()
                 if self.__too_old(post_date): continue
             
             match_title = metatags.get('ogTitle', '')
             if not match_title:
                 match_title = item['titleNoFormatting']
                 match_title = re.sub(re.compile('\s*-\s*Scene\s*Down$', re.I), '', match_title)
             match_url = item['url']
             match_year = ''
             item_meta = scraper_utils.parse_episode_link(match_title)
             if scraper_utils.meta_release_check(video_type, search_meta, item_meta):
                 result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                 results.append(result)
     
     if not results:
         results = self.__site_search(video_type, title, year)
         
     return results

コード例 #31

0

ファイルを表示

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search_ajax')
     data = {'query': title}
     html = self._http_get(search_url,
                           data=data,
                           headers=XHR,
                           cache_limit=1)
     results = []
     for match in re.finditer(
             'class="list-group-item"\s+href="([^"]+)">([^<]+)', html):
         url, match_title = match.groups()
         result = {
             'url': scraper_utils.pathify_url(url),
             'title': scraper_utils.cleanse_title(match_title),
             'year': ''
         }
         results.append(result)
     return results

コード例 #32

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/search?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=8)
        results = []
        for item in dom_parser.parse_dom(html, 'h4',
                                         {'class': 'media-heading'}):
            match = re.search('href="([^"]+)">([^<]+)', item)
            if match:
                url, match_title = match.groups()
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': ''
                }
                results.append(result)

        return results

コード例 #33

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'browse-movie-top'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url, match_title_year = match[0].attrs['href'], match[0].content
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not match_year:
                    div = dom_parser2.parse_dom(item, 'div', {'class': 'browse-movie-year'})
                    if div: match_year = div[0].content.strip()
                        
                match_url += '?watching'
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #34

0

ファイルを表示

ファイル: watch8now_scraper.py プロジェクト: yam4me/plugin.video.blamo

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        search_url = scraper_utils.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params={'q': title}, cache_limit=8)
        results = []
        for _attrs, item in dom_parser2.parse_dom(html, 'td',
                                                  {'class': 'col-md-10'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url, match_title = match[0].attrs['href'], match[
                    0].content
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': ''
                }
                results.append(result)

        return results

コード例 #35

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/?s=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=.25)
        for match in re.finditer('class="home_post_cont.*?href="([^"]+).*?/&quot;&gt;(.*?)&lt;', html, re.DOTALL):
            link, match_title_year = match.groups()
            match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
            if match:
                match_title, match_year = match.groups()
            else:
                match_title = match_title_year
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(link), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)

        return results

コード例 #36

0

ファイルを表示

ファイル: moviexk_scraper.py プロジェクト: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}):
            name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'})
            if name:
                match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0])
                if match:
                    match_url, match_title_year = match.groups()
                    if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE:
                        continue

                    match_title_year = re.sub('</?[^>]*>', '',
                                              match_title_year)
                    match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '',
                                              match_title_year)
                    match_title_year = match_title_year.replace('&#8217;', "'")
                    match = re.search('(.*?)\s+\((\d{4})[^)]*\)$',
                                      match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''

                    if not match_year:
                        year_span = dom_parser.parse_dom(
                            fragment, 'span', {'class': 'year'})
                        if year_span:
                            year_text = dom_parser.parse_dom(year_span[0], 'a')
                            if year_text:
                                match_year = year_text[0].strip()

                    if not year or not match_year or year == match_year:
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'url': scraper_utils.pathify_url(match_url),
                            'year': match_year
                        }
                        results.append(result)

        return results

コード例 #37

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        folders = ['/addons/real-movies/base.xml']
        norm_title = scraper_utils.normalize_title(title)
        for page_url in folders:
            xml_file = os.path.basename(page_url)
            page_url = scraper_utils.urljoin(self.base_url, page_url)
            xml = self._http_get(page_url,
                                 require_debrid=False,
                                 cache_limit=48)
            new_folders = re.findall('<folder>(.*?)</folder>', xml, re.I)
            if new_folders:
                folders += [folder for folder in new_folders if folder]

            for match in re.finditer('<item>(.*?)</item>', xml,
                                     re.I | re.DOTALL):
                item = match.group(1)
                match_title_year = re.search('<title>(.*?)</title>', item,
                                             re.I)
                match_url = re.search('<link>(.*?)</link>', item, re.I)
                if match_title_year and match_url:
                    match_title_year = match_title_year.group(1)
                    match_url = match_url.group(1)
                    if match_title_year and match_url:
                        match_title, match_year = scraper_utils.extra_year(
                            match_title_year)
                        xml_file = xml_file.replace(' ', '').lower()
                        match_url = 'xml_file=%s&link=%s' % (xml_file,
                                                             match_url)
                        if norm_title in scraper_utils.normalize_title(
                                match_title) and (not year or not match_year
                                                  or year == match_year):
                            if 'format' in XML_META.get(xml_file, {}):
                                match_title += ' (%s)' % (
                                    XML_META[xml_file]['format'])
                            result = {
                                'title':
                                scraper_utils.cleanse_title(match_title),
                                'year': match_year,
                                'url': match_url
                            }
                            results.append(result)

        return results

コード例 #38

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        results = []
        url = urlparse.urljoin(self.base_url, '/forum/forum.php')
        html = self._http_get(url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for span in dom_parser.parse_dom(html, 'span',
                                         {'class': 'sectiontitle'}):
            match = re.search('href="([^"]+)[^>]+>([^<]+)', span)
            if match:
                url, match_title = match.groups()
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

コード例 #39

0

ファイルを表示

ファイル: dizimag_scraper.py プロジェクト: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, cache_limit=48)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'fil'})
        if not fragment: return results

        norm_title = scraper_utils.normalize_title(title)
        for match in re.finditer('href="([^"]+)"\s+title="([^"]+)',
                                 fragment[0].content):
            url, match_title = match.groups()
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': ''
                }
                results.append(result)

        return results

コード例 #40

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        show_list_url = scraper_utils.urljoin(self.base_url, '/tv-lists/')
        html = self._http_get(show_list_url, cache_limit=8)
        results = []
        seen_urls = set()
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url = scraper_utils.pathify_url(match[0].attrs['href'])
                match_title = match[0].content
                if match_url in seen_urls: continue
                seen_urls.add(match_url)
                match_title = re.sub('</?strong[^>]*>', '', match_title)
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                    results.append(result)

        return results

コード例 #41

0

ファイルを表示

ファイル: losmovies_scraper.py プロジェクト: normico21/repository.xvbmc

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, '/search')
     params = {'type': 'movies', 'q': title}
     html = self._http_get(search_url, params=params, cache_limit=8)
     for _attrs, item in dom_parser2.parse_dom(html, 'div', {'id': re.compile('movie-\d+')}):
         is_tvshow = dom_parser2.parse_dom(item, 'div', {'class': 'movieTV'})
         if (video_type == VIDEO_TYPES.MOVIE and is_tvshow) or (video_type == VIDEO_TYPES.TVSHOW and not is_tvshow): continue
         
         match_url = dom_parser2.parse_dom(item, 'a', req='href')
         match_title = dom_parser2.parse_dom(item, 'h4')
         if match_url and match_title:
             match_title = match_title[0].content
             match_url = match_url[0].attrs['href']
             match_year = ''
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

コード例 #42

0

ファイルを表示

ファイル: zumvo_scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild

 def search(self, video_type, title, year, season=''):
     search_url = urlparse.urljoin(self.base_url, '/search/')
     search_url += urllib.quote_plus(title)
     html = self._http_get(search_url, cache_limit=0)
     results = []
     match = re.search('ul class="list-film"(.*?)</ul>', html, re.DOTALL)
     if match:
         result_fragment = match.group(1)
         pattern = 'class="name">\s*<a\s+href="([^"]+)"\s+title="Watch\s+(.*?)\s+\((\d{4})\)'
         for match in re.finditer(pattern, result_fragment, re.DOTALL):
             url, title, match_year = match.groups('')
             if not year or not match_year or year == match_year:
                 result = {
                     'url': scraper_utils.pathify_url(url),
                     'title': scraper_utils.cleanse_title(title),
                     'year': match_year
                 }
                 results.append(result)
     return results

コード例 #43

0

ファイルを表示

 def __list(self, title):
     results = []
     search_url = scraper_utils.urljoin(self.base_url, 'index.php')
     params = {'do': 'charmap', 'name': 'series-list', 'args': '/' + title[0]}
     html = self._http_get(search_url, params=params, require_debrid=True, cache_limit=48)
     
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'downpara-list'})
     if not fragment: return results
     
     for match in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
         match_url = match.attrs['href']
         match_title_extra = match.content
         match_title, match_season, q_str, is_pack = self.__get_title_parts(match_title_extra)
         if is_pack: continue
         quality = QUALITY_MAP.get(q_str, QUALITIES.HIGH)
         result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '', 'quality': quality,
                   'season': match_season, 'q_str': q_str}
         results.append(result)
     return results

コード例 #44

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/search/%s.html' % urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        fragment = dom_parser.parse_dom(html, 'div', {'class': 'list-movie'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'div',
                                             {'class': 'movie'}):
                match = re.search(
                    'class="movie-name".*?href="([^"]+)[^>]+>([^<]+)', item)
                if match:
                    url, match_title = match.groups()
                    is_season = re.search('\s+-\s+[Ss](\d+)$', match_title)
                    if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                        match_year = ''
                        if video_type == VIDEO_TYPES.MOVIE:
                            for info_frag in dom_parser.parse_dom(
                                    item, 'p', {'class': 'info'}):
                                match = re.search('(\d{4})', info_frag)
                                if match:
                                    match_year = match.group(1)
                                    break

                            if not match_year:
                                match = re.search('(\d{4})$', url)
                                if match:
                                    match_year = match.group(1)
                        else:
                            if season and int(
                                    is_season.group(1)) != int(season):
                                continue

                        if (not year or not match_year or year == match_year):
                            result = {
                                'url': scraper_utils.pathify_url(url),
                                'title':
                                scraper_utils.cleanse_title(match_title),
                                'year': match_year
                            }
                            results.append(result)

        return results

コード例 #45

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        html = self._http_get(self.base_url, cache_limit=8)
        results = []
        fragment = dom_parser.parse_dom(html, 'div',
                                        {'class': '[^"]*dizis[^"]*'})
        norm_title = scraper_utils.normalize_title(title)
        if fragment:
            for match in re.finditer('href="([^"]+)[^>]*>([^<]+)',
                                     fragment[0]):
                url, match_title = match.groups()
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

コード例 #46

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php')
        timestamp = int(time.time() * 1000)
        query = {'q': title, 'limit': 100, 'timestamp': timestamp, 'verifiedCheck': ''}
        html = self._http_get(search_url, data=query, headers=XHR, cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            if not item['meta'].upper().startswith(media_type): continue
            
            result = {'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink']), 'year': ''}
            results.append(result)

        return results

コード例 #47

0

ファイルを表示

ファイル: rmz_scraper.py プロジェクト: yam4me/plugin.video.blamo

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, '/search/')
     search_url = scraper_utils.urljoin(search_url, urllib.quote_plus(title))
     html = self._http_get(search_url, require_debrid=False, cache_limit=8)
     for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'list'}):
         if not dom_parser2.parse_dom(fragment, 'div', {'class': 'lists_titles'}): continue
         for attrs, match_title_year in dom_parser2.parse_dom(fragment, 'a', {'class': 'title'}, req='href'):
             match_url = attrs['href']
             match_title_year = re.sub('</?[^>]*>', '', match_title_year)
             is_show = re.search('\(d{4|-\)', match_title_year)
             if (is_show and video_type == VIDEO_TYPES.MOVIE) or (not is_show and video_type == VIDEO_TYPES.TVSHOW): continue
             
             match_title, match_year = scraper_utils.extra_year(match_title_year)
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
         
     return results

コード例 #48

0

ファイルを表示

 def search(self, video_type, title, year, season=''):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     html = self._http_get(self.base_url, cache_limit=48)
     links = dom_parser.parse_dom(html,
                                  'a', {'rollapp-href': '[^"]*'},
                                  ret='href')
     titles = dom_parser.parse_dom(html, 'a', {'rollapp-href': '[^"]*'})
     for match_url, match_title in zip(links, titles):
         if norm_title in scraper_utils.normalize_title(match_title):
             show_dir = self.__get_show_dir(match_url)
             if show_dir:
                 result = {
                     'url': scraper_utils.pathify_url(show_dir),
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': ''
                 }
                 results.append(result)
     return results

コード例 #49

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url,
            '/search-movies/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li',
                                                  {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = re.search('onmouseover="([^"]+)', item)
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year.group(1)
                match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>',
                                  match_title_year)
                if not match: continue

                match_title, match_year = scraper_utils.extra_year(
                    match.group(1))
                is_season = re.search('season\s+(\d+)', match_title_year, re.I)
                if (is_season and video_type == VIDEO_TYPES.MOVIE) or (
                        not is_season and video_type == VIDEO_TYPES.SEASON):
                    continue

                if video_type == VIDEO_TYPES.MOVIE:
                    if not match_year:
                        match_year = re.search('>Release:\s*(\d{4})',
                                               match_title_year)
                        match_year = match_year.group(1) if match_year else ''
                else:
                    if season and int(season) != int(is_season.group(1)):
                        continue

                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

コード例 #50

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div',
                                                      {'class': 'inner'}):
            name = dom_parser2.parse_dom(fragment, 'div', {'class': 'name'})
            if not name: continue

            match = dom_parser2.parse_dom(name[0].content, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE:
                continue

            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '',
                                      match_title_year)
            match_title_year = match_title_year.replace('&#8217;', "'")
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                year_span = dom_parser2.parse_dom(fragment, 'span',
                                                  {'class': 'year'})
                if year_span:
                    year_text = dom_parser2.parse_dom(year_span[0].content,
                                                      'a')
                    if year_text:
                        match_year = year_text[0].content.strip()

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'url': scraper_utils.pathify_url(match_url),
                    'year': match_year
                }
                results.append(result)

        return results

コード例 #51

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        results = []
        test_url = title.replace("'", '')
        test_url = re.sub(r'[^a-zA-Z0-9\s]+', ' ', test_url).lower().strip()
        test_url = re.sub('\s+', ' ', test_url)
        test_url = test_url.replace(' ', '-')
        if year:
            test_url += '-%s' % (year)
        test_url += '/'

        test_url = urlparse.urljoin(self.base_url, test_url)
        if self._http_get(test_url, cache_limit=1):
            result = {
                'title': scraper_utils.cleanse_title(title),
                'year': year,
                'url': scraper_utils.pathify_url(test_url)
            }
            results.append(result)

        return results

コード例 #52

0

ファイルを表示

ファイル: hevcbluray_scraper.py プロジェクト: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'cover'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue
            
            match_url, match_title_year = match[0].attrs['href'], match[0].attrs['title']
            if re.search('S\d+E\d+', match_title_year, re.I): continue
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not match_year:
                match = re.search('-(\d{4})-', match_url)
                if match:
                    match_year = match.group(1)
                    
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                results.append(result)

        return results

コード例 #53

0

ファイルを表示

 def __movie_search(self, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     html = self._http_get(self.base_url, cache_limit=48)
     for item in self._parse_directory(html):
         if not item['directory']:
             match_title, match_year, height, extra = scraper_utils.parse_movie_link(
                 item['title'])
             if 'dubbed' in extra.lower(): continue
             if (norm_title in scraper_utils.normalize_title(match_title)
                 ) and (not year or not match_year or year == match_year):
                 match_title = match_title.replace('.', ' ')
                 match_title += ' [%sp.%s]' % (height, extra)
                 result = {
                     'url': scraper_utils.pathify_url(item['link']),
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year
                 }
                 results.append(result)
     return results

コード例 #54

0

ファイルを表示

ファイル: miradetodo_scraper.py プロジェクト: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=1)
        results = []
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser2.parse_dom(item, 'span', {'class': 'year'})
            if match_url and match_title:
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].content
                if re.search('\d+\s*x\s*\d+', match_title): continue  # exclude episodes
                match_title, match_year = scraper_utils.extra_year(match_title)
                if not match_year and year_frag:
                    match_year = year_frag[0].content

                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #55

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        page_url = scraper_utils.urljoin(self.base_url, '/tvseries/search.php')
        html = self._http_get(page_url, params={'dayq': title}, cache_limit=48)
        html = re.sub('<!--.*?-->', '', html)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}):
            match_url = dom_parser2.parse_dom(td, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(td, 'img', req='alt')
            if not match_url or not match_title_year: continue

            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year):
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)

        return results

コード例 #56

0

ファイルを表示

ファイル: pubfilmto_scraper.py プロジェクト: enursha101/xbmc-addon

    def _get_episode_url(self, show_url, video):
        episode_pattern = 'href="([^"]+season-0*%s-episode-0*%s-[^"]*)' % (
            video.season, video.episode)
        page_url = show_url
        pages = 0
        while page_url and pages < MAX_PAGES:
            page_url = scraper_utils.urljoin(self.base_url, page_url)
            html = self._http_get(page_url, cache_limit=2)
            ep_url = self._default_get_episode_url(html, video,
                                                   episode_pattern)
            if ep_url: return ep_url

            fragment = dom_parser2.parse_dom(html, 'div',
                                             {'class': 'pagination'})
            if not fragment: break
            match = re.search('href="([^"]+)[^>]+>\s*&gt;\s*<',
                              fragment[0].content)
            if not match: break
            page_url = scraper_utils.cleanse_title(match.group(1))
            pages += 1

コード例 #57

0

ファイルを表示

 def __search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/search/')
     search_url += urllib.quote(title)
     html = self._http_get(search_url, cache_limit=2)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'name_top'}):
         match = re.search('href="([^"]+)[^>]+>([^<]+)', item)
         if match:
             match_url, match_title_year = match.groups()
             match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
             if match:
                 match_title, match_year = match.groups()
             else:
                 match_title = match_title_year
                 match_year = ''
             
             if not year or not match_year or year == match_year:
                 result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                 results.append(result)
     return results

コード例 #58

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        search_url = '/Category-FilmsAndTV/Genre-Any/Letter-Any/ByPopularity/1/Search-%s.htm' % (title)
        search_url = urlparse.urljoin(self.base_url, search_url)
        html = self._http_get(search_url, cache_limit=.25)

        results = []
        for result in dom_parser.parse_dom(html, 'div', {'class': 'searchResult'}):
            url = dom_parser.parse_dom(result, 'a', {'itemprop': 'url'}, ret='href')
            match_title = dom_parser.parse_dom(result, 'span', {'itemprop': 'name'})
            match_year = dom_parser.parse_dom(result, 'span', {'itemprop': 'copyrightYear'})
            if match_year:
                match_year = match_year[0]
            else:
                match_year = ''
            
            if url and match_title and (not year or not match_year or year == match_year):
                if FRAGMENTS[video_type] not in url[0].lower(): continue
                result = {'url': scraper_utils.pathify_url(url[0]), 'title': scraper_utils.cleanse_title(match_title[0]), 'year': match_year}
                results.append(result)
        return results

コード例 #59

0

ファイルを表示

ファイル: view47_scraper.py プロジェクト: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/search.php?q=%s&limit=20&timestamp=%s' %
            (urllib.quote_plus(title), int(time.time())))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        items = dom_parser.parse_dom(html, 'li')
        if len(items) >= 2:
            items = items[1:]
            for item in items:
                match_url = dom_parser.parse_dom(item, 'a', ret='href')
                match_title_year = dom_parser.parse_dom(item, 'strong')
                if match_url and match_title_year:
                    match_url = match_url[0]
                    match_title_year = re.sub('</?strong>', '',
                                              match_title_year[0])
                    is_season = re.search('S(?:eason\s+)?(\d+)$',
                                          match_title_year, re.I)
                    if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                        if video_type == VIDEO_TYPES.MOVIE:
                            match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)',
                                              match_title_year)
                            if match:
                                match_title, match_year = match.groups()
                            else:
                                match_title = match_title_year
                                match_year = ''
                        else:
                            if season and int(
                                    is_season.group(1)) != int(season):
                                continue
                            match_title = match_title_year
                            match_year = ''

                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(match_url)
                        }
                        results.append(result)
        return results

コード例 #60

0

ファイルを表示

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url,
                                      '/movies-list.php?b=search&v=%s')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, headers=XHR, cache_limit=0)
        results = []
        for movie in dom_parser.parse_dom(html, 'li',
                                          {'class': '[^"]*movie[^"]*'}):
            href = dom_parser.parse_dom(movie, 'a', ret='href')
            title = dom_parser.parse_dom(movie, 'h4')

            if href and title:
                match = re.search('movie-detail/(.*?)/', href[0])
                if match:
                    result = {
                        'url': DETAIL_URL % (match.group(1)),
                        'title': scraper_utils.cleanse_title(title[0]),
                        'year': ''
                    }
                    results.append(result)
        return results