Python normalize_title Examples, salts_lib.scraper_utils.normalize_title Python Examples

Example #1

0

Show file

File: 2ddl_scraper.py Project: yam4me/plugin.video.blamo

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=False, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results

Example #2

0

Show file

File: quikr_scraper.py Project: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     url = scraper_utils.urljoin(self.base_url, show_url)
     html = self._http_get(url, cache_limit=2)
     episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
     parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
     fragment = '\n'.join(part.content for part in parts)
     result = self._default_get_episode_url(fragment, video, episode_pattern)
     if result: return result
     
     ep_urls = [r.attrs['href'] for r in dom_parser2.parse_dom(fragment, 'a', req='href')]
     ep_dates = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_air_d'})]
     ep_titles = [r.content for r in dom_parser2.parse_dom(fragment, 'span', {'class': 'episode_name'})]
     force_title = scraper_utils.force_title(video)
     if not force_title and kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
         for ep_url, ep_date in zip(ep_urls, ep_dates):
             logger.log('Quikr Ep Airdate Matching: %s - %s - %s' % (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
             if video.ep_airdate == scraper_utils.to_datetime(ep_date, '%Y-%m-%d').date():
                 return scraper_utils.pathify_url(ep_url)
 
     if force_title or kodi.get_setting('title-fallback') == 'true':
         norm_title = scraper_utils.normalize_title(video.ep_title)
         for ep_url, ep_title in zip(ep_urls, ep_titles):
             ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
             logger.log('Quikr Ep Title Matching: %s - %s - %s' % (ep_url.encode('utf-8'), ep_title.encode('utf-8'), video.ep_title), log_utils.LOGDEBUG)
             if norm_title == scraper_utils.normalize_title(ep_title):
                 return scraper_utils.pathify_url(ep_url)

Example #3

0

Show file

File: watchepisodes_scraper.py Project: monicarero/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                        ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                    ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]):
                        return scraper_utils.pathify_url(ep_url[0])

Example #4

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        for page in ['/latest-added/', '/popular-today/', '/most-popular/']:
            url = scraper_utils.urljoin(self.base_url, page)
            html = self._http_get(url, cache_limit=24)
            fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for attrs, match_title_year in dom_parser2.parse_dom(
                        fragment[0].content, 'a', req='href'):
                    match_url = attrs['href']
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)
                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        match_url = scraper_utils.pathify_url(match_url)
                        if match_url in seen_urls: continue
                        seen_urls.add(match_url)
                        result = {
                            'url': match_url,
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Example #5

0

Show file

File: moviewatcher_scraper.py Project: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
            match_url = dom_parser.parse_dom(item, 'a', ret='href')
            match_title = dom_parser.parse_dom(item, 'img', ret='alt')
            match_year = ''
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                if VIDEO_TYPES == VIDEO_TYPES.TVSHOW and '/tv-series/' not in match_url:
                    continue

                if match_year:
                    match_year = match_year[0]
                else:
                    match_year = ''

                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Example #6

0

Show file

File: hdmovie14_scraper.py Project: EPiC-APOC/repository.xvbmc

    def __alt_search(self, video_type, title, year, season=''):
        results = []
        params = title.lower()
        if year: params += ' %s' % (year)
        if video_type == VIDEO_TYPES.SEASON and season:
            params += ' Season %s' % (season)
        params = {'key': params}
        search_url = urlparse.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params=params, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}):
            match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item)
            if match:
                match_url, match_title = match.groups()
                is_season = re.search('-season-\d+', match_url)
                if (video_type == VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season):
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search('season-0*%s$' % (season), match_url): continue
                        
                    match_title = re.sub('</?[^>]*>', '', match_title)
                    match_title = re.sub('\s+Full\s+Movie', '', match_title)
                    match = re.search('-(\d{4})(?:$|-)', match_url)
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''
                    
                    if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                        results.append(result)

        return results

Example #7

0

Show file

File: farda_scraper.py Project: henry73/salts

 def search(self, video_type, title, year, season=''):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     if video_type == VIDEO_TYPES.MOVIE:
         if year:
             base_url = urlparse.urljoin(self.base_url, '/Film/')
             html = self._http_get(base_url, cache_limit=48)
             for link in self.__parse_directory(html):
                 if year == link['title']:
                     url = urlparse.urljoin(base_url, link['link'])
                     for movie in self.__get_files(url, cache_limit=24):
                         match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
                         if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                             result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
                             results.append(result)
     else:
         base_url = urlparse.urljoin(self.base_url, '/Serial/')
         html = self._http_get(base_url, cache_limit=48)
         for link in self.__parse_directory(html):
             if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
                 url = urlparse.urljoin(base_url, link['link'])
                 result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
                 results.append(result)
         
     return results

Example #8

0

Show file

File: premiumizev2_scraper.py Project: c0ns0le/YCBuilds

 def search(self, video_type, title, year, season=''):
     url = urlparse.urljoin(self.base_url, LIST_URL)
     js_data = self._http_get(url, cache_limit=0)
     norm_title = scraper_utils.normalize_title(title)
     results = []
     if 'transfers' in js_data:
         for item in js_data['transfers']:
             is_season = re.search('(.*?[._ ]season[._ ]+(\d+))[._ ](.*)', item['name'], re.I)
             if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
                 if re.search('[._ ]S\d+E\d+[._ ]', item['name']): continue  # skip episodes
                 if video_type == VIDEO_TYPES.SEASON:
                     match_title, match_season, extra = is_season.groups()
                     if season and int(match_season) != int(season):
                         continue
                     match_year = ''
                     match_title = re.sub('[._]', ' ', match_title)
                 else:
                     match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name'])
                     if match:
                         match_title, match_year, extra = match.groups()
                     else:
                         match_title, match_year, extra = item['name'], '', ''
                     
                 match_title = match_title.strip()
                 extra = extra.strip()
                 if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                     result_title = match_title
                     if extra: result_title += ' [%s]' % (extra)
                     result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])}
                     results.append(result)
     
     return results

Example #9

0

Show file

File: 2ddl_scraper.py Project: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = '/search/' + urllib.quote_plus(title)
     html = self._http_get(search_url, require_debrid=True, cache_limit=1)
     if video_type == VIDEO_TYPES.TVSHOW:
         seen_urls = {}
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if CATEGORIES[video_type] not in post: continue
             match = re.search('<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I)
             if match:
                 show_url, match_title = match.groups()
                 if show_url in seen_urls: continue
                 result = {'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                 seen_urls[show_url] = result
                 results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         norm_title = scraper_utils.normalize_title(title)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [result.content for result in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if CATEGORIES[video_type] not in post or self.__too_old(post): continue
             post_url, post_title = heading
             meta = scraper_utils.parse_movie_link(post_title)
             full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
             match_year = meta['year']
             
             match_norm_title = scraper_utils.normalize_title(meta['title'])
             if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                 results.append(result)
         
     return results

Example #10

0

Show file

File: 2ddl_scraper.py Project: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         html = self._http_get(page_url[0], require_debrid=True, cache_limit=1)
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = dom_parser2.parse_dom(post, 'a', req='href')
                 if match:
                     url, title = match[0].attrs['href'], match[0].content
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

Example #11

0

Show file

File: quikr_scraper.py Project: EPiC-APOC/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (video.season, video.episode)
        result = self._default_get_episode_url(show_url, video, episode_pattern)
        if result:
            return result

        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        fragment = dom_parser.parse_dom(html, "ul", {"class": "episode_list"})
        if fragment:
            ep_urls = dom_parser.parse_dom(fragment[0], "a", ret="href")
            ep_dates = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_air_d"})
            ep_titles = dom_parser.parse_dom(fragment[0], "span", {"class": "episode_name"})
            force_title = scraper_utils.force_title(video)
            if not force_title and kodi.get_setting("airdate-fallback") == "true" and video.ep_airdate:
                for ep_url, ep_date in zip(ep_urls, ep_dates):
                    log_utils.log(
                        "Quikr Ep Airdate Matching: %s - %s - %s" % (ep_url, ep_date, video.ep_airdate),
                        log_utils.LOGDEBUG,
                    )
                    if video.ep_airdate == scraper_utils.to_datetime(ep_date, "%Y-%m-%d").date():
                        return scraper_utils.pathify_url(ep_url)

            if force_title or kodi.get_setting("title-fallback") == "true":
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for ep_url, ep_title in zip(ep_urls, ep_titles):
                    ep_title = re.sub("<span>.*?</span>\s*", "", ep_title)
                    log_utils.log(
                        "Quikr Ep Title Matching: %s - %s - %s" % (ep_url, norm_title, video.ep_title),
                        log_utils.LOGDEBUG,
                    )
                    if norm_title == scraper_utils.normalize_title(ep_title):
                        return scraper_utils.pathify_url(ep_url)

Example #12

0

Show file

File: tvonline_scraper.py Project: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        results = []
        if title:
            search_url = '/tv-listings/%s/' % (title[:1].lower())
            search_url = urlparse.urljoin(self.base_url, search_url)
            html = self._http_get(search_url, cache_limit=8)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''',
                                         fragment[0]):
                    url, match_title_year = match.groups()
                    match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''

                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Example #13

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results

        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li',
                                                  {'class': 'box-shadow'}):
            for attrs, _content in dom_parser2.parse_dom(item,
                                                         'a',
                                                         req=['href',
                                                              'title']):
                match_url, match_title_year = attrs['href'], attrs['title']
                if re.search('S\d{2}E\d{2}', match_title_year):
                    continue  # skip episodes
                if re.search('TV\s*SERIES', match_title_year, re.I):
                    continue  # skip shows
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if (
                        not year or not match_year or year == match_year
                ) and norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Example #14

0

Show file

File: farda_scraper.py Project: azumimuo/family-xbmc-addon

 def search(self, video_type, title, year):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     if video_type == VIDEO_TYPES.MOVIE:
         if year:
             base_url = urlparse.urljoin(self.base_url, '/Film/')
             html = self._http_get(base_url, cache_limit=48)
             for link in self.__parse_directory(html):
                 if year == link['title']:
                     url = urlparse.urljoin(base_url, link['link'])
                     for movie in self.__get_files(url, cache_limit=24):
                         match_title, match_year, _height, _extra = scraper_utils.parse_movie_link(movie['link'])
                         if not movie['directory'] and norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                             result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': year}
                             results.append(result)
     else:
         base_url = urlparse.urljoin(self.base_url, '/Serial/')
         html = self._http_get(base_url, cache_limit=48)
         for link in self.__parse_directory(html):
             if link['directory'] and norm_title in scraper_utils.normalize_title(link['title']):
                 url = urlparse.urljoin(base_url, link['link'])
                 result = {'url': scraper_utils.pathify_url(url), 'title': link['title'], 'year': ''}
                 results.append(result)
         
     return results

Example #15

0

Show file

File: farda_scraper.py Project: henry73/salts

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self.__get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                 else:
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                     
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters

Example #16

0

Show file

File: icefilms_scraper.py Project: henry73/salts

    def search(self, video_type, title, year, season=''):
        if video_type == VIDEO_TYPES.MOVIE:
            url = urlparse.urljoin(self.base_url, '/movies/a-z/')
        else:
            url = urlparse.urljoin(self.base_url, '/tv/a-z/')

        if title.upper().startswith('THE '):
            first_letter = title[4:5]
        elif title.upper().startswith('A '):
            first_letter = title[2:3]
        elif title[:1] in string.digits:
            first_letter = '1'
        else:
            first_letter = title[:1]
        url = url + first_letter.upper()

        html = self._http_get(url, cache_limit=.25)
        h = HTMLParser.HTMLParser()
        html = unicode(html, 'windows-1252')
        html = h.unescape(html)
        norm_title = scraper_utils.normalize_title(title)
        pattern = 'class=star.*?href=([^>]+)>(.*?)(?:\s*\((\d+)\))?</a>'
        results = []
        for match in re.finditer(pattern, html, re.DOTALL):
            url, match_title, match_year = match.groups('')
            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {'url': url, 'title': match_title, 'year': match_year}
                results.append(result)
        return results

Example #17

0

Show file

File: rlseries_scraper.py Project: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        if title and title[0].isalpha():
            page_url = ['/list/?char=%s' % (title[0])]
            while page_url:
                page_url = urlparse.urljoin(self.base_url, page_url[0])
                html = self._http_get(page_url, cache_limit=48)
                fragment = dom_parser.parse_dom(html, 'ul', {'class': 'list-film-char'})
                if fragment:
                    norm_title = scraper_utils.normalize_title(title)
                    for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>', fragment[0]):
                        match_url, match_title = match.groups()
                        match_title = re.sub('</?strong>', '', match_title)
                        match = re.search('Season\s+(\d+)', match_title, re.I)
                        if match:
                            if season and int(season) != int(match.group(1)):
                                continue
                            
                            if norm_title in scraper_utils.normalize_title(match_title):
                                result = {'title': scraper_utils.cleanse_title(match_title), 'year': '', 'url': scraper_utils.pathify_url(match_url)}
                                results.append(result)
                
                if results:
                    break
                
                page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

        return results

Example #18

0

Show file

File: 2ddl_scraper.py Project: freeworldxbmc/KAOSbox-Repo

 def _get_episode_url(self, show_url, video):
     sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
     except: airdate_pattern = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for post in posts:
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                 if match:
                     url, title = match.groups()
                     if not force_title:
                         if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Example #19

0

Show file

File: farda_scraper.py Project: EPiC-APOC/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     meta = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(meta['title']):
                         match = line
                 elif self.__episode_match(line, video):
                     match = line
                     meta = scraper_utils.parse_episode_link(line['link'])
                     
                 if match:
                     if meta['dubbed']: continue
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     stream_url = stream_url.replace(self.base_url, '')
                     quality = scraper_utils.height_get_quality(meta['height'])
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'format' in meta: hoster['format'] = meta['format']
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters

Example #20

0

Show file

File: moviestorm_scraper.py Project: freeworldxbmc/KAOSbox-Repo

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.TVSHOW:
            url = urlparse.urljoin(self.base_url, '/series/all/')
            html = self._http_get(url, cache_limit=8)
    
            links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'})
            items = zip(links, titles)
        else:
            url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title))
            data = {'q': title, 'go': 'Search'}
            html = self._http_get(url, data=data, cache_limit=8)
            match = re.search('you can search again in (\d+) seconds', html, re.I)
            if match:
                wait = int(match.group(1))
                if wait > self.timeout: wait = self.timeout
                time.sleep(wait)
                html = self._http_get(url, data=data, cache_limit=0)
                
            pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)'
            items = re.findall(pattern, html, re.DOTALL)

        norm_title = scraper_utils.normalize_title(title)
        for item in items:
            url, match_title = item
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                results.append(result)

        return results

Example #21

0

Show file

File: tvonline_scraper.py Project: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        if title:
            first_letter = title[:1].lower()
            if first_letter.isdigit(): first_letter = '0-9'
            search_url = '/alphabet/%s/' % (first_letter)
            search_url = urlparse.urljoin(self.base_url, search_url)
            html = self._http_get(search_url, cache_limit=24)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''', fragment[0]):
                    url, match_title_year = match.groups()
                    match = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''
                    
                    if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                        result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)

        return results

Example #22

0

Show file

File: icefilms_scraper.py Project: assli100/kodi-openelec

    def search(self, video_type, title, year):
        if video_type == VIDEO_TYPES.MOVIE:
            url = urlparse.urljoin(self.base_url, '/movies/a-z/')
        else:
            url = urlparse.urljoin(self.base_url, '/tv/a-z/')

        if title.upper().startswith('THE '):
            first_letter = title[4:5]
        elif title.upper().startswith('A '):
            first_letter = title[2:3]
        elif title[:1] in string.digits:
            first_letter = '1'
        else:
            first_letter = title[:1]
        url = url + first_letter.upper()
        
        html = self._http_get(url, cache_limit=.25)
        h = HTMLParser.HTMLParser()
        html = unicode(html, 'windows-1252')
        html = h.unescape(html)
        norm_title = scraper_utils.normalize_title(title)
        pattern = 'class=star.*?href=([^>]+)>(.*?)(?:\s*\((\d+)\))?</a>'
        results = []
        for match in re.finditer(pattern, html, re.DOTALL):
            url, match_title, match_year = match.groups('')
            if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                result = {'url': url, 'title': match_title, 'year': match_year}
                results.append(result)
        return results

Example #23

0

Show file

File: pubfilm_scraper.py Project: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php')
        data = {'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1',
                'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'}
        html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'asp_res_url'}):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and int(is_season.group(1)) != int(season):
                        continue

                    match_title = match_title_year
                    match_title = re.sub('\s*\d{4}', '', match_title)
                else:
                    match_title, match_year = scraper_utils.extra_year(match_title_year)

                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
                if title_match and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        return results

Example #24

0

Show file

    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, '/torrent/list')
        js_data = self._http_get(url, cache_limit=0)
        norm_title = scraper_utils.normalize_title(title)
        results = []
        if 'torrents' in js_data:
            for item in js_data['torrents']:
                if re.search('[._ ]S\d+E\d+[._ ]', item['name']):
                    continue  # skip episodes for movies
                match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name'])
                if match:
                    match_title, match_year, extra = match.groups()
                else:
                    match_title, match_year, extra = item['name'], '', ''
                match_title = match_title.strip()
                extra = extra.strip()
                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or year == match_year):
                    result_title = match_title
                    if extra: result_title += ' [%s]' % (extra)
                    result = {
                        'title': result_title,
                        'year': match_year,
                        'url': 'hash=%s' % (item['hash'])
                    }
                    results.append(result)

        return results

Example #25

0

Show file

    def __alt_search(self, video_type, title, year, season=''):
        search_url = base64.decodestring(SEARCH_URL) % (
            urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=1)
        results = []
        js_data = scraper_utils.parse_json(html)
        if 'results' in js_data:
            norm_title = scraper_utils.normalize_title(title)
            for item in js_data['results']:
                match_title_year = item['titleNoFormatting']
                match_title_year = re.sub('^Watch\s+', '', match_title_year)
                match_url = item['url']
                match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)',
                                  match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''

                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or year == match_year):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Example #26

0

Show file

File: dayt_scraper.py Project: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        page_url = scraper_utils.urljoin(self.base_url, '/tvseries/search.php')
        html = self._http_get(page_url, params={'dayq': title}, cache_limit=48)
        html = re.sub('<!--.*?-->', '', html)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, td in dom_parser2.parse_dom(html, 'td',
                                                {'class': 'topic_content'}):
            match_url = dom_parser2.parse_dom(td, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(td, 'img', req='alt')
            if not match_url or not match_title_year: continue

            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            if not match_url.startswith('/'):
                match_url = '/tvseries/' + match_url
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (norm_title in scraper_utils.normalize_title(match_title)) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #27

0

Show file

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_title = re.sub('[^A-Za-z0-9. ]', '', title)
     url = '/search/%s/' % (urllib.quote(search_title))
     url = scraper_utils.urljoin(self.base_url, url)
     html = self._http_get(url, cache_limit=48)
     norm_title = scraper_utils.normalize_title(title)
     for _attrs, item in dom_parser2.parse_dom(html, 'article',
                                               {'class': 'movie-details'}):
         match_url = dom_parser2.parse_dom(item, 'a', req='href')
         match_title = dom_parser2.parse_dom(item, 'h2',
                                             {'class': 'movie-title'})
         match_year = dom_parser2.parse_dom(item, 'div',
                                            {'class': 'movie-year'})
         if match_url and match_title:
             match_url = match_url[0].attrs['href']
             match_title = match_title[0].content
             match_year = match_year[0].content if match_year else ''
             if norm_title in scraper_utils.normalize_title(
                     match_title) and (not match_year or not year
                                       or year == match_year):
                 result = {
                     'url': scraper_utils.pathify_url(match_url),
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year
                 }
                 results.append(result)
     return results

Example #28

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser2.parse_dom(html, 'select', {'id': 'cat'})
        if fragment:
            for attrs, label in dom_parser2.parse_dom(fragment[0].content,
                                                      'option',
                                                      {'class': 'level-0'},
                                                      req='value'):
                label = scraper_utils.cleanse_title(label)
                label = re.sub('\s+\(\d+\)$', '', label)
                if norm_title in scraper_utils.normalize_title(label):
                    cat_url = scraper_utils.urljoin(
                        self.base_url, '/?cat=%s' % (attrs['value']))
                    html = self._http_get(cat_url,
                                          allow_redirect=False,
                                          cache_limit=8)
                    if html.startswith('http'):
                        cat_url = html
                    result = {
                        'url': scraper_utils.pathify_url(cat_url),
                        'title': label,
                        'year': ''
                    }
                    results.append(result)

        return results

Example #29

0

Show file

File: 2ddl_scraper.py Project: EPiC-APOC/repository.xvbmc

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for post in posts:
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = re.search('<a\s+href="([^"]+)[^>]+>(.*?)</a>', post)
                 if match:
                     url, title = match.groups()
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Example #30

0

Show file

File: ororotv_scraper.py Project: CYBERxNUKE/xbmc-addon

 def _get_episode_url(self, show_url, video):
     query = scraper_utils.parse_query(show_url)
     if 'id' in query:
         url = scraper_utils.urljoin(self.base_url, '/api/v2/shows/%s' % (query['id']))
         js_data = self._http_get(url, cache_limit=.5)
         if 'episodes' in js_data:
             force_title = scraper_utils.force_title(video)
             if not force_title:
                 for episode in js_data['episodes']:
                     if int(video.season) == int(episode['season']) and int(video.episode) == int(episode['number']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))
                 
                 if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                     for episode in js_data['episodes']:
                         if 'airdate' in episode:
                             ep_airdate = scraper_utils.to_datetime(episode['airdate'], "%Y-%m-%d").date()
                             if video.ep_airdate == (ep_airdate - datetime.timedelta(days=1)):
                                 return scraper_utils.pathify_url('?id=%s' % (episode['id']))
             else:
                 logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)
             
             if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                 norm_title = scraper_utils.normalize_title(video.ep_title)
                 for episode in js_data['episodes']:
                     if 'name' in episode and norm_title in scraper_utils.normalize_title(episode['name']):
                         return scraper_utils.pathify_url('?id=%s' % (episode['id']))

Example #31

0

Show file

File: yabanci_scraper.py Project: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for series in dom_parser.parse_dom(html, 'div', {'class': 'series-item'}):
            match_url = dom_parser.parse_dom(series, 'a', ret='href')
            match_title = dom_parser.parse_dom(series, 'h3')
            match_year = dom_parser.parse_dom(series, 'p')
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                if match_year:
                    match = re.search('\s*(\d{4})\s+', match_year[0])
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''
                else:
                    match_year = ''
                    
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

Example #32

0

Show file

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            search_url = urlparse.urljoin(self.base_url, '/?s=')
            search_url += urllib.quote_plus('%s' % (title))
            html = self._http_get(search_url, cache_limit=1)
            links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title')
            matches = zip(links, titles)
        else:
            html = self._http_get(self.base_url, cache_limit=8)
            matches = re.findall('<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html)
                
        norm_title = scraper_utils.normalize_title(title)
        for item in matches:
            url, match_title_year = item
            match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
            if match:
                match_title, match_year = match.groups()
            else:
                match_title = match_title_year
                match_year = ''
            
            if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                results.append(result)

        return results

Example #33

0

Show file

File: dizist_scraper.py Project: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, '/arsiv')
        html = self._http_get(url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'ts-list-content'})
        if not fragment: return results

        items = dom_parser2.parse_dom(fragment[0].content, 'h1',
                                      {'class': 'ts-list-name'})
        details = dom_parser2.parse_dom(fragment[0].content, 'ul')
        for item, detail in zip(items, details):
            match = dom_parser2.parse_dom(item.content, 'a', req='href')
            match_year = re.search('<span>(\d{4})</span>', detail.content)
            if not match: continue

            match_url = match[0].attrs['href']
            match_title = match[0].content
            match_year = match_year.group(1) if match_year else ''

            if norm_title in scraper_utils.normalize_title(match_title):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #34

0

Show file

File: farda_scraper.py Project: monicarero/repository.xvbmc

 def get_sources(self, video):
     source_url = self.get_url(video)
     hosters = []
     norm_title = scraper_utils.normalize_title(video.title)
     if source_url and source_url != FORCE_NO_MATCH:
         source_url = urlparse.urljoin(self.base_url2, source_url)
         for line in self._get_files(source_url, cache_limit=24):
             if not line['directory']:
                 match = {}
                 if video.video_type == VIDEO_TYPES.MOVIE:
                     match_title, _match_year, height, extra = scraper_utils.parse_movie_link(line['link'])
                     if norm_title in scraper_utils.normalize_title(match_title):
                         match = line
                 else:
                     _show_title, season, episode, height, extra = scraper_utils.parse_episode_link(line['link'])
                     if int(video.season) == int(season) and int(video.episode) == int(episode):
                         match = line
                     
                 if 'dubbed' in extra.lower(): continue
                 if match:
                     stream_url = match['url'] + '|User-Agent=%s' % (scraper_utils.get_ua())
                     hoster = {'multi-part': False, 'host': self._get_direct_hostname(stream_url), 'class': self, 'quality': scraper_utils.height_get_quality(height), 'views': None, 'rating': None, 'url': stream_url, 'direct': True}
                     if 'x265' in extra: hoster['format'] = 'x265'
                     if 'size' in match: hoster['size'] = scraper_utils.format_size(int(match['size']))
                     hosters.append(hoster)
         
     return hosters

Example #35

0

Show file

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     if video_type == VIDEO_TYPES.TVSHOW and title:
         test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title))
         test_url = scraper_utils.urljoin(self.base_url, test_url)
         html = self._http_get(test_url, require_debrid=True, cache_limit=24)
         posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})
         if posts:
             result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''}
             results.append(result)
     elif video_type == VIDEO_TYPES.MOVIE:
         search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
         html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1)
         norm_title = scraper_utils.normalize_title(title)
         for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
             if match:
                 post_url, post_title = match.groups()
                 if '/tv-show/' in post or self.__too_old(post): continue
                 post_title = re.sub('<[^>]*>', '', post_title)
                 meta = scraper_utils.parse_movie_link(post_title)
                 full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height'])
                 match_year = meta['year']
                 
                 match_norm_title = scraper_utils.normalize_title(meta['title'])
                 if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year}
                     results.append(result)
         
     return results

Example #36

0

Show file

File: scraper.py Project: CYBERxNUKE/xbmc-addon

    def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''):
        logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG)
        if not html: return
        
        try: html = html[0].content
        except AttributeError: pass
        force_title = scraper_utils.force_title(video)
        if not force_title:
            if episode_pattern:
                match = re.search(episode_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))

            if kodi.get_setting('airdate-fallback') == 'true' and airdate_pattern and video.ep_airdate:
                airdate_pattern = airdate_pattern.replace('{year}', str(video.ep_airdate.year))
                airdate_pattern = airdate_pattern.replace('{month}', str(video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace('{p_month}', '%02d' % (video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace('{month_name}', MONTHS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace('{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace('{day}', str(video.ep_airdate.day))
                airdate_pattern = airdate_pattern.replace('{p_day}', '%02d' % (video.ep_airdate.day))
                logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG)

                match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
        else:
            logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG)

        if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern:
            norm_title = scraper_utils.normalize_title(video.ep_title)
            for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                episode = match.groupdict()
                if norm_title == scraper_utils.normalize_title(episode['title']):
                    return scraper_utils.pathify_url(episode['url'])

Example #37

0

Show file

File: rainierland_scraper.py Project: c0ns0le/YCBuilds

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            search_url = urlparse.urljoin(self.base_url, '/?s=')
            search_url += urllib.quote_plus('%s' % (title))
            html = self._http_get(search_url, cache_limit=1)
            links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'}, 'title')
            matches = zip(links, titles)
        else:
            html = self._http_get(self.base_url, cache_limit=8)
            matches = re.findall('<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', html)
                
        norm_title = scraper_utils.normalize_title(title)
        for item in matches:
            url, match_title_year = item
            match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)
            if match:
                match_title, match_year = match.groups()
            else:
                match_title = match_title_year
                match_year = ''
            
            if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                result = {'title': match_title, 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                results.append(result)

        return results

Example #38

0

Show file

 def _get_episode_url(self, show_url, video):
     sxe = '(\.|_| )S%02dE%02d(\.|_| )' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: airdate_pattern = video.ep_airdate.strftime('(\.|_| )%Y(\.|_| )%m(\.|_| )%d(\.|_| )')
     except: airdate_pattern = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if re.search(sxe, title) or (airdate_pattern and re.search(airdate_pattern, title)):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('</strong>(.*?)</p>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Example #39

0

Show file

File: filmovizjia_scraper.py Project: EPiC-APOC/repository.xvbmc

    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=8)
        pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
        match = re.search(pattern, html, re.DOTALL)
        if match:
            fragment = match.group(1)
            ep_ids = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'}, ret='class')
            episodes = dom_parser.parse_dom(fragment, 'a', {'id': 'epiloader'})
            airdates = dom_parser.parse_dom(fragment, 'span', {'class': 'airdate'})
            ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
            norm_title = scraper_utils.normalize_title(video.ep_title)
            num_id, airdate_id, title_id = '', '', ''
            for episode, airdate, ep_id in zip(episodes, airdates, ep_ids):
                if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
                match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
                if match:
                    ep_num, ep_title = match.groups()
                    if int(ep_num) == int(video.episode): num_id = ep_id
                    if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id

            best_id = ''
            if not scraper_utils.force_title(video):
                if num_id: best_id = num_id
                if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
                if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
            else:
                if title_id: best_id = title_id
            
            if best_id:
                return EP_URL % (best_id)

Example #40

0

Show file

    def _get_episode_url(self, show_url, video):
        url = urlparse.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser.parse_dom(html, 'div', {'class': '\s*el-item\s*'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                        ep_airdate = dom_parser.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    ep_url = dom_parser.parse_dom(episode, 'a', ret='href')
                    ep_title = dom_parser.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0]):
                        return scraper_utils.pathify_url(ep_url[0])

Example #41

0

Show file

File: dayt_scraper.py Project: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1')
        while page_url:
            html = self._http_get(page_url, cache_limit=48)
            html = re.sub('<!--.*?-->', '', html)
            norm_title = scraper_utils.normalize_title(title)
            for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}):
                match_url = re.search('href="([^"]+)', td)
                match_title_year = dom_parser.parse_dom(td, 'img', ret='alt')
                if match_url and match_title_year:
                    match_url = match_url.group(1)
                    if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
                    match_title, match_year = scraper_utils.extra_year(match_title_year[0])
                    if norm_title in scraper_utils.normalize_title(match_title):
                        result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)
            
            match = re.search('href="([^"]+)[^>]*>>', html)
            if match:
                page_url = urlparse.urljoin(self.base_url, match.group(1))
            else:
                page_url = ''

        return results

Example #42

0

Show file

File: pelispedia_scraper.py Project: kevintone/tdbaddon

 def __tv_search(self, title, year):
     results = []
     if title:
         norm_title = scraper_utils.normalize_title(title)
         url = '/series/letra/%s/' % (title[0])
         url = urlparse.urljoin(self.base_url, url)
         html = self._http_get(url, cache_limit=48)
         for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*bpM12[^"]*'}):
             title_frag = dom_parser.parse_dom(item, 'h2')
             year_frag = dom_parser.parse_dom(item, 'div', {'class': '[^"]*sectionDetail[^"]*'})
             match_url = dom_parser.parse_dom(item, 'a', ret='href')
             if title_frag and match_url:
                 match_url = match_url[0]
                 match = re.search('(.*?)<br>', title_frag[0])
                 if match:
                     match_title = match.group(1)
                 else:
                     match_title = title_frag[0]
                     
                 match_year = ''
                 if year_frag:
                     match = re.search('(\d{4})', year_frag[0])
                     if match:
                         match_year = match.group(1)
 
                 if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
                     
     return results

Example #43

0

Show file

File: ororotv_scraper.py Project: kevintone/tdbaddon

    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, 'http://ororo.tv/en')
        if video_type == VIDEO_TYPES.MOVIE:
            url += '/movies'
        html = self._http_get(url, cache_limit=.25)
        results = []
        norm_title = scraper_utils.normalize_title(title)
        include_paid = kodi.get_setting('%s-include_premium' %
                                        (self.get_name())) == 'true'
        for match in re.finditer(
                '''<span class='value'>(\d{4})(.*?)href="([^"]+)[^>]+>([^<]+)''',
                html, re.DOTALL):
            match_year, middle, url, match_title = match.groups()
            if not include_paid and video_type == VIDEO_TYPES.MOVIE and 'paid accounts' in middle:
                continue

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #44

0

Show file

File: hdmovie14_scraper.py Project: EPiC-APOC/repository.xvbmc

    def __search(self, video_type, title, year, season=''):
        results = []
        search_url = base64.decodestring(SEARCH_URL) % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=1)
        js_data = scraper_utils.parse_json(html)
        norm_title = scraper_utils.normalize_title(title)
        for item in js_data.get('results', []):
            if '/watch/' not in item['url'].lower(): continue
            is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match_title_year = item['titleNoFormatting']
                match_title_year = re.sub('^Watch\s+', '', match_title_year)
                match_url = item['url']
                match_year = ''
                if video_type == VIDEO_TYPES.MOVIE:
                    match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                else:
                    if season and int(is_season.group(1)) != int(season):
                        continue
                    match = re.search('(.*?)\s+\(\d{4}\)', match_title_year)
                    if match:
                        match_title = match.group(1)
                    else:
                        match_title = match_title_year
                
                if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

Example #45

0

Show file

    def search(self, video_type, title, year, season=''):
        results = []
        url = urlparse.urljoin(self.base_url, '/search.html')
        data = {'search': title}
        headers = {'Referer': self.base_url}
        html = self._http_get(url, data=data, headers=headers, cache_limit=2)
        if video_type == VIDEO_TYPES.MOVIE:
            query_type = 'watch-movie-'
        else:
            query_type = 'watch-tvshow-'

        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'a', {'href': '#'}):
            match = re.search('href="(%s[^"]+)' % (query_type), item)
            if match:
                link = match.group(1)
                match_title = self.__make_title(link, query_type)
                match_year = ''
                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or int(year) == int(match_year)):
                    result = {
                        'url': scraper_utils.pathify_url(link),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Example #46

0

Show file

File: moviewatcher_scraper.py Project: freeworldxbmc/KAOSbox-Repo

    def __movie_search(self, title, year):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search?q=')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'video_item'}):
            match_url = dom_parser.parse_dom(item, 'a', ret='href')
            match_title = dom_parser.parse_dom(item, 'img', ret='alt')
            match_year = ''
            if match_url and match_title:
                match_url = match_url[0]
                match_title = match_title[0]
                
                if match_year:
                    match_year = match_year[0]
                else:
                    match_year = ''
        
                if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

Example #47

0

Show file

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'})
        if not fragment: return results
        
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
            is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'})
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
                if not match: continue
                
                match_title = match[0].attrs['title']
                match_url = match[0].attrs['href']
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+%s$' % (season), match_title, re.I):
                        continue
                else:
                    match = re.search('-(\d{4})[-.]', match_url)
                    if match:
                        match_year = match.group(1)
                
                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
                if title_match and (not year or not match_year or year == match_year):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

Example #48

0

Show file

    def _get_episode_url(self, video):
        url = urlparse.urljoin(self.base_url, '/torrent/list')
        js_data = self._http_get(url, cache_limit=0)
        norm_title = scraper_utils.normalize_title(video.title)
        if 'torrents' in js_data:
            airdate_fallback = kodi.get_setting(
                'airdate-fallback') == 'true' and video.ep_airdate
            show_title = ''
            if not scraper_utils.force_title(video):
                for item in js_data['torrents']:
                    sxe_pattern = '(.*?)[. ][Ss]%02d[Ee]%02d[. ]' % (int(
                        video.season), int(video.episode))
                    match = re.search(sxe_pattern, item['name'])
                    if match:
                        show_title = match.group(1)
                    elif airdate_fallback:
                        airdate_pattern = '(.*?)[. ]%s[. ]%02d[. ]%02d[. ]' % (
                            video.ep_airdate.year, video.ep_airdate.month,
                            video.ep_airdate.day)
                        match = re.search(airdate_pattern, item['name'])
                        if match:
                            show_title = match.group(1)

                    if show_title and norm_title in scraper_utils.normalize_title(
                            show_title):
                        return 'hash=%s' % (item['hash'])

Example #49

0

Show file

File: dizipas_scraper.py Project: henry73/salts

    def search(self, video_type, title, year, season=''):
        results = []
        xml_url = urlparse.urljoin(self.base_url, '/series.xml')
        xml = self._http_get(xml_url, cache_limit=24)
        if xml:
            norm_title = scraper_utils.normalize_title(title)
            match_year = ''
            try:
                for element in ET.fromstring(xml).findall('.//dizi'):
                    name = element.find('adi')
                    if name is not None and norm_title in scraper_utils.normalize_title(
                            name.text):
                        url = element.find('url')
                        if url is not None and (not year or not match_year
                                                or year == match_year):
                            result = {
                                'url': scraper_utils.pathify_url(url.text),
                                'title': name.text,
                                'year': ''
                            }
                            results.append(result)
            except (ParseError, ExpatError) as e:
                log_utils.log('Dizilab Search Parse Error: %s' % (e),
                              log_utils.LOGWARNING)

        return results

Example #50

0

Show file

File: ddlvalley_scraper.py Project: monicarero/repository.xvbmc

 def _get_episode_url(self, show_url, video):
     sxe = '.S%02dE%02d.' % (int(video.season), int(video.episode))
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     try: ep_airdate = video.ep_airdate.strftime('.%Y.%m.%d.')
     except: ep_airdate = ''
     
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = urlparse.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = dom_parser.parse_dom(html, 'div', {'id': 'post-\d+'})
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if (sxe in title) or (ep_airdate and ep_airdate in title):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser.parse_dom(html, 'a', {'class': 'nextpostslink'}, ret='href')

Example #51

0

Show file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/')
        headers = {'Referer': self.base_url}
        params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1}
        html = self._http_get(search_url,
                              params=params,
                              headers=headers,
                              cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'datos'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            is_tvshow = '/tvshows/' in match_url
            if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW:
                continue

            match_title = match[0].content
            match_title, match_year = scraper_utils.extra_year(match_title)
            if scraper_utils.normalize_title(match_title) in norm_title and (
                    not year or not match_year or year == match_year):
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

Example #52

0

Show file

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         url = scraper_utils.urljoin(self.base_url, page_url[0])
         html = self._http_get(url, require_debrid=True, cache_limit=1)
         headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html)
         posts = [r.content for r in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')})]
         for heading, post in zip(headings, posts):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 url, title = heading
                 if not force_title:
                     if scraper_utils.release_check(video, title, require_title=False):
                         return scraper_utils.pathify_url(url)
                 else:
                     if title_fallback and norm_title:
                         match = re.search('<strong>(.*?)</strong>', post)
                         if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                             return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

Example #53

0

Show file

File: clickplay_scraper.py Project: AMOboxTV/AMOBox.LegoBuild

    def search(self, video_type, title, year, season=''):
        url = urlparse.urljoin(self.base_url, '/tv-series-a-z-list')
        html = self._http_get(url, cache_limit=8)

        results = []
        pattern = '<li>\s*<a.*?href="([^"]+)[^>]*>([^<]+)'
        norm_title = scraper_utils.normalize_title(title)
        for match in re.finditer(pattern, html, re.DOTALL):
            url, match_title_year = match.groups()
            r = re.search('(.*?)\s+\((\d{4})\)', match_title_year)
            if r:
                match_title, match_year = r.groups()
            else:
                match_title = match_title_year
                match_year = ''

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

Example #54

0

Show file

    def search(self, video_type, title, year):
        search_url = urlparse.urljoin(self.base_url, '/?s=')
        search_url += urllib.quote_plus('%s %s' % (title, year))
        html = self._http_get(search_url, cache_limit=.25)
        results = []
        if not re.search('Sorry, but nothing matched', html):
            norm_title = scraper_utils.normalize_title(title)
            for item in dom_parser.parse_dom(
                    html, 'li', {'class': '[^"]*box-shadow[^"]*'}):
                match = re.search('href="([^"]+)"\s+title="([^"]+)', item)
                if match:
                    url, match_title_year = match.groups()
                    if re.search('S\d{2}E\d{2}', match_title_year):
                        continue  # skip episodes
                    if re.search('TV\s*SERIES', match_title_year, re.I):
                        continue  # skip shows
                    match = re.search('(.*?)\s+\(?(\d{4})\)?',
                                      match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                        match_year = ''

                    if (not year or not match_year or year == match_year
                        ) and norm_title in scraper_utils.normalize_title(
                            match_title):
                        result = {
                            'title': match_title,
                            'year': match_year,
                            'url': scraper_utils.pathify_url(url)
                        }
                        results.append(result)

        return results

Example #55

0

Show file

File: premiumizev2_scraper.py Project: dsjh/tknorris-beta-repo

 def search(self, video_type, title, year, season=''):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     for item in self.__get_torrents():
         is_season = re.search('(.*?[._ ]season[._ ]+(\d+))[._ ](.*)', item['name'], re.I)
         if not is_season and video_type == VIDEO_TYPES.MOVIE or is_season and VIDEO_TYPES.SEASON:
             if re.search('[._ ]S\d+E\d+[._ ]', item['name']): continue  # skip episodes
             if video_type == VIDEO_TYPES.SEASON:
                 match_title, match_season, extra = is_season.groups()
                 if season and int(match_season) != int(season):
                     continue
                 match_year = ''
                 match_title = re.sub('[._]', ' ', match_title)
             else:
                 match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name'])
                 if match:
                     match_title, match_year, extra = match.groups()
                 else:
                     match_title, match_year, extra = item['name'], '', ''
                 
             match_title = match_title.strip()
             extra = extra.strip()
             if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
                 result_title = match_title
                 if extra: result_title += ' [%s]' % (extra)
                 result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])}
                 results.append(result)
     
     return results

Example #56

0

Show file

File: 2ddl_scraper.py Project: yam4me/plugin.video.blamo

 def _get_episode_url(self, show_url, video):
     force_title = scraper_utils.force_title(video)
     title_fallback = kodi.get_setting('title-fallback') == 'true'
     norm_title = scraper_utils.normalize_title(video.ep_title)
     page_url = [show_url]
     too_old = False
     while page_url and not too_old:
         html = self._http_get(page_url[0], require_debrid=False, cache_limit=1)
         for _attr, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}):
             if self.__too_old(post):
                 too_old = True
                 break
             if CATEGORIES[VIDEO_TYPES.TVSHOW] in post and show_url in post:
                 match = dom_parser2.parse_dom(post, 'a', req='href')
                 if match:
                     url, title = match[0].attrs['href'], match[0].content
                     if not force_title:
                         if scraper_utils.release_check(video, title, require_title=False):
                             return scraper_utils.pathify_url(url)
                     else:
                         if title_fallback and norm_title:
                             match = re.search('</strong>(.*?)</p>', post)
                             if match and norm_title == scraper_utils.normalize_title(match.group(1)):
                                 return scraper_utils.pathify_url(url)
             
         page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href')
         if page_url: page_url = [page_url[0].attrs['href']]

Example #57

0

Show file

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.TVSHOW:
            url = urlparse.urljoin(self.base_url, '/series/all/')
            html = self._http_get(url, cache_limit=8)
    
            links = dom_parser.parse_dom(html, 'a', {'class': 'underilne'}, 'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'underilne'})
            items = zip(links, titles)
        else:
            url = urlparse.urljoin(self.base_url, '/search?=%s' % urllib.quote_plus(title))
            data = {'q': title, 'go': 'Search'}
            html = self._http_get(url, data=data, cache_limit=8)
            match = re.search('you can search again in (\d+) seconds', html, re.I)
            if match:
                wait = int(match.group(1))
                if wait > self.timeout: wait = self.timeout
                time.sleep(wait)
                html = self._http_get(url, data=data, cache_limit=0)
                
            pattern = 'class="movie_box.*?href="([^"]+).*?<h1>([^<]+)'
            items = re.findall(pattern, html, re.DOTALL)

        norm_title = scraper_utils.normalize_title(title)
        for item in items:
            url, match_title = item
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(url), 'title': match_title, 'year': ''}
                results.append(result)

        return results