Esempi in Python per normalize_title, esempi in Python per dsrd_lib.scraper_utils.normalize_title

Esempio n. 1

0

Mostra file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if title:
            first_letter = title[:1].lower()
            if first_letter.isdigit(): first_letter = '0-9'
            search_url = '/search.php/%s/' % (first_letter)
            search_url = urlparse.urljoin(self.base_url, search_url)
            html = self._http_get(search_url, cache_limit=24)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''',
                                         fragment[0]):
                    url, match_title_year = match.groups()
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)
                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Esempio n. 2

0

Mostra file

File: premiumizev2_scraper.py Progetto: Lhse44/repository.deallen

 def search(self, video_type, title, year, season=''):
     results = []
     norm_title = scraper_utils.normalize_title(title)
     for item in self.__get_torrents():
         if title or year or season:
             is_season = re.search('(.*?{delim}season{delim}+(\d+)){delim}?(.*)'.format(delim=DELIM), item['name'], re.I)
             if (not is_season and video_type == VIDEO_TYPES.SEASON) or (is_season and video_type == VIDEO_TYPES.MOVIE):
                 continue
             
             if re.search('{delim}S\d+E\d+{delim}'.format(delim=DELIM), item['name'], re.I): continue  # skip episodes
             if video_type == VIDEO_TYPES.SEASON:
                 match_title, match_season, extra = is_season.groups()
                 if season and int(match_season) != int(season): continue
                 match_year = ''
                 match_title = re.sub(DELIM, ' ', match_title)
             else:
                 match = re.search('(.*?)\(?(\d{4})\)?(.*)', item['name'])
                 if match:
                     match_title, match_year, extra = match.groups()
                 else:
                     match_title, match_year, extra = item['name'], '', ''
         else:
             match_title, match_year, extra = item['name'], '', ''
                 
         match_title = match_title.strip()
         extra = extra.strip()
         if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year):
             result_title = match_title
             if extra: result_title += ' [%s]' % (extra)
             result = {'title': result_title, 'year': match_year, 'url': 'hash=%s' % (item['hash'])}
             results.append(result)
     
     return results

Esempio n. 3

0

Mostra file

File: filmovizjia_scraper.py Progetto: Lhse44/repository.deallen

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=8)
        pattern = "<a[^>]*class='dropdown-toggle'[^>]*>Season\s+%s<(.*?)<li\s+class='divider'>" % (video.season)
        match = re.search(pattern, html, re.DOTALL)
        if not match: return
        
        fragment = match.group(1)
        episodes = dom_parser2.parse_dom(fragment, 'a', {'id': 'epiloader'}, req='class')
        airdates = dom_parser2.parse_dom(fragment, 'span', {'class': 'airdate'})
        ep_airdate = video.ep_airdate.strftime('%Y-%m-%d') if isinstance(video.ep_airdate, datetime.date) else ''
        norm_title = scraper_utils.normalize_title(video.ep_title)
        num_id, airdate_id, title_id = '', '', ''
        for episode, airdate in zip(episodes, airdates):
            ep_id = episode.attrs['class']
            episode = episode.content
            
            if ep_airdate and ep_airdate == airdate: airdate_id = ep_id
            match = re.search('(?:<span[^>]*>)?(\d+)\.\s*([^<]+)', episode)
            if match:
                ep_num, ep_title = match.groups()
                if int(ep_num) == int(video.episode): num_id = ep_id
                if norm_title and norm_title in scraper_utils.normalize_title(ep_title): title_id = ep_id

        best_id = ''
        if not scraper_utils.force_title(video):
            if num_id: best_id = num_id
            if kodi.get_setting('airdate-fallback') == 'true' and airdate_id: best_id = airdate_id
            if kodi.get_setting('title-fallback') == 'true' and title_id: best_id = title_id
        else:
            if title_id: best_id = title_id
        
        if best_id:
            return EP_URL % (best_id)

Esempio n. 4

0

Mostra file

File: unblocked_scraper.py Progetto: Lhse44/repository.deallen

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        if html:
            force_title = scraper_utils.force_title(video)
            episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'})
            if not force_title:
                episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode))
                match = re.search(episode_pattern, html)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
                
                if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate:
                    airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year)
                    for episode in episodes:
                        episode = episode.content
                        ep_url = dom_parser2.parse_dom(episode, 'a', req='href')
                        ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'})
                        if ep_url and ep_airdate:
                            ep_airdate = ep_airdate[0].content.strip()
                            if airdate_pattern == ep_airdate:
                                return scraper_utils.pathify_url(ep_url[0].attrs['href'])

            if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title:
                norm_title = scraper_utils.normalize_title(video.ep_title)
                for episode in episodes:
                    episode = episode.content
                    ep_url = dom_parser2.parse_dom(episode, 'a', req='href')
                    ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'})
                    if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content):
                        return scraper_utils.pathify_url(ep_url[0].attrs['href'])

Esempio n. 5

0

Mostra file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        xml_url = scraper_utils.urljoin(self.base_url, '/series.xml')
        xml = self._http_get(xml_url, cache_limit=24)
        if not xml: return results
        try:
            norm_title = scraper_utils.normalize_title(title)
            match_year = ''
            for element in ET.fromstring(xml).findall('.//dizi'):
                name = element.find('adi')
                if name is not None and norm_title in scraper_utils.normalize_title(
                        name.text):
                    url = element.find('url')
                    if url is not None and (not year or not match_year
                                            or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url.text),
                            'title': scraper_utils.cleanse_title(name.text),
                            'year': ''
                        }
                        results.append(result)
        except (ParseError, ExpatError) as e:
            logger.log('Dizilab Search Parse Error: %s' % (e),
                       log_utils.LOGWARNING)

        return results

Esempio n. 6

0

Mostra file

File: onlinedizi_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        html = self._http_get(search_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'category-post'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'h3')
            if match_url and match_title:
                match_url = scraper_utils.pathify_url(
                    match_url[0].attrs['href'])
                match_title = match_title[0].content
                if match_url in seen_urls: continue
                seen_urls.add(match_url)
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': match_url,
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

Esempio n. 7

0

Mostra file

File: quikr_scraper.py Progetto: Lhse44/repository.deallen

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_title = re.sub('[^A-Za-z0-9. ]', '', title)
     url = '/search/%s/' % (urllib.quote(search_title))
     url = scraper_utils.urljoin(self.base_url, url)
     html = self._http_get(url, cache_limit=48)
     norm_title = scraper_utils.normalize_title(title)
     for _attrs, item in dom_parser2.parse_dom(html, 'article',
                                               {'class': 'movie-details'}):
         match_url = dom_parser2.parse_dom(item, 'a', req='href')
         match_title = dom_parser2.parse_dom(item, 'h2',
                                             {'class': 'movie-title'})
         match_year = dom_parser2.parse_dom(item, 'div',
                                            {'class': 'movie-year'})
         if match_url and match_title:
             match_url = match_url[0].attrs['href']
             match_title = match_title[0].content
             match_year = match_year[0].content if match_year else ''
             if norm_title in scraper_utils.normalize_title(
                     match_title) and (not match_year or not year
                                       or year == match_year):
                 result = {
                     'url': scraper_utils.pathify_url(match_url),
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year
                 }
                 results.append(result)
     return results

Esempio n. 8

0

Mostra file

    def _get_episode_url(self, show_url, video):
        force_title = scraper_utils.force_title(video)
        title_fallback = kodi.get_setting('title-fallback') == 'true'
        norm_title = scraper_utils.normalize_title(video.ep_title)
        page_url = [show_url]
        too_old = False
        while page_url and not too_old:
            url = scraper_utils.urljoin(self.base_url, page_url[0])
            html = self._http_get(url, require_debrid=True, cache_limit=1)
            for _attrs, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                if self.__too_old(post):
                    too_old = True
                    break
                if show_url not in post: continue
                match = dom_parser2.parse_dom(post, 'a', req='href')
                if match:
                    url, title = match[0].attrs['href'], match[0].content
                    if not force_title:
                        if scraper_utils.release_check(video,
                                                       title,
                                                       require_title=False):
                            return scraper_utils.pathify_url(url)
                    else:
                        if title_fallback and norm_title:
                            match = re.search('</strong>(.*?)</p>', post)
                            if match and norm_title == scraper_utils.normalize_title(
                                    match.group(1)):
                                return scraper_utils.pathify_url(url)

            page_url = dom_parser2.parse_dom(html,
                                             'a', {'class': 'nextpostslink'},
                                             req='href')
            if page_url: page_url = [page_url[0].attrs['href']]

Esempio n. 9

0

Mostra file

File: mintmovies_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results

        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li',
                                                  {'class': 'box-shadow'}):
            for attrs, _content in dom_parser2.parse_dom(item,
                                                         'a',
                                                         req=['href',
                                                              'title']):
                match_url, match_title_year = attrs['href'], attrs['title']
                if re.search('S\d{2}E\d{2}', match_title_year): continue
                if re.search('TV\s*SERIES', match_title_year, re.I): continue
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if (
                        not year or not match_year or year == match_year
                ) and norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Esempio n. 10

0

Mostra file

    def _default_get_episode_url(self,
                                 html,
                                 video,
                                 episode_pattern,
                                 title_pattern='',
                                 airdate_pattern=''):
        logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video),
                   log_utils.LOGDEBUG)
        if not html: return

        try:
            html = html[0].content
        except AttributeError:
            pass
        force_title = scraper_utils.force_title(video)
        if not force_title:
            if episode_pattern:
                match = re.search(episode_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))

            if kodi.get_setting(
                    'airdate-fallback'
            ) == 'true' and airdate_pattern and video.ep_airdate:
                airdate_pattern = airdate_pattern.replace(
                    '{year}', str(video.ep_airdate.year))
                airdate_pattern = airdate_pattern.replace(
                    '{month}', str(video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace(
                    '{p_month}', '%02d' % (video.ep_airdate.month))
                airdate_pattern = airdate_pattern.replace(
                    '{month_name}', MONTHS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace(
                    '{short_month}', SHORT_MONS[video.ep_airdate.month - 1])
                airdate_pattern = airdate_pattern.replace(
                    '{day}', str(video.ep_airdate.day))
                airdate_pattern = airdate_pattern.replace(
                    '{p_day}', '%02d' % (video.ep_airdate.day))
                logger.log('Air Date Pattern: %s' % (airdate_pattern),
                           log_utils.LOGDEBUG)

                match = re.search(airdate_pattern, html, re.DOTALL | re.I)
                if match:
                    return scraper_utils.pathify_url(match.group(1))
        else:
            logger.log(
                'Skipping S&E matching as title search is forced on: %s' %
                (video.trakt_id), log_utils.LOGDEBUG)

        if (force_title or kodi.get_setting('title-fallback')
                == 'true') and video.ep_title and title_pattern:
            norm_title = scraper_utils.normalize_title(video.ep_title)
            for match in re.finditer(title_pattern, html, re.DOTALL | re.I):
                episode = match.groupdict()
                if norm_title == scraper_utils.normalize_title(
                        episode['title']):
                    return scraper_utils.pathify_url(episode['url'])

Esempio n. 11

0

Mostra file

    def search(self, video_type, title, year, season=''):
        html = self._http_get(self.base_url, cache_limit=8)
        results = []
        norm_title = scraper_utils.normalize_title(title)
        pattern = 'class="[^"]*cat-item.*?href="([^"]+)[^>]+>([^<]+)'
        for match in re.finditer(pattern, html):
            url, match_title = match.groups()
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                results.append(result)

        return results

Esempio n. 12

0

Mostra file

File: wso_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, '/index')
        html = self._http_get(url, cache_limit=24)
        for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'ddmcc'}):
            norm_title = scraper_utils.normalize_title(title)
            for attrs, match_title in dom_parser2.parse_dom(fragment, 'a', req='href'):
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                    results.append(result)

        return results

Esempio n. 13

0

Mostra file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if video_type == VIDEO_TYPES.TVSHOW and title:
            test_url = '/category/tv-shows/' % (scraper_utils.to_slug(title))
            test_url = scraper_utils.urljoin(self.base_url, test_url)
            html = self._http_get(test_url,
                                  require_debrid=True,
                                  cache_limit=24)
            posts = dom_parser2.parse_dom(html, 'div',
                                          {'id': re.compile('post-\d+')})
            if posts:
                result = {
                    'url': scraper_utils.pathify_url(test_url),
                    'title': scraper_utils.cleanse_title(title),
                    'year': ''
                }
                results.append(result)
        elif video_type == VIDEO_TYPES.MOVIE:
            search_title = re.sub(
                '[/forum/7-1080p-720p-high-definition-movies/]', '',
                title.lower())
            html = self._http_get(self.base_url,
                                  params={'s': search_title},
                                  require_debrid=True,
                                  cache_limit=1)
            norm_title = scraper_utils.normalize_title(title)
            for _attrs, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                match = re.search(
                    '<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
                if match:
                    post_url, post_title = match.groups()
                    if '/tv-show/' in post or self.__too_old(post): continue
                    post_title = re.sub('<[^>]*>', '', post_title)
                    meta = scraper_utils.parse_movie_link(post_title)
                    full_title = '%s [%s] (%sp)' % (
                        meta['title'], meta['extra'], meta['height'])
                    match_year = meta['year']

                    match_norm_title = scraper_utils.normalize_title(
                        meta['title'])
                    if (match_norm_title in norm_title or norm_title
                            in match_norm_title) and (not year
                                                      or not match_year
                                                      or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(post_url),
                            'title': scraper_utils.cleanse_title(full_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

Esempio n. 14

0

Mostra file

    def __episode_match(self, video, label):
        episode_pattern = 'Episode\s+0*%s(?!\d)' % (video.episode)
        if re.search(episode_pattern, label, re.I):
            return True

        if video.ep_title:
            match = re.search('Episode\s+\d+: (.*)', label)
            if match: label = match.group(1)
            if scraper_utils.normalize_title(
                    video.ep_title) in scraper_utils.normalize_title(label):
                return True

        return False

Esempio n. 15

0

Mostra file

File: 2ddl_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = '/search/' + urllib.quote_plus(title)
        html = self._http_get(search_url, require_debrid=False, cache_limit=1)
        if video_type == VIDEO_TYPES.TVSHOW:
            seen_urls = {}
            for _attr, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                if CATEGORIES[video_type] not in post: continue
                match = re.search(
                    '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)',
                    post, re.I)
                if match:
                    show_url, match_title = match.groups()
                    if show_url in seen_urls: continue
                    result = {
                        'url': scraper_utils.pathify_url(show_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    seen_urls[show_url] = result
                    results.append(result)
        elif video_type == VIDEO_TYPES.MOVIE:
            norm_title = scraper_utils.normalize_title(title)
            headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>',
                                  html)
            posts = [
                result.content for result in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')})
            ]
            for heading, post in zip(headings, posts):
                if CATEGORIES[video_type] not in post or self.__too_old(post):
                    continue
                post_url, post_title = heading
                meta = scraper_utils.parse_movie_link(post_title)
                full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'],
                                                meta['height'])
                match_year = meta['year']

                match_norm_title = scraper_utils.normalize_title(meta['title'])
                if (match_norm_title in norm_title or norm_title
                        in match_norm_title) and (not year or not match_year
                                                  or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(post_url),
                        'title': scraper_utils.cleanse_title(full_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

Esempio n. 16

0

Mostra file

File: projectfree_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        norm_title = scraper_utils.normalize_title(title)
        url = scraper_utils.urljoin(self.base_url, '/search/')
        headers = {'Referer': self.base_url}
        html = self._http_get(url, headers=headers, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['title', 'href']):
                match_title, match_url = attrs['title'], attrs['href']
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                    results.append(result)

        return results

Esempio n. 17

0

Mostra file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, cache_limit=48)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'dizis'})
        if not fragment: return results
        
        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title = re.sub('<div[^>]*>.*?</div>', '', match_title)
            if norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': ''}
                results.append(result)

        return results

Esempio n. 18

0

Mostra file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, AJAX_URL)
        data = {'type': 'getDizi'}
        headers = {'Referer': scraper_utils.urljoin(self.base_url, '/arsiv')}
        headers.update(XHR)
        html = self._http_get(url, data=data, headers=headers, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        match_year = ''
        js_data = scraper_utils.parse_json(html, url)
        for item in js_data.get('data', []):
            match_title = item.get('adi', '')
            if 'url' in item and norm_title in scraper_utils.normalize_title(match_title):
                result = {'url': scraper_utils.pathify_url(item['url']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)

        return results

Esempio n. 19

0

Mostra file

File: vivoto_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/%s.html')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'div', {'class': 'movie'})
        if not fragment: return results

        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'span',
                                                {'class': 'text'})
            match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'})
            if not match_url or not match_title: continue

            match_url = match_url[0].attrs['href']
            match_title = re.sub('</?strong>', '', match_title[0].content)
            is_season = re.search('Season\s+(\d+)$', match_title, re.I)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                    is_season and video_type == VIDEO_TYPES.SEASON):
                if video_type == VIDEO_TYPES.MOVIE:
                    if match_year:
                        match_year = match_year[0].content
                    else:
                        match_year = ''
                else:
                    if season and int(is_season.group(1)) != int(season):
                        continue
                    match_year = ''

                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title
                               in match_norm_title) or (match_norm_title
                                                        in norm_title)
                if title_match and (not year or not match_year
                                    or year == match_year):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Esempio n. 20

0

Mostra file

File: quikr_scraper.py Progetto: Lhse44/repository.deallen

    def _get_episode_url(self, show_url, video):
        url = scraper_utils.urljoin(self.base_url, show_url)
        html = self._http_get(url, cache_limit=2)
        episode_pattern = 'href="([^"]+-s0*%se0*%s(?!\d)[^"]*)' % (
            video.season, video.episode)
        parts = dom_parser2.parse_dom(html, 'ul', {'class': 'episode_list'})
        fragment = '\n'.join(part.content for part in parts)
        result = self._default_get_episode_url(fragment, video,
                                               episode_pattern)
        if result: return result

        ep_urls = [
            r.attrs['href']
            for r in dom_parser2.parse_dom(fragment, 'a', req='href')
        ]
        ep_dates = [
            r.content for r in dom_parser2.parse_dom(
                fragment, 'span', {'class': 'episode_air_d'})
        ]
        ep_titles = [
            r.content for r in dom_parser2.parse_dom(fragment, 'span',
                                                     {'class': 'episode_name'})
        ]
        force_title = scraper_utils.force_title(video)
        if not force_title and kodi.get_setting(
                'airdate-fallback') == 'true' and video.ep_airdate:
            for ep_url, ep_date in zip(ep_urls, ep_dates):
                logger.log(
                    'Quikr Ep Airdate Matching: %s - %s - %s' %
                    (ep_url, ep_date, video.ep_airdate), log_utils.LOGDEBUG)
                if video.ep_airdate == scraper_utils.to_datetime(
                        ep_date, '%Y-%m-%d').date():
                    return scraper_utils.pathify_url(ep_url)

        if force_title or kodi.get_setting('title-fallback') == 'true':
            norm_title = scraper_utils.normalize_title(video.ep_title)
            for ep_url, ep_title in zip(ep_urls, ep_titles):
                ep_title = re.sub('<span>.*?</span>\s*', '', ep_title)
                logger.log(
                    'Quikr Ep Title Matching: %s - %s - %s' %
                    (ep_url.encode('utf-8'), ep_title.encode('utf-8'),
                     video.ep_title), log_utils.LOGDEBUG)
                if norm_title == scraper_utils.normalize_title(ep_title):
                    return scraper_utils.pathify_url(ep_url)

Esempio n. 21

0

Mostra file

    def __search(self, video_type, title, year, season=''):
        results = []
        search_url = (SEARCH_URL) % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=1)
        js_data = scraper_utils.parse_json(html)
        norm_title = scraper_utils.normalize_title(title)
        for item in js_data.get('results', []):
            if '/watch/' not in item['url'].lower(): continue
            is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'],
                                  re.IGNORECASE)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                    is_season and video_type == VIDEO_TYPES.SEASON):
                match_title_year = item['titleNoFormatting']
                match_title_year = re.sub('^Watch\s+', '', match_title_year)
                match_url = item['url']
                match_year = ''
                if video_type == VIDEO_TYPES.MOVIE:
                    match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)',
                                      match_title_year)
                    if match:
                        match_title, match_year = match.groups()
                    else:
                        match_title = match_title_year
                else:
                    if season and int(is_season.group(1)) != int(season):
                        continue
                    match = re.search('(.*?)\s+\(\d{4}\)', match_title_year)
                    if match:
                        match_title = match.group(1)
                    else:
                        match_title = match_title_year

                if norm_title in scraper_utils.normalize_title(
                        match_title) and (not year or not match_year
                                          or year == match_year):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Esempio n. 22

0

Mostra file

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        try: season = int(season)
        except: season = 0
        
        results = self.__list(title)
        results = []
        if not results:
            results = self.__search(title, season)

        filtered_results = []
        norm_title = scraper_utils.normalize_title(title)
        for result in results:
            if norm_title in scraper_utils.normalize_title(result['title']) and (not season or season == int(result['season'])):
                result['title'] = '%s - Season %s [%s]' % (result['title'], result['season'], result['q_str'])
                if Q_ORDER[result['quality']] <= self.max_qorder:
                    filtered_results.append(result)

        filtered_results.sort(key=lambda x: Q_ORDER[x['quality']], reverse=True)
        return filtered_results

Esempio n. 23

0

Mostra file

    def __alt_search(self, video_type, title, year, season=''):
        results = []
        params = title.lower()
        if year: params += ' %s' % (year)
        if video_type == VIDEO_TYPES.SEASON and season:
            params += ' Season %s' % (season)
        params = {'key': params}
        search_url = urlparse.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, params=params, cache_limit=1)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'caption'}):
            match = re.search('href="([^"]+)[^>]+>(.*?)<span[^>]*>', item)
            if match:
                match_url, match_title = match.groups()
                is_season = re.search('-season-\d+', match_url)
                if (video_type == VIDEO_TYPES.MOVIE
                        and not is_season) or (video_type == VIDEO_TYPES.SEASON
                                               and is_season):
                    if video_type == VIDEO_TYPES.SEASON:
                        if season and not re.search('season-0*%s$' %
                                                    (season), match_url):
                            continue

                    match_title = re.sub('</?[^>]*>', '', match_title)
                    match_title = re.sub('\s+Full\s+Movie', '', match_title)
                    match = re.search('-(\d{4})(?:$|-)', match_url)
                    if match:
                        match_year = match.group(1)
                    else:
                        match_year = ''

                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        result = {
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year,
                            'url': scraper_utils.pathify_url(match_url)
                        }
                        results.append(result)

        return results

Esempio n. 24

0

Mostra file

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cfv'})
        if not fragment: return results

        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'):
            is_season = dom_parser2.parse_dom(item, 'div', {'class': 'status'})
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                    is_season and video_type == VIDEO_TYPES.SEASON):
                match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
                if not match: continue

                match_title = match[0].attrs['title']
                match_url = match[0].attrs['href']
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+%s$' %
                                                (season), match_title, re.I):
                        continue
                else:
                    match = re.search('-(\d{4})[-.]', match_url)
                    if match:
                        match_year = match.group(1)

                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title
                               in match_norm_title) or (match_norm_title
                                                        in norm_title)
                if title_match and (not year or not match_year
                                    or year == match_year):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

Esempio n. 25

0

Mostra file

File: dayt_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        page_url = scraper_utils.urljoin(self.base_url, '/search.php')
        html = self._http_get(page_url, params={'dayq': title}, cache_limit=48)
        html = re.sub('<!--.*?-->', '', html)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'topic_content'}):
            match_url = dom_parser2.parse_dom(td, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(td, 'img', req='alt')
            if not match_url or not match_title_year: continue

            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if (norm_title in scraper_utils.normalize_title(match_title)) and (not year or not match_year or year == match_year):
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)

        return results

Esempio n. 26

0

Mostra file

    def __tv_search(self, title, year):
        results = []
        search_url = scraper_utils.urljoin(self.tv_base_url, '/showlist/')
        html = self._http_get(search_url, cache_limit=48)
        match_year = ''
        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title in dom_parser2.parse_dom(
                html, 'a', {'class': 'thread_link'}, req='href'):
            match_url = attrs['href']
            if match_title.upper().endswith(', THE'):
                match_title = 'The ' + match_title[:-5]

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)
        return results

Esempio n. 27

0

Mostra file

File: rainierland_scraper.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):
        results = []
        if video_type == VIDEO_TYPES.MOVIE:
            search_url = urlparse.urljoin(self.base_url, '/?s=')
            search_url += urllib.quote_plus('%s' % (title))
            html = self._http_get(search_url, cache_limit=1)
            links = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'},
                                         'href')
            titles = dom_parser.parse_dom(html, 'a', {'class': 'clip-link'},
                                          'title')
            matches = zip(links, titles)
        else:
            html = self._http_get(self.base_url, cache_limit=8)
            matches = re.findall(
                '<li\s+class="cat-item[^>]+>\s*<a\s+href="([^"]+)[^>]+>([^<]+)',
                html)
        norm_title = scraper_utils.normalize_title(title)
        for item in matches:
            url = item[0].attrs['href']
            match_title_year = item[0].attrs['title']
            match = re.search('(.*?)\s+\(?(\d{4})\)?', match_title_year)

            if match:
                match_title, match_year = match.groups()
            else:
                match_title = match_title_year
                match_year = ''

            if norm_title in scraper_utils.normalize_title(match_title) and (
                    not year or not match_year or year == match_year):
                log_utils.log('Rainierland - search - Match Found: ' +
                              str(norm_title))
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(url)
                }
                results.append(result)

        return results

Esempio n. 28

0

Mostra file

File: oneclick.py Progetto: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search')
        html = self._http_get(search_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for item in dom_parser.parse_dom(html, 'li'):
            match = re.search('''href=["']([^"']+)[^>]+>([^<]+)''', item)
            if match:
                url, match_title = match.groups()
                match = re.search('(.*?)\s*\(Season\s+\d+', match_title)
                if match:
                    match_title = match.group(1)

                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

Esempio n. 29

0

Mostra file

File: vkflix_scraper.py Progetto: Lhse44/repository.deallen

 def __search(self, video_type, title, year, page):
     results = []
     url = scraper_utils.urljoin(self.base_url, page['url'])
     params = page['params'] if 'params' in page else None
     html = self._http_get(url, params=params, cache_limit=24)
     norm_title = scraper_utils.normalize_title(title)
     match_year = ''
     for _attrs, item in dom_parser2.parse_dom(html, 'div', {'id': re.compile('movie-+\d+')}):
         is_tvshow = dom_parser2.parse_dom(item, 'div', {'class': 'movieTV'})
         if (is_tvshow and video_type == VIDEO_TYPES.TVSHOW) or (not is_tvshow and video_type == VIDEO_TYPES.MOVIE):
             fragment = dom_parser2.parse_dom(item, 'h4', {'class': 'showRowName'})
             if fragment:
                 match = dom_parser2.parse_dom(fragment[0].content, 'a', req='href')
                 if match:
                     match_url, match_title = match[0].attrs['href'], match[0].content
                     if re.search('/-?\d{7,}/', match_url): continue
                     
                     match_norm_title = scraper_utils.normalize_title(match_title)
                     if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year):
                         result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
                         results.append(result)
     return results

Esempio n. 30

0

Mostra file

File: tvwtvs_scraper.py Progetto: Lhse44/repository.deallen

    def __search(self, video_type, title, year):
        url = urlparse.urljoin(
            self.base_url, '/advanced-search/menu-id-111.html?view=buscador')
        html = self._http_get(url, cache_limit=48)
        results = []
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser.parse_dom(html, 'div', {'class': 'tagindex'})
        if fragment:
            for match in re.finditer('href="([^"]+)[^>]+>(.*?)</a>',
                                     fragment[0]):
                url, match_title = match.groups()
                match_title = re.sub('\s+\(\d+\)$', '', match_title)
                match_title = match_title.replace('&amp;', '&')
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': match_title,
                        'year': ''
                    }
                    results.append(result)

        return results