Python extra_yearの例、salts_lib.scraper_utils.extra_year Pythonの例

コード例 #1

0

ファイルを表示

ファイル: dayt_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        page_url = urlparse.urljoin(self.base_url, '/tvseries/index.php?&page=1')
        while page_url:
            html = self._http_get(page_url, cache_limit=48)
            html = re.sub('<!--.*?-->', '', html)
            norm_title = scraper_utils.normalize_title(title)
            for td in dom_parser.parse_dom(html, 'td', {'class': 'topic_content'}):
                match_url = re.search('href="([^"]+)', td)
                match_title_year = dom_parser.parse_dom(td, 'img', ret='alt')
                if match_url and match_title_year:
                    match_url = match_url.group(1)
                    if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
                    match_title, match_year = scraper_utils.extra_year(match_title_year[0])
                    if norm_title in scraper_utils.normalize_title(match_title):
                        result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                        results.append(result)
            
            match = re.search('href="([^"]+)[^>]*>>', html)
            if match:
                page_url = urlparse.urljoin(self.base_url, match.group(1))
            else:
                page_url = ''

        return results

コード例 #2

0

ファイルを表示

ファイル: moviexk_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/')
        search_url += urllib.quote_plus(title)
        html = self._http_get(search_url, cache_limit=1)
        for fragment in dom_parser.parse_dom(html, 'div', {'class': 'inner'}):
            name = dom_parser.parse_dom(fragment, 'div', {'class': 'name'})
            if name:
                match = re.search('href="([^"]+)[^>]+>(.*?)</a>', name[0])
                if match:
                    match_url, match_title_year = match.groups()
                    if 'tv-series' in match_url and video_type == VIDEO_TYPES.MOVIE: continue
                    
                    match_title_year = re.sub('</?[^>]*>', '', match_title_year)
                    match_title_year = re.sub('[Ww]atch\s+[Mm]ovie\s*', '', match_title_year)
                    match_title_year = match_title_year.replace('&#8217;', "'")
                    match_title, match_year = scraper_utils.extra_year(match_title_year)
                    if not match_year:
                        year_span = dom_parser.parse_dom(fragment, 'span', {'class': 'year'})
                        if year_span:
                            year_text = dom_parser.parse_dom(year_span[0], 'a')
                            if year_text:
                                match_year = year_text[0].strip()
    
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'url': scraper_utils.pathify_url(match_url), 'year': match_year}
                        results.append(result)

        return results

コード例 #3

0

ファイルを表示

ファイル: pubfilm_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/wp-content/plugins/ajax-search-pro/ajax_search.php')
        data = {'action': 'ajaxsearchpro_search', 'aspp': title, 'asid': '1', 'asp_inst_id': '1_1',
                'options': 'current_page_id=7&qtranslate_lang=0&set_intitle=None&customset%5B%5D=post'}
        html = self._http_get(search_url, data=data, headers=XHR, cache_limit=8)
        norm_title = scraper_utils.normalize_title(title)
        for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'asp_res_url'}):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
            if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                match_year = ''
                if video_type == VIDEO_TYPES.SEASON:
                    if season and int(is_season.group(1)) != int(season):
                        continue

                    match_title = match_title_year
                    match_title = re.sub('\s*\d{4}', '', match_title)
                else:
                    match_title, match_year = scraper_utils.extra_year(match_title_year)

                match_norm_title = scraper_utils.normalize_title(match_title)
                title_match = (norm_title in match_norm_title) or (match_norm_title in norm_title)
                if title_match and (not year or not match_year or year == match_year):
                    result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        return results

コード例 #4

0

ファイルを表示

ファイル: pubfilm_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/search/%s' % (urllib.quote(title)))
     headers = {'Referer': self.base_url}
     html = self._http_get(search_url, headers=headers, cache_limit=8)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'recent-item'}):
         fragment = dom_parser.parse_dom(item, 'h\d+')
         if not fragment: continue
         
         match_title_year = dom_parser.parse_dom(fragment[0], 'a', {'rel': 'bookmark'})
         match_url = dom_parser.parse_dom(fragment[0], 'a', {'rel': 'bookmark'}, ret='href')
         if match_title_year and match_url:
             match_title_year = match_title_year[0]
             match_url = match_url[0]
             match_title_year = re.sub('</?span[^>]*>', '', match_title_year)
             is_season = re.search('Season\s+(\d+)\s*', match_title_year, re.I)
             if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON):
                 match_year = ''
                 if video_type == VIDEO_TYPES.SEASON:
                     match_title = match_title_year
                     if season and int(is_season.group(1)) != int(season):
                         continue
                 else:
                     match_title, match_year = scraper_utils.extra_year(match_title_year)
     
                 if not year or not match_year or year == match_year:
                     result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                     results.append(result)
     return results

コード例 #5

0

ファイルを表示

ファイル: hevcbluray_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+)', item)
            match_title = dom_parser.parse_dom(item, 'span', {'class': 'tt'})
            year_frag = dom_parser.parse_dom(item, 'span', {'class': 'year'})
            if match and match_title:
                url = match.group(1)
                match_title = match_title[0]
                if re.search('\d+\s*x\s*\d+', match_title): continue  # exclude episodes
                match_title, match_year = scraper_utils.extra_year(match_title)
                if not match_year and year_frag:
                    match_year = year_frag[0]

                match = re.search('(.*?)\s+\d{3,}p', match_title)
                if match:
                    match_title = match.group(1)
                
                extra = dom_parser.parse_dom(item, 'span', {'class': 'calidad2'})
                if extra:
                    match_title += ' [%s]' % (extra[0])
                    
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                    results.append(result)

        return results

コード例 #6

0

ファイルを表示

ファイル: filmikz_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/index.php')
        params = {'search': title, 'image.x': 0, 'image.y': 0}
        html = self._http_get(search_url, params=params, cache_limit=1)

        # Are we on a results page?
        if not re.search('window\.location', html):
            pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
            for match in re.finditer(pattern, html, re.DOTALL):
                match_title_year, match_url = match.groups('')
                # skip p**n
                if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year: continue
                match_title_year = re.sub('</?.*?>', '', match_title_year)
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)
        else:
            match = re.search('window\.location\s+=\s+"([^"]+)', html)
            if match:
                url = match.group(1)
                if url != 'movies.php':
                    result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(title), 'year': year}
                    results.append(result)
        return results

コード例 #7

0

ファイルを表示

ファイル: moviehubs_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search-movies/%s.html' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'item'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title_year = re.search('onmouseover="([^"]+)', item)
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                match_title_year = match_title_year.group(1)
                match = re.search('<b>(?:<i>)?\s*(.*?)\s*(?:</i>)?</b>', match_title_year)
                if not match: continue
                
                match_title, match_year = scraper_utils.extra_year(match.group(1))
                is_season = re.search('season\s+(\d+)', match_title_year, re.I)
                if (is_season and video_type == VIDEO_TYPES.MOVIE) or (not is_season and video_type == VIDEO_TYPES.SEASON): continue
                
                if video_type == VIDEO_TYPES.MOVIE:
                    if not match_year:
                        match_year = re.search('>Release:\s*(\d{4})', match_title_year)
                        match_year = match_year.group(1) if match_year else ''
                else:
                    if season and int(season) != int(is_season.group(1)):
                        continue
                                
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #8

0

ファイルを表示

ファイル: watchitvideos_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
     for _attrs, item in dom_parser2.parse_dom(html, 'article', {'class': 'item-list'}):
         match = dom_parser2.parse_dom(item, 'a', req='href')
         if not match: continue
         match_title_year = match[0].content
         match_url = match[0].attrs['href']
         match_title, match_year = scraper_utils.extra_year(match_title_year)
         if not year or not match_year or year == match_year:
             result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
             results.append(result)
     return results

コード例 #9

0

ファイルを表示

ファイル: wmo_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, params={'s': title, 'search': ''}, cache_limit=8)
        for item in dom_parser.parse_dom(html, 'div', {'class': '[^"]*movie_poster[^"]*'}):
            match = re.search('href="([^"]+)[^>]+title="([^"]+)', item)
            if match:
                url, match_title_year = match.groups()
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

コード例 #10

0

ファイルを表示

ファイル: wmo_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title, 'search': ''}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_poster'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if match:
                attrs = match[0].attrs
                match_title, match_year = scraper_utils.extra_year(attrs['title'])
                if not year or not match_year or year == match_year:
                    result = {'url': scraper_utils.pathify_url(attrs['href']), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                    results.append(result)

        return results

コード例 #11

0

ファイルを表示

ファイル: emoviespro_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=1)
        if not re.search('nothing matched your search criteria', html, re.I):
            for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*box-shadow[^"]*'}):
                match = re.search('href="([^"]+)[^>]*title="([^"]+)', item)
                if match:
                    match_url, match_title_year = match.groups()
                    match_title, match_year = scraper_utils.extra_year(match_title_year)
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                        results.append(result)

        return results

コード例 #12

0

ファイルを表示

ファイル: movie25_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/keywords/%s/' % (title))
        html = self._http_get(search_url, cache_limit=4)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie_about'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue

            match_url, match_title, extra = match[0].attrs['href'], match[0].attrs['title'], match[0].content
            _match_title, match_year = scraper_utils.extra_year(extra)
            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #13

0

ファイルを表示

ファイル: movie25_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/search.php')
     html = self._http_get(search_url, params={'q': title}, cache_limit=4)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'movie_about'}):
         match_url = dom_parser.parse_dom(item, 'a', ret='href')
         match_title_year = dom_parser.parse_dom(item, 'a')
         if match_url and match_title_year:
             match_url = match_url[0]
             match_title, match_year = scraper_utils.extra_year(match_title_year[0])
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

コード例 #14

0

ファイルを表示

ファイル: moviepool_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        fragment = dom_parser.parse_dom(html, 'ul', {'class': '[^"]*listing-videos[^"]*'})
        if fragment:
            urls = dom_parser.parse_dom(fragment[0], 'a', ret='href')
            labels = dom_parser.parse_dom(fragment[0], 'a')
            for match_url, match_title_year in zip(urls, labels):
                match_title_year = re.sub('</?[^>]*>', '', match_title_year)
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #15

0

ファイルを表示

ファイル: afdah_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/wp-content/themes/afdah/ajax-search.php')
     data = {'search': title, 'type': 'title'}
     html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1)
     for item in dom_parser.parse_dom(html, 'li'):
         match_url = dom_parser.parse_dom(item, 'a', ret='href')
         match_title_year = dom_parser.parse_dom(item, 'a')
         if match_url and match_title_year:
             match_url = match_url[0]
             match_title, match_year = scraper_utils.extra_year(match_title_year[0])
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

コード例 #16

0

ファイルを表示

ファイル: afdah_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/wp-content/themes/afdah/ajax-search.php')
        data = {'test1': title, 'test2': 'title'}
        html = self._http_get(search_url, data=data, headers=XHR, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            match_title, match_year = scraper_utils.extra_year(match[0].content)
            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

コード例 #17

0

ファイルを表示

ファイル: putlocker_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     results = []
     headers = {'Referer': self.base_url}
     params = {'s': title, 'submit': 'Search Now!'}
     html = self._http_get(self.base_url, params=params, headers=headers, cache_limit=8)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'aaa_item'}):
         match_title_year = dom_parser.parse_dom(item, 'a', ret='title')
         match_url = dom_parser.parse_dom(item, 'a', ret='href')
         if match_title_year and match_url:
             match_url = match_url[0]
             match_title, match_year = scraper_utils.extra_year(match_title_year[0])
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

コード例 #18

0

ファイルを表示

ファイル: solar_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     if video_type == VIDEO_TYPES.TVSHOW:
         return self.__tv_search(title, year)
     else:
         results = []
         html = self. _http_get(self.base_url, params={'s': title}, cache_limit=1)
         titles = dom_parser.parse_dom(html, 'a', {'class': 'coverImage'}, ret='title')
         links = dom_parser.parse_dom(html, 'a', {'class': 'coverImage'}, ret='href')
         for match_title_year, match_url in zip(titles, links):
             if 'Season' in match_title_year and 'Episode' in match_title_year: continue
             match_title, match_year = scraper_utils.extra_year(match_title_year)
             if not year or not match_year or year == match_year:
                 result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     return results

コード例 #19

0

ファイルを表示

ファイル: moviego_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        data = {'subaction': 'search', 'do': 'search', 'story': urllib.quote(title)}
        html = self._http_get(self.base_url, data=data, cache_limit=8)
        if dom_parser.parse_dom(html, 'div', {'class': 'sresult'}):
            for item in dom_parser.parse_dom(html, 'div', {'class': 'short_content'}):
                match = re.search('href="([^"]+)', item)
                match_title_year = dom_parser.parse_dom(item, 'div', {'class': 'short_header'})
                if match and match_title_year:
                    url = match.group(1)
                    match_title, match_year = scraper_utils.extra_year(match_title_year[0])
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                        results.append(result)

        return results

コード例 #20

0

ファイルを表示

ファイル: moviego_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        data = {'hash': 'indexert', 'do': 'search', 'subaction': 'search', 'search_start': 0, 'full_search': 0, 'result_from': 1, 'story': title}
        search_url = scraper_utils.urljoin(self.base_url, 'index.php')
        html = self._http_get(search_url, params={'do': 'search'}, data=data, cache_limit=8)
        if dom_parser2.parse_dom(html, 'div', {'class': 'sresult'}):
            for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'short_content'}):
                match_url = dom_parser2.parse_dom(item, 'a', req='href')
                match_title_year = dom_parser2.parse_dom(item, 'div', {'class': 'short_header'})
                if match_url and match_title_year:
                    match_url = match_url[0].attrs['href']
                    match_title, match_year = scraper_utils.extra_year(match_title_year[0].content)
                    if not year or not match_year or year == match_year:
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                        results.append(result)

        return results

コード例 #21

0

ファイルを表示

ファイル: moviehut_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

 def search(self, video_type, title, year, season=''):
     results = []
     search_url = urlparse.urljoin(self.base_url, '/bestmatch-search-%s.html')
     search_title = title.replace(' ', '-')
     search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
     search_url = search_url % (search_title)
     html = self._http_get(search_url, cache_limit=1)
     for item in dom_parser.parse_dom(html, 'div', {'class': 'thumbsTitle'}):
         match = re.search('href="([^"]+)[^>]*>(.*?)</a>', item)
         if match:
             url, match_title_year = match.groups('')
             match_title, match_year = scraper_utils.extra_year(match_title_year)
             if (not year or not match_year or year == match_year):
                 result = {'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                 results.append(result)
     
     return results

コード例 #22

0

ファイルを表示

ファイル: hdmoviefree_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
        search_url = search_url % (self.__to_slug(title))
        html = self._http_get(search_url, cache_limit=8)
        for item in dom_parser.parse_dom(html, 'div', {'class': '[^"]*slideposter[^"]*'}):
            match_url = dom_parser.parse_dom(item, 'a', ret='href')
            match_title_year = dom_parser.parse_dom(item, 'img', ret='alt')
            if match_url and match_title_year:
                match_url = match_url[0]
                match_title_year = match_title_year[0]
                match_title, match_year = scraper_utils.extra_year(match_title_year[0])
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #23

0

ファイルを表示

ファイル: m4ufree_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        title = re.sub('\s+', '-', title)
        search_url = scraper_utils.urljoin(self.base_url, '/tag/%s' % (title))
        html = self._http_get(search_url, cache_limit=1)
        for attrs, match_title_year in dom_parser2.parse_dom(html, 'a', {'class': 'top-item'}, req='href'):
            match_url = attrs['href']
            if '-tvshow-' in match_url: continue
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title_year = re.sub('^Watch\s*', '', match_title_year)
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                results.append(result)

        return results

コード例 #24

0

ファイルを表示

ファイル: onlinemoviespro_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results
        
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'listing-videos'})
        if not fragment: return results
        
        for attrs, match_title_year in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title, match_year = scraper_utils.extra_year(match_title_year)
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                results.append(result)

        return results

コード例 #25

0

ファイルを表示

ファイル: mintmovies_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=1)
        results = []
        if not re.search('Sorry, but nothing matched', html):
            norm_title = scraper_utils.normalize_title(title)
            for item in dom_parser.parse_dom(html, 'li', {'class': '[^"]*box-shadow[^"]*'}):
                match = re.search('href="([^"]+)"\s+title="([^"]+)', item)
                if match:
                    url, match_title_year = match.groups()
                    if re.search('S\d{2}E\d{2}', match_title_year): continue  # skip episodes
                    if re.search('TV\s*SERIES', match_title_year, re.I): continue  # skip shows
                    match_title, match_year = scraper_utils.extra_year(match_title_year)
                    if (not year or not match_year or year == match_year) and norm_title in scraper_utils.normalize_title(match_title):
                        result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(url)}
                        results.append(result)

        return results

コード例 #26

0

ファイルを表示

ファイル: mintmovies_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results
        
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li', {'class': 'box-shadow'}):
            for attrs, _content in dom_parser2.parse_dom(item, 'a', req=['href', 'title']):
                match_url, match_title_year = attrs['href'], attrs['title']
                if re.search('S\d{2}E\d{2}', match_title_year): continue  # skip episodes
                if re.search('TV\s*SERIES', match_title_year, re.I): continue  # skip shows
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if (not year or not match_year or year == match_year) and norm_title in scraper_utils.normalize_title(match_title):
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #27

0

ファイルを表示

ファイル: iwatch_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't'
     search_url = scraper_utils.urljoin(self.base_url, '/search')
     html = self._http_get(search_url, data={'searchquery': title, 'searchin': search_in}, cache_limit=8)
     fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'})
     if not fragment: return results
     fragment = dom_parser2.parse_dom(fragment[0].content, 'table')
     if not fragment: return results
     for attrs, match_title_year in dom_parser2.parse_dom(fragment[0].content, 'a', req='href'):
         match_url = attrs['href']
         match_title, match_year = scraper_utils.extra_year(match_title_year)
         if not year or not match_year or year == match_year:
             result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
             results.append(result)
     
     return results

コード例 #28

0

ファイルを表示

ファイル: spacemov_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'browse-movie-bottom'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url, match_title_year = match[0].attrs['href'], match[0].content
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not match_year:
                    div = dom_parser2.parse_dom(item, 'div', {'class': 'browse-movie-year'})
                    if div: match_year = div[0].content.strip()
                        
                match_url += '?watching'
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #29

0

ファイルを表示

ファイル: streamdor_scraper.py プロジェクト: CYBERxNUKE/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/searchBoxSuggestion')
        html = self._http_get(search_url, params={'top': 8, 'query': title}, cache_limit=8)
        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            entityName = match_title_year = item.get('Value', '')
            if entityName:
                match_title, match_year2 = scraper_utils.extra_year(match_title_year)
                match_year = str(item.get('ReleaseYear', ''))
                if not match_year: match_year = match_year2
                
                match_url = '/ontology/EntityDetails?' + urllib.urlencode({'entityName': entityName, 'ignoreMediaLinkError': 'false'})
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #30

0

ファイルを表示

ファイル: spacemov_scraper.py プロジェクト: EPiC-APOC/repository.xvbmc

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = urlparse.urljoin(self.base_url, '/wp-admin/admin-ajax.php')
        referer = self.base_url + '/?' + urllib.urlencode({'s': title, 'submit': 'Search'})
        headers = {'Referer': referer}
        headers.update(XHR)
        params = {'s': title, 'action': 'dwls_search'}
        html = self._http_get(search_url, params=params, headers=headers, cache_limit=8)
        js_data = scraper_utils.parse_json(html, search_url)
        for match in js_data.get('results', []):
            match_title_year = match.get('post_title')
            match_url = match.get('permalink')
            if match_url and match_title_year:
                match_title, match_year = scraper_utils.extra_year(match_title_year)
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

コード例 #31

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/wp-content/themes/afdah/ajax-search.php')
        data = {'test1': title, 'test2': 'title'}
        html = self._http_get(search_url,
                              data=data,
                              headers=XHR,
                              cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'li'):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url = match[0].attrs['href']
            match_title, match_year = scraper_utils.extra_year(
                match[0].content)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)
        return results

コード例 #32

0

ファイルを表示

ファイル: filmikz_scraper.py プロジェクト: enursha101/xbmc-addon

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/index.php')
        params = {'search': title, 'image.x': 0, 'image.y': 0}
        html = self._http_get(search_url, params=params, cache_limit=1)

        # Are we on a results page?
        if not re.search('window\.location', html):
            pattern = '<td[^>]+class="movieText"[^>]*>(.*?)</p>.*?href="(/watch/[^"]+)'
            for match in re.finditer(pattern, html, re.DOTALL):
                match_title_year, match_url = match.groups('')
                # skip p**n
                if '-XXX-' in match_url.upper() or ' XXX:' in match_title_year:
                    continue
                match_title_year = re.sub('</?.*?>', '', match_title_year)
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not year or not match_year or year == match_year:
                    result = {
                        'url': match_url,
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        else:
            match = re.search('window\.location\s+=\s+"([^"]+)', html)
            if not match: return results
            url = match.group(1)
            if url != 'movies.php':
                result = {
                    'url': scraper_utils.pathify_url(url),
                    'title': scraper_utils.cleanse_title(title),
                    'year': year
                }
                results.append(result)
        return results

コード例 #33

0

ファイルを表示

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'result-item'}):
            match = dom_parser2.parse_dom(item, 'div', {'class': 'title'})
            is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'})
            is_show = dom_parser2.parse_dom(item, 'span', {'class': 'tvshows'})
            if (video_type == VIDEO_TYPES.TVSHOW
                    and is_movie) or (video_type == VIDEO_TYPES.MOVIE
                                      and is_show) or not match:
                continue

            match = dom_parser2.parse_dom(match[0].content, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                match_year = dom_parser2.parse_dom(item, 'span',
                                                   {'class': 'year'})
                match_year = match_year[0].content if match_year else ''

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results