Python cleanse_title 예제들, deaths_lib.scraper_utils.cleanse_title Python 예제들

예제 #1

0

파일 보기

파일: dizibox_scraper.py 프로젝트: uguer30/Project

 def __get_ok(self, embed, flashvars):
     hosters = []
     link = flashvars[0].attrs['value']
     match = re.search('metadataUrl=([^"]+)', link)
     if match:
         referer = scraper_utils.cleanse_title(
             urllib.unquote(embed[0].attrs['data']))
         ok_url = scraper_utils.cleanse_title(urllib.unquote(
             match.group(1)))
         html = self._http_get(ok_url,
                               data='ok',
                               headers={'Referer': referer},
                               cache_limit=.25)
         js_data = scraper_utils.parse_json(html, ok_url)
         stream_url = js_data.get('movie', {}).get('url')
         if stream_url is not None:
             host = urlparse.urlparse(stream_url).hostname
             hoster = {
                 'multi-part': False,
                 'host': host,
                 'class': self,
                 'quality': QUALITIES.HD720,
                 'views': None,
                 'rating': None,
                 'url': stream_url,
                 'direct': False,
                 'subs': 'Turkish Subtitles'
             }
             hosters.append(hoster)
     return hosters

예제 #2

0

파일 보기

파일: myddl_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if video_type == VIDEO_TYPES.TVSHOW and title:
            test_url = '/tvshow/' % (scraper_utils.to_slug(title))
            test_url = scraper_utils.urljoin(self.base_url, test_url)
            html = self._http_get(test_url,
                                  require_debrid=True,
                                  cache_limit=24)
            posts = dom_parser2.parse_dom(html, 'div',
                                          {'id': re.compile('post-\d+')})
            if posts:
                result = {
                    'url': scraper_utils.pathify_url(test_url),
                    'title': scraper_utils.cleanse_title(title),
                    'year': ''
                }
                results.append(result)
        elif video_type == VIDEO_TYPES.MOVIE:
            search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower())
            html = self._http_get(self.base_url,
                                  params={'s': search_title},
                                  require_debrid=True,
                                  cache_limit=1)
            norm_title = scraper_utils.normalize_title(title)
            for _attrs, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                match = re.search(
                    '<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post)
                if match:
                    post_url, post_title = match.groups()
                    if '/tv-show/' in post or self.__too_old(post): continue
                    post_title = re.sub('<[^>]*>', '', post_title)
                    meta = scraper_utils.parse_movie_link(post_title)
                    full_title = '%s [%s] (%sp)' % (
                        meta['title'], meta['extra'], meta['height'])
                    match_year = meta['year']

                    match_norm_title = scraper_utils.normalize_title(
                        meta['title'])
                    if (match_norm_title in norm_title or norm_title
                            in match_norm_title) and (not year
                                                      or not match_year
                                                      or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(post_url),
                            'title': scraper_utils.cleanse_title(full_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

예제 #3

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = '/search/' + urllib.quote_plus(title)
        html = self._http_get(search_url, require_debrid=True, cache_limit=1)
        if video_type == VIDEO_TYPES.TVSHOW:
            seen_urls = {}
            for _attr, post in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')}):
                if CATEGORIES[video_type] not in post: continue
                match = re.search(
                    '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)',
                    post, re.I)
                if match:
                    show_url, match_title = match.groups()
                    if show_url in seen_urls: continue
                    result = {
                        'url': scraper_utils.pathify_url(show_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    seen_urls[show_url] = result
                    results.append(result)
        elif video_type == VIDEO_TYPES.MOVIE:
            norm_title = scraper_utils.normalize_title(title)
            headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>',
                                  html)
            posts = [
                result.content for result in dom_parser2.parse_dom(
                    html, 'div', {'id': re.compile('post-\d+')})
            ]
            for heading, post in zip(headings, posts):
                if CATEGORIES[video_type] not in post or self.__too_old(post):
                    continue
                post_url, post_title = heading
                meta = scraper_utils.parse_movie_link(post_title)
                full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'],
                                                meta['height'])
                match_year = meta['year']

                match_norm_title = scraper_utils.normalize_title(meta['title'])
                if (match_norm_title in norm_title or norm_title
                        in match_norm_title) and (not year or not match_year
                                                  or year == match_year):
                    result = {
                        'url': scraper_utils.pathify_url(post_url),
                        'title': scraper_utils.cleanse_title(full_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

예제 #4

0

파일 보기

파일: dizipas_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        xml_url = scraper_utils.urljoin(self.base_url, '/series.xml')
        xml = self._http_get(xml_url, cache_limit=24)
        if not xml: return results
        try:
            norm_title = scraper_utils.normalize_title(title)
            match_year = ''
            for element in ET.fromstring(xml).findall('.//dizi'):
                name = element.find('adi')
                if name is not None and norm_title in scraper_utils.normalize_title(
                        name.text):
                    url = element.find('url')
                    if url is not None and (not year or not match_year
                                            or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url.text),
                            'title': scraper_utils.cleanse_title(name.text),
                            'year': ''
                        }
                        results.append(result)
        except (ParseError, ExpatError) as e:
            logger.log('Dizilab Search Parse Error: %s' % (e),
                       log_utils.LOGWARNING)

        return results

예제 #5

0

파일 보기

파일: movienight_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=4)
        for _attrs, movie in dom_parser2.parse_dom(html, 'div',
                                                   {'class': 'movie'}):
            match_url = dom_parser2.parse_dom(movie, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(movie, 'img', req='alt')
            if match_url and match_title_year:
                match_url = match_url[0].attrs['href']
                if re.search('season-\d+-episode\d+', match_url): continue
                match_title_year = match_title_year[0].attrs['alt']

                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    match_year = dom_parser2.parse_dom(movie, 'div',
                                                       {'class': 'year'})
                    try:
                        match_year = match_year[0].content
                    except:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(match_url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)

        return results

예제 #6

0

파일 보기

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/movies/search')
        html = self._http_get(search_url, params={'s': title}, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}):
            match = dom_parser2.parse_dom(item, 'a', req=['href', 'title'])
            if not match: continue
            
            match_title_year = match[0].attrs['title']
            match_url = match[0].attrs['href']
            is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I)
            match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season)
            match_year = ''
            if video_type == VIDEO_TYPES.SEASON:
                if not season and not match_vt: continue
                if match_vt:
                    if season and int(is_season.group(1)) != int(season): continue
                else:
                    if season and int(season) != 1: continue
                    site_title, site_year = scraper_utils.extra_year(match_title_year)
                    if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue
                    
                match_title = match_title_year
            else:
                if not match_vt: continue
                match_title, match_year = scraper_utils.extra_year(match_title_year)

            match_url = scraper_utils.urljoin(match_url, 'watching.html')
            if not year or not match_year or year == match_year:
                result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year}
                results.append(result)
        return results

예제 #7

0

파일 보기

파일: moviehut_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/bestmatch-fund-movies-%s.html')
        search_title = title.replace(' ', '-')
        search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower()
        search_url = search_url % (search_title)
        html = self._http_get(search_url, cache_limit=1)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'thumbsTitle'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if not match: continue

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

예제 #8

0

파일 보기

파일: snagfilms_scraper.py 프로젝트: uguer30/Project

 def search(self, video_type, title, year, season=''):  # @UnusedVariable
     results = []
     search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL)
     referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s')
     referer = referer % (urllib.quote_plus(title))
     headers = {'Referer': referer}
     headers.update(XHR)
     params = {
         'searchTerm': title,
         'type': SEARCH_TYPES[video_type],
         'limit': 500
     }
     html = self._http_get(search_url,
                           params=params,
                           headers=headers,
                           auth=False,
                           cache_limit=2)
     js_data = scraper_utils.parse_json(html, search_url)
     if 'results' in js_data:
         for result in js_data['results']:
             match_year = str(result.get('year', ''))
             match_url = result.get('permalink', '')
             match_title = result.get('title', '')
             if not year or not match_year or year == match_year:
                 result = {
                     'title': scraper_utils.cleanse_title(match_title),
                     'year': match_year,
                     'url': scraper_utils.pathify_url(match_url)
                 }
                 results.append(result)
     return results

예제 #9

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        self.__get_token()
        if self.__token is None: return results
        
        search_url, u = self.__get_search_url()
        search_url = scraper_utils.urljoin(API_BASE_URL, search_url)
        timestamp = int(time.time() * 1000)
        s = self.__get_s()
        query = {'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': self.__token, 'set': s, 'rt': self.__get_rt(self.__token + s),
                 'sl': self.__get_sl(u)}
        headers = {'Referer': self.base_url}
        html = self._http_get(search_url, data=query, headers=headers, cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        for item in scraper_utils.parse_json(html, search_url):
            if not item['meta'].upper().startswith(media_type): continue
            
            match_year = str(item['year']) if 'year' in item and item['year'] else ''
            if not year or not match_year or year == match_year:
                result = {'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink'].replace('/show/', '/tv-show/')), 'year': match_year}
                results.append(result)

        return results

예제 #10

0

파일 보기

    def search(self, video_type, title, year, season=''):
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        title = re.sub('[^A-Za-z0-9 ]', '', title)
        search_url += '%s.html' % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'ml-item'}):
            match_title = dom_parser2.parse_dom(item, 'span', {'class': 'mli-info'})
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_year = re.search('class="jt-info">(\d{4})<', item)
            is_episodes = dom_parser2.parse_dom(item, 'span', {'class': 'mli-eps'})
            
            if (video_type == VIDEO_TYPES.MOVIE and not is_episodes) or (video_type == VIDEO_TYPES.SEASON and is_episodes):
                if not match_title or not match_url: continue
                
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].content
                match_title = re.sub('</?h2>', '', match_title)
                match_title = re.sub('\s+\d{4}$', '', match_title)
                if video_type == VIDEO_TYPES.SEASON:
                    if season and not re.search('Season\s+0*%s$' % (season), match_title): continue
                    
                match_year = match_year.group(1) if match_year else ''
                if not year or not match_year or year == match_year:
                    result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)}
                    results.append(result)

        return results

예제 #11

0

파일 보기

파일: vidics_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        search_url = '/Category-FilmsAndTV/Genre-Any/Letter-Any/ByPopularity/1/Search-%s.htm' % (
            urllib.quote(title))
        search_url = scraper_utils.urljoin(self.base_url, search_url)
        html = self._http_get(search_url, cache_limit=10)

        results = []
        for _attrs, result in dom_parser2.parse_dom(html, 'div',
                                                    {'class': 'searchResult'}):
            match_url = dom_parser2.parse_dom(result,
                                              'a', {'itemprop': 'url'},
                                              req='href')
            match_title = dom_parser2.parse_dom(result, 'span',
                                                {'itemprop': 'name'})
            match_year = dom_parser2.parse_dom(result, 'span',
                                               {'itemprop': 'copyrightYear'})
            match_year = match_year[0].content if match_year else ''

            if match_url and match_title and (not year or not match_year
                                              or year == match_year):
                match_url = match_url[0].attrs['href']
                match_title = match_title[0].content
                if FRAGMENTS[video_type] not in match_url.lower(): continue
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)
        return results

예제 #12

0

파일 보기

파일: movietube_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/index.php')
        data = {'subaction': 'search', 'story': title, 'do': 'search'}
        headers = {'Referer': search_url}
        html = self._http_get(search_url,
                              params={'do': 'search'},
                              data=data,
                              headers=headers,
                              cache_limit=1)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'})
        if not fragment: return results

        for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div',
                                                  {'class': 'short-film'}):
            match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)',
                              item)
            if not match: continue

            url, match_title = match.groups('')
            result = {
                'url': scraper_utils.pathify_url(url),
                'title': scraper_utils.cleanse_title(match_title),
                'year': ''
            }
            results.append(result)

        return results

예제 #13

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(
            self.base_url, '/search/%s' % (urllib.quote(title)))
        html = self._http_get(search_url, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'})
        if not fragment: return results

        fragment = fragment[0].content
        match_url = dom_parser2.parse_dom(fragment, 'a', req='href')
        match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt')
        if match_url and match_title_year:
            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

예제 #14

0

파일 보기

파일: onlinemoviespro_scraper.py 프로젝트: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results

        fragment = dom_parser2.parse_dom(html, 'ul',
                                         {'class': 'listing-videos'})
        if not fragment: return results

        for attrs, match_title_year in dom_parser2.parse_dom(
                fragment[0].content, 'a', req='href'):
            match_url = attrs['href']
            match_title_year = re.sub('</?[^>]*>', '', match_title_year)
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

예제 #15

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        seen_urls = set()
        search_url = scraper_utils.urljoin(self.base_url, '/search/')
        html = self._http_get(search_url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'category-post'}):
            match_url = dom_parser2.parse_dom(item, 'a', req='href')
            match_title = dom_parser2.parse_dom(item, 'h3')
            if match_url and match_title:
                match_url = scraper_utils.pathify_url(
                    match_url[0].attrs['href'])
                match_title = match_title[0].content
                if match_url in seen_urls: continue
                seen_urls.add(match_url)
                if norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'url': match_url,
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': ''
                    }
                    results.append(result)

        return results

예제 #16

0

파일 보기

파일: tvonline_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        if title:
            first_letter = title[:1].lower()
            if first_letter.isdigit(): first_letter = '0-9'
            search_url = '/alphabet/%s/' % (first_letter)
            search_url = urlparse.urljoin(self.base_url, search_url)
            html = self._http_get(search_url, cache_limit=24)
            fragment = dom_parser.parse_dom(html, 'div', {'class': 'home'})
            if fragment:
                norm_title = scraper_utils.normalize_title(title)
                for match in re.finditer('''href=["']([^'"]+)[^>]+>([^<]+)''',
                                         fragment[0]):
                    url, match_title_year = match.groups()
                    match_title, match_year = scraper_utils.extra_year(
                        match_title_year)
                    if norm_title in scraper_utils.normalize_title(
                            match_title) and (not year or not match_year
                                              or year == match_year):
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)

        return results

예제 #17

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(html, 'div',
                                                  {'class': 'result-item'}):
            match = dom_parser2.parse_dom(item, 'div', {'class': 'title'})
            is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'})
            if not is_movie or not match: return results

            match = dom_parser2.parse_dom(match[0].content, 'a', req='href')
            if not match: return results

            match_url, match_title_year = match[0].attrs['href'], match[
                0].content
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not match_year:
                match_year = dom_parser2.parse_dom(item, 'span',
                                                   {'class': 'year'})
                match_year = match_year[0].content if match_year else ''

            if not year or not match_year or year == match_year:
                result = {
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year,
                    'url': scraper_utils.pathify_url(match_url)
                }
                results.append(result)

        return results

예제 #18

0

파일 보기

파일: streamdor_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           '/search/searchBoxSuggestion')
        html = self._http_get(search_url,
                              params={
                                  'top': 8,
                                  'query': title
                              },
                              cache_limit=8)
        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            entityName = match_title_year = item.get('Value', '')
            if entityName:
                match_title, match_year2 = scraper_utils.extra_year(
                    match_title_year)
                match_year = str(item.get('ReleaseYear', ''))
                if not match_year: match_year = match_year2

                match_url = '/ontology/EntityDetails?' + urllib.urlencode(
                    {
                        'entityName': entityName,
                        'ignoreMediaLinkError': 'false'
                    })
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

예제 #19

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        if re.search('Sorry, but nothing matched', html, re.I): return results

        norm_title = scraper_utils.normalize_title(title)
        for _attrs, item in dom_parser2.parse_dom(html, 'li',
                                                  {'class': 'box-shadow'}):
            for attrs, _content in dom_parser2.parse_dom(item,
                                                         'a',
                                                         req=['href',
                                                              'title']):
                match_url, match_title_year = attrs['href'], attrs['title']
                if re.search('S\d{2}E\d{2}', match_title_year):
                    continue  # skip episodes
                if re.search('TV\s*SERIES', match_title_year, re.I):
                    continue  # skip shows
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if (
                        not year or not match_year or year == match_year
                ) and norm_title in scraper_utils.normalize_title(match_title):
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

예제 #20

0

파일 보기

파일: 9movies_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(
            self.base_url, '/search?keyword=%s' % (urllib.quote_plus(title)))
        html = self._http_get(search_url, cache_limit=1)
        results = []
        match_year = ''
        fragment = dom_parser.parse_dom(html, 'href',
                                        {'class': '[^"]*movie-list[^"]*'})
        if fragment:
            for item in dom_parser.parse_dom(fragment[0], 'div',
                                             {'class': 'item'}):
                links = dom_parser.parse_dom(item,
                                             'a', {'class': 'name'},
                                             ret='href')
                titles = dom_parser.parse_dom(item, 'a', {'class': 'name'})
                is_season = dom_parser.parse_dom(item, 'div',
                                                 {'class': 'status'})
                for match_url, match_title in zip(links, titles):
                    if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                            is_season and video_type == VIDEO_TYPES.SEASON):
                        if video_type == VIDEO_TYPES.SEASON:
                            if season and not re.search(
                                    '\s+%s$' % (season), match_title):
                                continue

                        if not year or not match_year or year == match_year:
                            result = {
                                'title':
                                scraper_utils.cleanse_title(match_title),
                                'year': '',
                                'url': scraper_utils.pathify_url(match_url)
                            }
                            results.append(result)

        return results

예제 #21

0

파일 보기

파일: merb.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):
        search_url = self.base_url
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            search_url += '/?tv'

        search_url += '/index.php?advanced_search='
        search_url += urllib.quote_plus(title)
        search_url += '&year=' + urllib.quote_plus(str(year))
        search_url += '&advanced_search=Search'

        html = self._http_get(search_url, cache_limit=.25)
        results = []
        for element in dom_parser.parse_dom(html, 'div',
                                            {'class': 'list_box_title'}):
            match = re.search('href="([^"]+)"\s+title="(?:Watch )?([^"]+)',
                              element)
            if match:
                url, match_title_year = match.groups()
                match = re.search('(.*?)(?:\s+\(?\s*(\d{4})\s*\)?)',
                                  match_title_year)
                if match:
                    match_title, match_year = match.groups()
                else:
                    match_title = match_title_year
                    match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        return results

예제 #22

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=1)
        for item in dom_parser.parse_dom(html, 'div', {'class': 'item'}):
            match = re.search('href="([^"]+).*?alt="([^"]+)', item, re.DOTALL)
            if match:
                url, match_title_year = match.groups()
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    year_fragment = dom_parser.parse_dom(
                        item, 'span', {'class': 'year'})
                    if year_fragment:
                        match_year = year_fragment[0]
                    else:
                        match_year = ''

                if not year or not match_year or year == match_year:
                    result = {
                        'url': scraper_utils.pathify_url(url),
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year
                    }
                    results.append(result)
        return results

예제 #23

0

파일 보기

파일: filmovizjia_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/search.php')
        if video_type == VIDEO_TYPES.MOVIE:
            params = {
                'all': 'all',
                'searchin': 'mov',
                'subtitles': '',
                'imdbfrom': '',
                'yearrange': '',
                'keywords': title
            }
        else:
            params = {'all': 'all', 'vselect': 'ser', 'keywords': title}
        html = self._http_get(search_url, params=params, cache_limit=8)
        fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'})
        if not fragment: return results

        for item in dom_parser2.parse_dom(fragment, 'li'):
            match = dom_parser2.parse_dom(item, 'a', req=['title', 'href'])
            if not match: continue

            match_url = match[0].attrs['href']
            match_title_year = match[0].attrs['title']
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

예제 #24

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        html = self._http_get(self.base_url,
                              params={'s': title},
                              cache_limit=8)
        for _attrs, item in dom_parser2.parse_dom(
                html, 'div', {'class': 'browse-movie-top'}):
            match = dom_parser2.parse_dom(item, 'a', req='href')
            if match:
                match_url, match_title_year = match[0].attrs['href'], match[
                    0].content
                match_title, match_year = scraper_utils.extra_year(
                    match_title_year)
                if not match_year:
                    div = dom_parser2.parse_dom(item, 'div',
                                                {'class': 'browse-movie-year'})
                    if div: match_year = div[0].content.strip()

                match_url += '?watching'
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

예제 #25

0

파일 보기

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url,
                                           'http://dwatchseries.to/search/%s')
        headers = {'Referer': self.base_url}
        headers.update(XHR)
        params = {'ajax': 1, 's': title, 'type': 'TVShows'}
        html = self._http_get(search_url, params=params, cache_limit=8)
        for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'):
            match_url = attrs['href']
            match_title = re.sub('</?[^>]*>', '', match_title)
            match = re.search('\((\d{4})\)$', match_url)
            if match:
                match_year = match.group(1)
            else:
                match_year = ''

            if not year or not match_year or year == match_year:
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

예제 #26

0

파일 보기

파일: dizist_scraper.py 프로젝트: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        url = scraper_utils.urljoin(self.base_url, '/arsiv/')
        html = self._http_get(url, cache_limit=48)
        norm_title = scraper_utils.normalize_title(title)
        fragment = dom_parser2.parse_dom(html, 'div',
                                         {'class': 'ts-list-content'})
        if not fragment: return results

        items = dom_parser2.parse_dom(fragment[0].content, 'h1',
                                      {'class': 'ts-list-name'})
        details = dom_parser2.parse_dom(fragment[0].content, 'ul')
        for item, detail in zip(items, details):
            match = dom_parser2.parse_dom(item.content, 'a', req='href')
            match_year = re.search('<span>(\d{4})</span>', detail.content)
            if not match: continue

            match_url = match[0].attrs['href']
            match_title = match[0].content
            match_year = match_year.group(1) if match_year else ''

            if norm_title in scraper_utils.normalize_title(match_title):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results

예제 #27

0

파일 보기

파일: moviesplanet_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php')
        timestamp = int(time.time() * 1000)
        query = {
            'q': title,
            'limit': 100,
            'timestamp': timestamp,
            'verifiedCheck': ''
        }
        html = self._http_get(search_url,
                              data=query,
                              headers=XHR,
                              cache_limit=1)
        if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]:
            media_type = 'TV SHOW'
        else:
            media_type = 'MOVIE'

        js_data = scraper_utils.parse_json(html, search_url)
        for item in js_data:
            if not item['meta'].upper().startswith(media_type): continue

            result = {
                'title': scraper_utils.cleanse_title(item['title']),
                'url': scraper_utils.pathify_url(item['permalink']),
                'year': ''
            }
            results.append(result)

        return results

예제 #28

0

파일 보기

파일: vumoo_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        search_url = scraper_utils.urljoin(self.base_url, '/videos/search/')
        headers = {'Referer': ''}
        html = self._http_get(search_url,
                              params={
                                  'search': title,
                                  've': 1
                              },
                              headers=headers,
                              cache_limit=8)
        for _attrs, article in dom_parser2.parse_dom(html, 'article',
                                                     {'class': 'movie_item'}):
            match = dom_parser2.parse_dom(article,
                                          'a',
                                          req=['href', 'data-title'])
            if match:
                match_url = match[0].attrs['href']
                match_title = match[0].attrs['data-title']
                match_year = ''
                if not year or not match_year or year == match_year:
                    result = {
                        'title': scraper_utils.cleanse_title(match_title),
                        'year': match_year,
                        'url': scraper_utils.pathify_url(match_url)
                    }
                    results.append(result)

        return results

예제 #29

0

파일 보기

파일: tunemovie_scraper.py 프로젝트: Lhse44/repository.deallen

    def search(self, video_type, title, year, season=''):
        search_url = urlparse.urljoin(self.base_url, '/search/%s.html')
        search_url = search_url % (urllib.quote_plus(title))
        html = self._http_get(search_url, cache_limit=8)
        results = []
        for thumb in dom_parser.parse_dom(html, 'div', {'class': 'thumb'}):
            match_title = dom_parser.parse_dom(thumb,
                                               'a', {'class': 'clip-link'},
                                               ret='title')
            url = dom_parser.parse_dom(thumb,
                                       'a', {'class': 'clip-link'},
                                       ret='href')
            if match_title and url:
                match_title, url = match_title[0], url[0]
                is_season = re.search('Season\s+(\d+)$', match_title, re.I)
                if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (
                        is_season and video_type == VIDEO_TYPES.SEASON):
                    match_year = ''
                    if video_type == VIDEO_TYPES.MOVIE:
                        match_year = dom_parser.parse_dom(
                            thumb, 'div', {'class': '[^"]*status-year[^"]*'})
                        if match_year:
                            match_year = match_year[0]
                    else:
                        if season and int(is_season.group(1)) != int(season):
                            continue

                    if not year or not match_year or year == match_year:
                        result = {
                            'url': scraper_utils.pathify_url(url),
                            'title': scraper_utils.cleanse_title(match_title),
                            'year': match_year
                        }
                        results.append(result)
        return results

예제 #30

0

파일 보기

파일: dayt_scraper.py 프로젝트: uguer30/Project

    def search(self, video_type, title, year, season=''):  # @UnusedVariable
        results = []
        page_url = scraper_utils.urljoin(self.base_url, '/search.php')
        html = self._http_get(page_url, params={'dayq': title}, cache_limit=60)
        html = re.sub('<!--.*?-->', '', html)
        norm_title = scraper_utils.normalize_title(title)
        for _attrs, td in dom_parser2.parse_dom(html, 'td',
                                                {'class': 'topic_content'}):
            match_url = dom_parser2.parse_dom(td, 'a', req='href')
            match_title_year = dom_parser2.parse_dom(td, 'img', req='alt')
            if not match_url or not match_title_year: continue

            match_url = match_url[0].attrs['href']
            match_title_year = match_title_year[0].attrs['alt']
            # if not match_url.startswith('/'): match_url = '/tvseries/' + match_url
            match_title, match_year = scraper_utils.extra_year(
                match_title_year)
            if (norm_title in scraper_utils.normalize_title(match_title)) and (
                    not year or not match_year or year == match_year):
                result = {
                    'url': scraper_utils.pathify_url(match_url),
                    'title': scraper_utils.cleanse_title(match_title),
                    'year': match_year
                }
                results.append(result)

        return results