Beispiel #1
0
    def get_info(self, query):
        if not self.browser.submit_form(self.url,
                fields={'search': query}):
            return

        info = {}

        re_q = Title(query).get_search_re()
        for li in self.browser.cssselect('#movie_results_ul li', []):
            log = html.tostring(li, pretty_print=True)[:1000]

            rating_ = li.cssselect('.tMeterScore')
            if not rating_:
                continue
            res = RE_RATING.search(rating_[0].text)
            if not res:
                logger.error('failed to get rating from "%s"', log)
                continue
            info['rating'] = int(res.group(1))

            title_ = li.cssselect('.nomargin a')
            if not title_:
                logger.error('failed to get title from %s', log)
                continue
            info['title'] = clean(title_[0].text, 1)
            if not re_q.search(info['title']):
                continue
            info['url'] = urljoin(self.url, title_[0].get('href'))

            url = self._get_thumbnail_url(info['url'])
            if url:
                info['url_thumbnail'] = url
            return info
Beispiel #2
0
 def get_info(self, artist, album=None):
     info = self._get_info(artist)
     if not album:
         return info
     if info:
         re_album = Title(album).get_search_re()
         for res in info['albums']:
             if re_album.search(res['title']):
                 return res
Beispiel #3
0
    def get_info(self, query, category, artist=None):
        re_cat = CAT_DEF.get(category)
        if not re_cat:
            logger.error('unknown category %s', category)
            return
        if not self.browser.submit_form(self.url,
                fields={'search_term': query}):
            return

        info = {}

        re_q = Title(query).get_search_re()
        re_artist = Title(artist).get_search_re() if artist else None
        for li in self.browser.cssselect('.search_results li.result', []):
            log = html.tostring(li, pretty_print=True)[:1000]

            type_ = li.cssselect('.result_type')
            if not type_:
                logger.error('failed to get type from %s', log)
                continue
            if not re_cat.search(clean(type_[0][0].text, 1)):
                continue

            title_ = li.cssselect('.product_title a')
            if not title_:
                logger.error('failed to get title from %s', log)
                continue
            info['title'] = clean(title_[0].text, 1)
            if not re_q.search(info['title']):
                continue
            info['url'] = urljoin(self.url, title_[0].get('href'))

            scores = []
            rating_ = li.cssselect('.metascore')
            if rating_:
                try:
                    scores.append(int(rating_[0].text))
                except ValueError:
                    if not RE_NA_SCORE.search(rating_[0].text):
                        logger.error('failed to get metascore from "%s"', log)
            rating_ = li.cssselect('.textscore')
            if rating_:
                try:
                    scores.append(int(float(rating_[0].text) * 10))
                except ValueError:
                    if not RE_NA_SCORE.search(rating_[0].text):
                        logger.error('failed to get user score from %s', html.tostring(rating_[0]))
            if scores:
                info['rating'] = sum(scores) / len(scores)

            info.update(self._get_media_info(info['url']))

            if re_artist and not re_artist.search(info.get('artist', '')):
                continue

            return info
Beispiel #4
0
    def test_search_movies(self):
        for query, title in self.fixtures_movies:
            res = Title(query).get_search_re(mode='__all__')

            self.assertTrue(res.search(title), '"%s" (%s) should match "%s"' % (query, res.pattern, title))

        for query, title in self.fixtures_movies_err:
            res = Title(query).get_search_re(mode='__all__')

            self.assertFalse(res.search(title), '"%s" (%s) should not match "%s"' % (query, res.pattern, title))
Beispiel #5
0
    def test_search_tv(self):
        for query, title in self.fixtures_tv:
            res = Title(query).get_search_re()

            self.assertTrue(res.search(title), '"%s" (%s) should match "%s"' % (query, res.pattern, title))

        for query, title in self.fixtures_tv_err:
            res = Title(query).get_search_re()

            self.assertFalse(res.search(title), '"%s" (%s) should not match "%s"' % (query, res.pattern, title))
Beispiel #6
0
    def get_track(self, artist, album):
        artist = clean(artist)
        album = clean(album)

        re_title = Title(artist).get_search_re(mode='__all__')
        for result in self.results('%s %s' % (artist, album)):
            if not result['title'] or not result['url_watch'] or not result['urls_thumbnails']:
                continue
            if re_title.search(result['title']):
                return result
Beispiel #7
0
 def get_info(self, artist, album=None, pages_max=MAX_ALBUMS_PAGES):
     if not self.accessible:
         return
     info = self._get_info(artist, pages_max)
     if not album:
         return info
     if info:
         re_album = Title(album).get_search_re()
         for res in info['albums']:
             if re_album.search(res['title']):
                 return res
Beispiel #8
0
 def _get_artist_url(self, artist):
     url = self._get_results_url(artist)
     if not url:
         return
     re_name = Title(artist).get_search_re()
     self.browser.open(url)
     for tag in self.browser.cssselect('.artistsWithInfo li', []):
         links = tag.cssselect('a')
         if links:
             name = clean(self.get_link_text(html.tostring(links[0])))
             if re_name.search(name):
                 return urljoin(self.url, self._clean_url(links[0].get('href')))
Beispiel #9
0
    def _get_torrent_url(self, query, url):
        re_q = Title(query).get_search_re(mode='__lazy__')

        for mirror_url in self._mirror_urls(url):
            for torrent_url in self._torrent_urls(mirror_url):
                res = parse_magnet_url(torrent_url)
                if not res or not 'dn' in res:
                    continue

                title = clean(res['dn'][0])
                if re_q.match(title):
                    return torrent_url
Beispiel #10
0
    def get_trailer(self, title, date=None):
        title = clean(title)
        re_title = Title(title).get_search_re(mode='__all__')

        queries = ['%s trailer' % title, title]
        if date:
            queries.insert(0, '%s %s trailer' % (title, date))

        for query in queries:
            for result in self.results(query):
                if not re_title.search(clean(result['title'])):
                    continue
                if result['url_watch'] and result['urls_thumbnails']:
                    return result
Beispiel #11
0
    def _get_urls(self, query, type='title'):
        urls = []
        self.browser.addheaders = [('Accept-Language', 'en-US,en')]
        if self.browser.submit_form(self.url, fields={'q': query}):
            url = self.browser.geturl()
            if RE_URLS[type].search(url):
                urls = [url]
            else:
                re_name = Title(query).get_search_re()
                for res in self.browser.cssselect('.result_text a', []):
                    if not re_name.search(clean(res.text)):
                        continue
                    url = urljoin(self.url, res.get('href'))
                    if not RE_URLS[type].search(url):
                        continue
                    urls.append(url)

        return urls
Beispiel #12
0
    def search(cls, name, category, **kwargs):
        '''Get media matching the parameters.
        '''
        spec = {'info.subtype': category}
        name_ = Title(name)

        if category == 'movies':
            spec['info.full_name'] = {'$regex': name_.get_search_pattern(), '$options': 'i'}

        elif category in ('tv', 'anime'):
            spec['info.subtype'] = 'tv'
            spec['info.name'] = {'$regex': name_.get_search_pattern(category='tv'), '$options': 'i'}
            if kwargs.get('season'):
                spec['info.season'] = str(kwargs['season'])
            if kwargs.get('episode'):
                spec['info.episode'] = {'$regex': '^0*%s$' % kwargs['episode']}

        elif category == 'music':
            spec['info.artist'] = {'$regex': name_.get_search_pattern(), '$options': 'i'}
            if kwargs.get('album'):
                album_ = Title(kwargs['album'])
                spec['info.album'] = {'$regex': album_.get_search_pattern(), '$options': 'i'}

        return list(cls.find(spec))