def get_info(self, query): if not self.browser.submit_form(self.url, fields={'search': query}): return info = {} re_q = Title(query).get_search_re() for li in self.browser.cssselect('#movie_results_ul li', []): log = html.tostring(li, pretty_print=True)[:1000] rating_ = li.cssselect('.tMeterScore') if not rating_: continue res = RE_RATING.search(rating_[0].text) if not res: logger.error('failed to get rating from "%s"', log) continue info['rating'] = int(res.group(1)) title_ = li.cssselect('.nomargin a') if not title_: logger.error('failed to get title from %s', log) continue info['title'] = clean(title_[0].text, 1) if not re_q.search(info['title']): continue info['url'] = urljoin(self.url, title_[0].get('href')) url = self._get_thumbnail_url(info['url']) if url: info['url_thumbnail'] = url return info
def get_info(self, artist, album=None): info = self._get_info(artist) if not album: return info if info: re_album = Title(album).get_search_re() for res in info['albums']: if re_album.search(res['title']): return res
def get_info(self, query, category, artist=None): re_cat = CAT_DEF.get(category) if not re_cat: logger.error('unknown category %s', category) return if not self.browser.submit_form(self.url, fields={'search_term': query}): return info = {} re_q = Title(query).get_search_re() re_artist = Title(artist).get_search_re() if artist else None for li in self.browser.cssselect('.search_results li.result', []): log = html.tostring(li, pretty_print=True)[:1000] type_ = li.cssselect('.result_type') if not type_: logger.error('failed to get type from %s', log) continue if not re_cat.search(clean(type_[0][0].text, 1)): continue title_ = li.cssselect('.product_title a') if not title_: logger.error('failed to get title from %s', log) continue info['title'] = clean(title_[0].text, 1) if not re_q.search(info['title']): continue info['url'] = urljoin(self.url, title_[0].get('href')) scores = [] rating_ = li.cssselect('.metascore') if rating_: try: scores.append(int(rating_[0].text)) except ValueError: if not RE_NA_SCORE.search(rating_[0].text): logger.error('failed to get metascore from "%s"', log) rating_ = li.cssselect('.textscore') if rating_: try: scores.append(int(float(rating_[0].text) * 10)) except ValueError: if not RE_NA_SCORE.search(rating_[0].text): logger.error('failed to get user score from %s', html.tostring(rating_[0])) if scores: info['rating'] = sum(scores) / len(scores) info.update(self._get_media_info(info['url'])) if re_artist and not re_artist.search(info.get('artist', '')): continue return info
def test_search_movies(self): for query, title in self.fixtures_movies: res = Title(query).get_search_re(mode='__all__') self.assertTrue(res.search(title), '"%s" (%s) should match "%s"' % (query, res.pattern, title)) for query, title in self.fixtures_movies_err: res = Title(query).get_search_re(mode='__all__') self.assertFalse(res.search(title), '"%s" (%s) should not match "%s"' % (query, res.pattern, title))
def test_search_tv(self): for query, title in self.fixtures_tv: res = Title(query).get_search_re() self.assertTrue(res.search(title), '"%s" (%s) should match "%s"' % (query, res.pattern, title)) for query, title in self.fixtures_tv_err: res = Title(query).get_search_re() self.assertFalse(res.search(title), '"%s" (%s) should not match "%s"' % (query, res.pattern, title))
def get_track(self, artist, album): artist = clean(artist) album = clean(album) re_title = Title(artist).get_search_re(mode='__all__') for result in self.results('%s %s' % (artist, album)): if not result['title'] or not result['url_watch'] or not result['urls_thumbnails']: continue if re_title.search(result['title']): return result
def get_info(self, artist, album=None, pages_max=MAX_ALBUMS_PAGES): if not self.accessible: return info = self._get_info(artist, pages_max) if not album: return info if info: re_album = Title(album).get_search_re() for res in info['albums']: if re_album.search(res['title']): return res
def _get_artist_url(self, artist): url = self._get_results_url(artist) if not url: return re_name = Title(artist).get_search_re() self.browser.open(url) for tag in self.browser.cssselect('.artistsWithInfo li', []): links = tag.cssselect('a') if links: name = clean(self.get_link_text(html.tostring(links[0]))) if re_name.search(name): return urljoin(self.url, self._clean_url(links[0].get('href')))
def _get_torrent_url(self, query, url): re_q = Title(query).get_search_re(mode='__lazy__') for mirror_url in self._mirror_urls(url): for torrent_url in self._torrent_urls(mirror_url): res = parse_magnet_url(torrent_url) if not res or not 'dn' in res: continue title = clean(res['dn'][0]) if re_q.match(title): return torrent_url
def get_trailer(self, title, date=None): title = clean(title) re_title = Title(title).get_search_re(mode='__all__') queries = ['%s trailer' % title, title] if date: queries.insert(0, '%s %s trailer' % (title, date)) for query in queries: for result in self.results(query): if not re_title.search(clean(result['title'])): continue if result['url_watch'] and result['urls_thumbnails']: return result
def _get_urls(self, query, type='title'): urls = [] self.browser.addheaders = [('Accept-Language', 'en-US,en')] if self.browser.submit_form(self.url, fields={'q': query}): url = self.browser.geturl() if RE_URLS[type].search(url): urls = [url] else: re_name = Title(query).get_search_re() for res in self.browser.cssselect('.result_text a', []): if not re_name.search(clean(res.text)): continue url = urljoin(self.url, res.get('href')) if not RE_URLS[type].search(url): continue urls.append(url) return urls
def search(cls, name, category, **kwargs): '''Get media matching the parameters. ''' spec = {'info.subtype': category} name_ = Title(name) if category == 'movies': spec['info.full_name'] = {'$regex': name_.get_search_pattern(), '$options': 'i'} elif category in ('tv', 'anime'): spec['info.subtype'] = 'tv' spec['info.name'] = {'$regex': name_.get_search_pattern(category='tv'), '$options': 'i'} if kwargs.get('season'): spec['info.season'] = str(kwargs['season']) if kwargs.get('episode'): spec['info.episode'] = {'$regex': '^0*%s$' % kwargs['episode']} elif category == 'music': spec['info.artist'] = {'$regex': name_.get_search_pattern(), '$options': 'i'} if kwargs.get('album'): album_ = Title(kwargs['album']) spec['info.album'] = {'$regex': album_.get_search_pattern(), '$options': 'i'} return list(cls.find(spec))