def _get_thumbnail_url(self, url): browser = Browser() browser.open(url) img_ = browser.cssselect('.movie_poster_area img') if img_: url = img_[0].get('src') if not RE_INVALID_IMG.search(url): return url
def _get_media_info(self, url): browser = Browser() browser.open(url) info = {} band_ = browser.cssselect('.band_name') if band_: info['artist'] = clean(band_[0].text, 1) genre_ = browser.cssselect('.product_genre .data') if genre_: info['genre'] = [clean(g, 1) for g in genre_[0].text.split(',')] return info
def _torrent_urls(self, url): '''Iterate over torrent urls fetched from the raw html data. ''' browser = Browser() if browser.open(url): for link in browser.links(url_regex=RE_URL_MAGNET): yield link.absolute_url
def _get_collection(url): browser = Browser() browser.open(url) res = [] for tr in browser.cssselect('table#r2 tr', []): if tr.cssselect('th'): continue refs = tr.cssselect('input[type="checkbox"]') if not refs: continue ref = refs[0].get('name') if not ref: logger.error('failed to get reference from %s', html.tostring(refs[0], pretty_print=True)) continue res.append(ref) return res
def _get_download_url(url): browser = Browser() netloc_parts = urlparse(url).netloc.split('.') if 'mediafire' in netloc_parts: browser.open(url) if browser.cssselect('#form_captcha'): raise FilestubeError('failed to get url from %s: captcha' % url) tags = browser.cssselect('.error_msg_title') if tags: raise FilestubeError('failed to get download url from %s: %s' % (url, tags[0].text)) tags = browser.cssselect('.download_link') if tags: data_ = html.tostring(tags[0]) res = re.compile(r'"(http://.*?)"').findall(data_) if res: return res[0] raise FilestubeError('failed to get download url from %s' % url)
def _mirror_urls(self, url): '''Iterate over mirror urls. ''' browser = Browser() browser.open(url) results = browser.cssselect('div.download dl') if not results: errors = browser.cssselect('div.error') if not (errors and RE_ERROR.search(errors[0].text)): logger.error('failed to get mirror urls from %s', url) return for result in results: # Skip sponsored links res = result.cssselect('dd') if res and RE_SPONSORED_LINK.search(html.tostring(res[0])): continue links = result.cssselect('dt a') if links: mirror_url = links[0].get('href') if not mirror_url.startswith('/'): yield mirror_url
def _get_torrent_url(self, url): browser = Browser() if browser.open(url): links = browser.cssselect('a[title="Magnet Link"]') if links: return links[0].get('href')
def _get_torrent_url(self, url): browser = Browser() if browser.open(url): links = browser.cssselect('a.btn-magnet') if links: return links[0].get('href')