예제 #1
0
    def parse_download_page(self, url):
        if 'newpct1.com' in url:
            log.verbose('Newpct1 URL: %s', url)
            url = url.replace('newpct1.com/', 'newpct1.com/descarga-torrent/')
        else:
            log.verbose('Newpct URL: %s', url)

        try:
            page = requests.get(url)
        except requests.exceptions.RequestException as e:
            raise UrlRewritingError(e)
        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(e)

        if 'newpct1.com' in url:
            torrent_id_prog = re.compile(r'descargar-torrent/(.+)/')
            torrent_ids = soup.findAll(href=torrent_id_prog)
        else:
            torrent_id_prog = re.compile("'(?:torrentID|id)'\s*:\s*'(\d+)'")
            torrent_ids = soup.findAll(text=torrent_id_prog)

        if len(torrent_ids) == 0:
            raise UrlRewritingError('Unable to locate torrent ID from url %s' % url)

        if 'newpct1.com' in url:
            torrent_id = torrent_id_prog.search(torrent_ids[0]['href']).group(1)
            return 'http://www.newpct1.com/download/%s.torrent' % torrent_id
        else:
            torrent_id = torrent_id_prog.search(torrent_ids[0]).group(1)
            return 'http://www.newpct.com/torrents/{:0>6}.torrent'.format(torrent_id)
예제 #2
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        link_re = re.compile('rarefile\.net.*\.rar$')

        # grab links from the main entry:
        blog_entry = soup.find('div', class_="entry")
        num_links = 0
        link_list = None
        for paragraph in blog_entry.find_all('p'):
            links = paragraph.find_all('a', href=link_re)
            if len(links) > num_links:
                link_list = links
                num_links = len(links)
        if 'urls' in entry:
            urls = list(entry['urls'])
        else:
            urls = []
        if link_list is not None:
            for link in link_list:
                urls.append(normalize_unicode(link['href']))
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])

        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])
예제 #3
0
    def url_rewrite(self, task, entry):
        if 'url' not in entry:
            log.error("Didn't actually get a URL...")
        else:
            url = entry['url']
            log.debug("Got the URL: %s" % entry['url'])
            rawdata = ""
            try:
                opener = urllib.request.build_opener()
                opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                response = opener.open(url)
            except Exception as e:
                raise UrlRewritingError("Connection Error for %s : %s" %
                                        (url, e))
            rawdata = response.read()

            match = re.search(
                r"<a href=\"/torrents/download/\?id=(\d*?)\">.*\.torrent</a>",
                rawdata)
            if match:
                torrent_id = match.group(1)
                log.debug("Got the Torrent ID: %s" % torrent_id)
                entry[
                    'url'] = 'https://www.t411.al/torrents/download/?id=' + torrent_id
                if 'download_auth' in entry:
                    auth_handler = t411Auth(*entry['download_auth'])
                    entry['download_auth'] = auth_handler
            else:
                raise UrlRewritingError("Cannot find torrent ID")
예제 #4
0
파일: eztv.py 프로젝트: oscarb-se/Flexget
    def url_rewrite(self, task, entry):
        url = entry['url']
        page = None
        for (scheme, netloc) in EZTV_MIRRORS:
            try:
                _, _, path, params, query, fragment = urlparse(url)
                url = urlunparse((scheme, netloc, path, params, query, fragment))
                page = task.requests.get(url).content
            except RequestException as e:
                log.debug('Eztv mirror `%s` seems to be down', url)
                continue
            break

        if not page:
            raise UrlRewritingError('No mirrors found for url %s' % entry['url'])

        log.debug('Eztv mirror `%s` chosen', url)
        try:
            soup = get_soup(page)
            mirrors = soup.find_all('a', attrs={'class': re.compile(r'download_\d')})
        except Exception as e:
            raise UrlRewritingError(e)

        log.debug('%d torrent mirrors found', len(mirrors))

        if not mirrors:
            raise UrlRewritingError('Unable to locate download link from url %s' % url)

        entry['urls'] = [m.get('href') for m in mirrors]
        entry['url'] = mirrors[0].get('href')
예제 #5
0
    def parse_downloads(self, series_url, search_title):
        page = requests.get(series_url).content
        try:
            soup = get_soup(page)
        except Exception as e:
            raise UrlRewritingError(e)

        urls = []
        # find all titles
        episode_titles = self.find_all_titles(search_title)
        if not episode_titles:
            raise UrlRewritingError('Unable to find episode')

        for ep_title in episode_titles:
            # find matching download
            episode_title = soup.find('strong',
                                      text=re.compile(ep_title, re.I))
            if not episode_title:
                continue

            # find download container
            episode = episode_title.parent
            if not episode:
                continue

            # find episode language
            episode_lang = episode.find_previous(
                'strong', text=re.compile('Sprache')).next_sibling
            if not episode_lang:
                log.warning('No language found for: %s', series_url)
                continue

            # filter language
            if not self.check_language(episode_lang):
                log.warning('languages not matching: %s <> %s',
                            self.config['language'], episode_lang)
                continue

            # find download links
            links = episode.find_all('a')
            if not links:
                log.warning('No links found for: %s', series_url)
                continue

            for link in links:
                if not link.has_attr('href'):
                    continue

                url = link['href']
                pattern = 'http:\/\/download\.serienjunkies\.org.*%s_.*\.html' % self.config[
                    'hoster']

                if re.match(pattern, url) or self.config['hoster'] == 'all':
                    urls.append(url)
                else:
                    continue
        return urls
예제 #6
0
 def _get_soup(self, task, url):
     try:
         page = task.requests.get(url)
     except RequestException as e:
         raise UrlRewritingError(str(e))
     try:
         return get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(str(e))
예제 #7
0
    def parse_download_page(self, url):
        if 'newpct1.com' in url:
            log.verbose('Newpct1 URL: %s', url)
        else:
            log.verbose('Newpct URL: %s', url)

        try:
            page = requests.get(url)
        except requests.exceptions.RequestException as e:
            raise UrlRewritingError(e)
        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(e)

        torrent_id = None
        if 'newpct1.com' in url:
            url_format = NEWPCT1_TORRENT_FORMAT
            torrent_id_prog = re.compile(
                'function openTorrent.*\n.*\{.*(\n.*)+window\.location\.href =\s*\".*\/(\d+.+)\";'
            )
            torrent_ids = soup.findAll(text=torrent_id_prog)
            log.debug('searching openTorrent script')
            if torrent_ids:
                match = torrent_id_prog.search(torrent_ids[0])
                if match:
                    torrent_id = match.group(2)
        else:
            url_format = NEWPCT_TORRENT_FORMAT

            torrent_id_prog = re.compile(
                "(?:parametros\s*=\s*\n?)\s*{\s*\n(?:\s*'\w+'\s*:.*\n)+\s*'(?:torrentID|id)"
                "'\s*:\s*'(\d+)'")
            torrent_ids = soup.findAll(text=torrent_id_prog)
            if len(torrent_ids):
                match = torrent_id_prog.search(torrent_ids[0])
                if match:
                    torrent_id = match.group(1)
            if not torrent_id:
                torrent_id_prog = re.compile(
                    'function openTorrent.*\n.*\{.*(\n.*)+window\.location\.href =\s*\".*\/(\d+).*\";'
                )
                torrent_ids = soup.findAll(text=torrent_id_prog)
                log.debug('torrent ID not found, searching openTorrent script')
                if torrent_ids:
                    match = torrent_id_prog.search(torrent_ids[0])
                    if match:
                        torrent_id = match.group(2)

        if not torrent_id:
            raise UrlRewritingError('Unable to locate torrent ID from url %s' %
                                    url)

        return url_format.format(torrent_id)
예제 #8
0
파일: koreus.py 프로젝트: cash2one/flexget
 def parse_download_page(self, url, requests):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     page = requests.get(url, headers=txheaders)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     down_link = soup.find('a', attrs={'href': re.compile(".+mp4")})
     if not down_link:
         raise UrlRewritingError('Unable to locate download link from url %s' % url)
     return down_link.get('href')
예제 #9
0
 def parse_download_page(self, url, requests):
     txheaders = {
         'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     page = requests.get(url, headers=txheaders)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find('a', attrs={'class': 'download_link'})
     if not tag_a:
         raise UrlRewritingError(
             'Unable to locate download link from url %s' % url)
     torrent_url = 'https://bakabt.me/' + tag_a.get('href')
     return torrent_url
예제 #10
0
파일: piratebay.py 프로젝트: topy/Flexget
 def parse_download_page(self, url, requests):
     page = requests.get(url).content
     try:
         soup = get_soup(page)
         tag_div = soup.find('div', attrs={'class': 'download'})
         if not tag_div:
             raise UrlRewritingError('Unable to locate download link from url %s' % url)
         tag_a = tag_div.find('a')
         torrent_url = tag_a.get('href')
         # URL is sometimes missing the schema
         if torrent_url.startswith('//'):
             torrent_url = 'http:' + torrent_url
         return torrent_url
     except Exception as e:
         raise UrlRewritingError(e)
예제 #11
0
    def parse_download_page(self, url, requests):
        txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
        page = requests.get(url, headers=txheaders)

        match = re.findall('mirror[0-9]_openload\',\'(.*?)\'\)',page.text)
        if len(match) == 0:
            raise UrlRewritingError('Unable to locate Openload hash from url %s' % url)
        urlhash = match[0]
        data = base64.b64decode(urlhash)
        data = data.split("-")
        down_link = ""
        for char in data:
            down_link += str(unichr(int(char)))
        if not down_link:
            raise UrlRewritingError('Unable to locate download link from url %s' % url)
        return down_link
예제 #12
0
 def url_from_page(self, url):
     """Parses torrent url from newtorrents download page"""
     try:
         page = requests.get(url)
         data = page.text
     except Exception:
         raise UrlRewritingError('URLerror when retrieving page')
     p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE)
     f = p.search(data)
     if not f:
         # the link in which plugin relies is missing!
         raise UrlRewritingError(
             'Failed to get url from download page. Plugin may need a update.'
         )
     else:
         return f.group(1)
예제 #13
0
    def url_rewrite(self, task, entry):
        url = entry['url']
        if (url.startswith('http://www.newtorrents.info/?q=')
                or url.startswith('http://www.newtorrents.info/search')):
            results = self.entries_from_search(entry['title'], url=url)
            if not results:
                raise UrlRewritingError("No matches for %s" % entry['title'])
            url = results[0]['url']
        else:
            url = self.url_from_page(url)

        if url:
            entry['url'] = url
            self.resolved.append(url)
        else:
            raise UrlRewritingError('Bug in newtorrents urlrewriter')
예제 #14
0
    def url_rewrite(self, task, entry):
        log.debug('Requesting %s' % entry['url'])
        page = requests.get(entry['url'])
        soup = get_soup(page.text)

        for link in soup.findAll('a', attrs={'href': re.compile(r'^/url')}):
            # Extract correct url from google internal link
            href = 'http://google.com' + link['href']
            args = parse_qs(urlparse(href).query)
            href = args['q'][0]

            # import IPython; IPython.embed()
            # import sys
            # sys.exit(1)
            # href = link['href'].lstrip('/url?q=').split('&')[0]

            # Test if entry with this url would be recognized by some urlrewriter
            log.trace('Checking if %s is known by some rewriter' % href)
            fake_entry = {'title': entry['title'], 'url': href}
            urlrewriting = plugin.get_plugin_by_name('urlrewriting')
            if urlrewriting['instance'].url_rewritable(task, fake_entry):
                log.debug('--> rewriting %s (known url pattern)' % href)
                entry['url'] = href
                return
            else:
                log.debug('<-- ignoring %s (unknown url pattern)' % href)
        raise UrlRewritingError('Unable to resolve')
예제 #15
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        # grab link from filehosters_re
        link_elements = []
        log.debug('Searching %s for a tags where the text matches one of: %s',
                  entry['url'], str(self.config.get('filehosters_re')))
        regexps = self.config.get('filehosters_re', [])
        if self.config.get('parse'):
            link_elements = soup.find_all('div',
                                          class_=re.compile("mag_details"))
            log.debug(
                'filehosters_re parsing enabled: found %d filehosters_re.',
                len(link_elements))
        log.debug('Original urls: %s', str(entry['urls']))
        if 'urls' in entry:
            urls = list(entry['urls'])
            log.debug('Original urls: %s', str(entry['urls']))
        else:
            urls = []
        log.debug('link_elements parsing enabled: found %d link_elements.',
                  len(link_elements))
        if link_elements and not regexps:
            log.warn('There are not in filehosters_re.')
        for target in link_elements:
            links = target.find_all('a')
            for link in links:
                if re.search('novafile.com', link['href']):
                    urls.append(link['href'])

        # filter urls:
        filtered_urls = []
        for i, url in enumerate(urls):
            urls[i] = normalize_unicode(url)
            for regexp in regexps:
                if re.search(regexp, urls[i]):
                    filtered_urls.append(urls[i])
                    log.debug('Url: "%s" matched filehoster filter: %s',
                              urls[i], regexp)
                    break
            else:
                if regexps:
                    log.debug(
                        'Url: "%s" was discarded because it does not match any of the given filehoster filters: %s',
                        urls[i], str(regexps))
        if regexps:
            log.debug('Using filehosters_re filters: %s', str(regexps))
            urls = filtered_urls
        else:
            log.debug(
                'No filehoster filters configured, using all found links.')
        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])
예제 #16
0
 def url_rewrite(self, task, entry):
     try:
         page = task.requests.get(entry['url'])
     except RequestException as e:
         raise UrlRewritingError(str(e))
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(str(e))
     link_elements = soup.find_all('pre', class_='links')
     if 'urls' in entry:
         urls = list(entry['urls'])
     else:
         urls = []
     for element in link_elements:
         urls.extend(element.text.splitlines())
     regexps = self.config.get('filehosters_re', [])
     filtered_urls = []
     for i, url in enumerate(urls):
         urls[i] = normalize_unicode(url)
         for regexp in regexps:
             if re.search(regexp, urls[i]):
                 filtered_urls.append(urls[i])
                 log.debug('Url: "%s" matched filehoster filter: %s',
                           urls[i], regexp)
                 break
         else:
             if regexps:
                 log.debug(
                     'Url: "%s" does not match any of the given filehoster filters: %s',
                     urls[i], str(regexps))
     if regexps:
         log.debug('Using filehosters_re filters: %s', str(regexps))
         urls = filtered_urls
     else:
         log.debug(
             'No filehoster filters configured, using all found links.')
     num_links = len(urls)
     log.verbose('Found %d links at %s.', num_links, entry['url'])
     if num_links:
         entry['urls'] = urls
         entry['url'] = urls[0]
     else:
         raise UrlRewritingError('No useable links found at %s' %
                                 entry['url'])
예제 #17
0
    def parse_download_page(self, url, requests):
        txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
        try:
            page = requests.get(url, headers=txheaders)
        except requests.exceptions.RequestException as e:
            msg = 'Cannot open "%s" : %s'% (url, str(e))
            log.error(msg)
            raise UrlRewritingError(msg)

        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(str(e))
        
        down_link = soup.find('a', attrs={'href': re.compile("down\.php\?.*")})
        if not down_link:
            raise UrlRewritingError('Unable to locate download link from url "%s"' % url)
        return 'http://bt.hliang.com/' + down_link.get('href')
예제 #18
0
 def url_rewrite(self, task, entry):
     if 'url' not in entry:
         log.error('Didn\'t actually get a URL...')
     else:
         log.debug('Got the URL: %s', entry['url'])
     if entry['url'].startswith('https://www.torrentday.com/browse'):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError('No search results found')
         entry['url'] = results[0]['url']
예제 #19
0
 def parse_download_page(self, page_url, requests):
     page = requests.get(page_url)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find("a", {"class": "dl_link"})
     if not tag_a:
         if soup.findAll(text="Connexion ?"):
             raise UrlRewritingError('You are not logged in,\
                                      check if your cookie for\
                                      authentication is up to date')
         else:
             raise UrlRewritingError('You have reached your download\
                                     limit per 24hours, so I cannot\
                                     get the torrent')
     torrent_url = ("http://www.frenchtorrentdb.com" + tag_a.get('href') +
                    "&js=1")
     log.debug('TORRENT URL is : %s' % torrent_url)
     return torrent_url
예제 #20
0
 def parse_download_page(self, url, requests):
     txheaders = {
         'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     page = requests.get(url, headers=txheaders)
     try:
         soup = get_soup(page.text)
     except Exception as e:
         raise UrlRewritingError(e)
     vk_soup = soup.find(
         'a', attrs={'href': re.compile(r'https:.*vk.com.*no_preview=1')})
     if not vk_soup:
         raise UrlRewritingError(
             'Unable to locate download link from url %s' % url)
     vk_link = vk_soup.get('href')
     #txheaders = {'Accept': 'text/html'}
     page = requests.get(vk_link)
     if page.status_code != 200:
         raise UrlRewritingError('File does not exist in VK')
     return page.url
예제 #21
0
 def parse_download_page(self, url):
     try:
         page = requests.get(url).content
         soup = get_soup(page, 'html.parser')
         download_link = soup.findAll(
             href=re.compile('redirect|redirectlink'))
         download_href = download_link[0]['href']
         return download_href
     except Exception:
         raise UrlRewritingError('Unable to locate torrent from url %s' %
                                 url)
예제 #22
0
    def parse_download_page(self, url, task):
        log.verbose('Descargas2020 URL: %s', url)

        try:
            page = requests.get(url)
        except requests.exceptions.RequestException as e:
            raise UrlRewritingError(e)
        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(e)

        torrent_id = None
        url_format = DESCARGAS2020_TORRENT_FORMAT

        torrent_id_prog = re.compile(
            "(?:parametros\s*=\s*\n?)\s*{\s*\n(?:\s*'\w+'\s*:.*\n)+\s*'(?:torrentID|id)"
            "'\s*:\s*'(\d+)'")
        torrent_ids = soup.findAll(text=torrent_id_prog)
        if torrent_ids:
            match = torrent_id_prog.search(torrent_ids[0])
            if match:
                torrent_id = match.group(1)
        if not torrent_id:
            log.debug('torrent ID not found, searching openTorrent script')
            torrent_id_prog = re.compile(
                'function openTorrent.*\n.*\{.*(\n.*)+window\.location\.href =\s*\"(.*\/\d+_-.*[^\/])\/?\";'
            )
            torrent_ids = soup.findAll(text=torrent_id_prog)
            if torrent_ids:
                match = torrent_id_prog.search(torrent_ids[0])
                if match:
                    torrent_id = match.group(2)
                    return torrent_id.replace('descargar-torrent',
                                              'download') + '.torrent'

        if not torrent_id:
            raise UrlRewritingError('Unable to locate torrent ID from url %s' %
                                    url)

        return url_format.format(torrent_id)
예제 #23
0
    def parse_download_page(self, url):
        if 'newpct1.com' in url:
            log.verbose('Newpct1 URL: %s', url)
            url = url.replace('newpct1.com/', 'newpct1.com/descarga-torrent/')
        else:
            log.verbose('Newpct URL: %s', url)

        try:
            page = requests.get(url)
        except requests.exceptions.RequestException as e:
            raise UrlRewritingError(e)
        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(e)

        torrent_id = None
        if 'newpct1.com' in url:
            url_format = NEWPCT1_TORRENT_FORMAT

            torrent_id_prog = re.compile(r'descargar-torrent/(.+)/')
            match = torrent_id_prog.search(soup.text)
            if match:
                torrent_id = match.group(1)
        else:
            url_format = NEWPCT_TORRENT_FORMAT

            torrent_id_prog = re.compile(
                "(?:parametros\s*=\s*\n?)\s*{\s*\n(?:\s*'\w+'\s*:.*\n)+\s*'(?:torrentID|id)"
                "'\s*:\s*'(\d+)'")
            torrent_ids = soup.findAll(text=torrent_id_prog)
            if len(torrent_ids):
                match = torrent_id_prog.search(torrent_ids[0])
                if match:
                    torrent_id = match.group(1)

        if not torrent_id:
            raise UrlRewritingError('Unable to locate torrent ID from url %s' %
                                    url)

        return url_format.format(torrent_id)
예제 #24
0
 def url_rewrite(self, task, entry):
     if 'url' not in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith('https://www.torrentleech.org/torrents/browse/list/query/'):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError("No search results found")
         # TODO: Search doesn't enforce close match to title, be more picky
         entry['url'] = results[0]['url']
예제 #25
0
 def url_rewrite(self, task, entry):
     try:
         # need to fake user agent
         txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
         page = task.requests.get(entry['url'], headers=txheaders)
         soup = get_soup(page.text)
         results = soup.find_all('a', attrs={'class': 'l'})
         if not results:
             raise UrlRewritingError('No results')
         for res in results:
             url = res.get('href')
             url = url.replace('/interstitial?url=', '')
             # generate match regexp from google search result title
             regexp = '.*'.join([x.contents[0] for x in res.find_all('em')])
             if re.match(regexp, entry['title']):
                 log.debug('resolved, found with %s' % regexp)
                 entry['url'] = url
                 return
         raise UrlRewritingError('Unable to resolve')
     except Exception as e:
         raise UrlRewritingError(e)
예제 #26
0
 def url_rewrite(self, task, entry):
     if 'url' not in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if entry['url'].startswith(BASE_URL + '/t?'):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError("No search results found")
         # TODO: Search doesn't enforce close match to title, be more picky
         entry['url'] = results[0]['url']
예제 #27
0
    def url_rewrite(self, task, entry):
        for name, config in self.resolves.get(task.name, {}).items():
            regexp = config['regexp_compiled']
            format = config['format']
            if regexp.search(entry['url']):
                log.debug('Regexp resolving %s with %s' % (entry['url'], name))

                # run the regexp
                entry['url'] = regexp.sub(format, entry['url'])

                if regexp.match(entry['url']):
                    entry.fail('urlrewriting')
                    raise UrlRewritingError('Regexp %s result should NOT continue to match!' % name)
                return
예제 #28
0
    def url_rewrite(self, task, entry):
        soup = self._get_soup(task, entry['url'])

        # grab links from the main post:
        # Can't work perfectly filescdn\.com.*/*$
        # Can't work perfectly suprafiles\.net.*/*$
        link_re = re.compile(
            'dailyuploads\.net.*/*$|dropupload\.com.*/*$|cloudyfiles\.com.*/*$|upload4earn\.com.*/*$'
        )

        num_links = 0
        link_list = None
        blog_entry = soup.find('div', class_="box-inner-block")
        for paragraph in blog_entry.find_all('p'):
            links = paragraph.find_all('a', href=link_re)
            if len(links) > num_links:
                link_list = links
                num_links = len(links)
        if 'urls' in entry:
            urls = list(entry['urls'])
        else:
            urls = []
        if link_list is not None:
            for link in link_list:
                urls.append(normalize_unicode(link['href']))
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])

        num_links = len(urls)
        log.verbose('Found %d links at %s.', num_links, entry['url'])
        if num_links:
            entry['urls'] = urls
            entry['url'] = urls[0]
        else:
            raise UrlRewritingError('No useable links found at %s' %
                                    entry['url'])
예제 #29
0
 def url_rewrite(self, task, entry):
     if 'url' not in entry:
         log.error("Didn't actually get a URL...")
     else:
         log.debug("Got the URL: %s" % entry['url'])
     if URL_SEARCH.match(entry['url']):
         # use search
         results = self.search(task, entry)
         if not results:
             raise UrlRewritingError("No search results found")
         # TODO: Close matching was taken out of search methods, this may need to be fixed to be more picky
         entry['url'] = results[0]['url']
     else:
         # parse download page
         entry['url'] = self.parse_download_page(entry['url'])
예제 #30
0
    def url_rewrite(self, task, entry):
        """
            Gets the download information for 1337x result
        """

        url = entry['url']

        log.info('1337x rewriting download url: %s' % url)

        try:
            page = task.requests.get(url)
            log.debug('requesting: %s', page.url)
        except RequestException as e:
            log.error('1337x request failed: %s', e)
            raise UrlRewritingError('1337x request failed: %s', e)

        soup = get_soup(page.content)

        magnet_url = str(soup.find('a', id='magnetdl').get('href')).lower()
        torrent_url = str(soup.find('a', id='torrentdl').get('href')).lower()

        entry['url'] = torrent_url
        entry.setdefault('urls', []).append(torrent_url)
        entry['urls'].append(magnet_url)