Ejemplo n.º 1
0
    def parse_download_page(self, url):
        txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
        req = urllib2.Request(url, None, txheaders)
        page = urlopener(req, log)
        try:
            soup = get_soup(page)
        except Exception as e:
            raise UrlRewritingError(e)

        config = self.config or {}
        config.setdefault('quality', 'hd')

        links = soup.find_all('a', text="Descargar", href=re.compile("/subtitles"))
        if not links:
            raise UrlRewritingError('Unable to locate subtitle download link from url %s' % url)

        subtitle_url = ''
        for link in links:
            sub_url = link['href']
            log.verbose('Found url %s', sub_url)
            if config['quality'] == 'hd' and re.search("720p|1080p",sub_url):
                subtitle_url = 'http://www.argenteam.net' + sub_url
                log.verbose('is a match')
                break
            if config['quality'] == 'sd' and re.search("720p|1080p",sub_url) == None:
                subtitle_url = 'http://www.argenteam.net' + sub_url
                log.verbose('is a match')
                break
        if subtitle_url == '':
            raise UrlRewritingError('Unable to locate download link %s from url %s' % (config['quality'], url))
        return subtitle_url
Ejemplo n.º 2
0
def get_json(url):
    try:
        log.debug('fetching json at %s' % url)
        data = urlopener(url, log)
    except URLError, e:
        log.warning('Request failed %s' % url)
        return
Ejemplo n.º 3
0
 def nzbid_from_search(self, url, name, query):
     """Parses nzb download url from api results"""
     import time
     import difflib
     matched_results = []
     log.debug(
         "Sleeping to respect nzbmatrix rules about hammering the API")
     time.sleep(10)
     apireturn = self.parse_nzb_matrix_api(
         urlopener(url, log).read(), query)
     if not apireturn:
         return None
     else:
         names = []
         for result in apireturn:
             names.append(result["NZBNAME"])
         matches = difflib.get_close_matches(name, names, 1, 0.3)
         if len(matches) == 0:
             return None
         else:
             for result in apireturn:
                 if result["NZBNAME"] == matches[0]:
                     break
         for match in matches:  # Already sorted
             for result in apireturn:
                 if result.get(match, False):
                     matched_results.append(result)
         return matched_results
Ejemplo n.º 4
0
def get_http_seeds(url, info_hash):
    url = get_scrape_url(url, info_hash)
    if not url:
        log.debug('if not url is true returning 0')
        return 0
    log.debug('Checking for seeds from %s' % url)
    data = None
    try:
        data = bdecode(urlopener(url, log, retries=1,
                                 timeout=10).read()).get('files')
    except URLError as e:
        log.debug('Error scraping: %s' % e)
        return 0
    except SyntaxError as e:
        log.warning('Error decoding tracker response: %s' % e)
        return 0
    except BadStatusLine as e:
        log.warning('Error BadStatusLine: %s' % e)
        return 0
    except IOError as e:
        log.warning('Server error: %s' % e)
        return 0
    if not data:
        log.debug('No data received from tracker scrape.')
        return 0
    log.debug('get_http_seeds is returning: %s' % data.values()[0]['complete'])
    return data.values()[0]['complete']
Ejemplo n.º 5
0
 def get_file(self, only_cached=False):
     """Makes sure the poster is downloaded to the local cache (in userstatic folder) and
     returns the path split into a list of directory and file components"""
     from flexget.manager import manager
     base_dir = os.path.join(manager.config_base, 'userstatic')
     if self.file and os.path.isfile(os.path.join(base_dir, self.file)):
         return self.file.split(os.sep)
     elif only_cached:
         return
     # If we don't already have a local copy, download one.
     log.debug('Downloading poster %s' % self.url)
     dirname = os.path.join('tmdb', 'posters', str(self.movie_id))
     # Create folders if they don't exist
     fullpath = os.path.join(base_dir, dirname)
     if not os.path.isdir(fullpath):
         os.makedirs(fullpath)
     filename = os.path.join(dirname, posixpath.basename(self.url))
     thefile = file(os.path.join(base_dir, filename), 'wb')
     thefile.write(urlopener(self.url, log).read())
     self.file = filename
     # If we are detached from a session, update the db
     if not Session.object_session(self):
         session = Session()
         poster = session.query(TMDBPoster).filter(TMDBPoster.db_id == self.db_id).first()
         if poster:
             poster.file = filename
             session.commit()
         session.close()
     return filename.split(os.sep)
Ejemplo n.º 6
0
 def nzbid_from_search(self, url, name, query):
     """Parses nzb download url from api results"""
     import time
     import difflib
     matched_results = []
     log.debug("Sleeping to respect nzbmatrix rules about hammering the API")
     time.sleep(10)
     apireturn = self.parse_nzb_matrix_api(urlopener(url, log).read(),
                                           query)
     if not apireturn:
         return None
     else:
         names = []
         for result in apireturn:
             names.append(result["NZBNAME"])
         matches = difflib.get_close_matches(name, names, 1, 0.3)
         if len(matches) == 0:
             return None
         else:
             for result in apireturn:
                 if result["NZBNAME"] == matches[0]:
                     break
         for match in matches: # Already sorted
             for result in apireturn:
                 if result.get(match, False):
                     matched_results.append(result)
         return matched_results
Ejemplo n.º 7
0
    def on_task_output(self, task, config):
        for entry in task.accepted:
            if task.options.test:
                log.info('Would add into sabnzbd: %s' % entry['title'])
                continue

            params = self.get_params(config)
            # allow overriding the category
            if 'category' in entry:
                # Dirty hack over the next few lines to strip out non-ascii
                # chars. We're going to urlencode this, which causes
                # serious issues in python2.x if it's not ascii input.
                params['cat'] = ''.join([x for x in entry['category'] if ord(x) < 128])
            params['name'] = ''.join([x for x in entry['url'] if ord(x) < 128])
            # add cleaner nzb name (undocumented api feature)
            params['nzbname'] = ''.join([x for x in entry['title'] if ord(x) < 128])

            request_url = config['url'] + urllib.urlencode(params)
            log.debug('request_url: %s' % request_url)
            try:
                response = urlopener(request_url, log).read()
            except Exception as e:
                log.critical('Failed to use sabnzbd. Requested %s' % request_url)
                log.critical('Result was: %s' % e)
                entry.fail('sabnzbd unreachable')
                if task.options.debug:
                    log.exception(e)
                continue

            if 'error' in response.lower():
                entry.fail(response.replace('\n', ''))
            else:
                log.info('Added `%s` to SABnzbd' % (entry['title']))
Ejemplo n.º 8
0
 def get_file(self, only_cached=False):
     """Makes sure the poster is downloaded to the local cache (in userstatic folder) and
     returns the path split into a list of directory and file components"""
     from flexget.manager import manager
     base_dir = os.path.join(manager.config_base, 'userstatic')
     if self.file and os.path.isfile(os.path.join(base_dir, self.file)):
         return self.file.split(os.sep)
     elif only_cached:
         return
     # If we don't already have a local copy, download one.
     log.debug('Downloading poster %s' % self.url)
     dirname = os.path.join('tmdb', 'posters', str(self.movie_id))
     # Create folders if they don't exist
     fullpath = os.path.join(base_dir, dirname)
     if not os.path.isdir(fullpath):
         os.makedirs(fullpath)
     filename = os.path.join(dirname, posixpath.basename(self.url))
     thefile = file(os.path.join(base_dir, filename), 'wb')
     thefile.write(urlopener(self.url, log).read())
     self.file = filename
     # If we are detached from a session, update the db
     if not Session.object_session(self):
         session = Session()
         poster = session.query(TMDBPoster).filter(
             TMDBPoster.db_id == self.db_id).first()
         if poster:
             poster.file = filename
             session.commit()
         session.close()
     return filename.split(os.sep)
Ejemplo n.º 9
0
    def on_feed_output(self, feed, config):
        for entry in feed.accepted:
            if feed.manager.options.test:
                log.info('Would add into sabnzbd: %s' % entry['title'])
                continue

            params = self.get_params(config)
            # allow overriding the category
            if 'category' in entry:
                # Dirty hack over the next few lines to strip out non-ascii
                # chars. We're going to urlencode this, which causes
                # serious issues in python2.x if it's not ascii input.
                params['cat'] = ''.join([x for x in entry['category'] if ord(x) < 128])
            params['name'] = ''.join([x for x in entry['url'] if ord(x) < 128])
            # add cleaner nzb name (undocumented api feature)
            params['nzbname'] = ''.join([x for x in entry['title'] if ord(x) < 128])

            request_url = config['url'] + urllib.urlencode(params)
            log.debug('request_url: %s' % request_url)
            try:
                response = urlopener(request_url, log).read()
            except Exception, e:
                log.critical('Failed to use sabnzbd. Requested %s' % request_url)
                log.critical('Result was: %s' % e)
                feed.fail(entry, 'sabnzbd unreachable')
                if feed.manager.options.debug:
                    log.exception(e)
                continue

            if 'error' in response.lower():
                feed.fail(entry, response.replace('\n', ''))
            else:
                log.info('Added `%s` to SABnzbd' % (entry['title']))
Ejemplo n.º 10
0
def get_json(url):
    try:
        log.debug("fetching json at %s" % url)
        data = urlopener(url, log)
    except URLError, e:
        log.warning("Request failed %s" % url)
        return
Ejemplo n.º 11
0
    def on_task_output(self, task, config):
        for entry in task.accepted:
            if task.options.test:
                log.info("Would add into sabnzbd: %s" % entry["title"])
                continue

            params = self.get_params(config)
            # allow overriding the category
            if "category" in entry:
                # Dirty hack over the next few lines to strip out non-ascii
                # chars. We're going to urlencode this, which causes
                # serious issues in python2.x if it's not ascii input.
                params["cat"] = "".join([x for x in entry["category"] if ord(x) < 128])
            params["name"] = "".join([x for x in entry["url"] if ord(x) < 128])
            # add cleaner nzb name (undocumented api feature)
            params["nzbname"] = "".join([x for x in entry["title"] if ord(x) < 128])

            request_url = config["url"] + urllib.urlencode(params)
            log.debug("request_url: %s" % request_url)
            try:
                response = urlopener(request_url, log).read()
            except Exception as e:
                log.critical("Failed to use sabnzbd. Requested %s" % request_url)
                log.critical("Result was: %s" % e)
                entry.fail("sabnzbd unreachable")
                if task.options.debug:
                    log.exception(e)
                continue

            if "error" in response.lower():
                entry.fail(response.replace("\n", ""))
            else:
                log.info("Added `%s` to SABnzbd" % (entry["title"]))
    def search(self, task, entry, config=None):

        txheaders = {
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-us,en;q=0.5',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
            'Keep-Alive': '300',
            'Connection': 'keep-alive',
        }
        nzbs = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = entry['title']
            url = u'http://newzleech.com/?%s' % str(urllib.urlencode({'q': query.encode('latin1'),
                                                                      'm': 'search', 'group': '', 'min': 'min',
                                                                      'max': 'max', 'age': '', 'minage': '',
                                                                      'adv': ''}))
            # log.debug('Search url: %s' % url)

            req = urllib2.Request(url, headers=txheaders)
            page = urlopener(req, log)
            soup = get_soup(page)

            for item in soup.find_all('table', attrs={'class': 'contentt'}):
                subject_tag = item.find('td', attrs={'class': 'subject'}).next
                subject = ''.join(subject_tag.find_all(text=True))
                complete = item.find('td', attrs={'class': 'complete'}).contents[0]
                size = item.find('td', attrs={'class': 'size'}).contents[0]
                nzb_url = 'http://newzleech.com/' + item.find('td', attrs={'class': 'get'}).next.get('href')

                # generate regexp from entry title and see if it matches subject
                regexp = query
                wildcardize = [' ', '-']
                for wild in wildcardize:
                    regexp = regexp.replace(wild, '.')
                regexp = '.*' + regexp + '.*'
                # log.debug('Title regexp: %s' % regexp)

                if re.match(regexp, subject):
                    log.debug('%s matches to regexp' % subject)
                    if complete != u'100':
                        log.debug('Match is incomplete %s from newzleech, skipping ..' % query)
                        continue
                    log.info('Found \'%s\'' % query)

                    try:
                        size_num = float(size[:-3])
                    except (ValueError, TypeError):
                        log.error('Failed to parse_size %s' % size)
                        size_num = 0
                    # convert into megabytes
                    if 'GB' in size:
                        size_num *= 1024
                    if 'KB' in size:
                        size_num /= 1024

                    # choose largest file
                    nzbs.add(Entry(title=subject, url=nzb_url, content_size=size_num, search_sort=size_num))

        return nzbs
Ejemplo n.º 13
0
    def on_feed_input(self, feed, config):
        config = self.build_config(config)

        log.debug('InputPlugin html requesting url %s' % config['url'])

        if config.get('username') and config.get('password'):
            log.debug('Basic auth enabled. User: %s Password: %s' % (config['username'], config['password']))
            passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
            passman.add_password(None, config['url'], config['username'], config['password'])
            handlers = [urllib2.HTTPBasicAuthHandler(passman)]
        else:
            handlers = None
        page = urlopener(config['url'], log, handlers=handlers)
        soup = get_soup(page)
        log.debug('Detected encoding %s' % soup.originalEncoding)

        # dump received content into a file
        if 'dump' in config:
            name = config['dump']
            log.info('Dumping %s into %s' % (config['url'], name))
            data = soup.prettify()
            f = open(name, 'w')
            f.write(data)
            f.close()

        return self.create_entries(config['url'], soup, config)
Ejemplo n.º 14
0
    def on_task_input(self, task, config):
        config = self.build_config(config)

        log.debug('InputPlugin html requesting url %s' % config['url'])

        if config.get('username') and config.get('password'):
            log.debug('Basic auth enabled. User: %s Password: %s' %
                      (config['username'], config['password']))
            passman = urllib2.HTTPPasswordMgrWithDefaultRealm()
            passman.add_password(None, config['url'], config['username'],
                                 config['password'])
            handlers = [urllib2.HTTPBasicAuthHandler(passman)]
        else:
            handlers = None
        page = urlopener(config['url'], log, handlers=handlers)
        soup = get_soup(page)
        log.debug('Detected encoding %s' % soup.originalEncoding)

        # dump received content into a file
        if 'dump' in config:
            name = config['dump']
            log.info('Dumping %s into %s' % (config['url'], name))
            data = soup.prettify()
            f = open(name, 'w')
            f.write(data)
            f.close()

        return self.create_entries(config['url'], soup, config)
Ejemplo n.º 15
0
def get_http_seeds(url, info_hash):
    url = get_scrape_url(url, info_hash)
    if not url:
        log.debug('if not url is true returning 0')
        return 0
    log.debug('Checking for seeds from %s' % url)
    data = None
    try:
        data = bdecode(urlopener(url, log, retries=1, timeout=10).read()).get('files')
    except URLError as e:
        log.debug('Error scraping: %s' % e)
        return 0
    except SyntaxError as e:
        log.warning('Error decoding tracker response: %s' % e)
        return 0
    except BadStatusLine as e:
        log.warning('Error BadStatusLine: %s' % e)
        return 0
    except IOError as e:
        log.warning('Server error: %s' % e)
        return 0
    if not data:
        log.debug('No data received from tracker scrape.')
        return 0
    log.debug('get_http_seeds is returning: %s' % data.values()[0]['complete'])
    return data.values()[0]['complete']
  def search(self, entry, config):
    url = "https://tehconnection.eu/torrents.php?searchstr=%s" \
      % entry.get("imdb_id");

    page = urlopener(url, log)
    soup = get_soup(page)

    results = set()

    for row in soup.find_all("tr", class_="group_torrent"):
      link = row.find(title="Download")
      info = row.find(colspan="1").contents[3].contents[0].strip()
      seeders = int(row.find_all("td")[6].contents[0].strip())
      leechers = int(row.find_all("td")[7].contents[0].strip())

      result = Entry();
      result["title"] = entry.get("title") + " / " + info
      result["imdb_id"] = entry.get("imdb_id")
      result["url"] = "https://tehconnection.eu" + link.get("href")
      result["torrent_seeds"] = seeders
      result["torrent_leeches"] = leechers
      result["search_sort"] = torrent_availability(result['torrent_seeds'],
                                                   result['torrent_leeches'])

      results.add(result)

    return results
Ejemplo n.º 17
0
    def search(self, entry, config=None):

        txheaders = {
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-us,en;q=0.5',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
            'Keep-Alive': '300',
            'Connection': 'keep-alive',
        }
        nzbs = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = entry['title']
            url = u'http://newzleech.com/?%s' % str(urllib.urlencode({'q': query.encode('latin1'),
                                                                      'm': 'search', 'group': '', 'min': 'min',
                                                                      'max': 'max', 'age': '', 'minage': '', 'adv': ''}))
            #log.debug('Search url: %s' % url)

            req = urllib2.Request(url, headers=txheaders)
            page = urlopener(req, log)
            soup = get_soup(page)

            for item in soup.find_all('table', attrs={'class': 'contentt'}):
                subject_tag = item.find('td', attrs={'class': 'subject'}).next
                subject = ''.join(subject_tag.find_all(text=True))
                complete = item.find('td', attrs={'class': 'complete'}).contents[0]
                size = item.find('td', attrs={'class': 'size'}).contents[0]
                nzb_url = 'http://newzleech.com/' + item.find('td', attrs={'class': 'get'}).next.get('href')

                # generate regexp from entry title and see if it matches subject
                regexp = query
                wildcardize = [' ', '-']
                for wild in wildcardize:
                    regexp = regexp.replace(wild, '.')
                regexp = '.*' + regexp + '.*'
                #log.debug('Title regexp: %s' % regexp)

                if re.match(regexp, subject):
                    log.debug('%s matches to regexp' % subject)
                    if complete != u'100':
                        log.debug('Match is incomplete %s from newzleech, skipping ..' % query)
                        continue
                    log.info('Found \'%s\'' % query)

                    try:
                        size_num = float(size[:-3])
                    except (ValueError, TypeError):
                        log.error('Failed to parse_size %s' % size)
                        size_num = 0
                    # convert into megabytes
                    if 'GB' in size:
                        size_num *= 1024
                    if 'KB' in size:
                        size_num /= 1024

                    # choose largest file
                    nzbs.add(Entry(title=subject, url=nzb_url, content_size=size_num, search_sort=size_num))

        return nzbs
Ejemplo n.º 18
0
 def parse_download_page(self, url):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception, e:
         raise UrlRewritingError(e)
Ejemplo n.º 19
0
 def parse_download_page(self, url):
     page = urlopener(url, log)
     log.debug('%s opened', url)
     try:
         soup = get_soup(page)
         torrent_url = 'http://www.t411.me' + soup.find(text='Télécharger').findParent().get('href')
     except Exception, e:
         raise UrlRewritingError(e)
Ejemplo n.º 20
0
 def parse_download_page(self, url):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception, e:
         raise UrlRewritingError(e)
Ejemplo n.º 21
0
 def process_invalid_content(self, feed, url):
     """If feedparser reports error, save the received data and log error."""
     log.critical('Invalid XML received from feed %s' % feed.name)
     try:
         req = urlopener(url, log)
     except ValueError, exc:
         log.debug('invalid url `%s` due to %s (ok for a file)' % (url, exc))
         return
Ejemplo n.º 22
0
    def search_title(self, name, comparator=StringComparator(), url=None):
        """
            Search for name from piratebay.
            If optional search :url: is passed it will be used instead of internal search.
        """

        comparator.set_seq1(name)
        name = comparator.search_string()
        if not url:
            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = 'http://thepiratebay.org/search/' + urllib.quote(name.encode('utf-8'))
            log.debug('Using %s as piratebay search url' % url)
        page = urlopener(url, log)
        soup = get_soup(page)
        entries = []
        for link in soup.findAll('a', attrs={'class': 'detLink'}):
            comparator.set_seq2(link.contents[0])
            log.debug('name: %s' % comparator.a)
            log.debug('found name: %s' % comparator.b)
            log.debug('confidence: %s' % comparator.ratio())
            if not comparator.matches():
                continue
            entry = Entry()
            entry['title'] = link.contents[0]
            entry['url'] = 'http://thepiratebay.org' + link.get('href')
            tds = link.parent.parent.parent.findAll('td')
            entry['torrent_seeds'] = int(tds[-2].contents[0])
            entry['torrent_leeches'] = int(tds[-1].contents[0])
            entry['search_ratio'] = comparator.ratio()
            entry['search_sort'] = torrent_availability(entry['torrent_seeds'], entry['torrent_leeches'])
            # Parse content_size
            size = link.findNext(attrs={'class': 'detDesc'}).contents[0]
            size = re.search('Size ([\.\d]+)\xa0([GMK])iB', size)
            if size:
                if size.group(2) == 'G':
                    entry['content_size'] = int(float(size.group(1)) * 1000 ** 3 / 1024 ** 2)
                elif size.group(2) == 'M':
                    entry['content_size'] = int(float(size.group(1)) * 1000 ** 2 / 1024 ** 2)
                else:
                    entry['content_size'] = int(float(size.group(1)) * 1000 / 1024 ** 2)
            entries.append(entry)

        if not entries:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.search_title(name[:dashindex], comparator=comparator)
            else:
                raise PluginWarning('No close matches for %s' % name, log, log_once=True)

        def score(a):
            return torrent_availability(a['torrent_seeds'], a['torrent_leeches'])

        entries.sort(reverse=True, key=lambda x: x.get('search_sorted'))

        #for torrent in torrents:
        #    log.debug('%s link: %s' % (torrent, torrent['link']))

        return entries
Ejemplo n.º 23
0
    def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        page = urlopener(url, log)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
                link_name = link.contents[0]
                if link_name is None:
                    continue
                if not isinstance(link_name, NavigableString):
                    continue
                link_name = link_name.strip().lower()
                if link.has_attr('href'):
                    link_href = link['href']
                else:
                    continue
                log.debug('found link %s -> %s' % (link_name, link_href))
                # handle imdb link
                if link_name.lower() == 'imdb':
                    log.debug('found imdb link %s' % link_href)
                    release['imdb_url'] = link_href

                # test if entry with this url would be rewritable by known plugins (ie. downloadable)
                temp = {}
                temp['title'] = release['title']
                temp['url'] = link_href
                urlrewriting = plugin.get_plugin_by_name('urlrewriting')
                if urlrewriting['instance'].url_rewritable(task, temp):
                    release['url'] = link_href
                    log.trace('--> accepting %s (resolvable)' % link_href)
                else:
                    log.trace('<-- ignoring %s (non-resolvable)' % link_href)

            # reject if no torrent link
            if not 'url' in release:
                from flexget.utils.log import log_once
                log_once(
                    '%s skipped due to missing or unsupported (unresolvable) download link'
                    % (release['title']), log)
            else:
                releases.append(release)

        return releases
Ejemplo n.º 24
0
    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % urllib.quote(
                name.encode('utf-8'), safe=b':/~?=&%')

        log.debug('search url: %s' % url)

        html = urlopener(url, log).read()
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.find_next('td', attrs={
                'class': re.compile('s')
            }).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning(
                        'Error converting seed value (%s) from newtorrents to integer.'
                        % seed)
                    seed = 0

            #TODO: also parse content_size and peers from results
            torrents.append(
                Entry(title=release_name,
                      url=torrent_url,
                      torrent_seeds=seed,
                      search_sort=torrent_availability(seed, 0)))
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                return torrents
        else:
            if len(torrents) == 1:
                log.debug('found only one matching search result.')
            else:
                log.debug(
                    'search result contains multiple matches, sorted %s by most seeders'
                    % torrents)
            return torrents
Ejemplo n.º 25
0
def get_first_result(tmdb_function, value):
    if isinstance(value, basestring):
        value = value.replace(' ', '+').encode('utf-8')
    url = '%s/2.1/Movie.%s/%s/json/%s/%s' % (server, tmdb_function, lang, api_key, value)
    try:
        data = urlopener(url, log)
    except URLError, e:
        log.warning('Request failed %s' % url)
        return
Ejemplo n.º 26
0
    def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        page = urlopener(url, log)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
                link_name = link.contents[0]
                if link_name is None:
                    continue
                if not isinstance(link_name, NavigableString):
                    continue
                link_name = link_name.strip().lower()
                if link.has_attr('href'):
                    link_href = link['href']
                else:
                    continue
                log.debug('found link %s -> %s' % (link_name, link_href))
                # handle imdb link
                if link_name.lower() == 'imdb':
                    log.debug('found imdb link %s' % link_href)
                    release['imdb_url'] = link_href

                # test if entry with this url would be rewritable by known plugins (ie. downloadable)
                temp = {}
                temp['title'] = release['title']
                temp['url'] = link_href
                urlrewriting = plugin.get_plugin_by_name('urlrewriting')
                if urlrewriting['instance'].url_rewritable(task, temp):
                    release['url'] = link_href
                    log.trace('--> accepting %s (resolvable)' % link_href)
                else:
                    log.trace('<-- ignoring %s (non-resolvable)' % link_href)

            # reject if no torrent link
            if not 'url' in release:
                from flexget.utils.log import log_once
                log_once('%s skipped due to missing or unsupported (unresolvable) download link' % (release['title']), log)
            else:
                releases.append(release)

        return releases
Ejemplo n.º 27
0
 def get_tracker_seeds(self, url, info_hash):
     url = self.get_scrape_url(url)
     if not url:
         return 0
     log.debug('Checking for seeds from %s' % url)
     url += '?info_hash=%s' % quote(info_hash.decode('hex'))
     data = bdecode(urlopener(url, log, retries=2).read())['files']
     if not data:
         return 0
     return data.values()[0]['complete']
Ejemplo n.º 28
0
 def get_tracker_seeds(self, url, info_hash):
     url = self.get_scrape_url(url)
     if not url:
         return 0
     log.debug('Checking for seeds from %s' % url)
     url += '?info_hash=%s' % quote(info_hash.decode('hex'))
     try:
         data = bdecode(urlopener(url, log, retries=2).read()).get('files')
     except SyntaxError, e:
         log.warning('Error bdecoding tracker response: %s' % e)
         return 0
Ejemplo n.º 29
0
def get_tracker_seeds(url, info_hash):
    url = get_scrape_url(url, info_hash)
    if not url:
        log.debug('if not url is true returning 0')
        return 0
    log.debug('Checking for seeds from %s' % url)
    try:
        data = bdecode(urlopener(url, log, retries=1, timeout=10).read()).get('files')
    except SyntaxError, e:
        log.warning('Error decoding tracker response: %s' % e)
        return 0
Ejemplo n.º 30
0
    def on_task_input(self, task, config, session=None):
        account_id = str(config['account_id'])
        # Get the cache for this user
        user_favorites = session.query(ThetvdbFavorites).filter(
            ThetvdbFavorites.account_id == account_id).first()
        if user_favorites and user_favorites.updated > datetime.now(
        ) - timedelta(minutes=10):
            log.debug(
                'Using cached thetvdb favorite series information for account ID %s'
                % account_id)
        else:
            try:
                url = 'http://thetvdb.com/api/User_Favorites.php?accountid=%s' % account_id
                log.debug('requesting %s' % url)
                data = ElementTree.fromstring(urlopener(url, log).read())
                favorite_ids = []
                for i in data.findall('Series'):
                    if i.text:
                        favorite_ids.append(i.text)
            except (urllib2.URLError, IOError, AttributeError):
                import traceback
                # If there are errors getting the favorites or parsing the xml, fall back on cache
                log.error(
                    'Error retrieving favorites from thetvdb, using cache.')
                log.debug(traceback.format_exc())
            else:
                # Successfully updated from tvdb, update the database
                log.debug('Successfully updated favorites from thetvdb.com')
                if not user_favorites:
                    user_favorites = ThetvdbFavorites(account_id, favorite_ids)
                else:
                    user_favorites.series_ids = favorite_ids
                    user_favorites.updated = datetime.now()
                session.merge(user_favorites)
        if not user_favorites.series_ids:
            log.warning('Didn\'t find any thetvdb.com favorites.')
            return

        # Construct list of entries with our series names
        entries = []
        for series_id in user_favorites.series_ids:
            # Lookup the series name from the id
            try:
                series = lookup_series(tvdb_id=series_id)
            except LookupError as e:
                log.error('Error looking up %s from thetvdb: %s' %
                          (series_id, e.message))
            else:
                series_name = series.seriesname
                if config.get('strip_dates'):
                    # Remove year from end of series name if present
                    series_name = re.sub(r'\s+\(\d{4}\)$', '', series_name)
                entries.append(Entry(series_name, '', tvdb_id=series.id))
        return entries
Ejemplo n.º 31
0
 def parse_download_page(self, url):
     page = urlopener(url, log)
     try:
         soup = get_soup(page)
         tag_div = soup.find('div', attrs={'class': 'download'})
         if not tag_div:
             raise UrlRewritingError('Unable to locate download link from url %s' % url)
         tag_a = tag_div.find('a')
         torrent_url = tag_a.get('href')
         return torrent_url
     except Exception, e:
         raise UrlRewritingError(e)
Ejemplo n.º 32
0
 def parse_download_page(self, url):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     down_link = soup.find('a', attrs={'href': re.compile("download/\d+/.*\.torrent")})
     if not down_link:
         raise UrlRewritingError('Unable to locate download link from url %s' % url)
     return 'http://www.deadfrog.us/' + down_link.get('href')
Ejemplo n.º 33
0
    def on_task_input(self, task, config):
        # use rss plugin
        rss_config = {'url': self.rss_url}
        rss_entries = super(AppleTrailers,
                            self).on_task_input(task, rss_config)

        # Multiple entries can point to the same movie page (trailer 1, clip
        # 1, etc.)
        entries = {}
        for entry in rss_entries:
            url = entry['original_url']
            if url in entries:
                continue
            else:
                title = entry['title']
                entries[url] = title[:title.rfind('-')].rstrip()

        result = []

        for url, title in entries.iteritems():
            inc_url = url + 'includes/playlists/web.inc'
            try:
                page = urlopener(inc_url, log)
            except HTTPError, err:
                log.warning("HTTPError when opening playlist page: %d %s" %
                            (err.code, err.reason))
                continue

            soup = get_soup(page)
            links = soup.find_all('a',
                                  attrs={
                                      'class': 'target-quicktimeplayer',
                                      'href': re.compile(r'_h?480p\.mov$')
                                  })
            for link in links:
                url = link.get('href')
                url = url[:url.rfind('_')]
                quality = self.quality.lower()

                if quality == 'ipod':
                    url += '_i320.m4v'
                else:
                    url += '_h' + quality + '.mov'

                entry = Entry()
                entry['url'] = url
                entry['title'] = title

                match = re.search(r'.*/([^?#]*)', url)
                entry['filename'] = match.group(1)

                result.append(entry)
                log.debug('found trailer %s', url)
Ejemplo n.º 34
0
    def on_task_input(self, task):
        pageurl = "http://tvtorrents.com/loggedin/recently_aired.do"
        log.debug("InputPlugin tvtorrents requesting url %s" % pageurl)

        page = urlopener(pageurl, log)
        soup = get_soup(page)

        hscript = soup.find('script', src=None).contents[0]
        hlines = hscript.splitlines()
        hash = hlines[15].strip().split("'")[1]
        digest = hlines[16].strip().split("'")[1]
        hurl = hlines[17].strip().split("'")
        hashurl = hurl[1] + "%s" + hurl[3] + digest + hurl[5] + hash

        for link in soup.find_all('a'):
            if not 'href' in link:
                continue
            url = link['href']
            title = link.contents[0]

            if link.has_attr(
                    'onclick') and link['onclick'].find("loadTorrent") != -1:
                infohash = link['onclick'].split("'")[1]
                td = link.parent.parent.contents[4]
                sname = td.contents[0].strip()
                epi = td.contents[2].contents[0].strip()
                title = "%s - %s" % (sname, epi)
                url = hashurl % (infohash, )
            else:
                continue
            if title is None:
                continue

            title = title.strip()
            if not title:
                continue

            # fix broken urls
            if url.startswith('//'):
                url = "http:" + url
            elif not url.startswith('http://') or not url.startswith(
                    'https://'):
                url = urlparse.urljoin(pageurl, url)

            # in case the title contains xxxxxxx.torrent - foooo.torrent clean it a bit (get upto first .torrent)
            if title.lower().find('.torrent') > 0:
                title = title[:title.lower().find(".torrent")]

            entry = Entry()
            entry['url'] = url
            entry['title'] = title

            task.entries.append(entry)
Ejemplo n.º 35
0
    def on_task_input(self, task, config):
        account_id = str(config['account_id'])
        favorite_ids = []
        # Get the cache for this user
        with Session() as session:
            user_favorites = session.query(ThetvdbFavorites).filter(ThetvdbFavorites.account_id == account_id).first()
            if user_favorites:
                favorite_ids = user_favorites.series_ids
            if user_favorites and user_favorites.updated > datetime.now() - timedelta(minutes=10):
                log.debug('Using cached thetvdb favorite series information for account ID %s' % account_id)
            else:
                try:
                    url = 'http://thetvdb.com/api/User_Favorites.php?accountid=%s' % account_id
                    log.debug('requesting %s' % url)
                    data = ElementTree.fromstring(urlopener(url, log).read())
                    favorite_ids = []
                    for i in data.findall('Series'):
                        if i.text:
                            favorite_ids.append(i.text)
                except (urllib2.URLError, IOError, AttributeError):
                    import traceback
                    # If there are errors getting the favorites or parsing the xml, fall back on cache
                    log.error('Error retrieving favorites from thetvdb, using cache.')
                    log.debug(traceback.format_exc())
                else:
                    # Successfully updated from tvdb, update the database
                    log.debug('Successfully updated favorites from thetvdb.com')
                    if not user_favorites:
                        user_favorites = ThetvdbFavorites(account_id, favorite_ids)
                    else:
                        user_favorites.series_ids = favorite_ids
                        user_favorites.updated = datetime.now()
                    session.merge(user_favorites)
        if not favorite_ids:
            log.warning('Didn\'t find any thetvdb.com favorites.')
            return

        # Construct list of entries with our series names
        entries = []
        for series_id in favorite_ids:
            # Lookup the series name from the id
            try:
                series = lookup_series(tvdb_id=series_id)
            except LookupError as e:
                log.error('Error looking up %s from thetvdb: %s' % (series_id, e.args[0]))
            else:
                series_name = series.seriesname
                if config.get('strip_dates'):
                    # Remove year from end of series name if present
                    series_name = re.sub(r'\s+\(\d{4}\)$', '', series_name)
                entries.append(Entry(series_name, '', tvdb_id=series.id))
        return entries
Ejemplo n.º 36
0
    def entries_from_search(self, name, url=None, comparator=StringComparator(cutoff=0.9)):
        """Parses torrent download url from search results"""
        comparator.set_seq1(name)
        name = comparator.search_string()
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % urllib.quote(name, safe=':/~?=&%')

        log.debug('search url: %s' % url)

        html = urlopener(url, log).read()
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'(</SCR.*?)...(.*?IPT>)', r'\1\2', html)
        
        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.findAll('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.findNext('td', attrs={'class': re.compile('s')}).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning('Error converting seed value (%s) from newtorrents to integer.' % seed)
                    seed = 0
            
            #TODO: also parse content_size and peers from results
            if comparator.matches(release_name):
                torrents.append(Entry(title=release_name, url=torrent_url, torrent_seeds=seed,
                                      search_ratio=comparator.ratio(), search_sort=torrent_availability(seed, 0)))
            else:
                log.debug('rejecting search result: %s !~ %s' % (release_name, name))
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get('search_sort', 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind('-')
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex], comparator=comparator)
            else:
                raise PluginWarning('No matches for %s' % name, log, log_once=True)
        else:
            if len(torrents) == 1:
                log.debug('found only one matching search result.')
            else:
                log.debug('search result contains multiple matches, sorted %s by most seeders' % torrents)
            return torrents
Ejemplo n.º 37
0
def get_json(url):
    try:
        log.debug('fetching json at %s' % url)
        data = urlopener(url, log)
    except URLError as e:
        log.warning('Request failed %s' % url)
        return
    try:
        result = json.load(data)
    except ValueError:
        log.warning('Rotten Tomatoes returned invalid json at: %s' % url)
        return
    return result
Ejemplo n.º 38
0
 def parse_download_page(self, url):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find('a', attrs={'class': 'download_link'})
     if not tag_a:
         raise UrlRewritingError('Unable to locate download link from url %s' % url)
     torrent_url = 'http://www.bakabt.com' + tag_a.get('href')
     return torrent_url
Ejemplo n.º 39
0
 def parse_download_page(self, url):
     txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find('a', attrs={'class': 'download_link'})
     if not tag_a:
         raise UrlRewritingError('Unable to locate download link from url %s' % url)
     torrent_url = 'http://www.bakabt.com' + tag_a.get('href')
     return torrent_url
Ejemplo n.º 40
0
 def parse_download_page(self, url):
     txheaders = {"User-agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"}
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find("a", attrs={"class": "download_link"})
     if not tag_a:
         raise UrlRewritingError("Unable to locate download link from url %s" % url)
     torrent_url = "http://www.bakabt.com" + tag_a.get("href")
     return torrent_url
Ejemplo n.º 41
0
def get_json(url):
    try:
        log.debug('fetching json at %s' % url)
        data = urlopener(url, log)
    except URLError as e:
        log.warning('Request failed %s' % url)
        return
    try:
        result = json.load(data)
    except ValueError:
        log.warning('Rotten Tomatoes returned invalid json at: %s' % url)
        return
    return result
Ejemplo n.º 42
0
    def parse_download_page(self, url):
        page = urlopener(url, log)
        log.debug('%s opened', url)
        try:
            soup = get_soup(page)
            torrent_url = 'http://www.t411.me' + soup.find(text='Télécharger').findParent().get('href')
        except Exception as e:
            raise UrlRewritingError(e)

        if not torrent_url:
            raise UrlRewritingError('Unable to locate download link from url %s' % url)

        return torrent_url
Ejemplo n.º 43
0
    def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = "http://www.newtorrents.info/search/%s" % urllib.quote(name.encode("utf-8"), safe=b":/~?=&%")

        log.debug("search url: %s" % url)

        html = urlopener(url, log).read()
        # fix </SCR'+'IPT> so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r"(</SCR.*?)...(.*?IPT>)", r"\1\2", html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all("a", attrs={"href": re.compile("down.php")}):
            torrent_url = "http://www.newtorrents.info%s" % link.get("href")
            release_name = link.parent.next.get("title")
            # quick dirty hack
            seed = link.find_next("td", attrs={"class": re.compile("s")}).renderContents()
            if seed == "n/a":
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning("Error converting seed value (%s) from newtorrents to integer." % seed)
                    seed = 0

            # TODO: also parse content_size and peers from results
            torrents.append(
                Entry(
                    title=release_name, url=torrent_url, torrent_seeds=seed, search_sort=torrent_availability(seed, 0)
                )
            )
        # sort with seed number Reverse order
        torrents.sort(reverse=True, key=lambda x: x.get("search_sort", 0))
        # choose the torrent
        if not torrents:
            dashindex = name.rfind("-")
            if dashindex != -1:
                return self.entries_from_search(name[:dashindex])
            else:
                return torrents
        else:
            if len(torrents) == 1:
                log.debug("found only one matching search result.")
            else:
                log.debug("search result contains multiple matches, sorted %s by most seeders" % torrents)
            return torrents
Ejemplo n.º 44
0
    def on_feed_input(self, feed):
        pageurl = "http://tvtorrents.com/loggedin/recently_aired.do"
        log.debug("InputPlugin tvtorrents requesting url %s" % pageurl)

        page = urlopener(pageurl, log)
        soup = get_soup(page)

        hscript = soup.find('script', src=None).contents[0]
        hlines = hscript.splitlines()
        hash = hlines[15].strip().split("'")[1]
        digest = hlines[16].strip().split("'")[1]
        hurl = hlines[17].strip().split("'")
        hashurl = hurl[1] + "%s" + hurl[3] + digest + hurl[5] + hash

        for link in soup.findAll('a'):
            if not 'href' in link:
                continue
            url = link['href']
            title = link.contents[0]

            if link.has_key('onclick') and link['onclick'].find("loadTorrent") != -1:
                infohash = link['onclick'].split("'")[1]
                td = link.parent.parent.contents[4]
                sname = td.contents[0].strip()
                epi = td.contents[2].contents[0].strip()
                title = "%s - %s" % (sname, epi)
                url = hashurl % (infohash,)
            else:
                continue
            if title is None:
                continue

            title = title.strip()
            if not title:
                continue

            # fix broken urls
            if url.startswith('//'):
                url = "http:" + url
            elif not url.startswith('http://') or not url.startswith('https://'):
                url = urlparse.urljoin(pageurl, url)

            # in case the title contains xxxxxxx.torrent - foooo.torrent clean it a bit (get upto first .torrent)
            if title.lower().find('.torrent') > 0:
                title = title[:title.lower().find(".torrent")]

            entry = Entry()
            entry['url'] = url
            entry['title'] = title

            feed.entries.append(entry)
Ejemplo n.º 45
0
 def on_task_input(self, task, config):
     entries = []
     page = urlopener(config['url'], log)
     for row in csv.reader(page):
         if not row:
             continue
         entry = Entry()
         for name, index in config.get('values', {}).items():
             try:
                 entry[name] = row[index - 1]
             except IndexError:
                 raise Exception('Field `%s` index is out of range' % name)
         entries.append(entry)
     return entries
Ejemplo n.º 46
0
 def url_from_page(self, url):
     """Parses torrent url from newtorrents download page"""
     try:
         page = urlopener(url, log)
         data = page.read()
     except urllib2.URLError:
         raise UrlRewritingError('URLerror when retrieving page')
     p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE)
     f = p.search(data)
     if not f:
         # the link in which plugin relies is missing!
         raise UrlRewritingError('Failed to get url from download page. Plugin may need a update.')
     else:
         return f.group(1)
Ejemplo n.º 47
0
 def url_from_page(self, url):
     """Parses torrent url from newtorrents download page"""
     try:
         page = urlopener(url, log)
         data = page.read()
     except urllib2.URLError:
         raise UrlRewritingError('URLerror when retrieving page')
     p = re.compile("copy\(\'(.*)\'\)", re.IGNORECASE)
     f = p.search(data)
     if not f:
         # the link in which plugin relies is missing!
         raise UrlRewritingError('Failed to get url from download page. Plugin may need a update.')
     else:
         return f.group(1)
Ejemplo n.º 48
0
    def update_rated(self, task, config):
        """Update my movies list"""
        # set first last_time into past so we trigger update on first run
        next_time = task.simple_persistence.setdefault('next_time',
                                                       datetime.datetime.min)
        log.debug('next_time: %s' % next_time)
        if not datetime.datetime.now() > next_time:
            return
        task.simple_persistence['next_time'] = datetime.datetime.now(
        ) + datetime.timedelta(hours=4)
        log.debug('updating my movies from %s' % config['url'])

        massage = []

        # fix imdb html, just enough to pass parser
        #
        # <td class=list bgcolor="#CCCCCC"} colspan="4">
        #                                 ^ god damn noobs

        massage.append((re.compile('"}'), lambda match: '"'))

        # onclick="(new Image()).src='/rg/home/navbar/images/b.gif?link=/'"">IMDb</a>
        #                                                                 ^ are you even trying?

        massage.append((re.compile('/\'""'), lambda match: '/\'"'))

        # <table class="footer" id="amazon-affiliates"">
        #                                             ^ ffs, I don't think they are even trying ...

        massage.append((re.compile('amazon-affiliates""'),
                        lambda match: 'amazon-affiliates"'))

        data = urlopener(config['url'], log)
        soup = BeautifulSoup(data)

        count = 0
        for a_imdb_link in soup.find_all(
                'a', attrs={'href': re.compile(r'/title/tt\d+')}):
            imdb_url = 'http://www.imdb.com%s' % a_imdb_link.get('href')

            if not task.session.query(ImdbRated).filter(ImdbRated.url == config['url']).\
                    filter(ImdbRated.imdb_url == imdb_url).first():
                rated = ImdbRated(config['url'], imdb_url)
                task.session.add(rated)
                log.debug('adding %s' % rated)
                count += 1

        if count > 0:
            log.info('Added %s new movies' % count)
Ejemplo n.º 49
0
 def parse_download_page(self, url):
     txheaders = {
         'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
     }
     req = urllib2.Request(url, None, txheaders)
     page = urlopener(req, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     down_link = soup.find('a', attrs={'href': re.compile(".+mp4")})
     if not down_link:
         raise UrlRewritingError(
             'Unable to locate download link from url %s' % url)
     return down_link.get('href')
 def parse_download_page(self, page_url):
     page = urlopener(page_url, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find("a", {"class": "dl_link"})
     if not tag_a:
         raise UrlRewritingError(
             'FTDB Unable to locate download link from url %s and tag_a is : %s'
             % (page_url, tag_a))
     torrent_url = "http://www3.frenchtorrentdb.com" + tag_a.get(
         'href') + "&js=1"
     log.debug('TORRENT URL is : %s' % torrent_url)
     return torrent_url
Ejemplo n.º 51
0
    def parse_download_page(self, url):
        page = urlopener(url, log)
        log.debug('%s opened', url)
        try:
            soup = get_soup(page)
            torrent_url = 'http://www.t411.me' + soup.find(
                text='Télécharger').findParent().get('href')
        except Exception as e:
            raise UrlRewritingError(e)

        if not torrent_url:
            raise UrlRewritingError(
                'Unable to locate download link from url %s' % url)

        return torrent_url
Ejemplo n.º 52
0
def get_first_result(tmdb_function, value):
    if isinstance(value, basestring):
        value = quote(value.encode('utf-8'), safe='')
    url = '%s/2.1/Movie.%s/%s/json/%s/%s' % (server, tmdb_function, lang,
                                             api_key, value)
    try:
        data = urlopener(url, log)
    except URLError:
        log.warning('Request failed %s' % url)
        return
    try:
        result = json.load(data)
    except ValueError:
        log.warning('TMDb returned invalid json.')
        return
    # Make sure there is a valid result to return
    if isinstance(result, list) and len(result):
        result = result[0]
        if isinstance(result, dict) and result.get('id'):
            return result
Ejemplo n.º 53
0
 def parse_download_page(self, page_url):
     page = urlopener(page_url, log)
     try:
         soup = get_soup(page)
     except Exception as e:
         raise UrlRewritingError(e)
     tag_a = soup.find("a", {"class": "dl_link"})
     if not tag_a:
         if soup.findAll(text="Connexion ?"):
             raise UrlRewritingError('You are not logged in,\
                                      check if your cookie for\
                                      authentication is up to date')
         else:
             raise UrlRewritingError('You have reached your download\
                                     limit per 24hours, so I cannot\
                                     get the torrent')
     torrent_url = ("http://www.frenchtorrentdb.com" + tag_a.get('href') +
                    "&js=1")
     log.debug('TORRENT URL is : %s' % torrent_url)
     return torrent_url
Ejemplo n.º 54
0
 def url_rewrite(self, task, entry):
     try:
         # need to fake user agent
         txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
         req = urllib2.Request(entry['url'], None, txheaders)
         page = urlopener(req, log)
         soup = get_soup(page)
         results = soup.find_all('a', attrs={'class': 'l'})
         if not results:
             raise UrlRewritingError('No results')
         for res in results:
             url = res.get('href')
             url = url.replace('/interstitial?url=', '')
             # generate match regexp from google search result title
             regexp = '.*'.join([x.contents[0] for x in res.find_all('em')])
             if re.match(regexp, entry['title']):
                 log.debug('resolved, found with %s' % regexp)
                 entry['url'] = url
                 return
         raise UrlRewritingError('Unable to resolve')
     except Exception as e:
         raise UrlRewritingError(e)
Ejemplo n.º 55
0
    def search(self, query, comparator, config=None):
        # TODO: Implement comparator matching
        url = u'http://newzleech.com/?%s' % str(
            urllib.urlencode({
                'q': query.encode('latin1'),
                'm': 'search',
                'group': '',
                'min': 'min',
                'max': 'max',
                'age': '',
                'minage': '',
                'adv': ''
            }))
        #log.debug('Search url: %s' % url)

        txheaders = {
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-us,en;q=0.5',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
            'Keep-Alive': '300',
            'Connection': 'keep-alive',
        }

        req = urllib2.Request(url, headers=txheaders)
        page = urlopener(req, log)

        soup = get_soup(page)

        nzbs = []

        for item in soup.find_all('table', attrs={'class': 'contentt'}):
            subject_tag = item.find('td', attrs={'class': 'subject'}).next
            subject = ''.join(subject_tag.find_all(text=True))
            complete = item.find('td', attrs={'class': 'complete'}).contents[0]
            size = item.find('td', attrs={'class': 'size'}).contents[0]
            nzb_url = 'http://newzleech.com/' + item.find('td',
                                                          attrs={
                                                              'class': 'get'
                                                          }).next.get('href')

            #TODO: confidence match
            # generate regexp from entry title and see if it matches subject
            regexp = query
            wildcardize = [' ', '-']
            for wild in wildcardize:
                regexp = regexp.replace(wild, '.')
            regexp = '.*' + regexp + '.*'
            #log.debug('Title regexp: %s' % regexp)

            if re.match(regexp, subject):
                log.debug('%s matches to regexp' % subject)
                if complete != u'100':
                    log.debug(
                        'Match is incomplete %s from newzleech, skipping ..' %
                        query)
                    continue
                log.info('Found \'%s\'' % query)

                def parse_size(value):
                    try:
                        num = float(value[:-3])
                    except:
                        log.error('Failed to parse_size %s' % value)
                        return 0
                    # convert into megabytes
                    if 'GB' in value:
                        num *= 1024
                    if 'KB' in value:
                        num /= 1024
                    return num

                nzb = Entry(title=subject,
                            url=nzb_url,
                            content_size=parse_size(size))
                nzb['url'] = nzb_url
                nzb['size'] = parse_size(size)

                nzbs.append(nzb)

        if not nzbs:
            log.debug('Unable to find %s' % query)
            return

        # choose largest file
        nzbs.sort(reverse=True, key=lambda x: x.get('content_size', 0))

        return nzbs
Ejemplo n.º 56
0
    def on_task_download(self, task):

        # filter all entries that have IMDB ID set
        try:
            entries = filter(lambda x: x['imdb_url'] is not None,
                             task.accepted)
        except KeyError:
            # No imdb urls on this task, skip it
            # TODO: should do lookup via imdb_lookup plugin?
            return

        try:
            s = ServerProxy("http://api.opensubtitles.org/xml-rpc")
            res = s.LogIn("", "", "en", "FlexGet")
        except:
            log.warning('Error connecting to opensubtitles.org')
            return

        if res['status'] != '200 OK':
            raise Exception(
                "Login to opensubtitles.org XML-RPC interface failed")

        config = self.get_config(task)

        token = res['token']

        # configuration
        languages = config['languages']
        min_sub_rating = config['min_sub_rating']
        match_limit = config[
            'match_limit']  # no need to change this, but it should be configurable

        # loop through the entries
        for entry in entries:
            # dig out the raw imdb id
            m = re.search("tt(\d+)/$", entry['imdb_url'])
            if not m:
                log.debug("no match for %s" % entry['imdb_url'])
                continue

            imdbid = m.group(1)

            query = []
            for language in languages:
                query.append({'sublanguageid': language, 'imdbid': imdbid})

            subtitles = s.SearchSubtitles(token, query)
            subtitles = subtitles['data']

            # nothing found -> continue
            if not subtitles:
                continue

            # filter bad subs
            subtitles = filter(lambda x: x['SubBad'] == '0', subtitles)
            # some quality required (0.0 == not reviewed)
            subtitles = filter(
                lambda x: float(x['SubRating']) >= min_sub_rating or float(x[
                    'SubRating']) == 0.0, subtitles)

            filtered_subs = []

            # find the best rated subs for each language
            for language in languages:
                langsubs = filter(lambda x: x['SubLanguageID'] == language,
                                  subtitles)

                # did we find any subs for this language?
                if langsubs:

                    def seqmatch(subfile):
                        s = difflib.SequenceMatcher(lambda x: x in " ._",
                                                    entry['title'], subfile)
                        #print "matching: ", entry['title'], subfile, s.ratio()
                        return s.ratio() > match_limit

                    # filter only those that have matching release names
                    langsubs = filter(
                        lambda x: seqmatch(x['MovieReleaseName']), subtitles)

                    if langsubs:
                        # find the best one by SubRating
                        langsubs.sort(key=lambda x: float(x['SubRating']))
                        langsubs.reverse()
                        filtered_subs.append(langsubs[0])

            # download
            for sub in filtered_subs:
                log.debug('SUBS FOUND: ', sub['MovieReleaseName'],
                          sub['SubRating'], sub['SubLanguageID'])

                f = urlopener(sub['ZipDownloadLink'], log)
                subfilename = re.match(
                    '^attachment; filename="(.*)"$',
                    f.info()['content-disposition']).group(1)
                outfile = os.path.join(config['output'], subfilename)
                fp = file(outfile, 'w')
                fp.write(f.read())
                fp.close()
                f.close()

        s.LogOut(token)
Ejemplo n.º 57
0
 def post_json_to_trakt(self, url, data):
     """Dumps data as json and POSTs it to the specified url."""
     req = urllib2.Request(url, json.dumps(data),
                           {'content-type': 'application/json'})
     return urlopener(req, log)