Esempio n. 1
0
    def run(self):
        """ Process the queue (including waits and retries) """
        from sabnzbd.nzbqueue import NzbQueue
        def sleeper(delay):
            for n in range(delay):
                if not self.shutdown:
                    time.sleep(1.05)

        self.shutdown = False
        msgid = None
        while not self.shutdown:
            if not msgid:
                (msgid, nzo) = self.queue.get()
                if self.shutdown or not msgid:
                    break
            logging.debug("Popping msgid %s", msgid)

            filename, data, newzbin_cat, nzo_info = _grabnzb(msgid)
            if filename and data:
                filename = name_fixer(filename)

                pp = nzo.pp
                script = nzo.script
                cat = nzo.cat
                if cat == '*' or not cat:
                    cat = cat_convert(newzbin_cat)

                priority = nzo.priority
                nzbname = nzo.custom_name

                cat, pp, script, priority = cat_to_opts(cat, pp, script, priority)

                try:
                    sabnzbd.nzbqueue.insert_future_nzo(nzo, filename, msgid, data, pp=pp, script=script, cat=cat, priority=priority, nzbname=nzbname, nzo_info=nzo_info)
                except:
                    logging.error(Ta('Failed to update newzbin job %s'), msgid)
                    logging.info("Traceback: ", exc_info = True)

                    NzbQueue.do.remove(nzo.nzo_id, False)
                msgid = None
            else:
                if filename:
                    sleeper(int(filename))
                else:
                    # Fatal error, give up on this one
                    bad_fetch(nzo, msgid, msg=nzo_info, retry=True)
                    msgid = None

            osx.sendGrowlMsg(T('NZB added to queue'),filename,osx.NOTIFICATION['download'])

            # Keep some distance between the grabs
            sleeper(5)

        logging.debug('Stopping MSGIDGrabber')
Esempio n. 2
0
    def run(self):
        """ Process the queue (including waits and retries) """
        from sabnzbd.nzbqueue import NzbQueue
        self.shutdown = False
        while not self.shutdown:
            time.sleep(5)
            (msgid, nzo) = self.queue.get()
            if self.shutdown or not msgid:
                break
            if nzo.wait and nzo.wait > time.time():
                self.grab(msgid, nzo)
                continue
            logging.debug("Popping msgid %s", msgid)

            filename, data, newzbin_cat, nzo_info = _grabnzb(msgid)
            if filename and data:
                filename = name_fixer(filename)

                pp = nzo.pp
                script = nzo.script
                cat = nzo.cat
                if cat == '*' or not cat:
                    cat = cat_convert(newzbin_cat)

                priority = nzo.priority
                nzbname = nzo.custom_name

                cat, pp, script, priority = cat_to_opts(cat, pp, script, priority)

                try:
                    sabnzbd.nzbqueue.insert_future_nzo(nzo, filename, msgid, data, pp=pp, script=script, cat=cat, priority=priority, nzbname=nzbname, nzo_info=nzo_info)
                    nzo.url = format_source_url(str(msgid))
                except:
                    logging.error(Ta('Failed to update newzbin job %s'), msgid)
                    logging.info("Traceback: ", exc_info = True)

                    NzbQueue.do.remove(nzo.nzo_id, False)
                msgid = None
            else:
                if filename:
                    self.grab(msgid, nzo, float(filename))
                else:
                    # Fatal error, give up on this one
                    bad_fetch(nzo, msgid, msg=nzo_info, retry=True)
                msgid = None

            if msgid:
                growler.send_notification(T('NZB added to queue'), filename, 'download')

        logging.debug('Stopping MSGIDGrabber')
Esempio n. 3
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            # Don't pound the website!
            time.sleep(5.0)

            (url, future_nzo) = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue
            if future_nzo and future_nzo.wait and future_nzo.wait > time.time():
                # Requeue when too early and still active

                self.add(url, future_nzo)
                continue
            url = url.replace(' ', '')

            try:
                del_bookmark = not future_nzo
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing', url)
                        continue

                # Add nzbmatrix credentials if needed
                url, matrix_id = _matrix_url(url)

                # _grab_url cannot reside in a function, because the tempfile
                # would not survive the end of the function
                if del_bookmark:
                    logging.info('Removing nzbmatrix bookmark %s', matrix_id)
                else:
                    logging.info('Grabbing URL %s', url)
                opener = urllib.FancyURLopener({})
                opener.prompt_user_passwd = None
                opener.addheaders = []
                opener.addheader('User-Agent', 'SABnzbd+/%s' % sabnzbd.version.__version__)
                if not [True for item in _BAD_GZ_HOSTS if item in url]:
                    opener.addheader('Accept-encoding','gzip')
                filename = None
                category = None
                length = 0
                nzo_info = {}
                try:
                    fn, header = opener.retrieve(url)
                except:
                    fn = None

                if fn:
                    for tup in header.items():
                        try:
                            item = tup[0].lower()
                            value = tup[1].strip()
                        except:
                            continue
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo',):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name',):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item in ('content-length',):
                            length = misc.int_conv(value)

                        if not filename:
                            for item in tup:
                                if "filename=" in item:
                                    filename = item[item.index("filename=") + 9:].strip(';').strip('"')

                if matrix_id:
                    fn, msg, retry, wait = _analyse_matrix(fn, matrix_id)
                    if not fn:
                        if retry:
                            logging.info(msg)
                            logging.debug('Retry nzbmatrix item %s after waiting %s sec', matrix_id, wait)
                            self.add(url, future_nzo, wait)
                        else:
                            logging.error(msg)
                            misc.bad_fetch(future_nzo, clean_matrix_url(url), msg, retry=True)
                        continue
                    category = _MATRIX_MAP.get(category, category)

                    if del_bookmark:
                        # No retries of nzbmatrix bookmark removals
                        continue

                else:
                    fn, msg, retry, wait = _analyse_others(fn, url)
                    if not fn:
                        if retry:
                            logging.info('Retry URL %s', url)
                            self.add(url, future_nzo, wait)
                        else:
                            misc.bad_fetch(future_nzo, url, msg, retry=True)
                        continue

                if not filename:
                    filename = os.path.basename(url) + '.nzb'
                # Sanitize and trim name, preserving the extension
                filename, ext = os.path.splitext(filename)
                filename = misc.sanitize_foldername(filename)
                filename += '.' + misc.sanitize_foldername(ext)

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # Check if nzb file
                if os.path.splitext(filename)[1].lower() in ('.nzb', '.gz'):
                    res, nzo_ids = dirscanner.ProcessSingleFile(filename, fn, pp=pp, script=script, cat=cat, priority=priority, \
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url)
                    if res == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                    else:
                        if res == -2:
                            logging.info('Incomplete NZB, retry after 5 min %s', url)
                            when = 300
                        else:
                            logging.info('Unknown error fetching NZB, retry after 2 min %s', url)
                            when = 120
                        self.add(url, future_nzo, when)
                # Check if a supported archive
                else:
                    if dirscanner.ProcessArchiveFile(filename, fn, pp, script, cat, priority=priority, url=future_nzo.url)[0] == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                    else:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        logging.info('Unknown filetype when fetching NZB, retry after 30s %s', url)
                        self.add(url, future_nzo, 30)
            except:
                logging.error('URLGRABBER CRASHED', exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)
Esempio n. 4
0
File: rss.py Progetto: rivy/sabnzbd
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uris = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api
        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            if filter[3] in ('<', '>', 'F', 'S'):
                regexes.append(filter[4])
            else:
                regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            all_entries = []
            for uri in uris:
                uri = uri.replace(' ', '%20')
                logging.debug("Running feedparser on %s", uri)
                feed_parsed = feedparser.parse(uri.replace('feed://', 'http://'))
                logging.debug("Done parsing %s", uri)

                if not feed_parsed:
                    msg = T('Failed to retrieve RSS from %s: %s') % (uri, '?')
                    logging.info(msg)

                status = feed_parsed.get('status', 999)
                if status in (401, 402, 403):
                    msg = T('Do not have valid authentication for feed %s') % feed
                    logging.info(msg)

                if status >= 500 and status <= 599:
                    msg = T('Server side error (server code %s); could not get %s on %s') % (status, feed, uri)
                    logging.info(msg)

                entries = feed_parsed.get('entries')
                if 'bozo_exception' in feed_parsed and not entries:
                    msg = str(feed_parsed['bozo_exception'])
                    if 'CERTIFICATE_VERIFY_FAILED' in msg:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % get_urlbase(uri)
                        msg += ' - https://sabnzbd.org/certificate-errors'
                        logging.error(msg)
                    else:
                        msg = T('Failed to retrieve RSS from %s: %s') % (uri, xml_name(msg))
                    logging.info(msg)

                if not entries:
                    msg = T('RSS Feed %s was empty') % uri
                    logging.info(msg)
                all_entries.extend(entries)
            entries = all_entries

        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()
            # Sort in the order the jobs came from the feed
            entries.sort(lambda x, y: jobs[x].get('order', 0) - jobs[y].get('order', 0))

        order = 0
        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, category, size, age, season, episode = _get_link(uri, entry)
                except (AttributeError, IndexError):
                    link = None
                    category = u''
                    size = 0L
                    age = None
                    logging.info(T('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                title = entry.title

                # If there's multiple feeds, remove the duplicates based on title and size
                if len(uris) > 1:
                    skip_job = False
                    for job_link, job in jobs.items():
                        # Allow 5% size deviation because indexers might have small differences for same release
                        if job.get('title') == title and link != job_link and (job.get('size')*0.95) < size < (job.get('size')*1.05):
                            logging.info("Ignoring job %s from other feed", title)
                            skip_job = True
                            break
                    if skip_job:
                        continue
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                title = jobs[link].get('title', '')
                size = jobs[link].get('size', 0L)
                age = jobs[link].get('age')
                season = jobs[link].get('season', 0)
                episode = jobs[link].get('episode', 0)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if ('F' in reTypes or 'S' in reTypes) and (not season or not episode):
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]
                        season = int_conv(season)
                        episode = int_conv(episode)

                    # Match against all filters until an positive or negative match
                    logging.debug('Size %s for %s', size, title)
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == '<' and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == '>' and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'F' and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug('Filter requirement match on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'S' and season and episode and ep_match(season, episode, regexes[n], title):
                                logging.debug('Filter matched on rule %d', n)
                                result = True
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if not result and defCat:
                            # Apply Feed-category on non-matched items
                            myCat = defCat
                        elif result and notdefault(reCats[n]):
                            # Use the matched info
                            myCat = reCats[n]
                        elif category and not defCat:
                            # No result and no Feed-category
                            myCat = cat_convert(category)

                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio


                    if cfg.no_dupes() and self.check_duplicate(title):
                        if cfg.no_dupes() == 1:
                            # Dupe-detection: Discard
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        elif cfg.no_dupes() == 3:
                            # Dupe-detection: Fail
                            # We accept it so the Queue can send it to the History
                            logging.info("Found duplicate job %s", title)
                        else:
                            # Dupe-detection: Pause
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, link, title, size, age, season, episode, 'G', category, myCat, myPP, myScript,
                                    act, star, order, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, link, title, size, age, season, episode, 'B', category, myCat, myPP, myScript,
                                    False, star, order, priority=myPrio, rule=str(n))
            order += 1

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return msg
Esempio n. 5
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            (url, future_nzo) = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue

            if future_nzo:
                # Re-queue when too early and still active
                if future_nzo.url_wait and future_nzo.url_wait > time.time():
                    self.add(url, future_nzo)
                    time.sleep(1.0)
                    continue
                # Paused
                if future_nzo.status == Status.PAUSED:
                    self.add(url, future_nzo)
                    time.sleep(1.0)
                    continue

            url = url.replace(' ', '')

            try:
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing', url)
                        continue

                filename = None
                category = None
                gzipped = False
                nzo_info = {}
                wait = 0
                retry = True
                fetch_request = None

                logging.info('Grabbing URL %s', url)
                try:
                    fetch_request = _build_request(url)
                except Exception, e:
                    # Cannot list exceptions here, because of unpredictability over platforms
                    error0 = str(sys.exc_info()[0]).lower()
                    error1 = str(sys.exc_info()[1]).lower()
                    logging.debug('Error "%s" trying to get the url %s', error1, url)
                    if 'certificate_verify_failed' in error1 or 'certificateerror' in error0:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % ''
                        msg += ' - https://sabnzbd.org/certificate-errors'
                        retry = False
                    elif 'nodename nor servname provided' in error1:
                        msg = T('Server name does not resolve')
                        retry = False
                    elif '401' in error1 or 'unauthorized' in error1:
                        msg = T('Unauthorized access')
                        retry = False
                    elif '404' in error1:
                        msg = T('File not on server')
                        retry = False
                    elif hasattr(e, 'headers') and 'retry-after' in e.headers:
                        # Catch if the server send retry (e.headers is case-INsensitive)
                        wait = misc.int_conv(e.headers['retry-after'])

                # Check if dereference is used
                new_url = dereferring(url, fetch_request)
                if new_url:
                    self.add(new_url, future_nzo)
                    continue

                if fetch_request:
                    for hdr in fetch_request.headers:
                        try:
                            item = hdr.lower()
                            value = fetch_request.headers[hdr]
                        except:
                            continue
                        if item in ('content-encoding',) and value == 'gzip':
                            gzipped = True
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo',):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name',):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item == 'x-dnzb-propername':
                            nzo_info['propername'] = value
                        elif item == 'x-dnzb-episodename':
                            nzo_info['episodename'] = value
                        elif item == 'x-dnzb-year':
                            nzo_info['year'] = value
                        elif item == 'x-dnzb-failure':
                            nzo_info['failure'] = value
                        elif item == 'x-dnzb-details':
                            nzo_info['details'] = value
                        elif item == 'x-dnzb-password':
                            nzo_info['password'] = value
                        elif item == 'retry-after':
                            wait = misc.int_conv(value)

                        # Rating fields
                        if item in _RARTING_FIELDS:
                            nzo_info[item] = value

                        # Get filename from Content-Disposition header
                        if not filename and "filename=" in value:
                            filename = value[value.index("filename=") + 9:].strip(';').strip('"')

                if wait:
                    # For sites that have a rate-limiting attribute
                    msg = ''
                    retry = True
                    fetch_request = None
                elif retry:
                    fetch_request, msg, retry, wait, data = _analyse(fetch_request, future_nzo)

                if not fetch_request:
                    if retry:
                        logging.info('Retry URL %s', url)
                        self.add(url, future_nzo, wait)
                    else:
                        self.fail_to_history(future_nzo, url, msg)
                    continue

                if not filename:
                    filename = os.path.basename(urllib2.unquote(url))

                    # URL was redirected, maybe the redirect has better filename?
                    # Check if the original URL has extension
                    if url != fetch_request.url and misc.get_ext(filename) not in VALID_NZB_FILES:
                        filename = os.path.basename(urllib2.unquote(fetch_request.url))
                elif '&nzbname=' in filename:
                    # Sometimes the filename contains the full URL, duh!
                    filename = filename[filename.find('&nzbname=') + 9:]

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # process data
                if gzipped:
                    filename += '.gz'
                if not data:
                    try:
                        data = fetch_request.read()
                    except (IncompleteRead, IOError):
                        self.fail_to_history(future_nzo, url, T('Server could not complete request'))
                        fetch_request.close()
                        continue
                fetch_request.close()

                if '<nzb' in data and misc.get_ext(filename) != '.nzb':
                    filename += '.nzb'

                # Sanitize filename first (also removing forbidden Windows-names)
                filename = misc.sanitize_filename(filename)

                # Write data to temp file
                path = os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER)
                path = os.path.join(path, filename)
                f = open(path, 'wb')
                f.write(data)
                f.close()
                del data

                # Check if nzb file
                if misc.get_ext(filename) in VALID_NZB_FILES:
                    res = dirscanner.ProcessSingleFile(filename, path, pp=pp, script=script, cat=cat, priority=priority,
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url, keep=False,
                                                       nzo_id=future_nzo.nzo_id)[0]
                    if res:
                        if res == -2:
                            logging.info('Incomplete NZB, retry after 5 min %s', url)
                            when = 300
                        elif res == -1:
                            # Error, but no reason to retry. Warning is already given
                            NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                            continue
                        else:
                            logging.info('Unknown error fetching NZB, retry after 2 min %s', url)
                            when = 120
                        self.add(url, future_nzo, when)

                else:
                    # Check if a supported archive
                    status, zf, exp_ext = dirscanner.is_archive(path)
                    if status == 0:
                        if misc.get_ext(filename) not in ('.rar', '.zip', '.7z'):
                            filename = filename + exp_ext
                            os.rename(path, path + exp_ext)
                            path = path + exp_ext

                        dirscanner.ProcessArchiveFile(filename, path, pp, script, cat, priority=priority,
                                                     nzbname=nzbname, url=future_nzo.url, keep=False,
                                                     nzo_id=future_nzo.nzo_id)
                    else:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fetch_request)
                        except:
                            pass
                        logging.info('Unknown filetype when fetching NZB, retry after 30s %s', url)
                        self.add(url, future_nzo, 30)
            except:
Esempio n. 6
0
    def run(self):
        self.shutdown = False
        while not self.shutdown:
            # Set NzbObject object to None so reference from this thread
            # does not keep the object alive in the future (see #1628)
            future_nzo = None
            url, future_nzo = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue

            if future_nzo:
                # Re-queue when too early and still active
                if future_nzo.url_wait and future_nzo.url_wait > time.time():
                    self.add(url, future_nzo)
                    time.sleep(1.0)
                    continue
                # Paused
                if future_nzo.status == Status.PAUSED:
                    self.add(url, future_nzo)
                    time.sleep(1.0)
                    continue

            url = url.replace(" ", "")

            try:
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug("Dropping URL %s, job entry missing",
                                      url)
                        continue

                filename = None
                category = None
                nzo_info = {}
                wait = 0
                retry = True
                fetch_request = None

                logging.info("Grabbing URL %s", url)
                try:
                    fetch_request = _build_request(url)
                except Exception as e:
                    # Cannot list exceptions here, because of unpredictability over platforms
                    error0 = str(sys.exc_info()[0]).lower()
                    error1 = str(sys.exc_info()[1]).lower()
                    logging.debug('Error "%s" trying to get the url %s',
                                  error1, url)
                    if "certificate_verify_failed" in error1 or "certificateerror" in error0:
                        msg = T("Server %s uses an untrusted HTTPS certificate"
                                ) % ""
                        msg += " - https://sabnzbd.org/certificate-errors"
                        retry = False
                    elif "nodename nor servname provided" in error1:
                        msg = T("Server name does not resolve")
                        retry = False
                    elif "401" in error1 or "unauthorized" in error1:
                        msg = T("Unauthorized access")
                        retry = False
                    elif "404" in error1:
                        msg = T("File not on server")
                        retry = False
                    elif hasattr(e, "headers") and "retry-after" in e.headers:
                        # Catch if the server send retry (e.headers is case-INsensitive)
                        wait = misc.int_conv(e.headers["retry-after"])

                if fetch_request:
                    for hdr in fetch_request.headers:
                        try:
                            item = hdr.lower()
                            value = fetch_request.headers[hdr]
                        except:
                            continue
                        if item in ("category_id", "x-dnzb-category"):
                            category = value
                        elif item in ("x-dnzb-moreinfo", ):
                            nzo_info["more_info"] = value
                        elif item in ("x-dnzb-name", ):
                            filename = value
                            if not filename.endswith(".nzb"):
                                filename += ".nzb"
                        elif item == "x-dnzb-propername":
                            nzo_info["propername"] = value
                        elif item == "x-dnzb-episodename":
                            nzo_info["episodename"] = value
                        elif item == "x-dnzb-year":
                            nzo_info["year"] = value
                        elif item == "x-dnzb-failure":
                            nzo_info["failure"] = value
                        elif item == "x-dnzb-details":
                            nzo_info["details"] = value
                        elif item == "x-dnzb-password":
                            nzo_info["password"] = value
                        elif item == "retry-after":
                            wait = misc.int_conv(value)

                        # Rating fields
                        if item in _RARTING_FIELDS:
                            nzo_info[item] = value

                        # Get filename from Content-Disposition header
                        if not filename and "filename=" in value:
                            filename = value[value.index("filename=") +
                                             9:].strip(";").strip('"')

                if wait:
                    # For sites that have a rate-limiting attribute
                    msg = ""
                    retry = True
                    fetch_request = None
                elif retry:
                    fetch_request, msg, retry, wait, data = _analyse(
                        fetch_request, future_nzo)

                if not fetch_request:
                    if retry:
                        logging.info("Retry URL %s", url)
                        self.add(url, future_nzo, wait)
                    else:
                        self.fail_to_history(future_nzo, url, msg)
                    continue

                if not filename:
                    filename = os.path.basename(urllib.parse.unquote(url))

                    # URL was redirected, maybe the redirect has better filename?
                    # Check if the original URL has extension
                    if (url != fetch_request.geturl()
                            and sabnzbd.filesystem.get_ext(filename)
                            not in VALID_NZB_FILES + VALID_ARCHIVES):
                        filename = os.path.basename(
                            urllib.parse.unquote(fetch_request.geturl()))
                elif "&nzbname=" in filename:
                    # Sometimes the filename contains the full URL, duh!
                    filename = filename[filename.find("&nzbname=") + 9:]

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == "*") and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # process data
                if not data:
                    try:
                        data = fetch_request.read()
                    except (IncompleteRead, IOError):
                        self.fail_to_history(
                            future_nzo, url,
                            T("Server could not complete request"))
                        fetch_request.close()
                        continue
                fetch_request.close()

                if b"<nzb" in data and sabnzbd.filesystem.get_ext(
                        filename) != ".nzb":
                    filename += ".nzb"

                # Sanitize filename first (also removing forbidden Windows-names)
                filename = sabnzbd.filesystem.sanitize_filename(filename)

                # If no filename, make one
                if not filename:
                    filename = sabnzbd.get_new_id(
                        "url",
                        os.path.join(cfg.admin_dir.get_path(),
                                     FUTURE_Q_FOLDER))

                # Write data to temp file
                path = os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER,
                                    filename)
                with open(path, "wb") as temp_nzb:
                    temp_nzb.write(data)

                # Check if nzb file
                if sabnzbd.filesystem.get_ext(
                        filename) in VALID_ARCHIVES + VALID_NZB_FILES:
                    res, _ = sabnzbd.add_nzbfile(
                        path,
                        pp=pp,
                        script=script,
                        cat=cat,
                        priority=priority,
                        nzbname=nzbname,
                        nzo_info=nzo_info,
                        url=future_nzo.url,
                        keep=False,
                        password=future_nzo.password,
                        nzo_id=future_nzo.nzo_id,
                    )
                    # -2==Error/retry, -1==Error, 0==OK, 1==Empty
                    if res == -2:
                        logging.info("Incomplete NZB, retry after 5 min %s",
                                     url)
                        self.add(url, future_nzo, when=300)
                    elif res == -1:
                        # Error already thrown
                        self.fail_to_history(future_nzo, url)
                    elif res == 1:
                        # No NZB-files inside archive
                        self.fail_to_history(future_nzo, url,
                                             T("Empty NZB file %s") % filename)
                else:
                    logging.info(
                        "Unknown filetype when fetching NZB, retry after 30s %s",
                        url)
                    self.add(url, future_nzo, 30)

                # Always clean up what we wrote to disk
                try:
                    sabnzbd.filesystem.remove_file(path)
                except:
                    pass
            except:
                logging.error(T("URLGRABBER CRASHED"), exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)
Esempio n. 7
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            # Don't pound the website!
            time.sleep(5.0)

            (url, future_nzo, retry_count) = self.queue.get()
            if not url:
                continue
            url = url.replace(' ', '')

            try:
                del_bookmark = not future_nzo
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing',
                                      url)
                        continue

                # Add nzbmatrix credentials if needed
                url, matrix_id = _matrix_url(url)

                # When still waiting for nzbmatrix wait period, requeue
                if matrix_id and self.matrix_wait > time.time():
                    self.queue.put((url, future_nzo, retry_count))
                    continue

                # _grab_url cannot reside in a function, because the tempfile
                # would not survive the end of the function
                if del_bookmark:
                    logging.info('Removing nzbmatrix bookmark %s', matrix_id)
                else:
                    logging.info('Grabbing URL %s', url)
                opener = urllib.FancyURLopener({})
                opener.prompt_user_passwd = None
                opener.addheaders = []
                opener.addheader('User-Agent',
                                 'SABnzbd+/%s' % sabnzbd.version.__version__)
                opener.addheader('Accept-encoding', 'gzip')
                filename = None
                category = None
                length = 0
                nzo_info = {}
                try:
                    fn, header = opener.retrieve(url)
                except:
                    fn = None

                if fn:
                    for tup in header.items():
                        try:
                            item = tup[0].lower()
                            value = tup[1].strip()
                        except:
                            continue
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo', ):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name', ):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item in ('content-length', ):
                            length = misc.int_conv(value)

                        if not filename:
                            for item in tup:
                                if "filename=" in item:
                                    filename = item[item.index("filename=") +
                                                    9:].strip(';').strip('"')

                if matrix_id:
                    fn, msg, retry, wait = _analyse_matrix(fn, matrix_id)
                    if retry and wait > 0:
                        self.matrix_wait = time.time() + wait
                        logging.debug('Retry URL %s after waiting', url)
                        self.queue.put((url, future_nzo, retry_count))
                        continue
                    category = _MATRIX_MAP.get(category, category)
                else:
                    msg = ''
                    retry = True

                # Check if the filepath is specified, if not, check if a retry is allowed.
                if not fn:
                    retry_count -= 1
                    if retry_count > 0 and retry:
                        logging.info('Retry URL %s', url)
                        self.queue.put((url, future_nzo, retry_count))
                    elif not del_bookmark:
                        misc.bad_fetch(future_nzo, url, msg, retry=True)
                    continue

                if del_bookmark:
                    continue

                if not filename:
                    filename = os.path.basename(url) + '.nzb'
                # Sanitize and trim name, preserving the extension
                filename, ext = os.path.splitext(filename)
                filename = misc.sanitize_foldername(filename)
                filename += '.' + misc.sanitize_foldername(ext)

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # Check if nzb file
                if os.path.splitext(filename)[1].lower() in ('.nzb', '.gz'):
                    res = dirscanner.ProcessSingleFile(filename, fn, pp=pp, script=script, cat=cat, priority=priority, \
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url)
                    if res == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id,
                                           add_to_history=False)
                    elif res == -2:
                        retry_count -= 1
                        if retry_count > 0:
                            logging.info('Incomplete NZB, retry %s', url)
                            self.queue.put((url, future_nzo, retry_count))
                        else:
                            misc.bad_fetch(future_nzo,
                                           url,
                                           retry=True,
                                           content=True)
                    else:
                        misc.bad_fetch(future_nzo,
                                       url,
                                       retry=True,
                                       content=True)
                # Check if a supported archive
                else:
                    if dirscanner.ProcessArchiveFile(filename,
                                                     fn,
                                                     pp,
                                                     script,
                                                     cat,
                                                     priority=priority,
                                                     url=future_nzo.url) == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id,
                                           add_to_history=False)
                    else:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        misc.bad_fetch(future_nzo,
                                       url,
                                       retry=True,
                                       content=True)
            except:
                logging.error('URLGRABBER CRASHED', exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)
Esempio n. 8
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            # Don't pound the website!
            time.sleep(5.0)

            (url, future_nzo) = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue
            if future_nzo and future_nzo.wait and future_nzo.wait > time.time(
            ):
                # Requeue when too early and still active

                self.add(url, future_nzo)
                continue
            url = url.replace(' ', '')

            try:
                del_bookmark = not future_nzo
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing',
                                      url)
                        continue

                # Add nzbmatrix credentials if needed
                url, matrix_id = _matrix_url(url)

                # _grab_url cannot reside in a function, because the tempfile
                # would not survive the end of the function
                if del_bookmark:
                    logging.info('Removing nzbmatrix bookmark %s', matrix_id)
                else:
                    logging.info('Grabbing URL %s', url)
                if '.nzbsrus.' in url:
                    opener = urllib.URLopener({})
                else:
                    opener = urllib.FancyURLopener({})
                opener.prompt_user_passwd = None
                opener.addheaders = []
                opener.addheader('User-Agent',
                                 'SABnzbd+/%s' % sabnzbd.version.__version__)
                if not [True for item in _BAD_GZ_HOSTS if item in url]:
                    opener.addheader('Accept-encoding', 'gzip')
                filename = None
                category = None
                length = 0
                nzo_info = {}
                wait = 0
                try:
                    fn, header = opener.retrieve(url)
                except:
                    fn = None

                if fn:
                    for tup in header.items():
                        try:
                            item = tup[0].lower()
                            value = tup[1].strip()
                        except:
                            continue
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo', ):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name', ):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item == 'x-dnzb-propername':
                            nzo_info['propername'] = value
                        elif item == 'x-dnzb-episodename':
                            nzo_info['episodename'] = value
                        elif item == 'x-dnzb-year':
                            nzo_info['year'] = value
                        elif item == 'x-dnzb-failure':
                            nzo_info['failure'] = value
                        elif item == 'x-dnzb-details':
                            nzo_info['details'] = value
                        elif item in ('content-length', ):
                            length = misc.int_conv(value)
                        elif item == 'retry-after':
                            # For NZBFinder
                            wait = misc.int_conv(value)

                        if not filename:
                            for item in tup:
                                if "filename=" in item:
                                    filename = item[item.index("filename=") +
                                                    9:].strip(';').strip('"')

                if matrix_id:
                    fn, msg, retry, wait = _analyse_matrix(fn, matrix_id)
                    if not fn:
                        if retry:
                            logging.info(msg)
                            logging.debug(
                                'Retry nzbmatrix item %s after waiting %s sec',
                                matrix_id, wait)
                            self.add(url, future_nzo, wait)
                        else:
                            logging.error(msg)
                            misc.bad_fetch(future_nzo,
                                           clean_matrix_url(url),
                                           msg,
                                           retry=True)
                        continue
                    category = get_matrix_category(url, category)

                    if del_bookmark:
                        # No retries of nzbmatrix bookmark removals
                        continue

                else:
                    if wait:
                        # For sites that have a rate-limiting attribute
                        msg = ''
                        retry = True
                        fn = None
                    else:
                        fn, msg, retry, wait = _analyse_others(fn, url)
                    if not fn:
                        if retry:
                            logging.info('Retry URL %s', url)
                            self.add(url, future_nzo, wait)
                        else:
                            misc.bad_fetch(future_nzo, url, msg, retry=True)
                        continue

                if not filename:
                    filename = os.path.basename(url) + '.nzb'

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # Check if nzb file
                if os.path.splitext(filename)[1].lower() in ('.nzb', '.gz'):
                    res, nzo_ids = dirscanner.ProcessSingleFile(filename, fn, pp=pp, script=script, cat=cat, priority=priority, \
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url)
                    if res == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id,
                                           add_to_history=False)
                    else:
                        if res == -2:
                            logging.info(
                                'Incomplete NZB, retry after 5 min %s', url)
                            when = 300
                        elif res == -1:
                            # Error, but no reason to retry. Warning is already given
                            NzbQueue.do.remove(future_nzo.nzo_id,
                                               add_to_history=False)
                            continue
                        else:
                            logging.info(
                                'Unknown error fetching NZB, retry after 2 min %s',
                                url)
                            when = 120
                        self.add(url, future_nzo, when)
                # Check if a supported archive
                else:
                    if dirscanner.ProcessArchiveFile(
                            filename,
                            fn,
                            pp,
                            script,
                            cat,
                            priority=priority,
                            nzbname=nzbname,
                            url=future_nzo.url)[0] == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id,
                                           add_to_history=False)
                    else:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        logging.info(
                            'Unknown filetype when fetching NZB, retry after 30s %s',
                            url)
                        self.add(url, future_nzo, 30)
            except:
                logging.error('URLGRABBER CRASHED', exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)
Esempio n. 9
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            # Don't pound the website!
            time.sleep(5.0)

            (url, future_nzo) = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue
            if future_nzo and future_nzo.wait and future_nzo.wait > time.time():
                # Re-queue when too early and still active

                self.add(url, future_nzo)
                continue
            url = url.replace(' ', '')

            try:
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing', url)
                        continue

                logging.info('Grabbing URL %s', url)
                req = urllib2.Request(url)
                req.add_header('User-Agent', 'SABnzbd+/%s' % sabnzbd.version.__version__)
                if not [True for item in _BAD_GZ_HOSTS if item in url]:
                    req.add_header('Accept-encoding', 'gzip')
                filename = None
                category = None
                gzipped = False
                nzo_info = {}
                wait = 0
                retry = True
                fn = None
                try:
                    fn = urllib2.urlopen(req)
                except:
                    # Cannot list exceptions here, because of unpredictability over platforms
                    error0 = str(sys.exc_info()[0]).lower()
                    error1 = str(sys.exc_info()[1]).lower()
                    logging.debug('Error "%s" trying to get the url %s', error1, url)
                    if 'certificate_verify_failed' in error1 or 'certificateerror' in error0:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % ''
                        retry = False
                    elif 'nodename nor servname provided' in error1:
                        msg = T('Server name does not resolve')
                        retry = False
                    elif '401' in error1 or 'unauthorized' in error1:
                        msg = T('Unauthorized access')
                        retry = False
                    elif '404' in error1:
                        msg = T('File not on server')
                        retry = False

                new_url = dereferring(url, fn)
                if new_url:
                    self.add(new_url, future_nzo)
                    continue

                if fn:
                    for hdr in fn.headers:
                        try:
                            item = hdr.lower()
                            value = fn.headers[hdr]
                        except:
                            continue
                        if item in ('content-encoding',) and value == 'gzip':
                            gzipped = True
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo',):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name',):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item == 'x-dnzb-propername':
                            nzo_info['propername'] = value
                        elif item == 'x-dnzb-episodename':
                            nzo_info['episodename'] = value
                        elif item == 'x-dnzb-year':
                            nzo_info['year'] = value
                        elif item == 'x-dnzb-failure':
                            nzo_info['failure'] = value
                        elif item == 'x-dnzb-details':
                            nzo_info['details'] = value
                        elif item == 'retry-after':
                            # For NZBFinder
                            wait = misc.int_conv(value)

                        if not filename and "filename=" in value:
                            filename = value[value.index("filename=") + 9:].strip(';').strip('"')

                if wait:
                    # For sites that have a rate-limiting attribute
                    msg = ''
                    retry = True
                    fn = None
                elif retry:
                    fn, msg, retry, wait, data = _analyse(fn, url)

                if not fn:
                    if retry:
                        logging.info('Retry URL %s', url)
                        self.add(url, future_nzo, wait)
                    else:
                        bad_fetch(future_nzo, url, msg)
                    continue

                if not filename:
                    filename = os.path.basename(url) + '.nzb'
                elif '&nzbname=' in filename:
                    # Sometimes the filename contains the full URL, duh!
                    filename = filename[filename.find('&nzbname=') + 9:]

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # process data
                if gzipped:
                    filename = filename + '.gz'
                if not data:
                    data = fn.read()
                fn.close()

                # Sanatize filename first
                filename = misc.sanitize_filename(filename)

                # Write data to temp file
                path = os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER)
                path = os.path.join(path, filename)
                f = open(path, 'wb')
                f.write(data)
                f.close()
                del data

                # Check if nzb file
                if os.path.splitext(filename)[1].lower() in ('.nzb', '.gz', 'bz2'):
                    res = dirscanner.ProcessSingleFile(filename, path, pp=pp, script=script, cat=cat, priority=priority,
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url, keep=False,
                                                       nzo_id=future_nzo.nzo_id)[0]
                    if res:
                        if res == -2:
                            logging.info('Incomplete NZB, retry after 5 min %s', url)
                            when = 300
                        elif res == -1:
                            # Error, but no reason to retry. Warning is already given
                            NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                            continue
                        else:
                            logging.info('Unknown error fetching NZB, retry after 2 min %s', url)
                            when = 120
                        self.add(url, future_nzo, when)
                # Check if a supported archive
                else:
                    if dirscanner.ProcessArchiveFile(filename, path, pp, script, cat, priority=priority,
                                                     nzbname=nzbname, url=future_nzo.url, keep=False,
                                                     nzo_id=future_nzo.nzo_id)[0]:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        logging.info('Unknown filetype when fetching NZB, retry after 30s %s', url)
                        self.add(url, future_nzo, 30)
            except:
                logging.error(T('URLGRABBER CRASHED'), exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)
Esempio n. 10
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        if not feed:
            return "No such feed"

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uris = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api

        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for feed_filter in feeds.filters():
            reCat = feed_filter[0]
            if defCat in ("", "*"):
                reCat = None
            reCats.append(reCat)
            rePPs.append(feed_filter[1])
            reScripts.append(feed_filter[2])
            reTypes.append(feed_filter[3])
            if feed_filter[3] in ("<", ">", "F", "S"):
                regexes.append(feed_filter[4])
            else:
                regexes.append(convert_filter(feed_filter[4]))
            rePrios.append(feed_filter[5])
            reEnabled.append(feed_filter[6] != "0")
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add SABnzbd's custom User Agent
        feedparser.USER_AGENT = "SABnzbd/%s" % sabnzbd.__version__

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            all_entries = []
            for uri in uris:
                uri = uri.replace(" ", "%20")
                logging.debug("Running feedparser on %s", uri)
                feed_parsed = feedparser.parse(uri.replace("feed://", "http://"))
                logging.debug("Done parsing %s", uri)

                if not feed_parsed:
                    msg = T("Failed to retrieve RSS from %s: %s") % (uri, "?")
                    logging.info(msg)

                status = feed_parsed.get("status", 999)
                if status in (401, 402, 403):
                    msg = T("Do not have valid authentication for feed %s") % uri
                    logging.info(msg)

                if 500 <= status <= 599:
                    msg = T("Server side error (server code %s); could not get %s on %s") % (status, feed, uri)
                    logging.info(msg)

                entries = feed_parsed.get("entries")
                if "bozo_exception" in feed_parsed and not entries:
                    msg = str(feed_parsed["bozo_exception"])
                    if "CERTIFICATE_VERIFY_FAILED" in msg:
                        msg = T("Server %s uses an untrusted HTTPS certificate") % get_base_url(uri)
                        msg += " - https://sabnzbd.org/certificate-errors"
                        logging.error(msg)
                    elif "href" in feed_parsed and feed_parsed["href"] != uri and "login" in feed_parsed["href"]:
                        # Redirect to login page!
                        msg = T("Do not have valid authentication for feed %s") % uri
                    else:
                        msg = T("Failed to retrieve RSS from %s: %s") % (uri, msg)
                    logging.info(msg)

                if not entries and not msg:
                    msg = T("RSS Feed %s was empty") % uri
                    logging.info(msg)
                all_entries.extend(entries)
            entries = all_entries

        # In case of a new feed
        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]

        # Error in readout or now new readout
        if readout:
            if not entries:
                return msg
        else:
            entries = jobs

        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, infourl, category, size, age, season, episode = _get_link(entry)
                except (AttributeError, IndexError):
                    logging.info(T("Incompatible feed") + " " + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T("Incompatible feed")
                title = entry.title

                # If there's multiple feeds, remove the duplicates based on title and size
                if len(uris) > 1:
                    skip_job = False
                    for job_link, job in jobs.items():
                        # Allow 5% size deviation because indexers might have small differences for same release
                        if (
                            job.get("title") == title
                            and link != job_link
                            and (job.get("size") * 0.95) < size < (job.get("size") * 1.05)
                        ):
                            logging.info("Ignoring job %s from other feed", title)
                            skip_job = True
                            break
                    if skip_job:
                        continue
            else:
                link = entry
                infourl = jobs[link].get("infourl", "")
                category = jobs[link].get("orgcat", "")
                if category in ("", "*"):
                    category = None
                title = jobs[link].get("title", "")
                size = jobs[link].get("size", 0)
                age = jobs[link].get("age")
                season = jobs[link].get("season", 0)
                episode = jobs[link].get("episode", 0)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(" ", "%20")

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get("status", " ")[0]
                else:
                    jobstat = "N"
                if jobstat in "NGB" or (jobstat == "X" and readout):
                    # Match this title against all filters
                    logging.debug("Trying title %s", title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if ("F" in reTypes or "S" in reTypes) and (not season or not episode):
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]

                    # Match against all filters until an positive or negative match
                    logging.debug("Size %s", size)
                    for n in range(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == "C":
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == "<" and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug("Filter rejected on rule %d", n)
                                result = False
                                break
                            elif reTypes[n] == ">" and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug("Filter rejected on rule %d", n)
                                result = False
                                break
                            elif reTypes[n] == "F" and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug("Filter requirement match on rule %d", n)
                                result = False
                                break
                            elif (
                                reTypes[n] == "S"
                                and season
                                and episode
                                and ep_match(season, episode, regexes[n], title)
                            ):
                                logging.debug("Filter matched on rule %d", n)
                                result = True
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == "M" and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == "A":
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == "R":
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if not result and defCat:
                            # Apply Feed-category on non-matched items
                            myCat = defCat
                        elif result and notdefault(reCats[n]):
                            # Use the matched info
                            myCat = reCats[n]
                        elif category and not defCat:
                            # No result and no Feed-category
                            myCat = cat_convert(category)

                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ""):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio

                    if cfg.no_dupes() and self.check_duplicate(title):
                        if cfg.no_dupes() == 1:
                            # Dupe-detection: Discard
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        elif cfg.no_dupes() == 3:
                            # Dupe-detection: Fail
                            # We accept it so the Queue can send it to the History
                            logging.info("Found duplicate job %s", title)
                        else:
                            # Dupe-detection: Pause
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get("status", "").endswith("*")
                        act = act or force
                        star = first or jobs[link].get("status", "").endswith("*")
                    else:
                        star = first
                    if result:
                        _HandleLink(
                            jobs,
                            link,
                            infourl,
                            title,
                            size,
                            age,
                            season,
                            episode,
                            "G",
                            category,
                            myCat,
                            myPP,
                            myScript,
                            act,
                            star,
                            priority=myPrio,
                            rule=n,
                        )
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(
                            jobs,
                            link,
                            infourl,
                            title,
                            size,
                            age,
                            season,
                            episode,
                            "B",
                            category,
                            myCat,
                            myPP,
                            myScript,
                            False,
                            star,
                            priority=myPrio,
                            rule=n,
                        )

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return msg
Esempio n. 11
0
    def run(self):
        """ Process the queue (including waits and retries) """
        from sabnzbd.nzbqueue import NzbQueue
        self.shutdown = False
        while not self.shutdown:
            time.sleep(5)
            (msgid, nzo) = self.queue.get()
            if self.shutdown or not msgid:
                break
            if nzo.wait and nzo.wait > time.time():
                self.grab(msgid, nzo)
                continue
            logging.debug("Popping msgid %s", msgid)

            filename, data, newzbin_cat, nzo_info = _grabnzb(msgid)
            if filename and data:
                filename = name_fixer(filename)

                pp = nzo.pp
                script = nzo.script
                cat = nzo.cat
                if cat == '*' or not cat:
                    cat = cat_convert(newzbin_cat)

                priority = nzo.priority
                nzbname = nzo.custom_name

                cat, pp, script, priority = cat_to_opts(
                    cat, pp, script, priority)

                try:
                    sabnzbd.nzbqueue.insert_future_nzo(nzo,
                                                       filename,
                                                       msgid,
                                                       data,
                                                       pp=pp,
                                                       script=script,
                                                       cat=cat,
                                                       priority=priority,
                                                       nzbname=nzbname,
                                                       nzo_info=nzo_info)
                    nzo.url = format_source_url(str(msgid))
                except:
                    logging.error(Ta('Failed to update newzbin job %s'), msgid)
                    logging.info("Traceback: ", exc_info=True)

                    NzbQueue.do.remove(nzo.nzo_id, False)
                msgid = None
            else:
                if filename:
                    self.grab(msgid, nzo, float(filename))
                else:
                    # Fatal error, give up on this one
                    bad_fetch(nzo, msgid, msg=nzo_info, retry=True)
                msgid = None

            if msgid:
                growler.send_notification(T('NZB added to queue'), filename,
                                          'download')

        logging.debug('Stopping MSGIDGrabber')
Esempio n. 12
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        def dup_title(title):
            """ Check if this title was in this or other feeds
                Return matching feed name
            """
            title = title.lower()
            for fd in self.jobs:
                for lk in self.jobs[fd]:
                    item = self.jobs[fd][lk]
                    if item.get('status', ' ')[0] == 'D' and \
                       item.get('title', '').lower() == title:
                        return fd
            return ''


        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(Ta('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info = True)
            return T('Incorrect RSS feed description "%s"') % feed

        uri = feeds.uri()
        defCat = feeds.cat()
        if not notdefault(defCat):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Check for nzbs.org
        if 'nzbs.org/' in uri and not ('&dl=1' in uri):
            uri += '&dl=1'

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            uri = uri.replace(' ', '%20')
            logging.debug("Running feedparser on %s", uri)
            d = feedparser.parse(uri.replace('feed://', 'http://'))
            logging.debug("Done parsing %s", uri)
            if not d:
                msg = Ta('Failed to retrieve RSS from %s: %s') % (uri, '?')
                logging.info(msg)
                return unicoder(msg)

            status = d.get('status', 999)
            if status in (401, 402, 403):
                msg = Ta('Do not have valid authentication for feed %s') % feed
                logging.info(msg)
                return unicoder(msg)

            entries = d.get('entries')
            if 'bozo_exception' in d and not entries:
                msg = Ta('Failed to retrieve RSS from %s: %s') % (uri, xml_name(str(d['bozo_exception'])))
                logging.info(msg)
                return unicoder(msg)
            if not entries:
                msg = Ta('RSS Feed %s was empty') % uri
                logging.info(msg)

        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()

        order = 0
        # Filter out valid new links
        for entry in entries:
            if self.shutdown: return

            if readout:
                try:
                    link, category = _get_link(uri, entry)
                except (AttributeError, IndexError):
                    link = None
                    category = ''
                    logging.info(Ta('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info = True)
                    return T('Incompatible feed')
                category = latin1(category)
                # Make sure only latin-1 encodable characters occur
                atitle = latin1(entry.title)
                title = unicoder(atitle)
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                atitle = latin1(jobs[link].get('title', ''))
                title = unicoder(atitle)

            if link:
                # Make sure spaces are quoted in the URL
                if 'nzbclub.com' in link:
                    link, path = os.path.split(link.strip())
                    link = '%s/%s' % (link, urllib.quote(path))
                else:
                    link = link.strip().replace(' ','%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', atitle)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0

                    # Match against all filters until an postive or negative match
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if notdefault(reCats[n]):
                            myCat = reCats[n]
                        elif category and not defCat:
                            myCat = cat_convert(category)
                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio

                    if cfg.no_dupes() and dup_title(title):
                        if cfg.no_dupes() == 1:
                            logging.info("Ignoring duplicate job %s", atitle)
                            continue
                        else:
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, link, title, 'G', category, myCat, myPP, myScript,
                                    act, star, order, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, link, title, 'B', category, myCat, myPP, myScript,
                                    False, star, order, priority=myPrio, rule=str(n))
            order += 1

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return ''
Esempio n. 13
0
    def run_feed(self,
                 feed=None,
                 download=False,
                 ignoreFirst=False,
                 force=False,
                 readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        def dup_title(title):
            """ Check if this title was in this or other feeds
                Return matching feed name
            """
            title = title.lower()
            for fd in self.jobs:
                for lk in self.jobs[fd]:
                    item = self.jobs[fd][lk]
                    if item.get('status', ' ')[0] == 'D' and \
                       item.get('title', '').lower() == title:
                        return fd
            return ''

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(Ta('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uri = feeds.uri()
        defCat = feeds.cat()
        if not notdefault(defCat):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Check for nzbs.org
        if 'nzbs.org/' in uri and not ('&dl=1' in uri):
            uri += '&dl=1'

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            uri = uri.replace(' ', '%20')
            logging.debug("Running feedparser on %s", uri)
            d = feedparser.parse(uri.replace('feed://', 'http://'))
            logging.debug("Done parsing %s", uri)
            if not d:
                msg = Ta('Failed to retrieve RSS from %s: %s') % (uri, '?')
                logging.info(msg)
                return unicoder(msg)

            status = d.get('status', 999)
            if status in (401, 402, 403):
                msg = Ta('Do not have valid authentication for feed %s') % feed
                logging.info(msg)
                return unicoder(msg)

            entries = d.get('entries')
            if 'bozo_exception' in d and not entries:
                msg = Ta('Failed to retrieve RSS from %s: %s') % (
                    uri, xml_name(str(d['bozo_exception'])))
                logging.info(msg)
                return unicoder(msg)
            if not entries:
                msg = Ta('RSS Feed %s was empty') % uri
                logging.info(msg)

        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()

        order = 0
        # Filter out valid new links
        for entry in entries:
            if self.shutdown: return

            if readout:
                try:
                    link, category = _get_link(uri, entry)
                except (AttributeError, IndexError):
                    link = None
                    category = ''
                    logging.info(Ta('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                category = latin1(category)
                # Make sure only latin-1 encodable characters occur
                atitle = latin1(entry.title)
                title = unicoder(atitle)
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                atitle = latin1(jobs[link].get('title', ''))
                title = unicoder(atitle)

            if link:
                # Make sure spaces are quoted in the URL
                if 'nzbclub.com' in link:
                    link, path = os.path.split(link.strip())
                    link = '%s/%s' % (link, urllib.quote(path))
                else:
                    link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', atitle)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0

                    # Match against all filters until an postive or negative match
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d",
                                                  n)
                                    result = False
                                    break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d",
                                                  n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d",
                                                  n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d",
                                                  n)
                                    result = False
                                    break

                    if len(reCats):
                        if notdefault(reCats[n]):
                            myCat = reCats[n]
                        elif category and not defCat:
                            myCat = cat_convert(category)
                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(
                                myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY))
                                  or category):
                            myPrio = catPrio

                    if cfg.no_dupes() and dup_title(title):
                        if cfg.no_dupes() == 1:
                            logging.info("Ignoring duplicate job %s", atitle)
                            continue
                        else:
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status',
                                                         '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status',
                                                       '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs,
                                    link,
                                    title,
                                    'G',
                                    category,
                                    myCat,
                                    myPP,
                                    myScript,
                                    act,
                                    star,
                                    order,
                                    priority=myPrio,
                                    rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs,
                                    link,
                                    title,
                                    'B',
                                    category,
                                    myCat,
                                    myPP,
                                    myScript,
                                    False,
                                    star,
                                    order,
                                    priority=myPrio,
                                    rule=str(n))
            order += 1

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return ''
Esempio n. 14
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uris = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api
        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            if filter[3] in ('<', '>', 'F', 'S'):
                regexes.append(filter[4])
            else:
                regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            all_entries = []
            for uri in uris:
                uri = uri.replace(' ', '%20')
                logging.debug("Running feedparser on %s", uri)
                feed_parsed = feedparser.parse(uri.replace('feed://', 'http://'))
                logging.debug("Done parsing %s", uri)

                if not feed_parsed:
                    msg = T('Failed to retrieve RSS from %s: %s') % (uri, '?')
                    logging.info(msg)

                status = feed_parsed.get('status', 999)
                if status in (401, 402, 403):
                    msg = T('Do not have valid authentication for feed %s') % feed
                    logging.info(msg)

                if 500 <= status <= 599:
                    msg = T('Server side error (server code %s); could not get %s on %s') % (status, feed, uri)
                    logging.info(msg)

                entries = feed_parsed.get('entries')
                if 'bozo_exception' in feed_parsed and not entries:
                    msg = str(feed_parsed['bozo_exception'])
                    if 'CERTIFICATE_VERIFY_FAILED' in msg:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % get_urlbase(uri)
                        msg += ' - https://sabnzbd.org/certificate-errors'
                        logging.error(msg)
                    else:
                        msg = T('Failed to retrieve RSS from %s: %s') % (uri, xml_name(msg))
                    logging.info(msg)

                if not entries:
                    msg = T('RSS Feed %s was empty') % uri
                    logging.info(msg)
                all_entries.extend(entries)
            entries = all_entries

        # In case of a new feed
        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]

        # Error in readout or now new readout
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()

        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, category, size, age, season, episode = _get_link(entry)
                except (AttributeError, IndexError):
                    logging.info(T('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                title = entry.title

                # If there's multiple feeds, remove the duplicates based on title and size
                if len(uris) > 1:
                    skip_job = False
                    for job_link, job in jobs.items():
                        # Allow 5% size deviation because indexers might have small differences for same release
                        if job.get('title') == title and link != job_link and (job.get('size')*0.95) < size < (job.get('size')*1.05):
                            logging.info("Ignoring job %s from other feed", title)
                            skip_job = True
                            break
                    if skip_job:
                        continue
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                title = jobs[link].get('title', '')
                size = jobs[link].get('size', 0L)
                age = jobs[link].get('age')
                season = jobs[link].get('season', 0)
                episode = jobs[link].get('episode', 0)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if ('F' in reTypes or 'S' in reTypes) and (not season or not episode):
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]

                    # Match against all filters until an positive or negative match
                    logging.debug('Size %s', size)
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == '<' and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == '>' and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'F' and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug('Filter requirement match on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'S' and season and episode and ep_match(season, episode, regexes[n], title):
                                logging.debug('Filter matched on rule %d', n)
                                result = True
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if not result and defCat:
                            # Apply Feed-category on non-matched items
                            myCat = defCat
                        elif result and notdefault(reCats[n]):
                            # Use the matched info
                            myCat = reCats[n]
                        elif category and not defCat:
                            # No result and no Feed-category
                            myCat = cat_convert(category)

                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio


                    if cfg.no_dupes() and self.check_duplicate(title):
                        if cfg.no_dupes() == 1:
                            # Dupe-detection: Discard
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        elif cfg.no_dupes() == 3:
                            # Dupe-detection: Fail
                            # We accept it so the Queue can send it to the History
                            logging.info("Found duplicate job %s", title)
                        else:
                            # Dupe-detection: Pause
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, feed, link, title, size, age, season, episode, 'G', category, myCat, myPP,
                                     myScript, act, star, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, feed, link, title, size, age, season, episode, 'B', category, myCat, myPP,
                                     myScript, False, star, priority=myPrio, rule=str(n))

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return msg
Esempio n. 15
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            # Don't pound the website!
            time.sleep(5.0)

            (url, future_nzo, retry_count) = self.queue.get()
            if not url:
                continue
            url = url.replace(' ', '')

            try:
                del_bookmark = not future_nzo
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing', url)
                        continue

                # Add nzbmatrix credentials if needed
                url, matrix_id = _matrix_url(url)

                # When still waiting for nzbmatrix wait period, requeue
                if matrix_id and self.matrix_wait > time.time():
                    self.queue.put((url, future_nzo, retry_count))
                    continue

                # _grab_url cannot reside in a function, because the tempfile
                # would not survive the end of the function
                if del_bookmark:
                    logging.info('Removing nzbmatrix bookmark %s', matrix_id)
                else:
                    logging.info('Grabbing URL %s', url)
                opener = urllib.FancyURLopener({})
                opener.prompt_user_passwd = None
                opener.addheaders = []
                opener.addheader('User-Agent', 'SABnzbd+/%s' % sabnzbd.version.__version__)
                opener.addheader('Accept-encoding','gzip')
                filename = None
                category = None
                length = 0
                nzo_info = {}
                try:
                    fn, header = opener.retrieve(url)
                except:
                    fn = None

                if fn:
                    for tup in header.items():
                        try:
                            item = tup[0].lower()
                            value = tup[1].strip()
                        except:
                            continue
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo',):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name',):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item in ('content-length',):
                            length = misc.int_conv(value)

                        if not filename:
                            for item in tup:
                                if "filename=" in item:
                                    filename = item[item.index("filename=") + 9:].strip(';').strip('"')

                if matrix_id:
                    fn, msg, retry, wait = _analyse_matrix(fn, matrix_id)
                    if retry and wait > 0:
                        self.matrix_wait = time.time() + wait
                        logging.debug('Retry URL %s after waiting', url)
                        self.queue.put((url, future_nzo, retry_count))
                        continue
                    category = _MATRIX_MAP.get(category, category)
                else:
                    msg = ''
                    retry = True

                # Check if the filepath is specified, if not, check if a retry is allowed.
                if not fn:
                    retry_count -= 1
                    if retry_count > 0 and retry:
                        logging.info('Retry URL %s', url)
                        self.queue.put((url, future_nzo, retry_count))
                    elif not del_bookmark:
                        misc.bad_fetch(future_nzo, url, msg, retry=True)
                    continue

                if del_bookmark:
                    continue

                if not filename:
                    filename = os.path.basename(url) + '.nzb'
                # Sanitize and trim name, preserving the extension
                filename, ext = os.path.splitext(filename)
                filename = misc.sanitize_foldername(filename)
                filename += '.' + misc.sanitize_foldername(ext)

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # Check if nzb file
                if os.path.splitext(filename)[1].lower() in ('.nzb', '.gz'):
                    res = dirscanner.ProcessSingleFile(filename, fn, pp=pp, script=script, cat=cat, priority=priority, \
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url)
                    if res == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                    elif res == -2:
                        retry_count -= 1
                        if retry_count > 0:
                            logging.info('Incomplete NZB, retry %s', url)
                            self.queue.put((url, future_nzo, retry_count))
                        else:
                            misc.bad_fetch(future_nzo, url, retry=True, content=True)
                    else:
                        misc.bad_fetch(future_nzo, url, retry=True, content=True)
                # Check if a supported archive
                else:
                    if dirscanner.ProcessArchiveFile(filename, fn, pp, script, cat, priority=priority, url=future_nzo.url) == 0:
                        NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                    else:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        misc.bad_fetch(future_nzo, url, retry=True, content=True)
            except:
                logging.error('URLGRABBER CRASHED', exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)
Esempio n. 16
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            (url, future_nzo) = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue

            if future_nzo:
                # Re-queue when too early and still active
                if future_nzo.wait and future_nzo.wait > time.time():
                    self.add(url, future_nzo)
                    time.sleep(1.0)
                    continue
                # Paused
                if future_nzo.status == Status.PAUSED:
                    self.add(url, future_nzo)
                    time.sleep(1.0)
                    continue

            url = url.replace(' ', '')

            try:
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing', url)
                        continue

                filename = None
                category = None
                gzipped = False
                nzo_info = {}
                wait = 0
                retry = True
                fn = None

                logging.info('Grabbing URL %s', url)
                try:
                    fn = _build_request(url)
                except Exception, e:
                    # Cannot list exceptions here, because of unpredictability over platforms
                    error0 = str(sys.exc_info()[0]).lower()
                    error1 = str(sys.exc_info()[1]).lower()
                    logging.debug('Error "%s" trying to get the url %s', error1, url)
                    if 'certificate_verify_failed' in error1 or 'certificateerror' in error0:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % ''
                        msg += ' - https://sabnzbd.org/certificate-errors'
                        retry = False
                    elif 'nodename nor servname provided' in error1:
                        msg = T('Server name does not resolve')
                        retry = False
                    elif '401' in error1 or 'unauthorized' in error1:
                        msg = T('Unauthorized access')
                        retry = False
                    elif '404' in error1:
                        msg = T('File not on server')
                        retry = False
                    elif hasattr(e, 'headers') and 'retry-after' in e.headers:
                        # Catch if the server send retry (e.headers is case-INsensitive)
                        wait = misc.int_conv(e.headers['retry-after'])

                new_url = dereferring(url, fn)
                if new_url:
                    self.add(new_url, future_nzo)
                    continue

                if fn:
                    for hdr in fn.headers:
                        try:
                            item = hdr.lower()
                            value = fn.headers[hdr]
                        except:
                            continue
                        if item in ('content-encoding',) and value == 'gzip':
                            gzipped = True
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo',):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name',):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item == 'x-dnzb-propername':
                            nzo_info['propername'] = value
                        elif item == 'x-dnzb-episodename':
                            nzo_info['episodename'] = value
                        elif item == 'x-dnzb-year':
                            nzo_info['year'] = value
                        elif item == 'x-dnzb-failure':
                            nzo_info['failure'] = value
                        elif item == 'x-dnzb-details':
                            nzo_info['details'] = value
                        elif item == 'x-dnzb-password':
                            nzo_info['password'] = value
                        elif item == 'retry-after':
                            wait = misc.int_conv(value)

                        # Rating fields
                        if item in _RARTING_FIELDS:
                            nzo_info[item] = value

                        if not filename and "filename=" in value:
                            filename = value[value.index("filename=") + 9:].strip(';').strip('"')

                if wait:
                    # For sites that have a rate-limiting attribute
                    msg = ''
                    retry = True
                    fn = None
                elif retry:
                    fn, msg, retry, wait, data = _analyse(fn, url)

                if not fn:
                    if retry:
                        logging.info('Retry URL %s', url)
                        self.add(url, future_nzo, wait)
                    else:
                        bad_fetch(future_nzo, url, msg)
                    continue

                if not filename:
                    filename = os.path.basename(url)
                elif '&nzbname=' in filename:
                    # Sometimes the filename contains the full URL, duh!
                    filename = filename[filename.find('&nzbname=') + 9:]

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # process data
                if gzipped:
                    filename += '.gz'
                if not data:
                    try:
                        data = fn.read()
                    except (IncompleteRead, IOError):
                        bad_fetch(future_nzo, url, T('Server could not complete request'))
                        fn.close()
                        continue
                fn.close()

                if '<nzb' in data and misc.get_ext(filename) != '.nzb':
                    filename += '.nzb'

                # Sanitize filename first (also removing forbidden Windows-names)
                filename = misc.sanitize_filename(filename)

                # Write data to temp file
                path = os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER)
                path = os.path.join(path, filename)
                f = open(path, 'wb')
                f.write(data)
                f.close()
                del data

                # Check if nzb file
                if misc.get_ext(filename) in ('.nzb', '.gz', 'bz2'):
                    res = dirscanner.ProcessSingleFile(filename, path, pp=pp, script=script, cat=cat, priority=priority,
                                                       nzbname=nzbname, nzo_info=nzo_info, url=future_nzo.url, keep=False,
                                                       nzo_id=future_nzo.nzo_id)[0]
                    if res:
                        if res == -2:
                            logging.info('Incomplete NZB, retry after 5 min %s', url)
                            when = 300
                        elif res == -1:
                            # Error, but no reason to retry. Warning is already given
                            NzbQueue.do.remove(future_nzo.nzo_id, add_to_history=False)
                            continue
                        else:
                            logging.info('Unknown error fetching NZB, retry after 2 min %s', url)
                            when = 120
                        self.add(url, future_nzo, when)
                # Check if a supported archive
                else:
                    status, zf, exp_ext = dirscanner.is_archive(path)
                    if status == 0:
                        if misc.get_ext(filename) not in ('.rar', '.zip', '.7z'):
                            filename = filename + exp_ext
                            os.rename(path, path + exp_ext)
                            path = path + exp_ext

                        dirscanner.ProcessArchiveFile(filename, path, pp, script, cat, priority=priority,
                                                     nzbname=nzbname, url=future_nzo.url, keep=False,
                                                     nzo_id=future_nzo.nzo_id)
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        logging.info('Unknown filetype when fetching NZB, retry after 30s %s', url)
                        self.add(url, future_nzo, 30)
            except:
Esempio n. 17
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        def dup_title(title):
            """ Check if this title was in this or other feeds
                Return matching feed name
            """
            title = title.lower()
            for fd in self.jobs:
                for lk in self.jobs[fd]:
                    item = self.jobs[fd][lk]
                    if item.get('status', ' ')[0] == 'D' and \
                       item.get('title', '').lower() == title:
                        return fd
            return ''

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uri = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api
        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            if filter[3] in ('<', '>', 'F'):
                regexes.append(filter[4])
            else:
                regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Check for nzbs.org
        if 'nzbs.org/' in uri and '&dl=1' not in uri:
            uri += '&dl=1'

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            uri = uri.replace(' ', '%20')
            logging.debug("Running feedparser on %s", uri)
            d = feedparser.parse(uri.replace('feed://', 'http://'))
            logging.debug("Done parsing %s", uri)
            if not d:
                msg = T('Failed to retrieve RSS from %s: %s') % (uri, '?')
                logging.info(msg)
                return unicoder(msg)

            status = d.get('status', 999)
            if status in (401, 402, 403):
                msg = T('Do not have valid authentication for feed %s') % feed
                logging.info(msg)
                return unicoder(msg)
            if status >= 500 and status <= 599:
                msg = T('Server side error (server code %s); could not get %s on %s') % (status, feed, uri)
                logging.info(msg)
                return unicoder(msg)

            entries = d.get('entries')
            if 'bozo_exception' in d and not entries:
                msg = str(d['bozo_exception'])
                if 'CERTIFICATE_VERIFY_FAILED' in msg:
                    msg = T('Server %s uses an untrusted HTTPS certificate') % get_urlbase(uri)
                    logging.error(msg)
                else:
                    msg = T('Failed to retrieve RSS from %s: %s') % (uri, xml_name(msg))
                logging.info(msg)
                return unicoder(msg)
            if not entries:
                msg = T('RSS Feed %s was empty') % uri
                logging.info(msg)

        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()
            # Sort in the order the jobs came from the feed
            entries.sort(lambda x, y: jobs[x].get('order', 0) - jobs[y].get('order', 0))

        order = 0
        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, category, size = _get_link(uri, entry)
                except (AttributeError, IndexError):
                    link = None
                    category = u''
                    size = 0L
                    logging.info(T('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                title = entry.title
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                title = jobs[link].get('title', '')
                size = jobs[link].get('size', 0L)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if 'F' in reTypes:
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]
                        season = int_conv(season)
                        episode = int_conv(episode)
                    else:
                        season = episode = 0

                    # Match against all filters until an positive or negative match
                    logging.debug('Size %s for %s', size, title)
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == '<' and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == '>' and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'F' and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug('Filter requirement match on rule %d', n)
                                result = False
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if notdefault(reCats[n]):
                            myCat = reCats[n]
                        elif category and not defCat:
                            myCat = cat_convert(category)
                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio

                    if cfg.no_dupes() and dup_title(title):
                        if cfg.no_dupes() == 1:
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        else:
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, link, title, size, 'G', category, myCat, myPP, myScript,
                                    act, star, order, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, link, title, size, 'B', category, myCat, myPP, myScript,
                                    False, star, order, priority=myPrio, rule=str(n))
            order += 1

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return ''
Esempio n. 18
0
    def run(self):
        logging.info('URLGrabber starting up')
        self.shutdown = False

        while not self.shutdown:
            # Don't pound the website!
            time.sleep(5.0)

            (url, future_nzo) = self.queue.get()

            if not url:
                # stop signal, go test self.shutdown
                continue
            if future_nzo and future_nzo.wait and future_nzo.wait > time.time(
            ):
                # Re-queue when too early and still active

                self.add(url, future_nzo)
                continue
            url = url.replace(' ', '')

            try:
                if future_nzo:
                    # If nzo entry deleted, give up
                    try:
                        deleted = future_nzo.deleted
                    except AttributeError:
                        deleted = True
                    if deleted:
                        logging.debug('Dropping URL %s, job entry missing',
                                      url)
                        continue

                logging.info('Grabbing URL %s', url)
                req = urllib2.Request(url)
                req.add_header('User-Agent',
                               'SABnzbd+/%s' % sabnzbd.version.__version__)
                if not [True for item in _BAD_GZ_HOSTS if item in url]:
                    req.add_header('Accept-encoding', 'gzip')
                filename = None
                category = None
                gzipped = False
                nzo_info = {}
                wait = 0
                retry = True
                fn = None
                try:
                    fn = urllib2.urlopen(req)
                except:
                    # Cannot list exceptions here, because of unpredictability over platforms
                    error0 = str(sys.exc_info()[0]).lower()
                    error1 = str(sys.exc_info()[1]).lower()
                    logging.debug('Error "%s" trying to get the url %s',
                                  error1, url)
                    if 'certificate_verify_failed' in error1 or 'certificateerror' in error0:
                        msg = T('Server %s uses an untrusted HTTPS certificate'
                                ) % ''
                        retry = False
                    elif 'nodename nor servname provided' in error1:
                        msg = T('Server name does not resolve')
                        retry = False
                    elif '401' in error1 or 'unauthorized' in error1:
                        msg = T('Unauthorized access')
                        retry = False
                    elif '404' in error1:
                        msg = T('File not on server')
                        retry = False

                new_url = dereferring(url, fn)
                if new_url:
                    self.add(new_url, future_nzo)
                    continue

                if fn:
                    for hdr in fn.headers:
                        try:
                            item = hdr.lower()
                            value = fn.headers[hdr]
                        except:
                            continue
                        if item in ('content-encoding', ) and value == 'gzip':
                            gzipped = True
                        if item in ('category_id', 'x-dnzb-category'):
                            category = value
                        elif item in ('x-dnzb-moreinfo', ):
                            nzo_info['more_info'] = value
                        elif item in ('x-dnzb-name', ):
                            filename = value
                            if not filename.endswith('.nzb'):
                                filename += '.nzb'
                        elif item == 'x-dnzb-propername':
                            nzo_info['propername'] = value
                        elif item == 'x-dnzb-episodename':
                            nzo_info['episodename'] = value
                        elif item == 'x-dnzb-year':
                            nzo_info['year'] = value
                        elif item == 'x-dnzb-failure':
                            nzo_info['failure'] = value
                        elif item == 'x-dnzb-details':
                            nzo_info['details'] = value
                        elif item == 'x-dnzb-password':
                            nzo_info['password'] = value
                        elif item == 'retry-after':
                            # For NZBFinder
                            wait = misc.int_conv(value)

                        if not filename and "filename=" in value:
                            filename = value[value.index("filename=") +
                                             9:].strip(';').strip('"')

                if wait:
                    # For sites that have a rate-limiting attribute
                    msg = ''
                    retry = True
                    fn = None
                elif retry:
                    fn, msg, retry, wait, data = _analyse(fn, url)

                if not fn:
                    if retry:
                        logging.info('Retry URL %s', url)
                        self.add(url, future_nzo, wait)
                    else:
                        bad_fetch(future_nzo, url, msg)
                    continue

                if not filename:
                    filename = os.path.basename(url) + '.nzb'
                elif '&nzbname=' in filename:
                    # Sometimes the filename contains the full URL, duh!
                    filename = filename[filename.find('&nzbname=') + 9:]

                pp = future_nzo.pp
                script = future_nzo.script
                cat = future_nzo.cat
                if (cat is None or cat == '*') and category:
                    cat = misc.cat_convert(category)
                priority = future_nzo.priority
                nzbname = future_nzo.custom_name

                # process data
                if gzipped:
                    filename = filename + '.gz'
                if not data:
                    data = fn.read()
                fn.close()

                # Sanatize filename first
                filename = misc.sanitize_filename(filename)

                # Write data to temp file
                path = os.path.join(cfg.admin_dir.get_path(), FUTURE_Q_FOLDER)
                path = os.path.join(path, filename)
                f = open(path, 'wb')
                f.write(data)
                f.close()
                del data

                # Check if nzb file
                if os.path.splitext(filename)[1].lower() in ('.nzb', '.gz',
                                                             'bz2'):
                    res = dirscanner.ProcessSingleFile(
                        filename,
                        path,
                        pp=pp,
                        script=script,
                        cat=cat,
                        priority=priority,
                        nzbname=nzbname,
                        nzo_info=nzo_info,
                        url=future_nzo.url,
                        keep=False,
                        nzo_id=future_nzo.nzo_id)[0]
                    if res:
                        if res == -2:
                            logging.info(
                                'Incomplete NZB, retry after 5 min %s', url)
                            when = 300
                        elif res == -1:
                            # Error, but no reason to retry. Warning is already given
                            NzbQueue.do.remove(future_nzo.nzo_id,
                                               add_to_history=False)
                            continue
                        else:
                            logging.info(
                                'Unknown error fetching NZB, retry after 2 min %s',
                                url)
                            when = 120
                        self.add(url, future_nzo, when)
                # Check if a supported archive
                else:
                    if dirscanner.ProcessArchiveFile(
                            filename,
                            path,
                            pp,
                            script,
                            cat,
                            priority=priority,
                            nzbname=nzbname,
                            url=future_nzo.url,
                            keep=False,
                            nzo_id=future_nzo.nzo_id)[0]:
                        # Not a supported filetype, not an nzb (text/html ect)
                        try:
                            os.remove(fn)
                        except:
                            pass
                        logging.info(
                            'Unknown filetype when fetching NZB, retry after 30s %s',
                            url)
                        self.add(url, future_nzo, 30)
            except:
                logging.error(T('URLGRABBER CRASHED'), exc_info=True)
                logging.debug("URLGRABBER Traceback: ", exc_info=True)