Ejemplo n.º 1
0
 def limit_speed(self, value):
     ''' Set the actual download speed in Bytes/sec
         When 'value' ends with a '%' sign or is within 1-100, it is interpreted as a pecentage of the maximum bandwidth
         When no '%' is found, it is interpreted as an absolute speed (including KMGT notation).
     '''
     if value:
         mx = cfg.bandwidth_max.get_int()
         if '%' in str(value) or (from_units(value) > 0
                                  and from_units(value) < 101):
             limit = value.strip(' %')
             self.bandwidth_perc = from_units(limit)
             if mx:
                 self.bandwidth_limit = mx * self.bandwidth_perc / 100
             else:
                 logging.warning(
                     T('You must set a maximum bandwidth before you can set a bandwidth limit'
                       ))
         else:
             self.bandwidth_limit = from_units(value)
             if mx:
                 self.bandwidth_perc = self.bandwidth_limit / mx * 100
             else:
                 self.bandwidth_perc = 100
     else:
         self.speed_set()
     logging.info("Speed limit set to %s B/s", self.bandwidth_limit)
Ejemplo n.º 2
0
 def test_from_units(self):
     assert -1.0 == misc.from_units("-1")
     assert 100.0 == misc.from_units("100")
     assert 1024.0 == misc.from_units("1KB")
     assert 1048576.0 == misc.from_units("1024KB")
     assert 1048576.0 == misc.from_units("1024Kb")
     assert 1048576.0 == misc.from_units("1024kB")
     assert 1048576.0 == misc.from_units("1MB")
     assert 1073741824.0 == misc.from_units("1GB")
     assert 1125899906842624.0 == misc.from_units("1P")
Ejemplo n.º 3
0
def _get_link(uri, entry):
    """ Retrieve the post link from this entry
        Returns (link, category, size)
    """
    link = None  # @UnusedVariable -- pep8 bug?
    category = ''
    size = 0L
    uri = uri.lower()

    # Try standard link and enclosures first
    link = entry.link
    if not link:
        link = entry.links[0].href
    if 'enclosures' in entry:
        try:
            link = entry.enclosures[0]['href']
            size = int(entry.enclosures[0]['length'])
            logging.debug('Found size %s for %s', size, uri)
        except:
            pass

    if size == 0L:
        _RE_SIZE1 = re.compile(r'Size:\s*(\d+\.\d+\s*[KMG]{0,1})B\W*', re.I)
        _RE_SIZE2 = re.compile(r'\W*(\d+\.\d+\s*[KMG]{0,1})B\W*', re.I)
        # Try to find size in Description
        try:
            desc = entry.description.replace('\n', ' ')
            m = _RE_SIZE1.search(desc) or _RE_SIZE2.search(desc)
            if m:
                size = from_units(m.group(1))
                logging.debug('Found size %s for %s', size, uri)
        except:
            pass

    if link and 'http' in link.lower():
        try:
            category = entry.cattext
        except:
            try:
                category = entry.category
            except:
                try:  # nzb.su
                    category = entry.tags[0]['term']
                except:
                    try:
                        category = entry.description
                    except:
                        category = ''
        return link, category, size
    else:
        logging.warning(T('Empty RSS entry found (%s)'), link)
        return None, '', 0L
Ejemplo n.º 4
0
def _get_link(uri, entry):
    """ Retrieve the post link from this entry
        Returns (link, category, size)
    """
    link = None  # @UnusedVariable -- pep8 bug?
    category = ''
    size = 0L
    uri = uri.lower()

    # Try standard link and enclosures first
    link = entry.link
    if not link:
        link = entry.links[0].href
    if 'enclosures' in entry:
        try:
            link = entry.enclosures[0]['href']
            size = int(entry.enclosures[0]['length'])
            logging.debug('Found size %s for %s', size, uri)
        except:
            pass

    if size == 0L:
        _RE_SIZE1 = re.compile(r'Size:\s*(\d+\.\d+\s*[KMG]{0,1})B\W*', re.I)
        _RE_SIZE2 = re.compile(r'\W*(\d+\.\d+\s*[KMG]{0,1})B\W*', re.I)
        # Try to find size in Description
        try:
            desc = entry.description.replace('\n', ' ')
            m = _RE_SIZE1.search(desc) or _RE_SIZE2.search(desc)
            if m:
                size = from_units(m.group(1))
                logging.debug('Found size %s for %s', size, uri)
        except:
            pass

    if link and 'http' in link.lower():
        try:
            category = entry.cattext
        except:
            try:
                category = entry.category
            except:
                try:  # nzb.su
                    category = entry.tags[0]['term']
                except:
                    try:
                        category = entry.description
                    except:
                        category = ''
        return link, category, size
    else:
        logging.warning(T('Empty RSS entry found (%s)'), link)
        return None, '', 0L
Ejemplo n.º 5
0
 def limit_speed(self, value):
     ''' Set the actual download speed in Bytes/sec
         When 'value' ends with a '%' sign or is within 1-100, it is interpreted as a pecentage of the maximum bandwidth
         When no '%' is found, it is interpreted as an absolute speed (including KMGT notation).
     '''
     if value:
         mx = cfg.bandwidth_max.get_int()
         if '%' in str(value) or (from_units(value) > 0 and from_units(value) < 101):
             limit = value.strip(' %')
             self.bandwidth_perc = from_units(limit)
             if mx:
                 self.bandwidth_limit = mx * self.bandwidth_perc / 100
             else:
                 logging.warning(T('You must set a maximum bandwidth before you can set a bandwidth limit'))
         else:
             self.bandwidth_limit = from_units(value)
             if mx:
                 self.bandwidth_perc = self.bandwidth_limit / mx * 100
             else:
                 self.bandwidth_perc = 100
     else:
         self.speed_set()
     logging.info("Speed limit set to %s B/s", self.bandwidth_limit)
Ejemplo n.º 6
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        def dup_title(title):
            """ Check if this title was in this or other feeds
                Return matching feed name
            """
            title = title.lower()
            for fd in self.jobs:
                for lk in self.jobs[fd]:
                    item = self.jobs[fd][lk]
                    if item.get('status', ' ')[0] == 'D' and \
                       item.get('title', '').lower() == title:
                        return fd
            return ''

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uri = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api
        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            if filter[3] in ('<', '>', 'F'):
                regexes.append(filter[4])
            else:
                regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Check for nzbs.org
        if 'nzbs.org/' in uri and '&dl=1' not in uri:
            uri += '&dl=1'

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            uri = uri.replace(' ', '%20')
            logging.debug("Running feedparser on %s", uri)
            d = feedparser.parse(uri.replace('feed://', 'http://'))
            logging.debug("Done parsing %s", uri)
            if not d:
                msg = T('Failed to retrieve RSS from %s: %s') % (uri, '?')
                logging.info(msg)
                return unicoder(msg)

            status = d.get('status', 999)
            if status in (401, 402, 403):
                msg = T('Do not have valid authentication for feed %s') % feed
                logging.info(msg)
                return unicoder(msg)
            if status >= 500 and status <= 599:
                msg = T('Server side error (server code %s); could not get %s on %s') % (status, feed, uri)
                logging.info(msg)
                return unicoder(msg)

            entries = d.get('entries')
            if 'bozo_exception' in d and not entries:
                msg = str(d['bozo_exception'])
                if 'CERTIFICATE_VERIFY_FAILED' in msg:
                    msg = T('Server %s uses an untrusted HTTPS certificate') % get_urlbase(uri)
                    logging.error(msg)
                else:
                    msg = T('Failed to retrieve RSS from %s: %s') % (uri, xml_name(msg))
                logging.info(msg)
                return unicoder(msg)
            if not entries:
                msg = T('RSS Feed %s was empty') % uri
                logging.info(msg)

        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()
            # Sort in the order the jobs came from the feed
            entries.sort(lambda x, y: jobs[x].get('order', 0) - jobs[y].get('order', 0))

        order = 0
        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, category, size = _get_link(uri, entry)
                except (AttributeError, IndexError):
                    link = None
                    category = u''
                    size = 0L
                    logging.info(T('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                title = entry.title
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                title = jobs[link].get('title', '')
                size = jobs[link].get('size', 0L)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if 'F' in reTypes:
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]
                        season = int_conv(season)
                        episode = int_conv(episode)
                    else:
                        season = episode = 0

                    # Match against all filters until an positive or negative match
                    logging.debug('Size %s for %s', size, title)
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == '<' and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == '>' and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'F' and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug('Filter requirement match on rule %d', n)
                                result = False
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if notdefault(reCats[n]):
                            myCat = reCats[n]
                        elif category and not defCat:
                            myCat = cat_convert(category)
                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio

                    if cfg.no_dupes() and dup_title(title):
                        if cfg.no_dupes() == 1:
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        else:
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, link, title, size, 'G', category, myCat, myPP, myScript,
                                    act, star, order, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, link, title, size, 'B', category, myCat, myPP, myScript,
                                    False, star, order, priority=myPrio, rule=str(n))
            order += 1

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return ''
Ejemplo n.º 7
0
Archivo: rss.py Proyecto: rivy/sabnzbd
def _get_link(uri, entry):
    """ Retrieve the post link from this entry
        Returns (link, category, size)
    """
    link = None
    category = ''
    size = 0L
    uri = uri.lower()
    age = datetime.datetime.now()

    # Try standard link and enclosures first
    link = entry.link
    if not link:
        link = entry.links[0].href
    if 'enclosures' in entry:
        try:
            link = entry.enclosures[0]['href']
            size = int(entry.enclosures[0]['length'])
        except:
            pass

    if size == 0L:
        _RE_SIZE1 = re.compile(r'Size:\s*(\d+\.\d+\s*[KMG]{0,1})B\W*', re.I)
        _RE_SIZE2 = re.compile(r'\W*(\d+\.\d+\s*[KMG]{0,1})B\W*', re.I)
        # Try to find size in Description
        try:
            desc = entry.description.replace('\n', ' ').replace('&nbsp;', ' ')
            m = _RE_SIZE1.search(desc) or _RE_SIZE2.search(desc)
            if m:
                size = from_units(m.group(1))
        except:
            pass

    # Try newznab attribute first, this is the correct one
    try:
        # Convert it to format that calc_age understands
        age = datetime.datetime(*entry['newznab']['usenetdate_parsed'][:6])
    except:
        # Date from feed (usually lags behind)
        try:
            # Convert it to format that calc_age understands
            age = datetime.datetime(*entry.published_parsed[:6])
        except:
            pass
    finally:
        # We need to convert it to local timezone, feedparser always returns UTC
        age = age - datetime.timedelta(seconds=time.timezone)

    # Maybe the newznab also provided SxxExx info
    try:
        season = re.findall('\d+', entry['newznab']['season'])[0]
        episode = re.findall('\d+', entry['newznab']['episode'])[0]
    except:
        season = episode = 0

    if link and 'http' in link.lower():
        try:
            category = entry.cattext
        except:
            try:
                category = entry.category
            except:
                try:  # nzb.su
                    category = entry.tags[0]['term']
                except:
                    try:
                        category = entry.description
                    except:
                        category = ''

        return link, category, size, age, season, episode
    else:
        logging.warning(T('Empty RSS entry found (%s)'), link)
        return None, '', 0L, None, 0, 0
Ejemplo n.º 8
0
Archivo: rss.py Proyecto: rivy/sabnzbd
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uris = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api
        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            if filter[3] in ('<', '>', 'F', 'S'):
                regexes.append(filter[4])
            else:
                regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            all_entries = []
            for uri in uris:
                uri = uri.replace(' ', '%20')
                logging.debug("Running feedparser on %s", uri)
                feed_parsed = feedparser.parse(uri.replace('feed://', 'http://'))
                logging.debug("Done parsing %s", uri)

                if not feed_parsed:
                    msg = T('Failed to retrieve RSS from %s: %s') % (uri, '?')
                    logging.info(msg)

                status = feed_parsed.get('status', 999)
                if status in (401, 402, 403):
                    msg = T('Do not have valid authentication for feed %s') % feed
                    logging.info(msg)

                if status >= 500 and status <= 599:
                    msg = T('Server side error (server code %s); could not get %s on %s') % (status, feed, uri)
                    logging.info(msg)

                entries = feed_parsed.get('entries')
                if 'bozo_exception' in feed_parsed and not entries:
                    msg = str(feed_parsed['bozo_exception'])
                    if 'CERTIFICATE_VERIFY_FAILED' in msg:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % get_urlbase(uri)
                        msg += ' - https://sabnzbd.org/certificate-errors'
                        logging.error(msg)
                    else:
                        msg = T('Failed to retrieve RSS from %s: %s') % (uri, xml_name(msg))
                    logging.info(msg)

                if not entries:
                    msg = T('RSS Feed %s was empty') % uri
                    logging.info(msg)
                all_entries.extend(entries)
            entries = all_entries

        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()
            # Sort in the order the jobs came from the feed
            entries.sort(lambda x, y: jobs[x].get('order', 0) - jobs[y].get('order', 0))

        order = 0
        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, category, size, age, season, episode = _get_link(uri, entry)
                except (AttributeError, IndexError):
                    link = None
                    category = u''
                    size = 0L
                    age = None
                    logging.info(T('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                title = entry.title

                # If there's multiple feeds, remove the duplicates based on title and size
                if len(uris) > 1:
                    skip_job = False
                    for job_link, job in jobs.items():
                        # Allow 5% size deviation because indexers might have small differences for same release
                        if job.get('title') == title and link != job_link and (job.get('size')*0.95) < size < (job.get('size')*1.05):
                            logging.info("Ignoring job %s from other feed", title)
                            skip_job = True
                            break
                    if skip_job:
                        continue
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                title = jobs[link].get('title', '')
                size = jobs[link].get('size', 0L)
                age = jobs[link].get('age')
                season = jobs[link].get('season', 0)
                episode = jobs[link].get('episode', 0)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if ('F' in reTypes or 'S' in reTypes) and (not season or not episode):
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]
                        season = int_conv(season)
                        episode = int_conv(episode)

                    # Match against all filters until an positive or negative match
                    logging.debug('Size %s for %s', size, title)
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == '<' and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == '>' and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'F' and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug('Filter requirement match on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'S' and season and episode and ep_match(season, episode, regexes[n], title):
                                logging.debug('Filter matched on rule %d', n)
                                result = True
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if not result and defCat:
                            # Apply Feed-category on non-matched items
                            myCat = defCat
                        elif result and notdefault(reCats[n]):
                            # Use the matched info
                            myCat = reCats[n]
                        elif category and not defCat:
                            # No result and no Feed-category
                            myCat = cat_convert(category)

                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio


                    if cfg.no_dupes() and self.check_duplicate(title):
                        if cfg.no_dupes() == 1:
                            # Dupe-detection: Discard
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        elif cfg.no_dupes() == 3:
                            # Dupe-detection: Fail
                            # We accept it so the Queue can send it to the History
                            logging.info("Found duplicate job %s", title)
                        else:
                            # Dupe-detection: Pause
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, link, title, size, age, season, episode, 'G', category, myCat, myPP, myScript,
                                    act, star, order, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, link, title, size, age, season, episode, 'B', category, myCat, myPP, myScript,
                                    False, star, order, priority=myPrio, rule=str(n))
            order += 1

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return msg
Ejemplo n.º 9
0
def _get_link(entry):
    """ Retrieve the post link from this entry
        Returns (link, category, size)
    """
    size = 0
    age = datetime.datetime.now()

    # Try standard link and enclosures first
    link = entry.link
    if not link:
        link = entry.links[0].href
    if "enclosures" in entry:
        try:
            link = entry.enclosures[0]["href"]
            size = int(entry.enclosures[0]["length"])
        except:
            pass

    # GUID usually has URL to result on page
    infourl = None
    if entry.get("id") and entry.id != link and entry.id.startswith("http"):
        infourl = entry.id

    if size == 0:
        # Try to find size in Description
        try:
            desc = entry.description.replace("\n", " ").replace("&nbsp;", " ")
            m = _RE_SIZE1.search(desc) or _RE_SIZE2.search(desc)
            if m:
                size = from_units(m.group(1))
        except:
            pass

    # Try newznab attribute first, this is the correct one
    try:
        # Convert it to format that calc_age understands
        age = datetime.datetime(*entry["newznab"]["usenetdate_parsed"][:6])
    except:
        # Date from feed (usually lags behind)
        try:
            # Convert it to format that calc_age understands
            age = datetime.datetime(*entry.published_parsed[:6])
        except:
            pass
    finally:
        # We need to convert it to local timezone, feedparser always returns UTC
        age = age - datetime.timedelta(seconds=time.timezone)

    # Maybe the newznab also provided SxxExx info
    try:
        season = re.findall(r"\d+", entry["newznab"]["season"])[0]
        episode = re.findall(r"\d+", entry["newznab"]["episode"])[0]
    except (KeyError, IndexError):
        season = episode = 0

    if link and "http" in link.lower():
        try:
            category = entry.cattext
        except AttributeError:
            try:
                category = entry.category
            except AttributeError:
                try:  # nzb.su
                    category = entry.tags[0]["term"]
                except (AttributeError, KeyError):
                    try:
                        category = entry.description
                    except AttributeError:
                        category = ""

        return link, infourl, category, size, age, season, episode
    else:
        logging.warning(T("Empty RSS entry found (%s)"), link)
        return None, None, "", 0, None, 0, 0
Ejemplo n.º 10
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        if not feed:
            return "No such feed"

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uris = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api

        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for feed_filter in feeds.filters():
            reCat = feed_filter[0]
            if defCat in ("", "*"):
                reCat = None
            reCats.append(reCat)
            rePPs.append(feed_filter[1])
            reScripts.append(feed_filter[2])
            reTypes.append(feed_filter[3])
            if feed_filter[3] in ("<", ">", "F", "S"):
                regexes.append(feed_filter[4])
            else:
                regexes.append(convert_filter(feed_filter[4]))
            rePrios.append(feed_filter[5])
            reEnabled.append(feed_filter[6] != "0")
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add SABnzbd's custom User Agent
        feedparser.USER_AGENT = "SABnzbd/%s" % sabnzbd.__version__

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            all_entries = []
            for uri in uris:
                uri = uri.replace(" ", "%20")
                logging.debug("Running feedparser on %s", uri)
                feed_parsed = feedparser.parse(uri.replace("feed://", "http://"))
                logging.debug("Done parsing %s", uri)

                if not feed_parsed:
                    msg = T("Failed to retrieve RSS from %s: %s") % (uri, "?")
                    logging.info(msg)

                status = feed_parsed.get("status", 999)
                if status in (401, 402, 403):
                    msg = T("Do not have valid authentication for feed %s") % uri
                    logging.info(msg)

                if 500 <= status <= 599:
                    msg = T("Server side error (server code %s); could not get %s on %s") % (status, feed, uri)
                    logging.info(msg)

                entries = feed_parsed.get("entries")
                if "bozo_exception" in feed_parsed and not entries:
                    msg = str(feed_parsed["bozo_exception"])
                    if "CERTIFICATE_VERIFY_FAILED" in msg:
                        msg = T("Server %s uses an untrusted HTTPS certificate") % get_base_url(uri)
                        msg += " - https://sabnzbd.org/certificate-errors"
                        logging.error(msg)
                    elif "href" in feed_parsed and feed_parsed["href"] != uri and "login" in feed_parsed["href"]:
                        # Redirect to login page!
                        msg = T("Do not have valid authentication for feed %s") % uri
                    else:
                        msg = T("Failed to retrieve RSS from %s: %s") % (uri, msg)
                    logging.info(msg)

                if not entries and not msg:
                    msg = T("RSS Feed %s was empty") % uri
                    logging.info(msg)
                all_entries.extend(entries)
            entries = all_entries

        # In case of a new feed
        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]

        # Error in readout or now new readout
        if readout:
            if not entries:
                return msg
        else:
            entries = jobs

        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, infourl, category, size, age, season, episode = _get_link(entry)
                except (AttributeError, IndexError):
                    logging.info(T("Incompatible feed") + " " + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T("Incompatible feed")
                title = entry.title

                # If there's multiple feeds, remove the duplicates based on title and size
                if len(uris) > 1:
                    skip_job = False
                    for job_link, job in jobs.items():
                        # Allow 5% size deviation because indexers might have small differences for same release
                        if (
                            job.get("title") == title
                            and link != job_link
                            and (job.get("size") * 0.95) < size < (job.get("size") * 1.05)
                        ):
                            logging.info("Ignoring job %s from other feed", title)
                            skip_job = True
                            break
                    if skip_job:
                        continue
            else:
                link = entry
                infourl = jobs[link].get("infourl", "")
                category = jobs[link].get("orgcat", "")
                if category in ("", "*"):
                    category = None
                title = jobs[link].get("title", "")
                size = jobs[link].get("size", 0)
                age = jobs[link].get("age")
                season = jobs[link].get("season", 0)
                episode = jobs[link].get("episode", 0)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(" ", "%20")

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get("status", " ")[0]
                else:
                    jobstat = "N"
                if jobstat in "NGB" or (jobstat == "X" and readout):
                    # Match this title against all filters
                    logging.debug("Trying title %s", title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if ("F" in reTypes or "S" in reTypes) and (not season or not episode):
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]

                    # Match against all filters until an positive or negative match
                    logging.debug("Size %s", size)
                    for n in range(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == "C":
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == "<" and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug("Filter rejected on rule %d", n)
                                result = False
                                break
                            elif reTypes[n] == ">" and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug("Filter rejected on rule %d", n)
                                result = False
                                break
                            elif reTypes[n] == "F" and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug("Filter requirement match on rule %d", n)
                                result = False
                                break
                            elif (
                                reTypes[n] == "S"
                                and season
                                and episode
                                and ep_match(season, episode, regexes[n], title)
                            ):
                                logging.debug("Filter matched on rule %d", n)
                                result = True
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == "M" and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == "A":
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == "R":
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if not result and defCat:
                            # Apply Feed-category on non-matched items
                            myCat = defCat
                        elif result and notdefault(reCats[n]):
                            # Use the matched info
                            myCat = reCats[n]
                        elif category and not defCat:
                            # No result and no Feed-category
                            myCat = cat_convert(category)

                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ""):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio

                    if cfg.no_dupes() and self.check_duplicate(title):
                        if cfg.no_dupes() == 1:
                            # Dupe-detection: Discard
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        elif cfg.no_dupes() == 3:
                            # Dupe-detection: Fail
                            # We accept it so the Queue can send it to the History
                            logging.info("Found duplicate job %s", title)
                        else:
                            # Dupe-detection: Pause
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get("status", "").endswith("*")
                        act = act or force
                        star = first or jobs[link].get("status", "").endswith("*")
                    else:
                        star = first
                    if result:
                        _HandleLink(
                            jobs,
                            link,
                            infourl,
                            title,
                            size,
                            age,
                            season,
                            episode,
                            "G",
                            category,
                            myCat,
                            myPP,
                            myScript,
                            act,
                            star,
                            priority=myPrio,
                            rule=n,
                        )
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(
                            jobs,
                            link,
                            infourl,
                            title,
                            size,
                            age,
                            season,
                            episode,
                            "B",
                            category,
                            myCat,
                            myPP,
                            myScript,
                            False,
                            star,
                            priority=myPrio,
                            rule=n,
                        )

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return msg
Ejemplo n.º 11
0
 def test_unit_back_and_forth(self):
     assert 100 == misc.from_units(misc.to_units(100))
     assert 1024 == misc.from_units(misc.to_units(1024))
     assert 1024 ** 3 == misc.from_units(misc.to_units(1024 ** 3))
Ejemplo n.º 12
0
    def run_feed(self, feed=None, download=False, ignoreFirst=False, force=False, readout=True):
        """ Run the query for one URI and apply filters """
        self.shutdown = False

        if not feed:
            return 'No such feed'

        newlinks = []
        new_downloads = []

        # Preparations, get options
        try:
            feeds = config.get_rss()[feed]
        except KeyError:
            logging.error(T('Incorrect RSS feed description "%s"'), feed)
            logging.info("Traceback: ", exc_info=True)
            return T('Incorrect RSS feed description "%s"') % feed

        uris = feeds.uri()
        defCat = feeds.cat()
        import sabnzbd.api
        if not notdefault(defCat) or defCat not in sabnzbd.api.list_cats(default=False):
            defCat = None
        defPP = feeds.pp()
        if not notdefault(defPP):
            defPP = None
        defScript = feeds.script()
        if not notdefault(defScript):
            defScript = None
        defPrio = feeds.priority()
        if not notdefault(defPrio):
            defPrio = None

        # Preparations, convert filters to regex's
        regexes = []
        reTypes = []
        reCats = []
        rePPs = []
        rePrios = []
        reScripts = []
        reEnabled = []
        for filter in feeds.filters():
            reCat = filter[0]
            if defCat in ('', '*'):
                reCat = None
            reCats.append(reCat)
            rePPs.append(filter[1])
            reScripts.append(filter[2])
            reTypes.append(filter[3])
            if filter[3] in ('<', '>', 'F', 'S'):
                regexes.append(filter[4])
            else:
                regexes.append(convert_filter(filter[4]))
            rePrios.append(filter[5])
            reEnabled.append(filter[6] != '0')
        regcount = len(regexes)

        # Set first if this is the very first scan of this URI
        first = (feed not in self.jobs) and ignoreFirst

        # Add sabnzbd's custom User Agent
        feedparser.USER_AGENT = 'SABnzbd+/%s' % sabnzbd.version.__version__

        # Read the RSS feed
        msg = None
        entries = None
        if readout:
            all_entries = []
            for uri in uris:
                uri = uri.replace(' ', '%20')
                logging.debug("Running feedparser on %s", uri)
                feed_parsed = feedparser.parse(uri.replace('feed://', 'http://'))
                logging.debug("Done parsing %s", uri)

                if not feed_parsed:
                    msg = T('Failed to retrieve RSS from %s: %s') % (uri, '?')
                    logging.info(msg)

                status = feed_parsed.get('status', 999)
                if status in (401, 402, 403):
                    msg = T('Do not have valid authentication for feed %s') % feed
                    logging.info(msg)

                if 500 <= status <= 599:
                    msg = T('Server side error (server code %s); could not get %s on %s') % (status, feed, uri)
                    logging.info(msg)

                entries = feed_parsed.get('entries')
                if 'bozo_exception' in feed_parsed and not entries:
                    msg = str(feed_parsed['bozo_exception'])
                    if 'CERTIFICATE_VERIFY_FAILED' in msg:
                        msg = T('Server %s uses an untrusted HTTPS certificate') % get_urlbase(uri)
                        msg += ' - https://sabnzbd.org/certificate-errors'
                        logging.error(msg)
                    else:
                        msg = T('Failed to retrieve RSS from %s: %s') % (uri, xml_name(msg))
                    logging.info(msg)

                if not entries:
                    msg = T('RSS Feed %s was empty') % uri
                    logging.info(msg)
                all_entries.extend(entries)
            entries = all_entries

        # In case of a new feed
        if feed not in self.jobs:
            self.jobs[feed] = {}
        jobs = self.jobs[feed]

        # Error in readout or now new readout
        if readout:
            if not entries:
                return unicoder(msg)
        else:
            entries = jobs.keys()

        # Filter out valid new links
        for entry in entries:
            if self.shutdown:
                return

            if readout:
                try:
                    link, category, size, age, season, episode = _get_link(entry)
                except (AttributeError, IndexError):
                    logging.info(T('Incompatible feed') + ' ' + uri)
                    logging.info("Traceback: ", exc_info=True)
                    return T('Incompatible feed')
                title = entry.title

                # If there's multiple feeds, remove the duplicates based on title and size
                if len(uris) > 1:
                    skip_job = False
                    for job_link, job in jobs.items():
                        # Allow 5% size deviation because indexers might have small differences for same release
                        if job.get('title') == title and link != job_link and (job.get('size')*0.95) < size < (job.get('size')*1.05):
                            logging.info("Ignoring job %s from other feed", title)
                            skip_job = True
                            break
                    if skip_job:
                        continue
            else:
                link = entry
                category = jobs[link].get('orgcat', '')
                if category in ('', '*'):
                    category = None
                title = jobs[link].get('title', '')
                size = jobs[link].get('size', 0L)
                age = jobs[link].get('age')
                season = jobs[link].get('season', 0)
                episode = jobs[link].get('episode', 0)

            if link:
                # Make sure spaces are quoted in the URL
                link = link.strip().replace(' ', '%20')

                newlinks.append(link)

                if link in jobs:
                    jobstat = jobs[link].get('status', ' ')[0]
                else:
                    jobstat = 'N'
                if jobstat in 'NGB' or (jobstat == 'X' and readout):
                    # Match this title against all filters
                    logging.debug('Trying title %s', title)
                    result = False
                    myCat = defCat
                    myPP = defPP
                    myScript = defScript
                    myPrio = defPrio
                    n = 0
                    if ('F' in reTypes or 'S' in reTypes) and (not season or not episode):
                        season, episode = sabnzbd.newsunpack.analyse_show(title)[1:3]

                    # Match against all filters until an positive or negative match
                    logging.debug('Size %s', size)
                    for n in xrange(regcount):
                        if reEnabled[n]:
                            if category and reTypes[n] == 'C':
                                found = re.search(regexes[n], category)
                                if not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                            elif reTypes[n] == '<' and size and from_units(regexes[n]) < size:
                                # "Size at most" : too large
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == '>' and size and from_units(regexes[n]) > size:
                                # "Size at least" : too small
                                logging.debug('Filter rejected on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'F' and not ep_match(season, episode, regexes[n]):
                                # "Starting from SxxEyy", too early episode
                                logging.debug('Filter requirement match on rule %d', n)
                                result = False
                                break
                            elif reTypes[n] == 'S' and season and episode and ep_match(season, episode, regexes[n], title):
                                logging.debug('Filter matched on rule %d', n)
                                result = True
                                break
                            else:
                                if regexes[n]:
                                    found = re.search(regexes[n], title)
                                else:
                                    found = False
                                if reTypes[n] == 'M' and not found:
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break
                                if found and reTypes[n] == 'A':
                                    logging.debug("Filter matched on rule %d", n)
                                    result = True
                                    break
                                if found and reTypes[n] == 'R':
                                    logging.debug("Filter rejected on rule %d", n)
                                    result = False
                                    break

                    if len(reCats):
                        if not result and defCat:
                            # Apply Feed-category on non-matched items
                            myCat = defCat
                        elif result and notdefault(reCats[n]):
                            # Use the matched info
                            myCat = reCats[n]
                        elif category and not defCat:
                            # No result and no Feed-category
                            myCat = cat_convert(category)

                        if myCat:
                            myCat, catPP, catScript, catPrio = cat_to_opts(myCat)
                        else:
                            myCat = catPP = catScript = catPrio = None
                        if notdefault(rePPs[n]):
                            myPP = rePPs[n]
                        elif not (reCats[n] or category):
                            myPP = catPP
                        if notdefault(reScripts[n]):
                            myScript = reScripts[n]
                        elif not (notdefault(reCats[n]) or category):
                            myScript = catScript
                        if rePrios[n] not in (str(DEFAULT_PRIORITY), ''):
                            myPrio = rePrios[n]
                        elif not ((rePrios[n] != str(DEFAULT_PRIORITY)) or category):
                            myPrio = catPrio


                    if cfg.no_dupes() and self.check_duplicate(title):
                        if cfg.no_dupes() == 1:
                            # Dupe-detection: Discard
                            logging.info("Ignoring duplicate job %s", title)
                            continue
                        elif cfg.no_dupes() == 3:
                            # Dupe-detection: Fail
                            # We accept it so the Queue can send it to the History
                            logging.info("Found duplicate job %s", title)
                        else:
                            # Dupe-detection: Pause
                            myPrio = DUP_PRIORITY

                    act = download and not first
                    if link in jobs:
                        act = act and not jobs[link].get('status', '').endswith('*')
                        act = act or force
                        star = first or jobs[link].get('status', '').endswith('*')
                    else:
                        star = first
                    if result:
                        _HandleLink(jobs, feed, link, title, size, age, season, episode, 'G', category, myCat, myPP,
                                     myScript, act, star, priority=myPrio, rule=str(n))
                        if act:
                            new_downloads.append(title)
                    else:
                        _HandleLink(jobs, feed, link, title, size, age, season, episode, 'B', category, myCat, myPP,
                                     myScript, False, star, priority=myPrio, rule=str(n))

        # Send email if wanted and not "forced"
        if new_downloads and cfg.email_rss() and not force:
            emailer.rss_mail(feed, new_downloads)

        remove_obsolete(jobs, newlinks)
        return msg