Example #1
0
File: scan.py Project: pl77/pynab
def scan_missing(group_name):
    try:
        return pynab.groups.scan_missing_segments(group_name)
    except Exception as e:
        log.error('scan: nntp server is flipping out, hopefully they fix their shit: {}'.format(
            traceback.format_exc(e)
        ))
Example #2
0
File: tvrage.py Project: shpd/pynab
def search_lxml(show, content):
    """Search TVRage online API for show data."""
    try:
        tree = etree.fromstring(content)
    except:
        log.error('Problem parsing XML with lxml')
        return None

    matches = defaultdict(list)
    # parse show names in the same order as returned by tvrage, first one is usually the good one
    for xml_show in XPATH_SHOW(tree):
        for name in extract_names(xml_show):
            ratio = int(difflib.SequenceMatcher(None, show['clean_name'], clean_name(name)).ratio() * 100)
            if ratio == 100:
                log.debug('Found 100% xml_match: {}'.format(name))
                return xmltodict.parse(etree.tostring(xml_show))['show']
            matches[ratio].append(xml_show)
                
    # if no 100% is found, check highest ratio matches
    for ratio, xml_matches in sorted(matches.items(), reverse=True):
        for xml_match in xml_matches:
            if ratio >= 80:
                log.debug('Found {:d}% xml_match: {}'.format(ratio, XPATH_NAME(xml_match)[0]))
                return xmltodict.parse(etree.tostring(xml_match))['show']
            elif 80 > ratio > 60:
                if 'country' in show and show['country'] and XPATH_COUNTRY(xml_match):
                    if str.lower(show['country']) == str.lower(XPATH_COUNTRY(xml_match)):
                        log.debug('Found {:d}% xml_match: {}'.format(ratio, XPATH_NAME(xml_match)[0]))
                        return xmltodict.parse(etree.tostring(xml_match))['show']

    ratio, highests = sorted(matches.items(), reverse=True)[0]
    log.warning('No TVRage match found for {}, highest match was {}%.'.format(show['clean_name'], ratio))
Example #3
0
 def publish(self, node, data):
     payload = ET.fromstring("<test xmlns='test'>{}</test>".format(data))
     try:
         self['xep_0060'].publish(self.pubsub_server, node, payload=payload)
     except Exception as e:
         log.error('pubsub: could not publish to: {}'.format(node))
         log.error('Exception "{}" of type {}'.format(e, type(e)))
Example #4
0
def scan_missing(group_name):
    try:
        return pynab.groups.scan_missing_segments(group_name)
    except Exception as e:
        log.error('scan: nntp server is flipping out, hopefully they fix their shit: {}'.format(
            traceback.format_exc(e)
        ))
Example #5
0
 def create(self, node=None):
     if not node:
         node = self.node
     try:
         self['xep_0060'].create_node(self.pubsub_server, node)
     except:
         log.error('pubsub: could not create node: %s' % node)
Example #6
0
 def get(self):
     try:
         result = self['xep_0060'].get_item(self.pubsub_server, self.node, self.data)
         for item in result['pubsub']['items']['substanzas']:
             print('Retrieved item %s: %s' % (item['id'], tostring(item['payload'])))
     except:
         log.error('pubsub: could not retrieve item %s from node %s' % (self.data, self.node))
Example #7
0
File: xmpp.py Project: sqw23/pynab
 def create(self, node=None):
     if not node:
         node = self.node
     try:
         self['xep_0060'].create_node(self.pubsub_server, node)
     except:
         log.error('pubsub: could not create node: %s' % node)
Example #8
0
def save(binary):
    """Save a single binary to the DB, including all
    segments/parts (which takes the longest).
    --
    Note: Much quicker. Hooray!
    """
    log.debug('Saving to binary: ' + binary['name'])

    existing_binary = db.binaries.find_one({'name': binary['name']})
    try:
        if existing_binary:
            merge(existing_binary['parts'], binary['parts'])
            db.binaries.update({'_id': existing_binary['_id']}, {
                '$set': {
                    'parts': existing_binary['parts']
                }
            })
        else:
            db.binaries.insert({
                'name': binary['name'],
                'group_name': binary['group_name'],
                'posted': binary['posted'],
                'posted_by': binary['posted_by'],
                'category_id': binary['category_id'],
                'regex_id': binary['regex_id'],
                'req_id': binary['req_id'],
                'xref': binary['xref'],
                'total_parts': binary['total_parts'],
                'parts': binary['parts']
            })
    except:
        log.error('Binary was too large to fit in DB!')
Example #9
0
File: server.py Project: shpd/pynab
    def get(self, group_name, messages=None):
        """Get a set of messages from the server for the specified group."""
        log.info('{}: Getting {:d} messages...'.format(group_name, len(messages)))
        data = ''
        if messages:
            try:
                _, total, first, last, _ = self.connection.group(group_name)
                log.debug('{}: Total articles in group: {:d}'.format(group_name, total))
                for message in messages:
                    article = '<{}>'.format(message)

                    log.debug('{}: Getting article: {}'.format(group_name, article))

                    response, (number, message_id, lines) = self.connection.body(article)
                    res = pynab.yenc.yenc_decode(lines)
                    if res:
                        data += res
                    else:
                        return None
            except nntplib.NNTPError as nntpe:
                log.error('{}: Problem retrieving messages from server: {}.'.format(group_name, nntpe))
                return None

            return data
        else:
            log.error('{}: No messages were specified.'.format(group_name))
            return None
Example #10
0
File: xmpp.py Project: sqw23/pynab
 def subscribe(self):
     try:
         result = self['xep_0060'].subscribe(self.pubsub_server, self.node)
         print('Subscribed %s to node %s' % (self.boundjid.bare, self.node))
     except:
         log.error('pubsub: could not subscribe %s to node %s' %
                   (self.boundjid.bare, self.node))
Example #11
0
def details(dataset=None):
    if auth():
        if request.query.id:
            with db_session() as db:
                release = db.query(Release).filter(
                    Release.id == request.query.id).first()
                if release:
                    dataset['releases'] = [release]
                    dataset['detail'] = True
                    dataset['api_key'] = request.query.apikey

                    try:
                        tmpl = Template(filename=os.path.join(
                            root_dir, 'templates/api/result.mako'))
                        return tmpl.render(**dataset)
                    except:
                        log.error('Failed to deliver page: {0}'.format(
                            exceptions.text_error_template().render()))
                        return None
                else:
                    return api_error(300)
        else:
            return api_error(200)
    else:
        return api_error(100)
Example #12
0
    def day_to_post(self, group_name, days):
        """Converts a datetime to approximate article number for the specified group."""
        log.debug('{}: Finding post {:d} days old...'.format(group_name, days))

        _, count, first, last, _ = self.connection.group(group_name)
        target_date = datetime.datetime.now(pytz.utc) - datetime.timedelta(days)

        first_date = self.post_date(group_name, first)
        last_date = self.post_date(group_name, last)

        if first_date and last_date:
            if target_date < first_date:
                log.warning(
                    '{}: First available article is newer than target date, starting from first available.'.format(
                        group_name))
                return first
            elif target_date > last_date:
                log.warning(
                    '{}: Target date is more recent than newest article. Try a longer backfill.'.format(group_name))
                return False
            log.debug('{}: Searching for post where goal: {}, first: {}, last: {}'
            .format(group_name, target_date, first_date, last_date)
            )

            upper = last
            lower = first
            interval = math.floor((upper - lower) * 0.5)
            next_date = last_date

            log.debug('{}: Start: {:d} End: {:d} Interval: {:d}'.format(group_name, lower, upper, interval))

            while self.days_old(next_date) < days:
                skip = 1
                temp_date = self.post_date(group_name, upper - interval)
                while temp_date > target_date:
                    upper = upper - interval - (skip - 1)
                    log.debug('{}: New upperbound: {:d} is {:d} days old.'
                    .format(group_name, upper, self.days_old(temp_date))
                    )
                    skip *= 2
                    temp_date = self.post_date(group_name, upper - interval)

                interval = math.ceil(interval / 2)
                if interval <= 0:
                    break
                skip = 1
                log.debug('{}: Set interval to {:d} articles.'.format(group_name, interval))

                next_date = self.post_date(group_name, upper - 1)
                while not next_date:
                    upper = upper - skip
                    skip *= 2
                    log.debug('{}: Article was lost, getting next: {:d}'.format(group_name, upper))
                    next_date = self.post_date(group_name, upper - 1)

            log.debug('{}: Article is {:d} which is {:d} days old.'.format(group_name, upper, self.days_old(next_date)))
            return upper
        else:
            log.error('{}: Could not get group information.'.format(group_name))
            return False
Example #13
0
File: xmpp.py Project: sqw23/pynab
 def publish(self, node, data):
     payload = ET.fromstring("<test xmlns='test'>{}</test>".format(data))
     try:
         self['xep_0060'].publish(self.pubsub_server, node, payload=payload)
     except Exception as e:
         log.error('pubsub: could not publish to: {}'.format(node))
         log.error('Exception "{}" of type {}'.format(e, type(e)))
Example #14
0
def update_blacklist():
    """Check for Blacklist update and load them into Mongo."""
    blacklist_url = config.postprocess.get('blacklist_url')
    if blacklist_url:
        response = requests.get(blacklist_url)
        lines = response.text.splitlines()

        for line in lines:
            elements = line.split('\t\t')
            if len(elements) == 4:
                log.debug('Updating blacklist {}...'.format(elements[1]))
                db.blacklists.update(
                    {
                        'regex': elements[1]
                    },
                    {
                        '$setOnInsert': {
                            'status': 0
                        },
                        '$set': {
                            'group_name': elements[0],
                            'regex': elements[1],
                            'description': elements[3],
                        }
                    },
                    upsert=True
                )
        return True
    else:
        log.error('No blacklist update url in config.')
        return False
Example #15
0
    def get(self, group_name, messages=None):
        """Get a set of messages from the server for the specified group."""
        self.connect()

        data = ''
        if messages:
            try:
                _, total, first, last, _ = self.connection.group(group_name)
                for message in messages:
                    article = '<{}>'.format(message)
                    response, (number, message_id, lines) = self.connection.body(article)
                    res = pynab.yenc.yenc_decode(lines)
                    if res:
                        data += res
                    else:
                        return None
            except nntplib.NNTPError as nntpe:
                log.error('server: [{}]: problem retrieving messages: {}.'.format(group_name, nntpe))
                self.connection = None
                self.connect()
                return None
            except socket.timeout:
                log.error('server: socket timed out, reconnecting')
                self.connection = None
                self.connect()
                return None

            return data
        else:
            return None
Example #16
0
File: nfos.py Project: sqw23/pynab
def process(limit=None, category=0):
    """Process releases for NFO parts and download them."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone,PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(
                Release.nfo == None).filter(Release.nfo_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                found = False
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb:
                    nfos = []
                    for nfo in nzb['nfos']:
                        for part in nfo['segments']:
                            if int(part['size']) > NFO_MAX_FILESIZE:
                                continue
                            nfos.append(part)

                    for nfo in nfos:
                        try:
                            article = server.get(release.group.name, [
                                nfo['message_id'],
                            ])
                        except Exception as e:
                            # if usenet's not accessible, don't block it forever
                            log.error('nfo: unable to get nfo: {}'.format(e))
                            continue

                        if article:
                            data = gzip.compress(article.encode('utf-8'))
                            nfo = NFO(data=data)
                            db.add(nfo)

                            release.nfo = nfo
                            release.nfo_metablack_id = None
                            db.add(release)

                            log.debug('nfo: [{}] - nfo added'.format(
                                release.search_name))
                            found = True
                            break

                    if not found:
                        log.debug(
                            'nfo: [{}] - [{}] - no nfos in release'.format(
                                release.id, release.search_name))
                        mb = MetaBlack(nfo=release, status='IMPOSSIBLE')
                        db.add(mb)
                db.commit()
Example #17
0
def copy_file(engine, data, ordering, type):
    """
    Handles a fast-copy, or a slowass one.

    If you're using postgres or a mysql derivative, this should work fine.
    Anything else? Welllllllllllllp. It's gonna be slow. Really slow.

    In fact, I'm going to point out just how slow it is.
    """
    insert_start = time.time()
    if 'mysql' in config.db.get('engine'):
        # ho ho ho
        conn = engine.raw_connection()
        cur = conn.cursor()
        (fd, filename) = tempfile.mkstemp(prefix='pynab')
        filename = filename.replace('\\', '/')
        try:
            file = os.fdopen(fd, 'wb')
            data.seek(0)
            t = data.read(1048576)
            while t:
                file.write(t.encode('utf-8'))
                t = data.read(1048576)
            file.close()
            data.close()

            query = "LOAD DATA LOCAL INFILE '{}' INTO TABLE {} FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"' ({})" \
                .format(filename, type.__tablename__, ','.join(ordering))

            cur.execute((query))
            conn.commit()
            cur.close()

            os.remove(filename)
        except Exception as e:
            log.error(e)
            return False
    elif 'postgre' in config.db.get('engine'):
        conn = engine.raw_connection()
        cur = conn.cursor()
        try:
            cur.copy_expert(
                "COPY {} ({}) FROM STDIN WITH CSV ESCAPE E'\\\\'".format(type.__tablename__, ', '.join(ordering)), data)
        except Exception as e:
            log.error(e)
            return False
        conn.commit()
        cur.close()
    else:
        # this... this is the slow one
        # i don't even want to think about how slow this is
        # it's really slow
        # slower than the github api
        engine.execute(type.__table__.insert(), data)

    insert_end = time.time()
    log.debug('parts: {} insert: {:.2f}s'.format(config.db.get('engine'), insert_end - insert_start))

    return True
Example #18
0
def process(limit=None, category=0):
    """Process releases for NFO parts and download them."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone,PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(Release.nfo == None).filter(
                Release.nfo_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                found = False
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb:
                    nfos = []
                    for nfo in nzb['nfos']:
                        for part in nfo['segments']:
                            if int(part['size']) > NFO_MAX_FILESIZE:
                                continue
                            nfos.append(part)

                    for nfo in nfos:
                        try:
                            article = server.get(release.group.name, [nfo['message_id'], ])
                        except Exception as e:
                            # if usenet's not accessible, don't block it forever
                            log.error('nfo: unable to get nfo: {}'.format(e))
                            continue

                        if article:
                            data = gzip.compress(article.encode('utf-8'))
                            nfo = NFO(data=data)
                            db.add(nfo)

                            release.nfo = nfo
                            release.nfo_metablack_id = None
                            db.add(release)

                            log.debug('nfo: [{}] - nfo added'.format(
                                release.search_name
                            ))
                            found = True
                            break

                    if not found:
                        log.debug('nfo: [{}] - [{}] - no nfos in release'.format(
                            release.id,
                            release.search_name
                        ))
                        mb = MetaBlack(nfo=release, status='IMPOSSIBLE')
                        db.add(mb)
                db.commit()
Example #19
0
File: xmpp.py Project: sqw23/pynab
    def start(self, event):
        self.get_roster()
        self.send_presence()

        try:
            getattr(self, self.action)()
        except:
            log.error('pubsub: could not execute: %s' % self.action)
Example #20
0
File: xmpp.py Project: sqw23/pynab
 def retract(self):
     try:
         result = self['xep_0060'].retract(self.pubsub_server, self.node,
                                           self.data)
         print('Retracted item %s from node %s' % (self.data, self.node))
     except:
         log.error('pubsub: could not retract item %s from node %s' %
                   (self.data, self.node))
Example #21
0
    def start(self, event):
        self.get_roster()
        self.send_presence()

        try:
            getattr(self, self.action)()
        except:
            log.error('pubsub: could not execute: %s' % self.action)
Example #22
0
 def start(self):
     log.info("nabbot: xmpp bot started")
     if self.xmpp.connect():
         self.xmpp.process(block=False)  # pynab.xmpp is started in its own thread
         # self.create_nodes() #I have autocreate set, don't need to pre-populate
         self.handle_queue()
     else:
         log.error("nabbot: client didn't connect.")
Example #23
0
File: xmpp.py Project: sqw23/pynab
 def start(self):
     log.info("nabbot: xmpp bot started")
     if self.xmpp.connect():
         self.xmpp.process(
             block=False)  # pynab.xmpp is started in its own thread
         # self.create_nodes() #I have autocreate set, don't need to pre-populate
         self.handle_queue()
     else:
         log.error("nabbot: client didn't connect.")
Example #24
0
def update(group_name):
    try:
        return pynab.groups.scan(group_name, limit=config.scan.get('group_scan_limit', 2000000))
    except pynab.server.AuthException as e:
        log.error('server: {}'.format(e))
    except Exception as e:
        log.error('scan: nntp server is flipping out, hopefully they fix their shit: {}'.format(
            traceback.format_exc(e)
        ))
Example #25
0
File: scan.py Project: pl77/pynab
def update(group_name):
    try:
        return pynab.groups.scan(group_name, limit=config.scan.get('group_scan_limit', 2000000))
    except pynab.server.AuthException as e:
        log.error('server: {}'.format(e))
    except Exception as e:
        log.error('scan: nntp server is flipping out, hopefully they fix their shit: {}'.format(
            traceback.format_exc(e)
        ))
Example #26
0
    def group(self, group_name):
        self.connect()

        try:
            response, count, first, last, name = self.connection.group(group_name)
        except Exception as e:
            log.error('server: {}: couldn\'t send group command'.format(group_name))
            return None, False, None, None, None

        return response, count, first, last, name
Example #27
0
File: tvrage.py Project: shpd/pynab
def search(show):
    """Search TVRage's online API for show data."""
    try:
        r = requests.get(TVRAGE_FULL_SEARCH_URL, params={'show': show['clean_name']})
    except:
        log.error('Problem retrieving TVRage XML. The API is probably down.')
        return None
    
    content = r.content
    return search_lxml(show, content)
Example #28
0
File: xmpp.py Project: sqw23/pynab
 def get(self):
     try:
         result = self['xep_0060'].get_item(self.pubsub_server, self.node,
                                            self.data)
         for item in result['pubsub']['items']['substanzas']:
             print('Retrieved item %s: %s' %
                   (item['id'], tostring(item['payload'])))
     except:
         log.error('pubsub: could not retrieve item %s from node %s' %
                   (self.data, self.node))
Example #29
0
File: server.py Project: shpd/pynab
    def group(self, group_name):
        self.connect()

        try:
            response, count, first, last, name = self.connection.group(group_name)
        except nntplib.NNTPError:
            log.error('Problem sending group command to server.')
            return False

        return response, count, first, last, name
Example #30
0
def search(show):
    """Search TVRage's online API for show data."""
    try:
        r = requests.get(TVRAGE_FULL_SEARCH_URL, params={'show': show['clean_name']})
    except Exception as e:
        log.error(e)
        return None
    
    content = r.content
    return search_lxml(show, content)
Example #31
0
def process(limit=None, category=0):
    """Processes release rarfiles to check for passwords and filecounts."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(~Release.files.any()). \
                filter(Release.passworded == 'UNKNOWN').filter(Release.rar_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                log.debug('rar: processing {}'.format(release.search_name))
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb and nzb['rars']:
                    try:
                        passworded, info = check_release_files(server, release.group.name, nzb)
                    except Exception as e:
                        # if usenet isn't accessible, we don't want to blacklist it
                        log.error('rar: file info failed: {}'.format(e))
                        continue

                    if info:
                        log.info('rar: file info add [{}]'.format(
                            release.search_name
                        ))
                        release.passworded = passworded

                        size = 0
                        for file in info:
                            f = File(name=file['name'][:512],
                                     size=file['size'])
                            f.release = release
                            size += file['size']
                            db.add(f)

                        if size != 0:
                            release.size = size

                        release.rar_metablack_id = None
                        db.add(release)
                        db.commit()
                        continue
                log.debug('rar: [{}] - file info: no readable rars in release'.format(
                    release.search_name
                ))
                mb = MetaBlack(rar=release, status='IMPOSSIBLE')
                db.add(mb)
                db.commit()
Example #32
0
def process(limit=None, category=0):
    """Processes release rarfiles to check for passwords and filecounts."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(~Release.files.any()). \
                filter(Release.passworded == 'UNKNOWN').filter(Release.rar_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                log.debug('rar: processing {}'.format(release.search_name))
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb and nzb['rars']:
                    try:
                        passworded, info = check_release_files(
                            server, release.group.name, nzb)
                    except Exception as e:
                        # if usenet isn't accessible, we don't want to blacklist it
                        log.error('rar: file info failed: {}'.format(e))
                        continue

                    if info:
                        log.info('rar: file info add [{}]'.format(
                            release.search_name))
                        release.passworded = passworded

                        size = 0
                        for file in info:
                            f = File(name=file['name'][:512],
                                     size=file['size'])
                            f.release = release
                            size += file['size']
                            db.add(f)

                        if size != 0:
                            release.size = size

                        release.rar_metablack_id = None
                        db.add(release)
                        db.commit()
                        continue
                log.debug('rar: [{}] - file info: no readable rars in release'.
                          format(release.search_name))
                mb = MetaBlack(rar=release, status='IMPOSSIBLE')
                db.add(mb)
                db.commit()
Example #33
0
File: scan.py Project: pl77/pynab
def backfill(group_name, date=None, target=None):
    if date:
        date = pytz.utc.localize(dateutil.parser.parse(date))
    else:
        date = pytz.utc.localize(datetime.datetime.now() - datetime.timedelta(config.scan.get('backfill_days', 10)))
    try:
        return pynab.groups.scan(group_name, direction='backward', date=date, target=target,
                                 limit=config.scan.get('group_scan_limit', 2000000))
    except Exception as e:
        log.error('scan: nntp server is flipping out, hopefully they fix their shit: {}'.format(
            traceback.format_exc(e)
        ))
Example #34
0
def backfill(group_name, date=None, target=None):
    if date:
        date = pytz.utc.localize(dateutil.parser.parse(date))
    else:
        date = pytz.utc.localize(datetime.datetime.now() - datetime.timedelta(config.scan.get('backfill_days', 10)))
    try:
        return pynab.groups.scan(group_name, direction='backward', date=date, target=target,
                                 limit=config.scan.get('group_scan_limit', 2000000))
    except Exception as e:
        log.error('scan: nntp server is flipping out, hopefully they fix their shit: {}'.format(
            traceback.format_exc(e)
        ))
Example #35
0
def process_release(release, online=True):
    name, year = parse_movie(release['search_name'])
    if name and year:
        method = 'local'
        imdb = db.imdb.find_one({'name': clean_name(name), 'year': year})
        if not imdb and online:
            method = 'online'
            movie = search(clean_name(name), year)
            if movie and movie['Type'] == 'movie':
                db.imdb.update({
                    '_id': movie['imdbID']
                }, {'$set': {
                    'name': movie['Title'],
                    'year': movie['Year']
                }},
                               upsert=True)
                imdb = db.imdb.find_one({'_id': movie['imdbID']})

        if imdb:
            log.info('[{}] - [{}] - imdb added: {}'.format(
                release['_id'], release['search_name'], method))
            db.releases.update({
                '_id': release['_id']
            }, {'$set': {
                'imdb': imdb
            }})
        elif not imdb and online:
            log.warning('[{}] - [{}] - imdb not found: online'.format(
                release['_id'], release['search_name']))
            db.releases.update({
                '_id': release['_id']
            }, {
                '$set': {
                    'imdb': {
                        'attempted': datetime.datetime.now(pytz.utc)
                    }
                }
            })
        else:
            log.warning('[{}] - [{}] - imdb not found: local'.format(
                release['_id'], release['search_name']))
    else:
        log.error(
            '[{}] - [{}] - imdb not found: no suitable regex for movie name'.
            format(release['_id'], release['search_name']))
        db.releases.update({
            '_id': release['_id']
        }, {'$set': {
            'imdb': {
                'possible': False
            }
        }})
Example #36
0
def save(db, binaries):
    """Helper function to save a set of binaries
    and delete associated parts from the DB. This
    is a lot faster than Newznab's part deletion,
    which routinely took 10+ hours on my server.
    Turns out MySQL kinda sucks at deleting lots
    of shit. If we need more speed, move the parts
    away and drop the temporary table instead."""

    if binaries:
        existing_binaries = dict(
            ((binary.hash, binary)
             for binary in db.query(Binary.id, Binary.hash).filter(
                 Binary.hash.in_(binaries.keys())).all()))

        binary_inserts = []
        for hash, binary in binaries.items():
            existing_binary = existing_binaries.get(hash, None)
            if not existing_binary:
                binary_inserts.append(binary)

        if binary_inserts:
            # this could be optimised slightly with COPY but it's not really worth it
            # there's usually only a hundred or so rows
            db.execute(Binary.__table__.insert(), binary_inserts)
            db.commit()

        existing_binaries = dict(
            ((binary.hash, binary)
             for binary in db.query(Binary.id, Binary.hash).filter(
                 Binary.hash.in_(binaries.keys())).all()))

        update_parts = []
        for hash, binary in binaries.items():
            existing_binary = existing_binaries.get(hash, None)
            if existing_binary:
                for number, part in binary['parts'].items():
                    update_parts.append({
                        '_id': part.id,
                        '_binary_id': existing_binary.id
                    })
            else:
                log.error('something went horribly wrong')

        if update_parts:
            p = Part.__table__.update().where(
                Part.id == bindparam('_id')).values(
                    binary_id=bindparam('_binary_id'))
            db.execute(p, update_parts)
            db.commit()
Example #37
0
def stats(dataset=None):
    if not dataset:
        dataset = {}

    with db_session() as db:
        tv_totals = db.query(func.count(Release.tvshow_id), func.count(Release.tvshow_metablack_id),
                             func.count(Release.id)).join(Category).filter(Category.parent_id == 5000).one()
        movie_totals = db.query(func.count(Release.movie_id), func.count(Release.movie_metablack_id),
                                func.count(Release.id)).join(Category).filter(Category.parent_id == 2000).one()
        nfo_total = db.query(func.count(Release.nfo_id), func.count(Release.nfo_metablack_id)).one()
        file_total = db.query(Release.id).filter((Release.files.any()) | (Release.passworded != 'UNKNOWN')).count()
        file_failed_total = db.query(func.count(Release.rar_metablack_id)).one()
        release_total = db.query(Release.id).count()

        dataset['totals'] = {
            'TV': {
                'processed': tv_totals[0],
                'failed': tv_totals[1],
                'total': tv_totals[2]
            },
            'Movies': {
                'processed': movie_totals[0],
                'failed': movie_totals[1],
                'total': movie_totals[2]
            },
            'NFOs': {
                'processed': nfo_total[0],
                'failed': nfo_total[1],
                'total': release_total
            },
            'File Info': {
                'processed': file_total,
                'failed': file_failed_total[0],
                'total': release_total
            }
        }

        dataset['categories'] = db.query(Category, func.count(Release.id)).join(Release).group_by(Category).order_by(
            desc(func.count(Release.id))).all()

        dataset['groups'] = db.query(Group, func.min(Release.posted), func.count(Release.id)).join(Release).group_by(Group).order_by(desc(func.count(Release.id))).all()

        try:
            tmpl = Template(
                filename=os.path.join(root_dir, 'templates/api/stats.mako'))
            return tmpl.render(**dataset)
        except:
            log.error('Failed to deliver page: {0}'.format(exceptions.text_error_template().render()))
            return None
Example #38
0
def save(db, binaries):
    """Helper function to save a set of binaries
    and delete associated parts from the DB. This
    is a lot faster than Newznab's part deletion,
    which routinely took 10+ hours on my server.
    Turns out MySQL kinda sucks at deleting lots
    of shit. If we need more speed, move the parts
    away and drop the temporary table instead."""

    if binaries:
        existing_binaries = dict(
            (
                (binary.hash, binary)
                for binary in db.query(Binary.id, Binary.hash).filter(Binary.hash.in_(binaries.keys())).all()
            )
        )

        binary_inserts = []
        for hash, binary in binaries.items():
            existing_binary = existing_binaries.get(hash, None)
            if not existing_binary:
                binary_inserts.append(binary)

        if binary_inserts:
            # this could be optimised slightly with COPY but it's not really worth it
            # there's usually only a hundred or so rows
            db.execute(Binary.__table__.insert(), binary_inserts)
            db.commit()

        existing_binaries = dict(
            (
                (binary.hash, binary)
                for binary in db.query(Binary.id, Binary.hash).filter(Binary.hash.in_(binaries.keys())).all()
            )
        )

        update_parts = []
        for hash, binary in binaries.items():
            existing_binary = existing_binaries.get(hash, None)
            if existing_binary:
                for number, part in binary["parts"].items():
                    update_parts.append({"_id": part.id, "_binary_id": existing_binary.id})
            else:
                log.error("something went horribly wrong")

        if update_parts:
            p = Part.__table__.update().where(Part.id == bindparam("_id")).values(binary_id=bindparam("_binary_id"))
            db.execute(p, update_parts)
            db.commit()
Example #39
0
    def post_date(self, group_name, article):
        """Retrieves the date of the specified post."""
        self.connect()

        art_num = 0
        overview = None

        try:
            self.connection.group(group_name)
            art_num, overview = self.connection.head('{0:d}'.format(article))
        except nntplib.NNTPError as e:
            log.debug('server: unable to get date of message {}: {}'.format(article, e))
            # leave this alone - we don't expect any data back
            return None

        if art_num and overview:
            # overview[0] = article number
            # overview[1] = message-id
            # overview[2] = headers
            for header in overview[2]:
                date_header = ''
                head = nntplib.decode_header(header.decode('utf-8', errors='surrogateescape'))

                if 'X-Server-Date:' in head:
                    continue
                elif 'NNTP-Posting-Date:' in head:
                    date_header = head.replace('NNTP-Posting-Date: ', '')
                elif 'Date:' in head:
                    date_header = head.replace('Date: ', '')

                if date_header:
                    try:
                        date = dateutil.parser.parse(date_header)
                    except Exception as e:
                        log.error('server: date parse failed while dating message: {}'.format(e))
                        return None

                    try:
                        date = pytz.utc.localize(date)
                    except:
                        # no problem, it's already localised
                        pass

                    return date
        else:
            return None
Example #40
0
    def day_to_post(self, group_name, days):
        """Converts a datetime to approximate article number for the specified group."""

        _, count, first, last, _ = self.connection.group(group_name)
        target_date = datetime.datetime.now(pytz.utc) - datetime.timedelta(days)

        first_date = self.post_date(group_name, first)
        last_date = self.post_date(group_name, last)

        if first_date and last_date:
            if target_date < first_date:
                return first
            elif target_date > last_date:
                return False

            upper = last
            lower = first
            interval = math.floor((upper - lower) * 0.5)
            next_date = last_date

            while self.days_old(next_date) < days:
                skip = 1
                temp_date = self.post_date(group_name, upper - interval)
                if temp_date:
                    while temp_date > target_date:
                        upper = upper - interval - (skip - 1)
                        skip *= 2
                        temp_date = self.post_date(group_name, upper - interval)

                interval = math.ceil(interval / 2)
                if interval <= 0:
                    break
                skip = 1

                next_date = self.post_date(group_name, upper - 1)
                if next_date:
                    while not next_date:
                        upper = upper - skip
                        skip *= 2
                        next_date = self.post_date(group_name, upper - 1)

            log.debug('server: {}: article {:d} is {:d} days old.'.format(group_name, upper, self.days_old(next_date)))
            return upper
        else:
            log.error('server: {}: could not get group information.'.format(group_name))
            return False
Example #41
0
    def post_date(self, group_name, article):
        """Retrieves the date of the specified post."""
        self.connect()

        art_num = 0
        overview = None

        try:
            with nntp_handler(self, group_name):
                self.connection.group(group_name)
                art_num, overview = self.connection.head('{0:d}'.format(article))
        except:
            return None

        if art_num and overview:
            # overview[0] = article number
            # overview[1] = message-id
            # overview[2] = headers
            for header in overview[2]:
                date_header = ''
                head = nntplib.decode_header(header.decode('utf-8', errors='surrogateescape'))

                if 'X-Server-Date:' in head:
                    continue
                elif 'NNTP-Posting-Date:' in head:
                    date_header = head.replace('NNTP-Posting-Date: ', '')
                elif 'Date:' in head:
                    date_header = head.replace('Date: ', '')

                if date_header:
                    try:
                        date = dateutil.parser.parse(date_header)
                    except Exception as e:
                        log.error('server: date parse failed while dating message: {}'.format(e))
                        return None

                    try:
                        date = pytz.utc.localize(date)
                    except:
                        # no problem, it's already localised
                        pass

                    return date
        else:
            return None
Example #42
0
    def connect(self, compression=True):
        """Creates a connection to a news server."""
        if not self.connection:
            news_config = config.news.copy()

            # i do this because i'm lazy
            ssl = news_config.pop('ssl', False)

            try:
                if ssl:
                    self.connection = nntplib.NNTP_SSL(compression=compression, **news_config)
                else:
                    self.connection = nntplib.NNTP(compression=compression, **news_config)
            except Exception as e:
                log.error('server: could not connect to news server: {}'.format(e))
                return False

        return True
Example #43
0
def save_all(parts):
    """Save a set of parts to the DB, in a batch if possible."""

    # if possible, do a quick batch insert
    # rarely possible!
    # TODO: filter this more - batch import if first set in group?
    try:
        if db.parts.count() == 0:
            db.parts.insert([value for key, value in parts.items()])
            return True
        else:
            # otherwise, it's going to be slow
            for key, part in parts.items():
                save(part)
            return True
    except pymongo.errors.PyMongoError as e:
        log.error("parts: could not write to db: {0}".format(e))
        return False
Example #44
0
def create(gid, name, binary):
    """Create the NZB, store it in GridFS and return the ID
    to be linked to the release."""
    if binary['category_id']:
        category = db.categories.find_one({'id': binary['category_id']})
    else:
        category = None

    xml = ''
    try:
        tpl = Template(filename=os.path.join(root_dir, 'templates/nzb.mako'))
        xml = tpl.render(version=pynab.__version__, name=name, category=category, binary=binary)
    except:
        log.error('nzb: failed to create NZB: {0}'.format(exceptions.text_error_template().render()))
        return None

    data = gzip.compress(xml.encode('utf-8'))
    return fs.put(data, filename='.'.join([gid, 'nzb', 'gz'])), sys.getsizeof(data, 0)
Example #45
0
File: pre.py Project: pl77/pynab
def orlydb(name, search_name):
    # BeautifulSoup is required
    try:
        from bs4 import BeautifulSoup
    except:
        log.error(
            "BeautifulSoup is required to use orlydb scraping: pip install beautifulsoup4"
        )

    try:
        preHTML = requests.get('http://orlydb.com/?q={}'.format(search_name))
    except:
        log.debug("Error connecting to orlydb")
        return False

    soup = bs4.BeautifulSoup(preHTML.read())
    releases = soup.find(id="releases").findAll("div")

    rlsDict = {}
    rlsname = None
    for rls in releases:
        # Try/except used to filter out None types
        # pretime left as may be used later
        try:
            rlsname = rls.find("span", {"class": "release"}).get_text()
            # pretime = rls.find("span", {"class" : "timestamp"}).get_text()
            category = rls.find("span", {
                "class": "section"
            }).find("a").get_text()

            # If the release matches what is passed, return the category in a dict
            # This could be a problem if 2 pre's have the same name but different categories, chances are slim though
            if rlsname == name:
                rlsDict["category"] = category
        except Exception as e:
            log.debug("Error parsing to orlydb reponse: {}".format(e))
            return False

    if rlsDict:
        log.info("Orlydb pre found: {}".format(rlsname))
        return rlsDict
    else:
        return False
Example #46
0
def nntp_handler(conn, group=None):
    def reconn(conn, delay=5, group=None):
        time.sleep(delay)
        conn.reconnect()
        if group:
            conn.group(group)
    try:
        yield
    except (socket.timeout, socket.error, IOError) as e:
        log.warning('server: local socket error ({}), reconnecting in 10s...'.format(e.__repr__().encode('utf-8', 'ignore').decode('utf-8')))
        reconn(conn, 10, group)
        raise e
    except nntplib.NNTPProtocolError as e:
        log.warning('server: unrecoverable nntp error')
        raise e
    except (nntplib.NNTPError, nntplib.NNTPTemporaryError) as e:
        log.warning('server: nntp error: {}'.format(e.__repr__().encode('utf-8', 'ignore').decode('utf-8')))
        raise e
    except Exception as e:
        log.error('server: error: {}'.format(e.__repr__().encode('utf-8', 'ignore').decode('utf-8')))
        raise e
Example #47
0
def caps(dataset=None):
    if not dataset:
        dataset = {}

    dataset['app_version'] = config.api.get('version', '1.0.0')
    dataset['api_version'] = config.api.get('api_version', '0.2.3')
    dataset['email'] = config.api.get('email', '')
    dataset['result_limit'] = config.api.get('result_limit', 20)
    dataset['result_default'] = config.api.get('result_default', 20)

    with db_session() as db:
        category_alias = aliased(Category)
        dataset['categories'] = db.query(Category).filter(Category.parent_id == None).join(category_alias,
                                                                                           Category.children).all()
        try:
            tmpl = Template(
                filename=os.path.join(root_dir, 'templates/api/caps.mako'))
            return tmpl.render(**dataset)
        except:
            log.error('Failed to deliver page: {0}'.format(exceptions.text_error_template().render()))
            return None
Example #48
0
def truncate_table(engine, table_type):
    """
    Handles truncate table for given table type.
    """
    query = ''
    if 'mysql' in config.db.get('engine'):
        query = "TRUNCATE {}".format(table_type.__tablename__)
    elif 'postgre' in config.db.get('engine'):
        # RESTART IDENTITY - reset sequences
        # CASCADE - follow FK references
        query = 'TRUNCATE {} RESTART IDENTITY CASCADE'.format(
            table_type.__tablename__)

    try:
        conn = engine.raw_connection()
        cur = conn.cursor()
        cur.execute((query))
        conn.commit()
        cur.close()
    except Exception as e:
        log.error(e)
        return False

    return True
Example #49
0
def update_blacklist():
    """Check for Blacklist update and load them into db."""
    blacklist_url = config.postprocess.get('blacklist_url')
    if blacklist_url:
        response = requests.get(blacklist_url)
        lines = response.text.splitlines()

        blacklists = []
        for line in lines:
            elements = line.split('\t\t')
            if len(elements) == 4:
                blacklists.append({
                    'group_name': elements[0],
                    'regex': elements[1],
                    'description': elements[3],
                    'status': False
                })

        engine.execute(Blacklist.__table__.insert(), blacklists)

        return True
    else:
        log.error('No blacklist update url in config.')
        return False
Example #50
0
def caps(dataset=None):
    if not dataset:
        dataset = {}

    dataset['app_version'] = config.api.get('version', '1.0.0')
    dataset['api_version'] = config.api.get('api_version', '0.2.3')
    dataset['email'] = config.api.get('email', '')
    dataset['result_limit'] = config.api.get('result_limit', 20)
    dataset['result_default'] = config.api.get('result_default', 20)

    with db_session() as db:
        category_alias = aliased(Category)
        # noinspection PyComparisonWithNone
        dataset['categories'] = db.query(Category).filter(
            Category.parent_id == None).join(category_alias,
                                             Category.children).all()
        try:
            tmpl = Template(
                filename=os.path.join(root_dir, 'templates/api/caps.mako'))
            return tmpl.render(**dataset)
        except:
            log.error('Failed to deliver page: {0}'.format(
                exceptions.text_error_template().render()))
            return None
Example #51
0
    def scan(self, group_name, first=None, last=None, message_ranges=None):
        """Scan a group for segments and return a list."""
        self.connect()

        messages_missed = []
        overviews = []

        start = time.time()

        i = 0

        # grab the headers we're after
        check = 0
        while True:
            try:
                check += 1
                if check == 3:
                    return False, None, None, None
                with nntp_handler(self):
                    self.connection.group(group_name)
                    break
            except:
                continue

        if message_ranges:
            for first, last in message_ranges:
                range_overviews = None
                while True:
                    i += 1
                    log.debug('server: {}: getting range {}-{}'.format(group_name, first, last))
                    try:
                        with nntp_handler(self, group_name):
                            status, range_overviews = self.connection.over((first, last))
                    except:
                        # 3 attempts
                        if i == 3:
                            log.warning('server: {}: timed out a bunch, we\'ll try again later'.format(group_name))
                            break
                        continue

                    if range_overviews:
                        overviews += range_overviews
                    else:
                        # we missed them
                        messages_missed += range(first, last + 1)
                    break
        else:
            while True:
                i += 1
                log.debug('server: {}: getting range {}-{}'.format(group_name, first, last))
                try:
                    with nntp_handler(self, group_name):
                        status, overviews = self.connection.over((first, last))
                        break
                except:
                    # 3 attempts
                    if i == 3:
                        log.warning('server: {}: timed out a bunch, we\'ll try again later'.format(group_name))
                        break
                    continue

        parts = {}
        messages = []
        ignored = 0

        if overviews:
            with db_session() as db:
                blacklists = db.query(Blacklist).filter(Blacklist.status == True).all()
                for blacklist in blacklists:
                    db.expunge(blacklist)

            for (id, overview) in overviews:
                # keep track of which messages we received so we can
                # optionally check for ones we missed later
                messages.append(id)

                # some messages don't have subjects? who knew
                if 'subject' not in overview:
                    continue

                # get the current segment number
                results = SEGMENT_REGEX.findall(overview['subject'])

                # it might match twice, so just get the last one
                # the first is generally the part number
                if results:
                    (segment_number, total_segments) = results[-1]
                else:
                    # if there's no match at all, it's probably not a binary
                    ignored += 1
                    continue

                # make sure the header contains everything we need
                try:
                    size = int(overview[':bytes'])
                except:
                    # TODO: cull this later
                    log.debug('server: bad message: {}'.format(overview))
                    continue

                # assuming everything didn't f**k up, continue
                if int(segment_number) > 0 and int(total_segments) > 0:
                    # strip the segment number off the subject so
                    # we can match binary parts together
                    subject = nntplib.decode_header(overview['subject'].replace(
                        '(' + str(segment_number) + '/' + str(total_segments) + ')', ''
                    ).strip()).encode('utf-8', 'replace').decode('latin-1')

                    posted_by = nntplib.decode_header(overview['from']).encode('utf-8', 'replace').decode('latin-1')

                    # generate a hash to perform matching
                    hash = pynab.parts.generate_hash(subject, posted_by, group_name, int(total_segments))

                    # this is spammy as shit, for obvious reasons
                    # pynab.log.debug('Binary part found: ' + subject)

                    # build the segment, make sure segment number and size are ints
                    segment = {
                        'message_id': overview['message-id'][1:-1],
                        'segment': int(segment_number),
                        'size': size
                    }

                    # if we've already got a binary by this name, add this segment
                    if hash in parts:
                        parts[hash]['segments'][segment_number] = segment
                        parts[hash]['available_segments'] += 1
                    else:
                        # dateutil will parse the date as whatever and convert to UTC
                        # some subjects/posters have odd encoding, which will break pymongo
                        # so we make sure it doesn't
                        try:
                            message = {
                                'hash': hash,
                                'subject': subject,
                                'posted': dateutil.parser.parse(overview['date']),
                                'posted_by': posted_by,
                                'group_name': group_name,
                                'xref': pynab.util.smart_truncate(overview['xref'], length=1024),
                                'total_segments': int(total_segments),
                                'available_segments': 1,
                                'segments': {segment_number: segment, },
                            }

                            parts[hash] = message
                        except Exception as e:
                            log.error('server: bad message parse: {}'.format(e))
                            continue
                else:
                    # :getout:
                    ignored += 1

            # instead of checking every single individual segment, package them first
            # so we typically only end up checking the blacklist for ~150 parts instead of thousands
            blacklist = [k for k, v in parts.items() if pynab.parts.is_blacklisted(v, group_name, blacklists)]
            blacklisted_parts = len(blacklist)
            total_parts = len(parts)
            for k in blacklist:
                del parts[k]
        else:
            total_parts = 0
            blacklisted_parts = 0

        # check for missing messages if desired
        # don't do this if we're grabbing ranges, because it won't work
        if not message_ranges:
            messages_missed = list(set(range(first, last)) - set(messages))

        end = time.time()

        log.info('server: {}: retrieved {} - {} in {:.2f}s [{} recv, {} pts, {} ign, {} blk]'.format(
            group_name,
            first, last,
            end - start,
            len(messages),
            total_parts,
            ignored,
            blacklisted_parts
        ))

        # check to see if we at least got some messages - they might've been ignored
        if len(messages) > 0:
            status = True
        else:
            status = False

        return status, parts, messages, messages_missed
Example #52
0
    def day_to_post(self, group_name, days):
        """Converts a datetime to approximate article number for the specified group."""
        self.connect()

        log.info('server: {}: finding post {} days old...'.format(group_name, days))

        try:
            with nntp_handler(self, group_name):
                _, count, first, last, _ = self.connection.group(group_name)
        except:
            return None

        # calculate tolerance
        if days <= 50:
            tolerance = 1
        elif days <= 100:
            tolerance = 5
        elif days <= 1000:
            tolerance = 10
        else:
            tolerance = 20

        # get first, last and target dates
        candidate_post = None
        target_date = datetime.datetime.now(pytz.utc) - datetime.timedelta(days)
        bottom_date = self.post_date(group_name, first)

        if not bottom_date:
            log.error('server: {}: can\'t get first date on group, fatal group error. try again later?'.format(
                group_name
            ))
            return None

        # check bottom_date
        if target_date < bottom_date:
            log.info('server: {}: post was before first available, starting from the beginning'.format(
                group_name
            ))
            return first

        top_date = self.post_date(group_name, last)

        if not top_date:
            log.warning('server: {}: can\'t get first date on group, fatal group error. try again later?'.format(
                group_name
            ))
            return None

        if target_date > top_date:
            log.info('server: {}: requested post was newer than most recent, ending'.format(group_name))
            return None

        bottom = first
        top = last

        # Keep track of previously seen candidate posts so that we
        # can adjust and avoid getting into a loop.
        seen_post = {}

        # iterative, obviously
        while True:
            # do something like a binary search
            # find the percentage-point of target date between first and last dates
            # ie. start |-------T---| end = ~70%
            # so we'd find the post number ~70% through the message count
            try:
                target = target_date - bottom_date
                total = top_date - bottom_date
            except:
                log.error('server: {}: nntp server problem while getting first/last article dates'.format(
                    group_name))
                return None

            perc = target.total_seconds() / total.total_seconds()

            while True:
                candidate_post = int(abs(bottom + ((top - bottom) * perc)))
                candidate_date = self.post_date(group_name, candidate_post)
                if candidate_date:
                    break
                else:
                    addition = (random.choice([-1, 1]) / 100) * perc
                    if perc + addition > 1.0:
                        perc -= addition
                    elif perc - addition < 0.0:
                        perc += addition
                    else:
                        perc += addition

            # If we begin to see posts multiple times then we may need to
            # slide our tolerance out a bit to compensate for holes in posts.
            if candidate_post in seen_post:
                tolerance_adjustment = tolerance / 2
                log.debug('server: {}: Seen post more than once, increasing tolerance by {} to compensate.'.format(group_name, tolerance_adjustment))
                tolerance += tolerance_adjustment
            else:
                seen_post[candidate_post] = 1

            # tolerance sliding scale, about 0.1% rounded to the nearest day
            # we don't need a lot of leeway, since this is a lot faster than previously
            if abs(target_date - candidate_date) < datetime.timedelta(days=tolerance):
                break

            if candidate_date > target_date:
                top = candidate_post
                top_date = candidate_date
            else:
                bottom = candidate_post
                bottom_date = candidate_date

            log.debug('server: {}: post {} was {} days old'.format(group_name, candidate_post,
                                                                   Server.days_old(candidate_date)))

        return candidate_post
Example #53
0
File: nzbs.py Project: sqw23/pynab
def import_nzb(name, nzb_data):
    """Import an NZB and directly load it into releases."""

    release = {
        'added': pytz.utc.localize(datetime.datetime.now()),
        'size': None,
        'spotnab_id': None,
        'completion': None,
        'grabs': 0,
        'passworded': None,
        'file_count': None,
        'tvrage': None,
        'tvdb': None,
        'imdb': None,
        'nfo': None,
        'tv': None,
        'total_parts': 0
    }

    try:
        for event, elem in cet.iterparse(io.StringIO(nzb_data)):
            if 'meta' in elem.tag:
                release[elem.attrib['type']] = elem.text
            if 'file' in elem.tag:
                release['total_parts'] += 1
                release['posted'] = elem.get('date')
                release['posted_by'] = elem.get('poster')
            if 'group' in elem.tag and 'groups' not in elem.tag:
                release['group_name'] = elem.text
    except Exception as e:
        log.error('nzb: error parsing NZB files: file appears to be corrupt.')
        return False

    if 'name' not in release:
        log.error('nzb: failed to import nzb: {0}'.format(name))
        return False

    # check that it doesn't exist first
    with db_session() as db:
        r = db.query(Release).filter(Release.name == release['name']).first()
        if not r:
            r = Release()
            r.name = release['name']
            r.search_name = release['name']

            r.posted = release['posted']
            r.posted_by = release['posted_by']

            if 'posted' in release:
                r.posted = datetime.datetime.fromtimestamp(
                    int(release['posted']), pytz.utc)
            else:
                r.posted = None

            if 'category' in release:
                parent, child = release['category'].split(' > ')

                category = db.query(Category).filter(
                    Category.name == parent).filter(
                        Category.name == child).first()
                if category:
                    r.category = category
                else:
                    r.category = None
            else:
                r.category = None

            # make sure the release belongs to a group we have in our db
            if 'group_name' in release:
                group = db.query(Group).filter(
                    Group.name == release['group_name']).first()
                if not group:
                    group = Group(name=release['group_name'])
                    db.add(group)
                r.group = group

            # rebuild the nzb, gzipped
            nzb = NZB()
            nzb.data = gzip.compress(nzb_data.encode('utf-8'))
            r.nzb = nzb

            db.merge(r)

            return True
        else:
            log.error('nzb: release already exists: {0}'.format(
                release['name']))
            return False
Example #54
0
def get_rar_info(server, group_name, messages):
    data = server.get(group_name, messages)

    if data:
        # if we got the requested articles, save them to a temp rar
        t = None
        with tempfile.NamedTemporaryFile('wb', suffix='.rar',
                                         delete=False) as t:
            t.write(data.encode('ISO-8859-1'))
            t.flush()

        try:
            files = check_rar(t.name)
        except lib.rar.BadRarFile:
            os.remove(t.name)
            return False, None

        # build a list of files to return
        info = []

        passworded = False
        if files:
            info = [{'size': r.file_size, 'name': r.filename} for r in files]

            unrar_path = config.postprocess.get('unrar_path', '/usr/bin/unrar')
            if not (unrar_path and os.path.isfile(unrar_path)
                    and os.access(unrar_path, os.X_OK)):
                log.error(
                    'rar: skipping archive decompression because unrar_path is not set or incorrect'
                )
                log.error(
                    'rar: if the rar is not password protected, but contains an inner archive that is, we will not know'
                )
            else:
                # make a tempdir to extract rar to
                tmp_dir = tempfile.mkdtemp()
                exe = [
                    '"{}"'.format(unrar_path), 'e', '-ai', '-ep', '-r', '-kb',
                    '-c-', '-id', '-p-', '-y', '-inul', '"{}"'.format(t.name),
                    '"{}"'.format(tmp_dir)
                ]

                try:
                    subprocess.check_call(' '.join(exe),
                                          stderr=subprocess.STDOUT,
                                          shell=True)
                except subprocess.CalledProcessError as cpe:
                    # almost every rar piece we get will throw an error
                    # we're only getting the first segment
                    # log.debug('rar: issue while extracting rar: {}: {} {}'.format(cpe.cmd, cpe.returncode, cpe.output))
                    pass

                inner_passwords = []
                for file in files:
                    fpath = os.path.join(tmp_dir, file.filename)
                    try:
                        inner_files = check_rar(fpath)
                    except lib.rar.BadRarFile:
                        continue

                    if inner_files:
                        inner_passwords += [
                            r.is_encrypted for r in inner_files
                        ]
                    else:
                        passworded = True
                        break

                if not passworded:
                    passworded = any(inner_passwords)

                os.remove(t.name)
                shutil.rmtree(tmp_dir)
        else:
            passworded = True
            os.remove(t.name)

        return passworded, info

    # couldn't get article
    return False, None
Example #55
0
def update_regex():
    """Check for NN+ regex update and load them into db."""
    with db_session() as db:
        regex_type = config.postprocess.get('regex_type')
        regex_url = config.postprocess.get('regex_url')
        if regex_url:
            regexes = {}
            response = requests.get(regex_url)
            lines = response.text.splitlines()

            # get the revision or headers by itself
            first_line = lines.pop(0)

            if regex_type == 'nzedb':
                for line in lines:
                    try:
                        id, group, reg, status, desc, ordinal = tuple(
                            line.split('\t'))
                    except ValueError:
                        # broken line
                        continue

                    regexes[int(id)] = {
                        'id':
                        int(id),
                        'group_name':
                        group.replace('^', '').replace('\\',
                                                       '').replace('$', ''),
                        'regex':
                        reg.replace('\\\\', '\\'),
                        'ordinal':
                        ordinal,
                        'status':
                        bool(status),
                        'description':
                        desc[:255]
                    }
            else:
                revision = regex.search('\$Rev: (\d+) \$', first_line)
                if revision:
                    revision = int(revision.group(1))
                    log.info('Regex at revision: {:d}'.format(revision))

                # and parse the rest of the lines, since they're an sql dump
                for line in lines:
                    reg = regex.search(
                        '\((\d+), \'(.*)\', \'(.*)\', (\d+), (\d+), (.*), (.*)\);$',
                        line)
                    if reg:
                        try:
                            if reg.group(6) == 'NULL':
                                description = ''
                            else:
                                description = reg.group(6).replace('\'', '')

                            regexes[int(reg.group(1))] = {
                                'id': int(reg.group(1)),
                                'group_name': reg.group(2),
                                'regex': reg.group(3).replace('\\\\', '\\'),
                                'ordinal': int(reg.group(4)),
                                'status': bool(reg.group(5)),
                                'description': description
                            }
                        except:
                            log.error('Problem importing regex dump.')
                            return False

            # if the parsing actually worked
            if len(regexes) > 0:
                db.query(Regex).filter(Regex.id < 100000).delete()

                log.info('Retrieved {:d} regexes.'.format(len(regexes)))

                ids = []
                regexes = modify_regex(regexes, regex_type)
                for reg in regexes.values():
                    r = Regex(**reg)
                    ids.append(r.id)
                    db.merge(r)

                log.info('Added/modified {:d} regexes.'.format(len(regexes)))

            # add pynab regex
            for reg in regex_data.additions:
                r = Regex(**reg)
                db.merge(r)

            log.info('Added/modified {:d} Pynab regexes.'.format(
                len(regex_data.additions)))
            db.commit()

            return True
        else:
            log.error(
                'No config item set for regex_url - do you own newznab plus?')
            return False
Example #56
0
File: scan.py Project: pl77/pynab
def main(mode='update', group=None, date=None):
    log_init(mode)

    log.info('scan: starting {}...'.format(mode))

    groups = []
    active_groups = {}

    if mode == 'backfill':
        log.info('scan: finding targets for backfill...')
        with pynab.server.Server() as server:
            with db_session() as db:
                if not group:
                    groups = [group.name for group in db.query(Group).filter(Group.active == True).all()]
                else:
                    if db.query(Group).filter(Group.name == group).first():
                        groups = [group]
                for group in groups:
                    target = server.day_to_post(group,
                                                server.days_old(pytz.utc.localize(dateutil.parser.parse(date)))
                                                if date else config.scan.get('backfill_days', 10)
                                                )
                    if target:
                        active_groups[group] = target

    iterations = 0
    while True:
        iterations += 1
        data = []

        # refresh the db session each iteration, just in case
        with db_session() as db:
            if db.query(Segment).count() > config.scan.get('early_process_threshold', 50000000):
                if mode == 'update':
                    log.info('scan: backlog of segments detected, processing first')
                    process()
                else:
                    log.info('scan: backlog of segments detected during backfill, waiting until update has cleared them')
                    time.sleep(config.scan.get('update_wait', 600))
                    continue

            # for scanning, we want to re-check active groups each iteration
            # we don't want to do that for backfilling, though
            if mode == 'update':
                if not group:
                    active_groups = {group.name: None for group in db.query(Group).filter(Group.active == True).all()}
                else:
                    if db.query(Group).filter(Group.name == group).first():
                        active_groups = {group: None}
                    else:
                        log.error('scan: no such group exists')
                        return

            if active_groups:
                with concurrent.futures.ThreadPoolExecutor(config.scan.get('update_threads', None)) as executor:
                    # if maxtasksperchild is more than 1, everything breaks
                    # they're long processes usually, so no problem having one task per child
                    if mode == 'backfill':
                        result = [executor.submit(backfill, active_group, date, target) for active_group, target in active_groups.items()]
                    else:
                        result = [executor.submit(update, active_group) for active_group in active_groups.keys()]

                    for r in concurrent.futures.as_completed(result):
                        data.append(r.result())

                    if mode == 'backfill':
                        if all(data):
                            return

                    # don't retry misses during backfill, it ain't gonna happen
                    if config.scan.get('retry_missed') and not mode == 'backfill':
                        miss_groups = [group_name for group_name, in
                                       db.query(Miss.group_name).group_by(Miss.group_name).all()]
                        miss_result = [executor.submit(scan_missing, miss_group) for miss_group in miss_groups]

                        # no timeout for these, because it could take a while
                        for r in concurrent.futures.as_completed(miss_result):
                            data = r.result()

                db.commit()

                if mode == 'update':
                    process()

                    # clean up dead binaries and parts
                    if config.scan.get('dead_binary_age', 1) != 0:
                        dead_time = pytz.utc.localize(datetime.datetime.now()).replace(
                            tzinfo=None) - datetime.timedelta(days=config.scan.get('dead_binary_age', 3))

                        dead_binaries = db.query(Binary).filter(Binary.posted <= dead_time).delete()
                        db.commit()

                        log.info('scan: deleted {} dead binaries'.format(dead_binaries))
            else:
                log.info('scan: no groups active, cancelling pynab.py...')
                break

            if mode == 'update':
                # vacuum the segments, parts and binaries tables
                log.info('scan: vacuuming relevant tables...')

                if iterations >= config.scan.get('full_vacuum_iterations', 288):
                    # this may look weird, but we want to reset iterations even if full_vacuums are off
                    # so it doesn't count to infinity
                    if config.scan.get('full_vacuum', True):
                        vacuum(mode='scan', full=True)
                    iterations = 0
            else:
                iterations = 0

            db.close()

        # don't bother waiting if we're backfilling, just keep going
        if mode == 'update':
            # wait for the configured amount of time between cycles
            update_wait = config.scan.get('update_wait', 300)
            log.info('scan: sleeping for {:d} seconds...'.format(update_wait))
            time.sleep(update_wait)
Example #57
0
def stats(dataset=None):
    if not dataset:
        dataset = {}

    with db_session() as db:
        tv_totals = db.query(func.count(Release.tvshow_id),
                             func.count(Release.tvshow_metablack_id),
                             func.count(Release.id)).join(Category).filter(
                                 Category.parent_id == 5000).one()
        movie_totals = db.query(func.count(Release.movie_id),
                                func.count(Release.movie_metablack_id),
                                func.count(Release.id)).join(Category).filter(
                                    Category.parent_id == 2000).one()
        nfo_total = db.query(func.count(Release.nfo_id),
                             func.count(Release.nfo_metablack_id)).one()
        file_total = db.query(
            Release.id).filter((Release.files.any())
                               | (Release.passworded != 'UNKNOWN')).count()
        file_failed_total = db.query(func.count(
            Release.rar_metablack_id)).one()
        release_total = db.query(Release.id).count()

        dataset['totals'] = {
            'TV': {
                'processed': tv_totals[0],
                'failed': tv_totals[1],
                'total': tv_totals[2]
            },
            'Movies': {
                'processed': movie_totals[0],
                'failed': movie_totals[1],
                'total': movie_totals[2]
            },
            'NFOs': {
                'processed': nfo_total[0],
                'failed': nfo_total[1],
                'total': release_total
            },
            'File Info': {
                'processed': file_total,
                'failed': file_failed_total[0],
                'total': release_total
            }
        }

        dataset['categories'] = db.query(Category, func.count(
            Release.id)).join(Release).group_by(Category).order_by(
                desc(func.count(Release.id))).all()

        dataset['groups'] = db.query(
            Group, func.min(Release.posted),
            func.count(Release.id)).join(Release).group_by(Group).order_by(
                desc(func.count(Release.id))).all()

        try:
            tmpl = Template(
                filename=os.path.join(root_dir, 'templates/api/stats.mako'))
            return tmpl.render(**dataset)
        except:
            log.error('Failed to deliver page: {0}'.format(
                exceptions.text_error_template().render()))
            return None
Example #58
0
def process():
    """Helper function to begin processing binaries. Checks
    for 100% completion and will create NZBs/releases for
    each complete release. Will also categorise releases,
    and delete old binaries."""

    # TODO: optimise query usage in this, it's using like 10-15 per release

    binary_count = 0
    added_count = 0

    if config.scan.get('publish', False):
        request_session = FuturesSession()
    else:
        request_session = None

    start = time.time()

    with db_session() as db:
        binary_query = """
            SELECT
                binaries.id, binaries.name, binaries.posted, binaries.total_parts
            FROM binaries
            INNER JOIN (
                SELECT
                    parts.id, parts.binary_id, parts.total_segments, count(*) as available_segments
                FROM parts
                    INNER JOIN segments ON parts.id = segments.part_id
                GROUP BY parts.id
                ) as parts
                ON binaries.id = parts.binary_id
            GROUP BY binaries.id
            HAVING count(*) >= binaries.total_parts AND (sum(parts.available_segments) / sum(parts.total_segments)) * 100 >= {}
            ORDER BY binaries.posted DESC
        """.format(config.postprocess.get('min_completion', 100))

        # pre-cache blacklists and group them
        blacklists = db.query(Blacklist).filter(Blacklist.status == True).all()
        for blacklist in blacklists:
            db.expunge(blacklist)

        # cache categories
        parent_categories = {}
        for category in db.query(Category).all():
            parent_categories[
                category.
                id] = category.parent.name if category.parent else category.name

        # for interest's sakes, memory usage:
        # 38,000 releases uses 8.9mb of memory here
        # no real need to batch it, since this will mostly be run with
        # < 1000 releases per run
        for completed_binary in engine.execute(binary_query).fetchall():
            # some optimisations here. we used to take the binary id and load it
            # then compare binary.name and .posted to any releases
            # in doing so, we loaded the binary into the session
            # this meant that when we deleted it, it didn't cascade
            # we had to submit many, many delete queries - one per segment/part
            # by including name/posted in the big query, we don't load that much data
            # but it lets us check for a release without another query, and means
            # that we cascade delete when we clear the binary

            # first we check if the release already exists
            r = db.query(Release).filter(
                Release.name == completed_binary[1]).filter(
                    Release.posted == completed_binary[2]).first()

            if r:
                # if it does, we have a duplicate - delete the binary
                db.query(Binary).filter(
                    Binary.id == completed_binary[0]).delete()
            else:
                # get an approx size for the binary without loading everything
                # if it's a really big file, we want to deal with it differently
                binary = db.query(Binary).filter(
                    Binary.id == completed_binary[0]).first()

                # get the group early for use in uniqhash
                group = db.query(Group).filter(
                    Group.name == binary.group_name).one()

                # check if the uniqhash already exists too
                dupe_release = db.query(Release).filter(
                    Release.uniqhash == _create_hash(binary.name, group.id,
                                                     binary.posted)).first()
                if dupe_release:
                    db.query(Binary).filter(
                        Binary.id == completed_binary[0]).delete()
                    continue

                # this is an estimate, so it doesn't matter too much
                # 1 part nfo, 1 part sfv or something similar, so ignore two parts
                # take an estimate from the middle parts, since the first/last
                # have a good chance of being something tiny
                # we only care if it's a really big file
                # abs in case it's a 1 part release (abs(1 - 2) = 1)
                # int(/2) works fine (int(1/2) = 0, array is 0-indexed)
                try:
                    est_size = (abs(binary.total_parts - 2) * binary.parts[int(
                        binary.total_parts / 2)].total_segments *
                                binary.parts[int(
                                    binary.total_parts / 2)].segments[0].size)
                except IndexError:
                    log.error(
                        'release: binary [{}] - couldn\'t estimate size - bad regex: {}?'
                        .format(binary.id, binary.regex_id))
                    continue

                oversized = est_size > config.postprocess.get(
                    'max_process_size', 10 * 1024 * 1024 * 1024)

                if oversized and not config.postprocess.get(
                        'max_process_anyway', True):
                    log.debug('release: [{}] - removed (oversized)'.format(
                        binary.name))
                    db.query(Binary).filter(
                        Binary.id == completed_binary[0]).delete()
                    db.commit()
                    continue

                if oversized:
                    # for giant binaries, we do it differently
                    # lazyload the segments in parts and expunge when done
                    # this way we only have to store binary+parts
                    # and one section of segments at one time
                    binary = db.query(Binary).options(
                        subqueryload('parts'),
                        lazyload('parts.segments'),
                    ).filter(Binary.id == completed_binary[0]).first()
                else:
                    # otherwise, start loading all the binary details
                    binary = db.query(Binary).options(
                        subqueryload('parts'),
                        subqueryload('parts.segments'),
                        Load(Part).load_only(Part.id, Part.subject,
                                             Part.segments),
                    ).filter(Binary.id == completed_binary[0]).first()

                blacklisted = False
                for blacklist in blacklists:
                    if regex.search(blacklist.group_name, binary.group_name):
                        # we're operating on binaries, not releases
                        field = 'name' if blacklist.field == 'subject' else blacklist.field
                        if regex.search(blacklist.regex,
                                        getattr(binary, field)):
                            log.debug(
                                'release: [{}] - removed (blacklisted: {})'.
                                format(binary.name, blacklist.id))
                            db.query(Binary).filter(
                                Binary.id == binary.id).delete()
                            db.commit()
                            blacklisted = True
                            break

                if blacklisted:
                    continue

                binary_count += 1

                release = Release()
                release.name = binary.name
                release.original_name = binary.name
                release.posted = binary.posted
                release.posted_by = binary.posted_by
                release.regex_id = binary.regex_id
                release.grabs = 0

                # this counts segment sizes, so we can't use it for large releases
                # use the estimate for min_size and firm it up later during postproc
                if oversized:
                    release.size = est_size
                else:
                    release.size = binary.size()

                # check against minimum size for this group
                undersized = False
                for size, groups in config.postprocess.get('min_size',
                                                           {}).items():
                    if binary.group_name in groups:
                        if release.size < size:
                            undersized = True
                            break

                if undersized:
                    log.debug(
                        'release: [{}] - removed (smaller than minimum size for group)'
                        .format(binary.name))
                    db.query(Binary).filter(Binary.id == binary.id).delete()
                    db.commit()
                    continue

                # check to make sure we have over the configured minimum files
                # this one's okay for big releases, since we're only looking at part-level
                rars = []
                rar_count = 0
                zip_count = 0
                nzb_count = 0

                for part in binary.parts:
                    if pynab.nzbs.rar_part_regex.search(part.subject):
                        rar_count += 1
                    if pynab.nzbs.rar_regex.search(
                            part.subject
                    ) and not pynab.nzbs.metadata_regex.search(part.subject):
                        rars.append(part)
                    if pynab.nzbs.zip_regex.search(
                            part.subject
                    ) and not pynab.nzbs.metadata_regex.search(part.subject):
                        zip_count += 1
                    if pynab.nzbs.nzb_regex.search(part.subject):
                        nzb_count += 1

                # handle min_archives
                # keep, nzb, under
                status = 'keep'
                archive_rules = config.postprocess.get('min_archives', 1)
                if isinstance(archive_rules, dict):
                    # it's a dict
                    if binary.group_name in archive_rules:
                        group = binary.group_name
                    else:
                        group = '*'

                    # make sure the catchall exists
                    if group not in archive_rules:
                        archive_rules[group] = 1

                    # found a special rule
                    if rar_count + zip_count < archive_rules[group]:
                        if nzb_count > 0:
                            status = 'nzb'
                        else:
                            status = 'under'
                else:
                    # it's an integer, globalise that shit yo
                    if rar_count + zip_count < archive_rules:
                        if nzb_count > 0:
                            status = 'nzb'
                        else:
                            status = 'under'

                # if it's an nzb or we're under, kill it
                if status in ['nzb', 'under']:
                    if status == 'nzb':
                        log.debug('release: [{}] - removed (nzb only)'.format(
                            binary.name))
                    elif status == 'under':
                        log.debug(
                            'release: [{}] - removed (less than minimum archives)'
                            .format(binary.name))

                    db.query(Binary).filter(Binary.id == binary.id).delete()
                    db.commit()
                    continue

                # clean the name for searches
                release.search_name = clean_release_name(binary.name)

                # assign the release group
                release.group = group

                # give the release a category
                release.category_id = pynab.categories.determine_category(
                    binary.name, binary.group_name)

                # create the nzb, store it and link it here
                # no need to do anything special for big releases here
                # if it's set to lazyload, it'll kill rows as they're used
                # if it's a small release, it'll go straight from memory
                nzb = pynab.nzbs.create(release.search_name,
                                        parent_categories[release.category_id],
                                        binary)

                if nzb:
                    added_count += 1

                    log.info(
                        'release: [{}]: added release ({} rars, {} rarparts)'.
                        format(release.search_name, len(rars), rar_count))

                    release.nzb = nzb

                    # save the release
                    db.add(release)

                    try:
                        db.flush()
                    except Exception as e:
                        # this sometimes raises if we get a duplicate
                        # this requires a post of the same name at exactly the same time (down to the second)
                        # pretty unlikely, but there we go
                        log.debug(
                            'release: [{}]: duplicate release, discarded'.
                            format(release.search_name))
                        db.rollback()

                    # delete processed binaries
                    db.query(Binary).filter(Binary.id == binary.id).delete()

                    # publish processed releases?
                    if config.scan.get('publish', False):
                        futures = [
                            request_session.post(host, data=to_json(release))
                            for host in config.scan.get('publish_hosts')
                        ]

            db.commit()

    end = time.time()
    log.info('release: added {} out of {} binaries in {:.2f}s'.format(
        added_count, binary_count, end - start))
Example #59
0
def search(dataset=None):
    if auth():
        with db_session() as db:
            query = db.query(Release)

            try:
                dbid = None
                dbname = None
                cat_ids = []

                # handle tv/movie searches
                if dataset['function'] in ['tv', 'tvsearch']:
                    # set categories
                    cat_ids.append(5000)

                    query = query.join(TvShow)

                    # edge case for nn compat
                    if request.query.rid:
                        dbid = request.query.rid
                        dbname = 'TVRAGE'

                    # seasons and episodes
                    season = request.query.season or None
                    episode = request.query.ep or None

                    if season or episode:
                        query = query.join(Episode,
                                           Release.episode_id == Episode.id)

                        if season:
                            # 2014, do nothing
                            if season.isdigit() and len(season) <= 2:
                                # 2, convert to S02
                                season = 'S{:02d}'.format(int(season))

                            query = query.filter(Episode.season == season)

                        if episode:
                            # 23/10, do nothing
                            if episode.isdigit() and '/' not in episode:
                                # 15, convert to E15
                                episode = 'E{:02d}'.format(int(episode))

                            query = query.filter(Episode.episode == episode)

                if dataset['function'] in ['m', 'movie']:
                    cat_ids.append(2000)

                    query = query.join(Movie)

                    # edge case for imdb compat
                    if request.query.imdbid:
                        dbid = 'tt' + request.query.imdbid
                        dbname = 'OMDB'

                    genres = request.query.genre or None
                    if genres:
                        for genre in genres.split(','):
                            query = query.filter(
                                or_(Movie.genre.ilike('%{}%'.format(genre))))

                # but if we have a proper set, use them instead
                if request.query.dbname and request.query.dbid:
                    dbid = request.query.dbid
                    dbname = request.query.dbname.upper()

                # filter by id
                if dbid and dbname:
                    query = query.join(DBID).filter((DBID.db == dbname)
                                                    & (DBID.db_id == dbid))

                # get categories
                if not cat_ids:
                    cats = request.query.cat or None
                    if cats:
                        cat_ids = cats.split(',')

                if cat_ids:
                    query = query.join(Category).filter(
                        Category.id.in_(cat_ids)
                        | Category.parent_id.in_(cat_ids))

                # group names
                group_names = request.query.group or None
                if group_names:
                    query = query.join(Group)
                    group_names = group_names.split(',')
                    for group in group_names:
                        query = query.filter(Group.name == group)

                # max age
                max_age = request.query.maxage or None
                if max_age:
                    oldest = datetime.datetime.now() - datetime.timedelta(
                        int(max_age))
                    query = query.filter(Release.posted > oldest)

                # more info?
                extended = request.query.extended or None
                if extended:
                    dataset['extended'] = True
                else:
                    dataset['extended'] = False

                # set limit to request or default
                # this will also match limit == 0, which would be infinite
                limit = request.query.limit or None
                if limit and int(limit) <= int(
                        config.api.get('result_limit', 100)):
                    limit = int(limit)
                else:
                    limit = int(config.api.get('result_default', 20))

                # offset is only available for rss searches and won't work with text
                offset = request.query.offset or None
                if offset and int(offset) > 0:
                    offset = int(offset)
                else:
                    offset = 0

            except Exception as e:
                # normally a try block this long would make me shudder
                # but we don't distinguish between errors, so it's fine
                log.error(
                    'Incorrect API Parameter or parsing error: {}'.format(e))
                return api_error(201)

            search_terms = request.query.q or None
            if search_terms:
                # we're searching specifically for a show or something
                if search_terms:
                    for term in regex.split('[ \.]', search_terms):
                        query = query.filter(
                            Release.search_name.ilike('%{}%'.format(term)))

            if config.api.get('postprocessed_only', False):
                query = query.filter(Release.passworded != 'UNKNOWN')

            query = query.order_by(Release.posted.desc())

            query = query.limit(limit)
            query = query.offset(offset)

            total = query.count()
            results = query.all()

            dataset['releases'] = results
            dataset['offset'] = offset
            dataset['total'] = total
            dataset['api_key'] = request.query.apikey

            try:
                return RESULT_TEMPLATE.render(**dataset)
            except:
                log.error('Failed to deliver page: {0}'.format(
                    exceptions.text_error_template().render()))
                return None
    else:
        return api_error(100)