예제 #1
0
파일: groups.py 프로젝트: sqw23/pynab
def scan_missing_segments(group_name):
    """Scan for previously missed segments."""

    log.info('missing: checking for missed segments')

    with db_session() as db:
        # recheck for anything to delete
        expired = db.query(Miss).filter(
            Miss.attempts >= config.scan.get('miss_retry_limit')).filter(
                Miss.group_name == group_name).delete()
        db.commit()
        if expired:
            log.info('missing: deleted {} expired misses'.format(expired))

        # get missing articles for this group
        missing_messages = [
            r for r, in db.query(Miss.message).filter(
                Miss.group_name == group_name).all()
        ]

        if missing_messages:
            # mash it into ranges
            missing_ranges = intspan(missing_messages).ranges()

            server = Server()
            server.connect()

            status, parts, messages, missed = server.scan(
                group_name, message_ranges=missing_ranges)

            # if we got some missing parts, save them
            if parts:
                pynab.parts.save_all(parts)

            # even if they got blacklisted, delete the ones we got from the misses
            if messages:
                db.query(Miss).filter(Miss.message.in_(messages)).filter(
                    Miss.group_name == group_name).delete(False)

            db.commit()

            if missed:
                # clear up those we didn't get
                save_missing_segments(group_name, missed)

            if server.connection:
                try:
                    server.connection.quit()
                except:
                    pass
예제 #2
0
파일: nfos.py 프로젝트: sqw23/pynab
def process(limit=None, category=0):
    """Process releases for NFO parts and download them."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone,PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(
                Release.nfo == None).filter(Release.nfo_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                found = False
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb:
                    nfos = []
                    for nfo in nzb['nfos']:
                        for part in nfo['segments']:
                            if int(part['size']) > NFO_MAX_FILESIZE:
                                continue
                            nfos.append(part)

                    for nfo in nfos:
                        try:
                            article = server.get(release.group.name, [
                                nfo['message_id'],
                            ])
                        except Exception as e:
                            # if usenet's not accessible, don't block it forever
                            log.error('nfo: unable to get nfo: {}'.format(e))
                            continue

                        if article:
                            data = gzip.compress(article.encode('utf-8'))
                            nfo = NFO(data=data)
                            db.add(nfo)

                            release.nfo = nfo
                            release.nfo_metablack_id = None
                            db.add(release)

                            log.debug('nfo: [{}] - nfo added'.format(
                                release.search_name))
                            found = True
                            break

                    if not found:
                        log.debug(
                            'nfo: [{}] - [{}] - no nfos in release'.format(
                                release.id, release.search_name))
                        mb = MetaBlack(nfo=release, status='IMPOSSIBLE')
                        db.add(mb)
                db.commit()
예제 #3
0
파일: groups.py 프로젝트: Murodese/pynab
def scan_missing_segments(group_name):
    """Scan for previously missed segments."""

    log.info('missing: checking for missed segments')

    with db_session() as db:
        # recheck for anything to delete
        expired = db.query(Miss).filter(Miss.attempts >= config.scan.get('miss_retry_limit')).filter(
            Miss.group_name == group_name).delete()
        db.commit()
        if expired:
            log.info('missing: deleted {} expired misses'.format(expired))

        # get missing articles for this group
        missing_messages = [r for r, in db.query(Miss.message).filter(Miss.group_name == group_name).all()]

        if missing_messages:
            # mash it into ranges
            missing_ranges = intspan(missing_messages).ranges()

            server = Server()
            server.connect()

            status, parts, messages, missed = server.scan(group_name, message_ranges=missing_ranges)

            # if we got some missing parts, save them
            if parts:
                pynab.parts.save_all(parts)

            # even if they got blacklisted, delete the ones we got from the misses
            if messages:
                db.query(Miss).filter(Miss.message.in_(messages)).filter(Miss.group_name == group_name).delete(False)

            db.commit()

            if missed:
                # clear up those we didn't get
                save_missing_segments(group_name, missed)

            if server.connection:
                try:
                    server.connection.quit()
                except:
                    pass
예제 #4
0
def process(limit=None, category=0):
    """Process releases for SFV parts and download them."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone,PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(
                Release.sfv == None).filter(Release.sfv_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))
            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                found = False

                nzb = pynab.nzbs.get_nzb_details(release.nzb)
                if nzb:
                    sfvs = []
                    for sfv in nzb['sfvs']:
                        for part in sfv['segments']:
                            if int(part['size']) > SFV_MAX_FILESIZE:
                                continue
                            sfvs.append(part)

                    for sfv in sfvs:
                        try:
                            article = server.get(release.group.name, [
                                sfv['message_id'],
                            ])
                        except:
                            article = None

                        if article:
                            data = gzip.compress(article.encode('utf-8'))
                            sfv = SFV(data=data)
                            db.add(sfv)

                            release.sfv = sfv
                            release.sfv_metablack_id = None
                            db.add(release)

                            log.info('sfv: [{}] - sfv added'.format(
                                release.search_name))
                            found = True
                            break

                    if not found:
                        log.debug('sfv: [{}] - no sfvs in release'.format(
                            release.search_name))
                        mb = MetaBlack(sfv=release, status='IMPOSSIBLE')
                        db.add(mb)
                db.commit()
예제 #5
0
def process(limit=None, category=0):
    """Processes release rarfiles to check for passwords and filecounts."""

    with Server() as server:
        with db_session() as db:
            # noinspection PyComparisonWithNone
            query = db.query(Release).join(Group).join(NZB).filter(~Release.files.any()). \
                filter(Release.passworded == 'UNKNOWN').filter(Release.rar_metablack_id == None)
            if category:
                query = query.filter(Release.category_id == int(category))

            if limit:
                releases = query.order_by(Release.posted.desc()).limit(limit)
            else:
                releases = query.order_by(Release.posted.desc()).all()

            for release in releases:
                log.debug('rar: processing {}'.format(release.search_name))
                nzb = pynab.nzbs.get_nzb_details(release.nzb)

                if nzb and nzb['rars']:
                    try:
                        passworded, info = check_release_files(
                            server, release.group.name, nzb)
                    except Exception as e:
                        # if usenet isn't accessible, we don't want to blacklist it
                        log.error('rar: file info failed: {}'.format(e))
                        continue

                    if info:
                        log.info('rar: file info add [{}]'.format(
                            release.search_name))
                        release.passworded = passworded

                        size = 0
                        for file in info:
                            f = File(name=file['name'][:512],
                                     size=file['size'])
                            f.release = release
                            size += file['size']
                            db.add(f)

                        if size != 0:
                            release.size = size

                        release.rar_metablack_id = None
                        db.add(release)
                        db.commit()
                        continue
                log.debug('rar: [{}] - file info: no readable rars in release'.
                          format(release.search_name))
                mb = MetaBlack(rar=release, status='IMPOSSIBLE')
                db.add(mb)
                db.commit()
예제 #6
0
파일: test_pynab.py 프로젝트: gpmidi/pynab
 def test_uncompress(self):
     server = Server()
     server.connect(False)
     server.scan('alt.binaries.teevee', 563011234, 563031234)
예제 #7
0
파일: test_pynab.py 프로젝트: gpmidi/pynab
 def test_connect(self):
     self.server = Server()
     self.server.connect()
     self.assertTrue(self.server)
예제 #8
0
파일: test_pynab.py 프로젝트: gpmidi/pynab
class TestPynab(unittest.TestCase):
    def setUp(self):
        self.server = None

    def test_connect(self):
        self.server = Server()
        self.server.connect()
        self.assertTrue(self.server)

    def test_capabilities(self):
        self.test_connect()
        print(self.server.connection.getcapabilities())

    def test_fetch_headers(self):
        self.test_connect()
        groups = ['alt.binaries.teevee', 'alt.binaries.e-book', 'alt.binaries.moovee']
        for group in groups:
            (_, _, first, last, _) = self.server.connection.group(group)
            for x in range(0, 20000, 10000):
                y = x + 10000 - 1
                parts = self.server.scan(group, last - y, last - x)
                pynab.parts.save_all(parts)

    def test_process_binaries(self):
        pynab.binaries.process()

    def test_process_releases(self):
        pynab.releases.process()

    def test_all(self):
        self.test_fetch_headers()
        self.test_process_binaries()
        self.test_process_releases()

    def test_print_binaries(self):
        pprint.pprint([b for b in db.binaries.find()])

    def test_day_to_post(self):
        self.test_connect()
        self.server.day_to_post('alt.binaries.teevee', 5)

    def test_group_update(self):
        pynab.groups.update('alt.binaries.teevee')

    def test_group_backfill(self):
        pynab.groups.backfill('alt.binaries.teevee')

    def test_tvrage_process(self):
        pynab.tvrage.process(100)

    def test_omdb_search(self):
        print(pynab.imdb.search('South Park Bigger Longer Uncut', '1999'))

    def test_omdb_get_details(self):
        print(pynab.imdb.get_details('tt1285016'))

    def test_nzb_get(self):
        release = db.releases.find_one()
        pprint.pprint(pynab.nzbs.get_nzb_dict(release['nzb']))

    def test_rar_process(self):
        pynab.rars.process(5)

    def test_nfo_process(self):
        pynab.nfos.process(5)

    def test_compress(self):
        server = Server()
        server.connect()
        server.scan('alt.binaries.teevee', 563011234, 563031234)

    def test_uncompress(self):
        server = Server()
        server.connect(False)
        server.scan('alt.binaries.teevee', 563011234, 563031234)

    def tearDown(self):
        try:
            self.server.connection.quit()
        except:
            pass
예제 #9
0
파일: groups.py 프로젝트: gpmidi/pynab
def backfill(group_name, date=None):
    log.info('{}: Backfilling group...'.format(group_name))

    server = Server()
    _, count, first, last, _ = server.group(group_name)

    if date:
        target_article = server.day_to_post(group_name, server.days_old(date))
    else:
        target_article = server.day_to_post(group_name, config.site['backfill_days'])

    group = db.groups.find_one({'name': group_name})
    if group:
        # if the group hasn't been updated before, quit
        if not group['first']:
            log.error('{}: Need to run a normal update prior to backfilling group.'.format(group_name))
            if server.connection:
                server.connection.quit()
            return False

        log.info('{0}: Server has {1:d} - {2:d} or ~{3:d} days.'
        .format(group_name, first, last, server.days_old(server.post_date(group_name, first)))
        )

        # if the first article we have is lower than the target
        if target_article >= group['first']:
            log.info('{}: Nothing to do, we already have the target post.'.format(group_name))
            if server.connection:
                server.connection.quit()
            return True

        # or if the target is below the server's first
        if target_article < first:
            log.warning(
                '{}: Backfill target is older than the server\'s retention. Setting target to the first possible article.'.format(
                    group_name))
            target_article = first

        total = group['first'] - target_article
        end = group['first'] - 1
        start = end - MESSAGE_LIMIT + 1
        if target_article > start:
            start = target_article

        while True:
            messages = server.scan(group_name, start, end)

            if messages:
                if parts.save_all(messages):
                    db.groups.update({
                                         '_id': group['_id']
                                     },
                                     {
                                         '$set': {
                                             'first': start
                                         }
                                     })
                    pass
                else:
                    log.error('{}: Failed while saving parts.'.format(group_name))
                    if server.connection:
                        server.connection.quit()
                    return False

            if start == target_article:
                if server.connection:
                    server.connection.quit()
                return True
            else:
                end = start - 1
                start = end - MESSAGE_LIMIT + 1
                if target_article > start:
                    start = target_article
    else:
        log.error('{}: Group doesn\'t exist in db.'.format(group_name))
        if server.connection:
            server.connection.quit()
        return False
예제 #10
0
파일: groups.py 프로젝트: gpmidi/pynab
def update(group_name):
    log.info('{}: Updating group...'.format(group_name))

    server = Server()
    _, count, first, last, _ = server.group(group_name)

    group = db.groups.find_one({'name': group_name})
    if group:
        # if the group has been scanned before
        if group['last']:
            # pick up where we left off
            start = group['last'] + 1

            # if our last article is newer than the server's, something's wrong
            if last < group['last']:
                log.error('{}: Server\'s last article {:d} is lower than the local {:d}'.format(group_name, last,
                                                                                                group['last']))
                if server.connection:
                    try:
                        server.connection.quit()
                    except:
                        pass
                return False
        else:
            # otherwise, start from x days old
            start = server.day_to_post(group_name, config.site['new_group_scan_days'])
            if not start:
                log.error('{}: Couldn\'t determine a start point for group.'.format(group_name))
                if server.connection:
                    try:
                        server.connection.quit()
                    except:
                        pass
                return False
            else:
                db.groups.update({
                                     '_id': group['_id']
                                 },
                                 {
                                     '$set': {
                                         'first': start
                                     }
                                 })

        # either way, we're going upwards so end is the last available
        end = last

        # if total > 0, we have new parts
        total = end - start + 1

        start_date = server.post_date(group_name, start)
        end_date = server.post_date(group_name, end)
        total_date = end_date - start_date

        log.debug('{}: Start: {:d} ({}) End: {:d} ({}) Total: {:d} ({} days, {} hours, {} minutes)'
        .format(
            group_name, start, start_date,
            end, end_date,
            total, total_date.days, total_date.seconds // 3600, (total_date.seconds // 60) % 60
        )
        )
        if total > 0:
            if not group['last']:
                log.info('{}: Starting new group with {:d} days and {:d} new parts.'
                .format(group_name, config.site['new_group_scan_days'], total))
            else:
                log.info('{}: Group has {:d} new parts.'.format(group_name, total))

            retries = 0
            # until we're finished, loop
            while True:
                # break the load into segments
                if total > MESSAGE_LIMIT:
                    if start + MESSAGE_LIMIT > last:
                        end = last
                    else:
                        end = start + MESSAGE_LIMIT - 1

                messages = server.scan(group_name, start, end)
                if messages:
                    if parts.save_all(messages):
                        db.groups.update({
                                             '_id': group['_id']
                                         },
                                         {
                                             '$set': {
                                                 'last': end
                                             }
                                         })
                    else:
                        log.error('{}: Failed while saving parts.'.format(group_name))
                        if server.connection:
                            try:
                                server.connection.quit()
                            except:
                                pass
                        return False
                else:
                    log.error('Problem updating group - trying again...')
                    retries += 1
                    # keep trying the same block 3 times, then skip
                    if retries <= 3:
                        continue

                if end == last:
                    if server.connection:
                        try:
                            server.connection.quit()
                        except:
                            pass
                    return True
                else:
                    start = end + 1
                    log.info('{}: {:d} messages to go for this group.'.format(group_name, last - end))
        else:
            log.info('{}: No new records for group.'.format(group_name))
            if server.connection:
                server.connection.quit()
            return True
    else:
        log.error('{}: No such group exists in the db.'.format(group_name))
        if server.connection:
            server.connection.quit()
        return False
예제 #11
0
 def test_connect(self):
     self.server = Server()
     self.server.connect()
     self.assertTrue(self.server)
예제 #12
0
class TestPynab(unittest.TestCase):
    def setUp(self):
        self.server = None

    def test_connect(self):
        self.server = Server()
        self.server.connect()
        self.assertTrue(self.server)

    def test_capabilities(self):
        self.test_connect()
        print(self.server.connection.getcapabilities())

    def test_fetch_headers(self):
        self.test_connect()
        groups = ['alt.binaries.teevee']
        for group in groups:
            (_, _, first, last, _) = self.server.connection.group(group)
            for x in range(0, 40000, 20000):
                y = x + 20000 - 1
                parts = self.server.scan(group, last - y, last - x)
                pynab.parts.save_all(parts)

    def test_group_update(self):
        import pynab.groups
        pynab.groups.update('alt.binaries.teevee')

    def test_request_process(self):
        import pynab.requests
        pynab.requests.process()

    def test_update_pres(self):
        from scripts.nzedb_pre_import import largeNzedbPre, nzedbPre
        largeNzedbPre()
        nzedbPre()

    def test_process_binaries(self):
        import pynab.binaries
        pynab.binaries.process()

    def test_process_releases(self):
        import pynab.releases
        pynab.releases.process()

    def test_update_blacklist(self):
        import pynab.util
        pynab.util.update_blacklist()

    def test_update_regex(self):
        import pynab.util
        pynab.util.update_regex()

    def test_process_requests(self):
        import pynab.requests
        pynab.requests.process()

    def test_quick_postproc(self):
        import scripts.quick_postprocess

        scripts.quick_postprocess.local_postprocess()

    def test_process_ids(self):
        import pynab.ids

        pynab.ids.process('movie')

    def test_remove_metablacks(self):
        from pynab.db import MetaBlack
        with db_session() as db:
            db.query(MetaBlack).delete()
            db.commit()

    def test_search_releases(self):
        from sqlalchemy_searchable import search
        from pynab.db import Release

        with db_session() as db:
            q = db.query(Release)
            q = search(q, 'engaged e06')
            print(q.first().search_name)

    def test_nzb_parse(self):
        import pynab.nzbs
        from pynab.db import NZB

        with db_session() as db:
            nzb = db.query(NZB).filter(NZB.id==1).one()
            import pprint
            pprint.pprint(pynab.nzbs.get_nzb_details(nzb))

    def test_scrape_nzbsu(self):
        import requests
        import time
        from bs4 import BeautifulSoup

        url = 'https://api.nzb.su/api?apikey=4d901407e99ae6c942416585c8a44673'
        ua = {'User-agent': 'CouchPotato 3.0.1'}
        results = []

        for category in [5020,5030,5040,5050,5060,5070,5080,2010,2020,2030,2040,2050,2060,2070,4010,4020,4030,1010,1020,1030,1050,1080,1090,1100,4050,3010,3020,3030,3040,3050,7010,7020,7030,6010,6020,6030,6040,6050,6060,6070,8010]:
            data = requests.get(url + '&t=search&cat={}&o=json'.format(category), headers=ua).json()
            if 'item' in data['channel']:
                results.extend(data['channel']['item'])

        with open('dog_releases.csv', 'w', encoding='utf-8') as f:
            f.write('"r","name","name","category_id","name","name"\r\n')
            # turn results into useful data
            for i, result in enumerate(results):
                try:
                    resp = requests.get(url + '&t=details&id={}'.format(result['attr'][3]['@attributes']['value']), headers=ua)
                    soup = BeautifulSoup(resp.text)
                    group = soup.find(attrs={'name':'group'})['value']
                    f.write('"{}","{}","{}","{}","{}","{}"\r\n'.format(i, result['title'], group, result['attr'][1]['@attributes']['value'], *result['category'].split(' > ')))
                    time.sleep(5)
                except:
                    continue

    def test_categorise(self):
        import nltk
        import regex
        import csv
        import random
        import pprint

        #def determine_category(name, group_name=''):

        def load_data(filename):
            with open(filename, encoding='utf-8') as f:
                f.readline()
                csvfile = csv.reader(f, delimiter=',', quotechar='"')
                data = []
                for line in csvfile:
                    features = extract_features(line[1])
                    features['group'] = line[2]
                    features['name'] = line[1]
                    data.append((features, line[3]))

                random.shuffle(data)

            return data

        train_data = load_data('tagged_releases_train.csv')
        test_data = load_data('tagged_releases_test.csv')
        nzbsu_data = load_data('tagged_releases_test_nzbsu.csv')

        train_set = train_data
        test_set = test_data
        nzbsu_set = nzbsu_data

        classifier = nltk.NaiveBayesClassifier.train(train_set)

        from pickle import dump
        with open('release_categoriser.pkl', 'wb') as out:
            dump(classifier, out, -1)

        errors = []
        for features, tag in nzbsu_set:
            guess = classifier.classify(features)
            if guess[:2] != tag[:2]:
                errors.append((tag, guess, features))

        for tag, guess, features in errors:
            print('correct={} guess={} name={}'.format(tag, guess, features['name'].encode('utf-8')))

        print(classifier.show_most_informative_features())
        print('test: {}'.format(nltk.classify.accuracy(classifier, test_set)))
        print('test: {}'.format(nltk.classify.accuracy(classifier, nzbsu_set)))

    def test_load_and_categorise(self):
        from pynab.db import db_session, Release, Group, windowed_query
        from pickle import load

        with open('release_categoriser.pkl', 'rb') as cat_file:
            categoriser = load(cat_file)

        with db_session() as db:
            errors = []
            i = 0
            query = db.query(Release).join(Group)
            count = query.count()
            for result in windowed_query(query, Release.id, 500):
                features = extract_features(result.name)
                features['group'] = result.group.name
                features['name'] = result.name

                guess = categoriser.classify(features)
                if guess[:2] != str(result.category_id)[:2]:
                    errors.append((result.category_id, guess, features))

                i += 1
                if i % 500 == 0:
                    print('{} - {:.3f}%'.format((i/count)*100, (1 - (len(errors) / i)) * 100))

        for tag, guess, features in errors:
            print('correct={} guess={} name={}'.format(tag, guess, features['name'].encode('utf-8')))

        print('accuracy={}'.format(1 - (len(errors)/i)))

    def tearDown(self):
        try:
            self.server.connection.quit()
        except:
            pass
예제 #13
0
파일: groups.py 프로젝트: sqw23/pynab
def scan(group_name, direction='forward', date=None, target=None, limit=None):
    log.info('group: {}: scanning group'.format(group_name))

    with Server() as server:
        _, count, first, last, _ = server.group(group_name)

        if count:
            with db_session() as db:
                group = db.query(Group).filter(
                    Group.name == group_name).first()

                if group:
                    # sort out missing first/lasts
                    if not group.first and not group.last:
                        group.first = last
                        group.last = last
                        direction = 'backward'
                    elif not group.first:
                        group.first = group.last
                    elif not group.last:
                        group.last = group.first

                    # check that our firsts and lasts are valid
                    if group.first < first:
                        log.error(
                            'group: {}: first article was older than first on server'
                            .format(group_name))
                        return True
                    elif group.last > last:
                        log.error(
                            'group: {}: last article was newer than last on server'
                            .format(group_name))
                        return True

                    db.merge(group)

                    # sort out a target
                    start = 0
                    mult = 0
                    if direction == 'forward':
                        start = group.last
                        target = last
                        mult = 1
                    elif direction == 'backward':
                        start = group.first
                        if not target:
                            target = server.day_to_post(
                                group_name,
                                server.days_old(date) if date else
                                config.scan.get('backfill_days', 10))
                        mult = -1

                    if not target:
                        log.info(
                            'group: {}: unable to continue'.format(group_name))
                        return True

                    if group.first <= target <= group.last:
                        log.info(
                            'group: {}: nothing to do, already have target'.
                            format(group_name))
                        return True

                    if first > target or last < target:
                        log.error(
                            'group: {}: server doesn\'t carry target article'.
                            format(group_name))
                        return True

                    iterations = 0
                    num = config.scan.get('message_scan_limit') * mult
                    for i in range(start, target, num):
                        # set the beginning and ends of the scan to their respective values
                        begin = i + mult
                        end = i + (mult *
                                   config.scan.get('message_scan_limit'))

                        # check if the target is before our end
                        if abs(begin) <= abs(target) <= abs(end):
                            # we don't want to overscan
                            end = target

                        # at this point, we care about order
                        # flip them if one is bigger
                        begin, end = (begin, end) if begin < end else (end,
                                                                       begin)

                        status, parts, messages, missed = server.scan(
                            group_name, first=begin, last=end)

                        try:
                            if direction == 'forward':
                                group.last = max(messages)
                            elif direction == 'backward':
                                group.first = min(messages)
                        except:
                            log.error(
                                'group: {}: problem updating group ({}-{})'.
                                format(group_name, start, end))
                            return False

                        # don't save misses if we're backfilling, there are too many
                        if status and missed and config.scan.get(
                                'retry_missed') and direction == 'forward':
                            save_missing_segments(group_name, missed)

                        if status and parts:
                            if pynab.parts.save_all(parts):
                                db.merge(group)
                                db.commit()
                            else:
                                log.error(
                                    'group: {}: problem saving parts to db, restarting scan'
                                    .format(group_name))
                                return False

                        to_go = abs(target - end)
                        log.info(
                            'group: {}: {:.0f} iterations ({} messages) to go'.
                            format(
                                group_name,
                                to_go / config.scan.get('message_scan_limit'),
                                to_go))

                        parts.clear()
                        del messages[:]
                        del missed[:]

                        iterations += 1

                        if limit and iterations >= 3:  #* config.scan.get('message_scan_limit') >= limit:
                            log.info(
                                'group: {}: scan limit reached, ending early (will continue later)'
                                .format(group_name))
                            return False

                    log.info('group: {}: scan completed'.format(group_name))
                    return True
예제 #14
0
class TestPynab(unittest.TestCase):
    def setUp(self):
        self.server = None

    def test_connect(self):
        self.server = Server()
        self.server.connect()
        self.assertTrue(self.server)

    def test_capabilities(self):
        self.test_connect()
        print(self.server.connection.getcapabilities())

    def test_fetch_headers(self):
        self.test_connect()
        groups = ['alt.binaries.teevee']
        for group in groups:
            (_, _, first, last, _) = self.server.connection.group(group)
            for x in range(0, 40000, 20000):
                y = x + 20000 - 1
                parts = self.server.scan(group, last - y, last - x)
                pynab.parts.save_all(parts)

    def test_group_update(self):
        import pynab.groups
        pynab.groups.update('alt.binaries.teevee')

    def test_process_binaries(self):
        import pynab.binaries
        pynab.binaries.process()

    def test_process_releases(self):
        import pynab.releases
        pynab.releases.process()

    def test_update_blacklist(self):
        import pynab.util
        pynab.util.update_blacklist()

    def test_update_regex(self):
        import pynab.util
        pynab.util.update_regex()

    def test_search_releases(self):
        from sqlalchemy_searchable import search
        from pynab.db import Release

        with db_session() as db:
            q = db.query(Release)
            q = search(q, 'engaged e06')
            print(q.first().search_name)

    def test_nzb_parse(self):
        import pynab.nzbs
        from pynab.db import NZB

        with db_session() as db:
            nzb = db.query(NZB).filter(NZB.id==1).one()
            import pprint
            pprint.pprint(pynab.nzbs.get_nzb_details(nzb))


    def tearDown(self):
        try:
            self.server.connection.quit()
        except:
            pass
예제 #15
0
파일: groups.py 프로젝트: tbetton/pynab
def backfill(group_name, date=None):
    log.info('group: {}: backfilling group'.format(group_name))

    server = Server()
    _, count, first, last, _ = server.group(group_name)

    if date:
        target_article = server.day_to_post(group_name, server.days_old(date))
    else:
        target_article = server.day_to_post(group_name, config.scan.get('backfill_days', 10))

    group = db.groups.find_one({'name': group_name})
    if group:
        # if the group hasn't been updated before, quit
        if not group['first']:
            log.error('group: {}: run a normal update prior to backfilling'.format(group_name))
            if server.connection:
                server.connection.quit()
            return False

        # if the first article we have is lower than the target
        if target_article >= group['first']:
            log.info('group: {}: Nothing to do, we already have the target post.'.format(group_name))
            if server.connection:
                server.connection.quit()
            return True

        # or if the target is below the server's first
        if target_article < first:
            target_article = first

        total = group['first'] - target_article
        end = group['first'] - 1
        start = end - MESSAGE_LIMIT + 1
        if target_article > start:
            start = target_article

        retries = 0
        while True:
            messages = server.scan(group_name, start, end)

            if messages:
                if parts.save_all(messages):
                    db.groups.update({
                                         '_id': group['_id']
                                     },
                                     {
                                         '$set': {
                                             'first': start
                                         }
                                     })
                    retries = 0
                else:
                    log.error('group: {}: failed while saving parts'.format(group_name))
                    if server.connection:
                        server.connection.quit()
                    return False
            else:
                    log.error('group: {}: problem updating group - trying again'.format(group_name))
                    retries += 1
                    # keep trying the same block 3 times, then skip
                    if retries <= 3:
                        continue

            if start == target_article:
                if server.connection:
                    server.connection.quit()
                return True
            else:
                end = start - 1
                start = end - MESSAGE_LIMIT + 1
                if target_article > start:
                    start = target_article
    else:
        log.error('group: {}: group doesn\'t exist in db.'.format(group_name))
        if server.connection:
            server.connection.quit()
        return False
예제 #16
0
파일: groups.py 프로젝트: tbetton/pynab
def update(group_name):
    log.info('group: {}: updating group'.format(group_name))

    server = Server()
    _, count, first, last, _ = server.group(group_name)

    group = db.groups.find_one({'name': group_name})
    if group:
        # if the group has been scanned before
        if group['last']:
            # pick up where we left off
            start = group['last'] + 1

            # if our last article is newer than the server's, something's wrong
            if last < group['last']:
                log.error('group: {}: last article {:d} on server is older than the local {:d}'.format(group_name, last,
                                                                                                group['last']))
                if server.connection:
                    try:
                        server.connection.quit()
                    except:
                        pass
                return False
        else:
            # otherwise, start from x days old
            start = server.day_to_post(group_name, config.scan.get('new_group_scan_days', 5))
            if not start:
                log.error('group: {}: couldn\'t determine a start point for group'.format(group_name))
                if server.connection:
                    try:
                        server.connection.quit()
                    except:
                        pass
                return False
            else:
                db.groups.update({
                                     '_id': group['_id']
                                 },
                                 {
                                     '$set': {
                                         'first': start
                                     }
                                 })

        # either way, we're going upwards so end is the last available
        end = last

        # if total > 0, we have new parts
        total = end - start + 1

        start_date = server.post_date(group_name, start)
        end_date = server.post_date(group_name, end)

        if start_date and end_date:
            total_date = end_date - start_date

            log.info('group: {}: pulling {} - {} ({}d, {}h, {}m)'.format(
                group_name,
                start, end,
                total_date.days,
                total_date.seconds // 3600,
                (total_date.seconds // 60) % 60
            ))
        else:
            log.info('group: {}: pulling {} - {}'.format(group_name, start, end))

        if total > 0:
            if not group['last']:
                log.info('group: {}: starting new group with {:d} days and {:d} new parts'
                    .format(group_name, config.scan.get('new_group_scan_days', 5), total))
            else:
                log.info('group: {}: group has {:d} new parts.'.format(group_name, total))

            retries = 0
            # until we're finished, loop
            while True:
                # break the load into segments
                if total > MESSAGE_LIMIT:
                    if start + MESSAGE_LIMIT > last:
                        end = last
                    else:
                        end = start + MESSAGE_LIMIT - 1

                messages = server.scan(group_name, start, end)
                if messages:
                    if parts.save_all(messages):
                        db.groups.update({
                                             '_id': group['_id']
                                         },
                                         {
                                             '$set': {
                                                 'last': end
                                             }
                                         })
                        retries = 0
                    else:
                        log.error('group: {}: failed while saving parts'.format(group_name))
                        if server.connection:
                            try:
                                server.connection.quit()
                            except:
                                pass
                        return False

                if end == last:
                    if server.connection:
                        try:
                            server.connection.quit()
                        except:
                            pass
                    return True
                else:
                    start = end + 1
        else:
            log.info('group: {}: no new messages'.format(group_name))
            if server.connection:
                server.connection.quit()
            return True
    else:
        log.error('group: {}: no group in db'.format(group_name))
        if server.connection:
            server.connection.quit()
        return False