def scan_missing_segments(group_name): """Scan for previously missed segments.""" log.info('missing: checking for missed segments') with db_session() as db: # recheck for anything to delete expired = db.query(Miss).filter( Miss.attempts >= config.scan.get('miss_retry_limit')).filter( Miss.group_name == group_name).delete() db.commit() if expired: log.info('missing: deleted {} expired misses'.format(expired)) # get missing articles for this group missing_messages = [ r for r, in db.query(Miss.message).filter( Miss.group_name == group_name).all() ] if missing_messages: # mash it into ranges missing_ranges = intspan(missing_messages).ranges() server = Server() server.connect() status, parts, messages, missed = server.scan( group_name, message_ranges=missing_ranges) # if we got some missing parts, save them if parts: pynab.parts.save_all(parts) # even if they got blacklisted, delete the ones we got from the misses if messages: db.query(Miss).filter(Miss.message.in_(messages)).filter( Miss.group_name == group_name).delete(False) db.commit() if missed: # clear up those we didn't get save_missing_segments(group_name, missed) if server.connection: try: server.connection.quit() except: pass
def process(limit=None, category=0): """Process releases for NFO parts and download them.""" with Server() as server: with db_session() as db: # noinspection PyComparisonWithNone,PyComparisonWithNone query = db.query(Release).join(Group).join(NZB).filter( Release.nfo == None).filter(Release.nfo_metablack_id == None) if category: query = query.filter(Release.category_id == int(category)) if limit: releases = query.order_by(Release.posted.desc()).limit(limit) else: releases = query.order_by(Release.posted.desc()).all() for release in releases: found = False nzb = pynab.nzbs.get_nzb_details(release.nzb) if nzb: nfos = [] for nfo in nzb['nfos']: for part in nfo['segments']: if int(part['size']) > NFO_MAX_FILESIZE: continue nfos.append(part) for nfo in nfos: try: article = server.get(release.group.name, [ nfo['message_id'], ]) except Exception as e: # if usenet's not accessible, don't block it forever log.error('nfo: unable to get nfo: {}'.format(e)) continue if article: data = gzip.compress(article.encode('utf-8')) nfo = NFO(data=data) db.add(nfo) release.nfo = nfo release.nfo_metablack_id = None db.add(release) log.debug('nfo: [{}] - nfo added'.format( release.search_name)) found = True break if not found: log.debug( 'nfo: [{}] - [{}] - no nfos in release'.format( release.id, release.search_name)) mb = MetaBlack(nfo=release, status='IMPOSSIBLE') db.add(mb) db.commit()
def scan_missing_segments(group_name): """Scan for previously missed segments.""" log.info('missing: checking for missed segments') with db_session() as db: # recheck for anything to delete expired = db.query(Miss).filter(Miss.attempts >= config.scan.get('miss_retry_limit')).filter( Miss.group_name == group_name).delete() db.commit() if expired: log.info('missing: deleted {} expired misses'.format(expired)) # get missing articles for this group missing_messages = [r for r, in db.query(Miss.message).filter(Miss.group_name == group_name).all()] if missing_messages: # mash it into ranges missing_ranges = intspan(missing_messages).ranges() server = Server() server.connect() status, parts, messages, missed = server.scan(group_name, message_ranges=missing_ranges) # if we got some missing parts, save them if parts: pynab.parts.save_all(parts) # even if they got blacklisted, delete the ones we got from the misses if messages: db.query(Miss).filter(Miss.message.in_(messages)).filter(Miss.group_name == group_name).delete(False) db.commit() if missed: # clear up those we didn't get save_missing_segments(group_name, missed) if server.connection: try: server.connection.quit() except: pass
def process(limit=None, category=0): """Process releases for SFV parts and download them.""" with Server() as server: with db_session() as db: # noinspection PyComparisonWithNone,PyComparisonWithNone query = db.query(Release).join(Group).join(NZB).filter( Release.sfv == None).filter(Release.sfv_metablack_id == None) if category: query = query.filter(Release.category_id == int(category)) if limit: releases = query.order_by(Release.posted.desc()).limit(limit) else: releases = query.order_by(Release.posted.desc()).all() for release in releases: found = False nzb = pynab.nzbs.get_nzb_details(release.nzb) if nzb: sfvs = [] for sfv in nzb['sfvs']: for part in sfv['segments']: if int(part['size']) > SFV_MAX_FILESIZE: continue sfvs.append(part) for sfv in sfvs: try: article = server.get(release.group.name, [ sfv['message_id'], ]) except: article = None if article: data = gzip.compress(article.encode('utf-8')) sfv = SFV(data=data) db.add(sfv) release.sfv = sfv release.sfv_metablack_id = None db.add(release) log.info('sfv: [{}] - sfv added'.format( release.search_name)) found = True break if not found: log.debug('sfv: [{}] - no sfvs in release'.format( release.search_name)) mb = MetaBlack(sfv=release, status='IMPOSSIBLE') db.add(mb) db.commit()
def process(limit=None, category=0): """Processes release rarfiles to check for passwords and filecounts.""" with Server() as server: with db_session() as db: # noinspection PyComparisonWithNone query = db.query(Release).join(Group).join(NZB).filter(~Release.files.any()). \ filter(Release.passworded == 'UNKNOWN').filter(Release.rar_metablack_id == None) if category: query = query.filter(Release.category_id == int(category)) if limit: releases = query.order_by(Release.posted.desc()).limit(limit) else: releases = query.order_by(Release.posted.desc()).all() for release in releases: log.debug('rar: processing {}'.format(release.search_name)) nzb = pynab.nzbs.get_nzb_details(release.nzb) if nzb and nzb['rars']: try: passworded, info = check_release_files( server, release.group.name, nzb) except Exception as e: # if usenet isn't accessible, we don't want to blacklist it log.error('rar: file info failed: {}'.format(e)) continue if info: log.info('rar: file info add [{}]'.format( release.search_name)) release.passworded = passworded size = 0 for file in info: f = File(name=file['name'][:512], size=file['size']) f.release = release size += file['size'] db.add(f) if size != 0: release.size = size release.rar_metablack_id = None db.add(release) db.commit() continue log.debug('rar: [{}] - file info: no readable rars in release'. format(release.search_name)) mb = MetaBlack(rar=release, status='IMPOSSIBLE') db.add(mb) db.commit()
def test_uncompress(self): server = Server() server.connect(False) server.scan('alt.binaries.teevee', 563011234, 563031234)
def test_connect(self): self.server = Server() self.server.connect() self.assertTrue(self.server)
class TestPynab(unittest.TestCase): def setUp(self): self.server = None def test_connect(self): self.server = Server() self.server.connect() self.assertTrue(self.server) def test_capabilities(self): self.test_connect() print(self.server.connection.getcapabilities()) def test_fetch_headers(self): self.test_connect() groups = ['alt.binaries.teevee', 'alt.binaries.e-book', 'alt.binaries.moovee'] for group in groups: (_, _, first, last, _) = self.server.connection.group(group) for x in range(0, 20000, 10000): y = x + 10000 - 1 parts = self.server.scan(group, last - y, last - x) pynab.parts.save_all(parts) def test_process_binaries(self): pynab.binaries.process() def test_process_releases(self): pynab.releases.process() def test_all(self): self.test_fetch_headers() self.test_process_binaries() self.test_process_releases() def test_print_binaries(self): pprint.pprint([b for b in db.binaries.find()]) def test_day_to_post(self): self.test_connect() self.server.day_to_post('alt.binaries.teevee', 5) def test_group_update(self): pynab.groups.update('alt.binaries.teevee') def test_group_backfill(self): pynab.groups.backfill('alt.binaries.teevee') def test_tvrage_process(self): pynab.tvrage.process(100) def test_omdb_search(self): print(pynab.imdb.search('South Park Bigger Longer Uncut', '1999')) def test_omdb_get_details(self): print(pynab.imdb.get_details('tt1285016')) def test_nzb_get(self): release = db.releases.find_one() pprint.pprint(pynab.nzbs.get_nzb_dict(release['nzb'])) def test_rar_process(self): pynab.rars.process(5) def test_nfo_process(self): pynab.nfos.process(5) def test_compress(self): server = Server() server.connect() server.scan('alt.binaries.teevee', 563011234, 563031234) def test_uncompress(self): server = Server() server.connect(False) server.scan('alt.binaries.teevee', 563011234, 563031234) def tearDown(self): try: self.server.connection.quit() except: pass
def backfill(group_name, date=None): log.info('{}: Backfilling group...'.format(group_name)) server = Server() _, count, first, last, _ = server.group(group_name) if date: target_article = server.day_to_post(group_name, server.days_old(date)) else: target_article = server.day_to_post(group_name, config.site['backfill_days']) group = db.groups.find_one({'name': group_name}) if group: # if the group hasn't been updated before, quit if not group['first']: log.error('{}: Need to run a normal update prior to backfilling group.'.format(group_name)) if server.connection: server.connection.quit() return False log.info('{0}: Server has {1:d} - {2:d} or ~{3:d} days.' .format(group_name, first, last, server.days_old(server.post_date(group_name, first))) ) # if the first article we have is lower than the target if target_article >= group['first']: log.info('{}: Nothing to do, we already have the target post.'.format(group_name)) if server.connection: server.connection.quit() return True # or if the target is below the server's first if target_article < first: log.warning( '{}: Backfill target is older than the server\'s retention. Setting target to the first possible article.'.format( group_name)) target_article = first total = group['first'] - target_article end = group['first'] - 1 start = end - MESSAGE_LIMIT + 1 if target_article > start: start = target_article while True: messages = server.scan(group_name, start, end) if messages: if parts.save_all(messages): db.groups.update({ '_id': group['_id'] }, { '$set': { 'first': start } }) pass else: log.error('{}: Failed while saving parts.'.format(group_name)) if server.connection: server.connection.quit() return False if start == target_article: if server.connection: server.connection.quit() return True else: end = start - 1 start = end - MESSAGE_LIMIT + 1 if target_article > start: start = target_article else: log.error('{}: Group doesn\'t exist in db.'.format(group_name)) if server.connection: server.connection.quit() return False
def update(group_name): log.info('{}: Updating group...'.format(group_name)) server = Server() _, count, first, last, _ = server.group(group_name) group = db.groups.find_one({'name': group_name}) if group: # if the group has been scanned before if group['last']: # pick up where we left off start = group['last'] + 1 # if our last article is newer than the server's, something's wrong if last < group['last']: log.error('{}: Server\'s last article {:d} is lower than the local {:d}'.format(group_name, last, group['last'])) if server.connection: try: server.connection.quit() except: pass return False else: # otherwise, start from x days old start = server.day_to_post(group_name, config.site['new_group_scan_days']) if not start: log.error('{}: Couldn\'t determine a start point for group.'.format(group_name)) if server.connection: try: server.connection.quit() except: pass return False else: db.groups.update({ '_id': group['_id'] }, { '$set': { 'first': start } }) # either way, we're going upwards so end is the last available end = last # if total > 0, we have new parts total = end - start + 1 start_date = server.post_date(group_name, start) end_date = server.post_date(group_name, end) total_date = end_date - start_date log.debug('{}: Start: {:d} ({}) End: {:d} ({}) Total: {:d} ({} days, {} hours, {} minutes)' .format( group_name, start, start_date, end, end_date, total, total_date.days, total_date.seconds // 3600, (total_date.seconds // 60) % 60 ) ) if total > 0: if not group['last']: log.info('{}: Starting new group with {:d} days and {:d} new parts.' .format(group_name, config.site['new_group_scan_days'], total)) else: log.info('{}: Group has {:d} new parts.'.format(group_name, total)) retries = 0 # until we're finished, loop while True: # break the load into segments if total > MESSAGE_LIMIT: if start + MESSAGE_LIMIT > last: end = last else: end = start + MESSAGE_LIMIT - 1 messages = server.scan(group_name, start, end) if messages: if parts.save_all(messages): db.groups.update({ '_id': group['_id'] }, { '$set': { 'last': end } }) else: log.error('{}: Failed while saving parts.'.format(group_name)) if server.connection: try: server.connection.quit() except: pass return False else: log.error('Problem updating group - trying again...') retries += 1 # keep trying the same block 3 times, then skip if retries <= 3: continue if end == last: if server.connection: try: server.connection.quit() except: pass return True else: start = end + 1 log.info('{}: {:d} messages to go for this group.'.format(group_name, last - end)) else: log.info('{}: No new records for group.'.format(group_name)) if server.connection: server.connection.quit() return True else: log.error('{}: No such group exists in the db.'.format(group_name)) if server.connection: server.connection.quit() return False
class TestPynab(unittest.TestCase): def setUp(self): self.server = None def test_connect(self): self.server = Server() self.server.connect() self.assertTrue(self.server) def test_capabilities(self): self.test_connect() print(self.server.connection.getcapabilities()) def test_fetch_headers(self): self.test_connect() groups = ['alt.binaries.teevee'] for group in groups: (_, _, first, last, _) = self.server.connection.group(group) for x in range(0, 40000, 20000): y = x + 20000 - 1 parts = self.server.scan(group, last - y, last - x) pynab.parts.save_all(parts) def test_group_update(self): import pynab.groups pynab.groups.update('alt.binaries.teevee') def test_request_process(self): import pynab.requests pynab.requests.process() def test_update_pres(self): from scripts.nzedb_pre_import import largeNzedbPre, nzedbPre largeNzedbPre() nzedbPre() def test_process_binaries(self): import pynab.binaries pynab.binaries.process() def test_process_releases(self): import pynab.releases pynab.releases.process() def test_update_blacklist(self): import pynab.util pynab.util.update_blacklist() def test_update_regex(self): import pynab.util pynab.util.update_regex() def test_process_requests(self): import pynab.requests pynab.requests.process() def test_quick_postproc(self): import scripts.quick_postprocess scripts.quick_postprocess.local_postprocess() def test_process_ids(self): import pynab.ids pynab.ids.process('movie') def test_remove_metablacks(self): from pynab.db import MetaBlack with db_session() as db: db.query(MetaBlack).delete() db.commit() def test_search_releases(self): from sqlalchemy_searchable import search from pynab.db import Release with db_session() as db: q = db.query(Release) q = search(q, 'engaged e06') print(q.first().search_name) def test_nzb_parse(self): import pynab.nzbs from pynab.db import NZB with db_session() as db: nzb = db.query(NZB).filter(NZB.id==1).one() import pprint pprint.pprint(pynab.nzbs.get_nzb_details(nzb)) def test_scrape_nzbsu(self): import requests import time from bs4 import BeautifulSoup url = 'https://api.nzb.su/api?apikey=4d901407e99ae6c942416585c8a44673' ua = {'User-agent': 'CouchPotato 3.0.1'} results = [] for category in [5020,5030,5040,5050,5060,5070,5080,2010,2020,2030,2040,2050,2060,2070,4010,4020,4030,1010,1020,1030,1050,1080,1090,1100,4050,3010,3020,3030,3040,3050,7010,7020,7030,6010,6020,6030,6040,6050,6060,6070,8010]: data = requests.get(url + '&t=search&cat={}&o=json'.format(category), headers=ua).json() if 'item' in data['channel']: results.extend(data['channel']['item']) with open('dog_releases.csv', 'w', encoding='utf-8') as f: f.write('"r","name","name","category_id","name","name"\r\n') # turn results into useful data for i, result in enumerate(results): try: resp = requests.get(url + '&t=details&id={}'.format(result['attr'][3]['@attributes']['value']), headers=ua) soup = BeautifulSoup(resp.text) group = soup.find(attrs={'name':'group'})['value'] f.write('"{}","{}","{}","{}","{}","{}"\r\n'.format(i, result['title'], group, result['attr'][1]['@attributes']['value'], *result['category'].split(' > '))) time.sleep(5) except: continue def test_categorise(self): import nltk import regex import csv import random import pprint #def determine_category(name, group_name=''): def load_data(filename): with open(filename, encoding='utf-8') as f: f.readline() csvfile = csv.reader(f, delimiter=',', quotechar='"') data = [] for line in csvfile: features = extract_features(line[1]) features['group'] = line[2] features['name'] = line[1] data.append((features, line[3])) random.shuffle(data) return data train_data = load_data('tagged_releases_train.csv') test_data = load_data('tagged_releases_test.csv') nzbsu_data = load_data('tagged_releases_test_nzbsu.csv') train_set = train_data test_set = test_data nzbsu_set = nzbsu_data classifier = nltk.NaiveBayesClassifier.train(train_set) from pickle import dump with open('release_categoriser.pkl', 'wb') as out: dump(classifier, out, -1) errors = [] for features, tag in nzbsu_set: guess = classifier.classify(features) if guess[:2] != tag[:2]: errors.append((tag, guess, features)) for tag, guess, features in errors: print('correct={} guess={} name={}'.format(tag, guess, features['name'].encode('utf-8'))) print(classifier.show_most_informative_features()) print('test: {}'.format(nltk.classify.accuracy(classifier, test_set))) print('test: {}'.format(nltk.classify.accuracy(classifier, nzbsu_set))) def test_load_and_categorise(self): from pynab.db import db_session, Release, Group, windowed_query from pickle import load with open('release_categoriser.pkl', 'rb') as cat_file: categoriser = load(cat_file) with db_session() as db: errors = [] i = 0 query = db.query(Release).join(Group) count = query.count() for result in windowed_query(query, Release.id, 500): features = extract_features(result.name) features['group'] = result.group.name features['name'] = result.name guess = categoriser.classify(features) if guess[:2] != str(result.category_id)[:2]: errors.append((result.category_id, guess, features)) i += 1 if i % 500 == 0: print('{} - {:.3f}%'.format((i/count)*100, (1 - (len(errors) / i)) * 100)) for tag, guess, features in errors: print('correct={} guess={} name={}'.format(tag, guess, features['name'].encode('utf-8'))) print('accuracy={}'.format(1 - (len(errors)/i))) def tearDown(self): try: self.server.connection.quit() except: pass
def scan(group_name, direction='forward', date=None, target=None, limit=None): log.info('group: {}: scanning group'.format(group_name)) with Server() as server: _, count, first, last, _ = server.group(group_name) if count: with db_session() as db: group = db.query(Group).filter( Group.name == group_name).first() if group: # sort out missing first/lasts if not group.first and not group.last: group.first = last group.last = last direction = 'backward' elif not group.first: group.first = group.last elif not group.last: group.last = group.first # check that our firsts and lasts are valid if group.first < first: log.error( 'group: {}: first article was older than first on server' .format(group_name)) return True elif group.last > last: log.error( 'group: {}: last article was newer than last on server' .format(group_name)) return True db.merge(group) # sort out a target start = 0 mult = 0 if direction == 'forward': start = group.last target = last mult = 1 elif direction == 'backward': start = group.first if not target: target = server.day_to_post( group_name, server.days_old(date) if date else config.scan.get('backfill_days', 10)) mult = -1 if not target: log.info( 'group: {}: unable to continue'.format(group_name)) return True if group.first <= target <= group.last: log.info( 'group: {}: nothing to do, already have target'. format(group_name)) return True if first > target or last < target: log.error( 'group: {}: server doesn\'t carry target article'. format(group_name)) return True iterations = 0 num = config.scan.get('message_scan_limit') * mult for i in range(start, target, num): # set the beginning and ends of the scan to their respective values begin = i + mult end = i + (mult * config.scan.get('message_scan_limit')) # check if the target is before our end if abs(begin) <= abs(target) <= abs(end): # we don't want to overscan end = target # at this point, we care about order # flip them if one is bigger begin, end = (begin, end) if begin < end else (end, begin) status, parts, messages, missed = server.scan( group_name, first=begin, last=end) try: if direction == 'forward': group.last = max(messages) elif direction == 'backward': group.first = min(messages) except: log.error( 'group: {}: problem updating group ({}-{})'. format(group_name, start, end)) return False # don't save misses if we're backfilling, there are too many if status and missed and config.scan.get( 'retry_missed') and direction == 'forward': save_missing_segments(group_name, missed) if status and parts: if pynab.parts.save_all(parts): db.merge(group) db.commit() else: log.error( 'group: {}: problem saving parts to db, restarting scan' .format(group_name)) return False to_go = abs(target - end) log.info( 'group: {}: {:.0f} iterations ({} messages) to go'. format( group_name, to_go / config.scan.get('message_scan_limit'), to_go)) parts.clear() del messages[:] del missed[:] iterations += 1 if limit and iterations >= 3: #* config.scan.get('message_scan_limit') >= limit: log.info( 'group: {}: scan limit reached, ending early (will continue later)' .format(group_name)) return False log.info('group: {}: scan completed'.format(group_name)) return True
class TestPynab(unittest.TestCase): def setUp(self): self.server = None def test_connect(self): self.server = Server() self.server.connect() self.assertTrue(self.server) def test_capabilities(self): self.test_connect() print(self.server.connection.getcapabilities()) def test_fetch_headers(self): self.test_connect() groups = ['alt.binaries.teevee'] for group in groups: (_, _, first, last, _) = self.server.connection.group(group) for x in range(0, 40000, 20000): y = x + 20000 - 1 parts = self.server.scan(group, last - y, last - x) pynab.parts.save_all(parts) def test_group_update(self): import pynab.groups pynab.groups.update('alt.binaries.teevee') def test_process_binaries(self): import pynab.binaries pynab.binaries.process() def test_process_releases(self): import pynab.releases pynab.releases.process() def test_update_blacklist(self): import pynab.util pynab.util.update_blacklist() def test_update_regex(self): import pynab.util pynab.util.update_regex() def test_search_releases(self): from sqlalchemy_searchable import search from pynab.db import Release with db_session() as db: q = db.query(Release) q = search(q, 'engaged e06') print(q.first().search_name) def test_nzb_parse(self): import pynab.nzbs from pynab.db import NZB with db_session() as db: nzb = db.query(NZB).filter(NZB.id==1).one() import pprint pprint.pprint(pynab.nzbs.get_nzb_details(nzb)) def tearDown(self): try: self.server.connection.quit() except: pass
def backfill(group_name, date=None): log.info('group: {}: backfilling group'.format(group_name)) server = Server() _, count, first, last, _ = server.group(group_name) if date: target_article = server.day_to_post(group_name, server.days_old(date)) else: target_article = server.day_to_post(group_name, config.scan.get('backfill_days', 10)) group = db.groups.find_one({'name': group_name}) if group: # if the group hasn't been updated before, quit if not group['first']: log.error('group: {}: run a normal update prior to backfilling'.format(group_name)) if server.connection: server.connection.quit() return False # if the first article we have is lower than the target if target_article >= group['first']: log.info('group: {}: Nothing to do, we already have the target post.'.format(group_name)) if server.connection: server.connection.quit() return True # or if the target is below the server's first if target_article < first: target_article = first total = group['first'] - target_article end = group['first'] - 1 start = end - MESSAGE_LIMIT + 1 if target_article > start: start = target_article retries = 0 while True: messages = server.scan(group_name, start, end) if messages: if parts.save_all(messages): db.groups.update({ '_id': group['_id'] }, { '$set': { 'first': start } }) retries = 0 else: log.error('group: {}: failed while saving parts'.format(group_name)) if server.connection: server.connection.quit() return False else: log.error('group: {}: problem updating group - trying again'.format(group_name)) retries += 1 # keep trying the same block 3 times, then skip if retries <= 3: continue if start == target_article: if server.connection: server.connection.quit() return True else: end = start - 1 start = end - MESSAGE_LIMIT + 1 if target_article > start: start = target_article else: log.error('group: {}: group doesn\'t exist in db.'.format(group_name)) if server.connection: server.connection.quit() return False
def update(group_name): log.info('group: {}: updating group'.format(group_name)) server = Server() _, count, first, last, _ = server.group(group_name) group = db.groups.find_one({'name': group_name}) if group: # if the group has been scanned before if group['last']: # pick up where we left off start = group['last'] + 1 # if our last article is newer than the server's, something's wrong if last < group['last']: log.error('group: {}: last article {:d} on server is older than the local {:d}'.format(group_name, last, group['last'])) if server.connection: try: server.connection.quit() except: pass return False else: # otherwise, start from x days old start = server.day_to_post(group_name, config.scan.get('new_group_scan_days', 5)) if not start: log.error('group: {}: couldn\'t determine a start point for group'.format(group_name)) if server.connection: try: server.connection.quit() except: pass return False else: db.groups.update({ '_id': group['_id'] }, { '$set': { 'first': start } }) # either way, we're going upwards so end is the last available end = last # if total > 0, we have new parts total = end - start + 1 start_date = server.post_date(group_name, start) end_date = server.post_date(group_name, end) if start_date and end_date: total_date = end_date - start_date log.info('group: {}: pulling {} - {} ({}d, {}h, {}m)'.format( group_name, start, end, total_date.days, total_date.seconds // 3600, (total_date.seconds // 60) % 60 )) else: log.info('group: {}: pulling {} - {}'.format(group_name, start, end)) if total > 0: if not group['last']: log.info('group: {}: starting new group with {:d} days and {:d} new parts' .format(group_name, config.scan.get('new_group_scan_days', 5), total)) else: log.info('group: {}: group has {:d} new parts.'.format(group_name, total)) retries = 0 # until we're finished, loop while True: # break the load into segments if total > MESSAGE_LIMIT: if start + MESSAGE_LIMIT > last: end = last else: end = start + MESSAGE_LIMIT - 1 messages = server.scan(group_name, start, end) if messages: if parts.save_all(messages): db.groups.update({ '_id': group['_id'] }, { '$set': { 'last': end } }) retries = 0 else: log.error('group: {}: failed while saving parts'.format(group_name)) if server.connection: try: server.connection.quit() except: pass return False if end == last: if server.connection: try: server.connection.quit() except: pass return True else: start = end + 1 else: log.info('group: {}: no new messages'.format(group_name)) if server.connection: server.connection.quit() return True else: log.error('group: {}: no group in db'.format(group_name)) if server.connection: server.connection.quit() return False