Exemple #1
0
def main(mode='update', group=None, date=None):
    log_init(mode)

    log.info('scan: starting {}...'.format(mode))

    groups = []
    active_groups = {}

    if mode == 'backfill':
        log.info('scan: finding targets for backfill...')
        with pynab.server.Server() as server:
            with db_session() as db:
                if not group:
                    groups = [group.name for group in db.query(Group).filter(Group.active == True).all()]
                else:
                    if db.query(Group).filter(Group.name == group).first():
                        groups = [group]
                for group in groups:
                    target = server.day_to_post(group,
                                                server.days_old(pytz.utc.localize(dateutil.parser.parse(date)))
                                                if date else config.scan.get('backfill_days', 10)
                                                )
                    if target:
                        active_groups[group] = target

    iterations = 0
    while True:
        iterations += 1
        data = []

        # refresh the db session each iteration, just in case
        with db_session() as db:
            if db.query(Segment).count() > config.scan.get('early_process_threshold', 50000000):
                if mode == 'update':
                    log.info('scan: backlog of segments detected, processing first')
                    process()
                else:
                    log.info('scan: backlog of segments detected during backfill, waiting until update has cleared them')
                    time.sleep(config.scan.get('update_wait', 600))
                    continue

            # for scanning, we want to re-check active groups each iteration
            # we don't want to do that for backfilling, though
            if mode == 'update':
                if not group:
                    active_groups = {group.name: None for group in db.query(Group).filter(Group.active == True).all()}
                else:
                    if db.query(Group).filter(Group.name == group).first():
                        active_groups = {group: None}
                    else:
                        log.error('scan: no such group exists')
                        return

            if active_groups:
                with concurrent.futures.ThreadPoolExecutor(config.scan.get('update_threads', None)) as executor:
                    # if maxtasksperchild is more than 1, everything breaks
                    # they're long processes usually, so no problem having one task per child
                    if mode == 'backfill':
                        result = [executor.submit(backfill, active_group, date, target) for active_group, target in active_groups.items()]
                    else:
                        result = [executor.submit(update, active_group) for active_group in active_groups.keys()]

                    for r in concurrent.futures.as_completed(result):
                        data.append(r.result())

                    if mode == 'backfill':
                        if all(data):
                            return

                    # don't retry misses during backfill, it ain't gonna happen
                    if config.scan.get('retry_missed') and not mode == 'backfill':
                        miss_groups = [group_name for group_name, in
                                       db.query(Miss.group_name).group_by(Miss.group_name).all()]
                        miss_result = [executor.submit(scan_missing, miss_group) for miss_group in miss_groups]

                        # no timeout for these, because it could take a while
                        for r in concurrent.futures.as_completed(miss_result):
                            data = r.result()

                db.commit()

                if mode == 'update':
                    process()

                    # clean up dead binaries and parts
                    if config.scan.get('dead_binary_age', 1) != 0:
                        dead_time = pytz.utc.localize(datetime.datetime.now()).replace(
                            tzinfo=None) - datetime.timedelta(days=config.scan.get('dead_binary_age', 3))

                        dead_binaries = db.query(Binary).filter(Binary.posted <= dead_time).delete()
                        db.commit()

                        log.info('scan: deleted {} dead binaries'.format(dead_binaries))
            else:
                log.info('scan: no groups active, cancelling pynab.py...')
                break

            if mode == 'update':
                # vacuum the segments, parts and binaries tables
                log.info('scan: vacuuming relevant tables...')

                if iterations >= config.scan.get('full_vacuum_iterations', 288):
                    # this may look weird, but we want to reset iterations even if full_vacuums are off
                    # so it doesn't count to infinity
                    if config.scan.get('full_vacuum', True):
                        vacuum(mode='scan', full=True)
                    iterations = 0
            else:
                iterations = 0

            db.close()

        # don't bother waiting if we're backfilling, just keep going
        if mode == 'update':
            # wait for the configured amount of time between cycles
            update_wait = config.scan.get('update_wait', 300)
            log.info('scan: sleeping for {:d} seconds...'.format(update_wait))
            time.sleep(update_wait)
Exemple #2
0
def main():
    log_init('postprocess')

    log.info('postprocess: starting post-processing...')

    # start with a quick post-process
    #log.info('postprocess: starting with a quick post-process to clear out the cruft that\'s available locally...')
    #scripts.quick_postprocess.local_postprocess()

    iterations = 0
    while True:
        with db_session() as db:
            # delete passworded releases first so we don't bother processing them
            if config.postprocess.get('delete_passworded', True):
                query = db.query(Release)
                if config.postprocess.get('delete_potentially_passworded',
                                          True):
                    query = query.filter((Release.passworded == 'MAYBE')
                                         | (Release.passworded == 'YES'))
                else:
                    query = query.filter(Release.passworded == 'YES')
                deleted = query.delete()
                db.commit()
                log.info('postprocess: deleted {} passworded releases'.format(
                    deleted))

            with concurrent.futures.ThreadPoolExecutor(4) as executor:
                threads = []

                if config.postprocess.get('process_tvshows', True):
                    threads.append(executor.submit(process_tvshows))

                if config.postprocess.get('process_movies', True):
                    threads.append(executor.submit(process_movies))

                # grab and append nfo data to all releases
                if config.postprocess.get('process_nfos', True):
                    threads.append(executor.submit(process_nfos))

                # grab and append sfv data to all releases
                if config.postprocess.get('process_sfvs', False):
                    threads.append(executor.submit(process_sfvs))

                # check for passwords, file count and size
                if config.postprocess.get('process_rars', True):
                    threads.append(executor.submit(process_rars))

                # check for requests in local pre table
                if config.postprocess.get('process_requests', True):
                    threads.append(executor.submit(process_requests))

                #for t in concurrent.futures.as_completed(threads):
                #    data = t.result()

            # every 25 iterations (roughly), reset the unwanted status on releases
            """
            if iterations % 25 == 0:
                log.info('postprocess: resetting unwanted status')
                db.query(Release).filter(Release.unwanted==True).update({Release.unwanted: False})
                db.commit()
            """

            # rename misc->other and all ebooks
            scripts.rename_bad_releases.rename_bad_releases(8010)
            scripts.rename_bad_releases.rename_bad_releases(7020)

            # do a postproc deletion of any enabled blacklists
            # assuming it's enabled, of course
            if config.postprocess.get('delete_blacklisted_releases'):
                deleted = 0
                for blacklist in db.query(Blacklist).filter(
                        Blacklist.status == True).all():
                    # remap subject to name, since normal blacklists operate on binaries
                    # this is on releases, and the attribute changes
                    field = 'search_name' if blacklist.field == 'subject' else blacklist.field

                    # filter by:
                    # group_name should match the blacklist's
                    #   <field> should match the blacklist's regex
                    #   <field> is determined by blacklist's field (usually subject/name)
                    #   date (optimisation)
                    query = db.query(Release).filter(
                        Release.group_id.in_(
                            db.query(Group.id).filter(
                                Group.name.op('~*')(
                                    blacklist.group_name)).subquery())).filter(
                                        getattr(Release, field).op('~*')(
                                            blacklist.regex))
                    if config.postprocess.get('delete_blacklisted_days'):
                        query = query.filter(Release.posted >= (
                            datetime.datetime.now(pytz.utc) -
                            datetime.timedelta(days=config.postprocess.get(
                                'delete_blacklisted_days'))))
                    deleted += query.delete(False)
                log.info('postprocess: deleted {} blacklisted releases'.format(
                    deleted))
                db.commit()

            if config.postprocess.get('delete_bad_releases', False):
                # kill unwanteds
                pass
                """
                deletes = db.query(Release).filter(Release.unwanted==True).delete()
                deletes = 0

                # and also kill other-miscs that we can't retrieve a rar for
                sub = db.query(Release.id).join(MetaBlack, Release.rar_metablack).\
                    filter(Release.category_id==8010).\
                    filter(MetaBlack.status=='IMPOSSIBLE').\
                    subquery()

                deletes += db.query(Release).filter(Release.id.in_(sub)).delete(synchronize_session='fetch')

                log.info('postprocess: deleted {} bad releases'.format(deletes))
                db.commit()
                """

            if config.postprocess.get('release_expiry_days', 0) > 0:
                expire_days = config.postprocess.get('release_expiry_days', 0)
                log.info(
                    'postprocess: expiring releases posted more than {} days ago.'
                    .format(expire_days))
                deleted_releases = db.query(Release).filter(Release.posted < (
                    datetime.datetime.now(pytz.utc) -
                    datetime.timedelta(days=expire_days))).delete(
                        synchronize_session='fetch')
                log.info('postprocess: expired {} releases'.format(
                    deleted_releases))

            # delete any orphan metablacks
            log.info('postprocess: deleting orphan metablacks...')
            # noinspection PyComparisonWithNone,PyComparisonWithNone,PyComparisonWithNone,PyComparisonWithNone,PyComparisonWithNone
            deleted_metablacks = db.query(MetaBlack).filter(
                (MetaBlack.movie == None) & (MetaBlack.tvshow == None)
                & (MetaBlack.rar == None) & (MetaBlack.nfo == None)
                & (MetaBlack.sfv == None)).delete(synchronize_session='fetch')
            log.info('postprocess: deleted {} orphaned metablacks.'.format(
                deleted_metablacks))

            # delete any orphan nzbs
            log.info('postprocess: deleting orphan nzbs...')
            # noinspection PyComparisonWithNone
            deleted_nzbs = db.query(NZB).filter(NZB.release == None).delete(
                synchronize_session='fetch')
            log.info(
                'postprocess: deleted {} orphaned nzbs.'.format(deleted_nzbs))

            # delete any orphan nfos
            log.info('postprocess: deleting orphan nfos...')
            # noinspection PyComparisonWithNone
            deleted_nfos = db.query(NFO).filter(NFO.release == None).delete(
                synchronize_session='fetch')
            log.info(
                'postprocess: deleted {} orphaned nfos.'.format(deleted_nfos))

            # delete any orphan sfvs
            log.info('postprocess: deleting orphan sfvs...')
            # noinspection PyComparisonWithNone
            deleted_sfvs = db.query(SFV).filter(SFV.release == None).delete(
                synchronize_session='fetch')
            log.info(
                'postprocess: deleted {} orphaned sfvs.'.format(deleted_sfvs))

            db.commit()

            # vacuum the segments, parts and binaries tables
            log.info('postprocess: vacuuming relevant tables...')
            if iterations >= config.scan.get('full_vacuum_iterations', 288):
                # this may look weird, but we want to reset iterations even if full_vacuums are off
                # so it doesn't count to infinity
                if config.scan.get('full_vacuum', True):
                    vacuum(mode='postprocess', full=True)
                else:
                    vacuum(mode='postprocess', full=False)
                iterations = 0

        iterations += 1

        # wait for the configured amount of time between cycles
        postprocess_wait = config.postprocess.get('postprocess_wait', 300)
        log.info('sleeping for {:d} seconds...'.format(postprocess_wait))
        time.sleep(postprocess_wait)
Exemple #3
0
def main():
    log_init("postprocess")

    log.info("postprocess: starting post-processing...")

    # start with a quick post-process
    # log.info('postprocess: starting with a quick post-process to clear out the cruft that\'s available locally...')
    # scripts.quick_postprocess.local_postprocess()

    iterations = 0
    while True:
        with db_session() as db:
            # delete passworded releases first so we don't bother processing them
            if config.postprocess.get("delete_passworded", True):
                query = db.query(Release)
                if config.postprocess.get("delete_potentially_passworded", True):
                    query = query.filter((Release.passworded == "MAYBE") | (Release.passworded == "YES"))
                else:
                    query = query.filter(Release.passworded == "YES")
                deleted = query.delete()
                db.commit()
                log.info("postprocess: deleted {} passworded releases".format(deleted))

            with concurrent.futures.ThreadPoolExecutor(4) as executor:
                threads = []

                if config.postprocess.get("process_tvshows", True):
                    threads.append(executor.submit(process_tvshows))

                if config.postprocess.get("process_movies", True):
                    threads.append(executor.submit(process_movies))

                # grab and append nfo data to all releases
                if config.postprocess.get("process_nfos", True):
                    threads.append(executor.submit(process_nfos))

                # grab and append sfv data to all releases
                if config.postprocess.get("process_sfvs", False):
                    threads.append(executor.submit(process_sfvs))

                # check for passwords, file count and size
                if config.postprocess.get("process_rars", True):
                    threads.append(executor.submit(process_rars))

                # check for requests in local pre table
                if config.postprocess.get("process_requests", True):
                    threads.append(executor.submit(process_requests))

                # for t in concurrent.futures.as_completed(threads):
                #    data = t.result()

            # every 25 iterations (roughly), reset the unwanted status on releases
            """
            if iterations % 25 == 0:
                log.info('postprocess: resetting unwanted status')
                db.query(Release).filter(Release.unwanted==True).update({Release.unwanted: False})
                db.commit()
            """

            # rename misc->other and all ebooks
            scripts.rename_bad_releases.rename_bad_releases(8010)
            scripts.rename_bad_releases.rename_bad_releases(7020)

            # do a postproc deletion of any enabled blacklists
            # assuming it's enabled, of course
            if config.postprocess.get("delete_blacklisted_releases"):
                deleted = 0
                for blacklist in db.query(Blacklist).filter(Blacklist.status == True).all():
                    # remap subject to name, since normal blacklists operate on binaries
                    # this is on releases, and the attribute changes
                    field = "search_name" if blacklist.field == "subject" else blacklist.field

                    # filter by:
                    # group_name should match the blacklist's
                    #   <field> should match the blacklist's regex
                    #   <field> is determined by blacklist's field (usually subject/name)
                    #   date (optimisation)
                    query = (
                        db.query(Release)
                        .filter(
                            Release.group_id.in_(
                                db.query(Group.id).filter(Group.name.op("~*")(blacklist.group_name)).subquery()
                            )
                        )
                        .filter(getattr(Release, field).op("~*")(blacklist.regex))
                    )
                    if config.postprocess.get("delete_blacklisted_days"):
                        query = query.filter(
                            Release.posted
                            >= (
                                datetime.datetime.now(pytz.utc)
                                - datetime.timedelta(days=config.postprocess.get("delete_blacklisted_days"))
                            )
                        )
                    deleted += query.delete(False)
                log.info("postprocess: deleted {} blacklisted releases".format(deleted))
                db.commit()

            if config.postprocess.get("delete_bad_releases", False):
                # kill unwanteds
                pass
                """
                deletes = db.query(Release).filter(Release.unwanted==True).delete()
                deletes = 0

                # and also kill other-miscs that we can't retrieve a rar for
                sub = db.query(Release.id).join(MetaBlack, Release.rar_metablack).\
                    filter(Release.category_id==8010).\
                    filter(MetaBlack.status=='IMPOSSIBLE').\
                    subquery()

                deletes += db.query(Release).filter(Release.id.in_(sub)).delete(synchronize_session='fetch')

                log.info('postprocess: deleted {} bad releases'.format(deletes))
                db.commit()
                """

            if config.postprocess.get("release_expiry_days", 0) > 0:
                expire_days = config.postprocess.get("release_expiry_days", 0)
                log.info("postprocess: expiring releases posted more than {} days ago.".format(expire_days))
                deleted_releases = (
                    db.query(Release)
                    .filter(Release.posted < (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=expire_days)))
                    .delete(synchronize_session="fetch")
                )
                log.info("postprocess: expired {} releases".format(deleted_releases))

            # delete any orphan metablacks
            log.info("postprocess: deleting orphan metablacks...")
            # noinspection PyComparisonWithNone,PyComparisonWithNone,PyComparisonWithNone,PyComparisonWithNone,PyComparisonWithNone
            deleted_metablacks = (
                db.query(MetaBlack)
                .filter(
                    (MetaBlack.movie == None)
                    & (MetaBlack.tvshow == None)
                    & (MetaBlack.rar == None)
                    & (MetaBlack.nfo == None)
                    & (MetaBlack.sfv == None)
                )
                .delete(synchronize_session="fetch")
            )
            log.info("postprocess: deleted {} orphaned metablacks.".format(deleted_metablacks))

            # delete any orphan nzbs
            log.info("postprocess: deleting orphan nzbs...")
            # noinspection PyComparisonWithNone
            deleted_nzbs = db.query(NZB.id).filter(NZB.release == None).delete(synchronize_session="fetch")
            log.info("postprocess: deleted {} orphaned nzbs.".format(deleted_nzbs))

            # delete any orphan nfos
            log.info("postprocess: deleting orphan nfos...")
            # noinspection PyComparisonWithNone
            deleted_nfos = db.query(NFO.id).filter(NFO.release == None).delete(synchronize_session="fetch")
            log.info("postprocess: deleted {} orphaned nfos.".format(deleted_nfos))

            # delete any orphan sfvs
            log.info("postprocess: deleting orphan sfvs...")
            # noinspection PyComparisonWithNone
            deleted_sfvs = db.query(SFV.id).filter(SFV.release == None).delete(synchronize_session="fetch")
            log.info("postprocess: deleted {} orphaned sfvs.".format(deleted_sfvs))

            db.commit()

            # vacuum the segments, parts and binaries tables
            log.info("postprocess: vacuuming relevant tables...")
            if iterations >= config.scan.get("full_vacuum_iterations", 288):
                # this may look weird, but we want to reset iterations even if full_vacuums are off
                # so it doesn't count to infinity
                if config.scan.get("full_vacuum", True):
                    vacuum(mode="postprocess", full=True)
                else:
                    vacuum(mode="postprocess", full=False)
                iterations = 0

        iterations += 1

        # wait for the configured amount of time between cycles
        postprocess_wait = config.postprocess.get("postprocess_wait", 300)
        log.info("sleeping for {:d} seconds...".format(postprocess_wait))
        time.sleep(postprocess_wait)
Exemple #4
0
def main(mode='update', group=None, date=None):
    log_init(mode)

    log.info('scan: starting {}...'.format(mode))

    groups = []
    active_groups = {}

    if mode == 'backfill':
        log.info('scan: finding targets for backfill...')
        with pynab.server.Server() as server:
            with db_session() as db:
                if not group:
                    groups = [group.name for group in db.query(Group).filter(Group.active == True).all()]
                else:
                    if db.query(Group).filter(Group.name == group).first():
                        groups = [group]
                for group in groups:
                    target = server.day_to_post(group,
                                                server.days_old(pytz.utc.localize(dateutil.parser.parse(date)))
                                                if date else config.scan.get('backfill_days', 10)
                                                )
                    if target:
                        active_groups[group] = target

    iterations = 0
    while True:
        iterations += 1
        data = []

        # refresh the db session each iteration, just in case
        with db_session() as db:
            if db.query(Segment).count() > config.scan.get('early_process_threshold', 50000000):
                if mode == 'update':
                    log.info('scan: backlog of segments detected, processing first')
                    process()
                else:
                    log.info('scan: backlog of segments detected during backfill, waiting until update has cleared them')
                    time.sleep(config.scan.get('update_wait', 600))
                    continue

            # for scanning, we want to re-check active groups each iteration
            # we don't want to do that for backfilling, though
            if mode == 'update':
                if not group:
                    active_groups = {group.name: None for group in db.query(Group).filter(Group.active == True).all()}
                else:
                    if db.query(Group).filter(Group.name == group).first():
                        active_groups = [group]
                    else:
                        log.error('scan: no such group exists')
                        return

            if active_groups:
                with concurrent.futures.ThreadPoolExecutor(config.scan.get('update_threads', None)) as executor:
                    # if maxtasksperchild is more than 1, everything breaks
                    # they're long processes usually, so no problem having one task per child
                    if mode == 'backfill':
                        result = [executor.submit(backfill, active_group, date, target) for active_group, target in active_groups.items()]
                    else:
                        result = [executor.submit(update, active_group) for active_group in active_groups.keys()]

                    for r in concurrent.futures.as_completed(result):
                        data.append(r.result())

                    if mode == 'backfill':
                        if all(data):
                            return

                    # don't retry misses during backfill, it ain't gonna happen
                    if config.scan.get('retry_missed') and not mode == 'backfill':
                        miss_groups = [group_name for group_name, in
                                       db.query(Miss.group_name).group_by(Miss.group_name).all()]
                        miss_result = [executor.submit(scan_missing, miss_group) for miss_group in miss_groups]

                        # no timeout for these, because it could take a while
                        for r in concurrent.futures.as_completed(miss_result):
                            data = r.result()

                db.commit()

                if mode == 'update':
                    process()

                    # clean up dead binaries and parts
                    if config.scan.get('dead_binary_age', 1) != 0:
                        dead_time = pytz.utc.localize(datetime.datetime.now()).replace(
                            tzinfo=None) - datetime.timedelta(days=config.scan.get('dead_binary_age', 3))

                        dead_binaries = db.query(Binary).filter(Binary.posted <= dead_time).delete()
                        db.commit()

                        log.info('scan: deleted {} dead binaries'.format(dead_binaries))
            else:
                log.info('scan: no groups active, cancelling pynab.py...')
                break

            if mode == 'update':
                # vacuum the segments, parts and binaries tables
                log.info('scan: vacuuming relevant tables...')

                if iterations >= config.scan.get('full_vacuum_iterations', 288):
                    # this may look weird, but we want to reset iterations even if full_vacuums are off
                    # so it doesn't count to infinity
                    if config.scan.get('full_vacuum', True):
                        vacuum(mode='scan', full=True)
                    iterations = 0
            else:
                iterations = 0

            db.close()

        # don't bother waiting if we're backfilling, just keep going
        if mode == 'update':
            # wait for the configured amount of time between cycles
            update_wait = config.scan.get('update_wait', 300)
            log.info('scan: sleeping for {:d} seconds...'.format(update_wait))
            time.sleep(update_wait)