Exemple #1
0
def update_blacklist():
    """Check for Blacklist update and load them into db."""
    blacklist_url = config.postprocess.get("blacklist_url")
    if blacklist_url:
        response = requests.get(blacklist_url)
        lines = response.text.splitlines()

        blacklists = []
        for line in lines:
            elements = line.split("\t\t")
            if len(elements) == 4:
                blacklists.append(
                    {"group_name": elements[0], "regex": elements[1], "description": elements[3], "status": False}
                )

        engine.execute(Blacklist.__table__.insert(), blacklists)

        return True
    else:
        log.error("No blacklist update url in config.")
        return False
Exemple #2
0
def update_blacklist():
    """Check for Blacklist update and load them into db."""
    blacklist_url = config.postprocess.get('blacklist_url')
    if blacklist_url:
        response = requests.get(blacklist_url)
        lines = response.text.splitlines()

        blacklists = []
        for line in lines:
            elements = line.split('\t\t')
            if len(elements) == 4:
                blacklists.append({
                    'group_name': elements[0],
                    'regex': elements[1],
                    'description': elements[3],
                    'status': False
                })

        engine.execute(Blacklist.__table__.insert(), blacklists)

        return True
    else:
        log.error('No blacklist update url in config.')
        return False
Exemple #3
0
def process():
    """Helper function to begin processing binaries. Checks
    for 100% completion and will create NZBs/releases for
    each complete release. Will also categorise releases,
    and delete old binaries."""

    # TODO: optimise query usage in this, it's using like 10-15 per release

    binary_count = 0
    added_count = 0

    if config.scan.get('publish', False):
        request_session = FuturesSession()
    else:
        request_session = None

    start = time.time()

    with db_session() as db:
        binary_query = """
            SELECT
                binaries.id, binaries.name, binaries.posted, binaries.total_parts
            FROM binaries
            INNER JOIN (
                SELECT
                    parts.id, parts.binary_id, parts.total_segments, count(*) as available_segments
                FROM parts
                    INNER JOIN segments ON parts.id = segments.part_id
                GROUP BY parts.id
                ) as parts
                ON binaries.id = parts.binary_id
            GROUP BY binaries.id
            HAVING count(*) >= binaries.total_parts AND (sum(parts.available_segments) / sum(parts.total_segments)) * 100 >= {}
            ORDER BY binaries.posted DESC
        """.format(config.postprocess.get('min_completion', 100))

        # pre-cache blacklists and group them
        blacklists = db.query(Blacklist).filter(Blacklist.status == True).all()
        for blacklist in blacklists:
            db.expunge(blacklist)

        # cache categories
        parent_categories = {}
        for category in db.query(Category).all():
            parent_categories[category.id] = category.parent.name if category.parent else category.name

        # for interest's sakes, memory usage:
        # 38,000 releases uses 8.9mb of memory here
        # no real need to batch it, since this will mostly be run with
        # < 1000 releases per run
        for completed_binary in engine.execute(binary_query).fetchall():
            # some optimisations here. we used to take the binary id and load it
            # then compare binary.name and .posted to any releases
            # in doing so, we loaded the binary into the session
            # this meant that when we deleted it, it didn't cascade
            # we had to submit many, many delete queries - one per segment/part
            # by including name/posted in the big query, we don't load that much data
            # but it lets us check for a release without another query, and means
            # that we cascade delete when we clear the binary

            # first we check if the release already exists
            r = db.query(Release).filter(Release.name == completed_binary[1]).filter(
                Release.posted == completed_binary[2]
            ).first()
            if r:
                # if it does, we have a duplicate - delete the binary
                db.query(Binary).filter(Binary.id == completed_binary[0]).delete()
            else:
                # get an approx size for the binary without loading everything
                # if it's a really big file, we want to deal with it differently
                binary = db.query(Binary).filter(Binary.id == completed_binary[0]).first()

                # this is an estimate, so it doesn't matter too much
                # 1 part nfo, 1 part sfv or something similar, so ignore two parts
                # take an estimate from the middle parts, since the first/last
                # have a good chance of being something tiny
                # we only care if it's a really big file
                # abs in case it's a 1 part release (abs(1 - 2) = 1)
                # int(/2) works fine (int(1/2) = 0, array is 0-indexed)
                est_size = (abs(binary.total_parts - 2) *
                            binary.parts[int(binary.total_parts / 2)].total_segments *
                            binary.parts[int(binary.total_parts / 2)].segments[0].size)

                oversized = est_size > config.postprocess.get('max_process_size', 10 * 1024 * 1024 * 1024)

                if oversized and not config.postprocess.get('max_process_anyway', True):
                    log.debug('release: [{}] - removed (oversized)'.format(binary.name))
                    db.query(Binary).filter(Binary.id == completed_binary[0]).delete()
                    db.commit()
                    continue

                if oversized:
                    # for giant binaries, we do it differently
                    # lazyload the segments in parts and expunge when done
                    # this way we only have to store binary+parts
                    # and one section of segments at one time
                    binary = db.query(Binary).options(
                        subqueryload('parts'),
                        lazyload('parts.segments'),
                    ).filter(Binary.id == completed_binary[0]).first()
                else:
                    # otherwise, start loading all the binary details
                    binary = db.query(Binary).options(
                        subqueryload('parts'),
                        subqueryload('parts.segments'),
                        Load(Part).load_only(Part.id, Part.subject, Part.segments),
                    ).filter(Binary.id == completed_binary[0]).first()

                blacklisted = False
                for blacklist in blacklists:
                    if regex.search(blacklist.group_name, binary.group_name):
                        # we're operating on binaries, not releases
                        field = 'name' if blacklist.field == 'subject' else blacklist.field
                        if regex.search(blacklist.regex, getattr(binary, field)):
                            log.debug('release: [{}] - removed (blacklisted: {})'.format(binary.name, blacklist.id))
                            db.query(Binary).filter(Binary.id == binary.id).delete()
                            db.commit()
                            blacklisted = True
                            break

                if blacklisted:
                    continue

                binary_count += 1

                release = Release()
                release.name = binary.name
                release.posted = binary.posted
                release.posted_by = binary.posted_by
                release.regex_id = binary.regex_id
                release.grabs = 0

                # this counts segment sizes, so we can't use it for large releases
                # use the estimate for min_size and firm it up later during postproc
                if oversized:
                    release.size = est_size
                else:
                    release.size = binary.size()

                # check against minimum size for this group
                undersized = False
                for size, groups in config.postprocess.get('min_size', {}).items():
                    if binary.group_name in groups:
                        if release.size < size:
                            undersized = True
                            break

                if undersized:
                    log.debug('release: [{}] - removed (smaller than minimum size for group)'.format(
                        binary.name
                    ))
                    db.query(Binary).filter(Binary.id == binary.id).delete()
                    db.commit()
                    continue

                # check to make sure we have over the configured minimum files
                # this one's okay for big releases, since we're only looking at part-level
                rars = []
                rar_count = 0
                zip_count = 0
                nzb_count = 0

                for part in binary.parts:
                    if pynab.nzbs.rar_part_regex.search(part.subject):
                        rar_count += 1
                    if pynab.nzbs.rar_regex.search(part.subject) and not pynab.nzbs.metadata_regex.search(part.subject):
                        rars.append(part)
                    if pynab.nzbs.zip_regex.search(part.subject) and not pynab.nzbs.metadata_regex.search(part.subject):
                        zip_count += 1
                    if pynab.nzbs.nzb_regex.search(part.subject):
                        nzb_count += 1

                # handle min_archives
                # keep, nzb, under
                status = 'keep'
                archive_rules = config.postprocess.get('min_archives', 1)
                if isinstance(archive_rules, dict):
                    # it's a dict
                    if binary.group_name in archive_rules:
                        group = binary.group_name
                    else:
                        group = '*'

                    # make sure the catchall exists
                    if group not in archive_rules:
                        archive_rules[group] = 1

                    # found a special rule
                    if rar_count + zip_count < archive_rules[group]:
                        if nzb_count > 0:
                            status = 'nzb'
                        else:
                            status = 'under'
                else:
                    # it's an integer, globalise that shit yo
                    if rar_count + zip_count < archive_rules:
                        if nzb_count > 0:
                            status = 'nzb'
                        else:
                            status = 'under'

                # if it's an nzb or we're under, kill it
                if status in ['nzb', 'under']:
                    if status == 'nzb':
                        log.debug('release: [{}] - removed (nzb only)'.format(binary.name))
                    elif status == 'under':
                        log.debug('release: [{}] - removed (less than minimum archives)'.format(binary.name))

                    db.query(Binary).filter(Binary.id == binary.id).delete()
                    db.commit()
                    continue

                # clean the name for searches
                release.search_name = clean_release_name(binary.name)

                # assign the release group
                release.group = db.query(Group).filter(Group.name == binary.group_name).one()

                # give the release a category
                release.category_id = pynab.categories.determine_category(binary.name, binary.group_name)

                # create the nzb, store it and link it here
                # no need to do anything special for big releases here
                # if it's set to lazyload, it'll kill rows as they're used
                # if it's a small release, it'll go straight from memory
                nzb = pynab.nzbs.create(release.search_name, parent_categories[release.category_id], binary)

                if nzb:
                    added_count += 1

                    log.info('release: [{}]: added release ({} rars, {} rarparts)'.format(
                        release.search_name,
                        len(rars),
                        rar_count
                    ))

                    release.nzb = nzb

                    # save the release
                    db.add(release)

                    try:
                        db.flush()
                    except Exception as e:
                        # this sometimes raises if we get a duplicate
                        # this requires a post of the same name at exactly the same time (down to the second)
                        # pretty unlikely, but there we go
                        log.debug('release: [{}]: duplicate release, discarded'.format(release.search_name))
                        db.rollback()

                    # delete processed binaries
                    db.query(Binary).filter(Binary.id == binary.id).delete()

                    # publish processed releases?
                    if config.scan.get('publish', False):
                        futures = [request_session.post(host, data=to_json(release)) for host in
                                   config.scan.get('publish_hosts')]

            db.commit()

    end = time.time()
    log.info('release: added {} out of {} binaries in {:.2f}s'.format(
        added_count,
        binary_count,
        end - start
    ))
Exemple #4
0
def process():
    """Helper function to begin processing binaries. Checks
    for 100% completion and will create NZBs/releases for
    each complete release. Will also categorise releases,
    and delete old binaries."""

    # TODO: optimise query usage in this, it's using like 10-15 per release

    binary_count = 0
    added_count = 0

    if config.scan.get('publish', False):
        request_session = FuturesSession()
    else:
        request_session = None

    start = time.time()

    with db_session() as db:
        binary_query = """
            SELECT
                binaries.id, binaries.name, binaries.posted, binaries.total_parts
            FROM binaries
            INNER JOIN (
                SELECT
                    parts.id, parts.binary_id, parts.total_segments, count(*) as available_segments
                FROM parts
                    INNER JOIN segments ON parts.id = segments.part_id
                GROUP BY parts.id
                ) as parts
                ON binaries.id = parts.binary_id
            GROUP BY binaries.id
            HAVING count(*) >= binaries.total_parts AND (sum(parts.available_segments) / sum(parts.total_segments)) * 100 >= {}
            ORDER BY binaries.posted DESC
        """.format(config.postprocess.get('min_completion', 100))

        # pre-cache blacklists and group them
        blacklists = db.query(Blacklist).filter(Blacklist.status == True).all()
        for blacklist in blacklists:
            db.expunge(blacklist)

        # cache categories
        parent_categories = {}
        for category in db.query(Category).all():
            parent_categories[
                category.
                id] = category.parent.name if category.parent else category.name

        # for interest's sakes, memory usage:
        # 38,000 releases uses 8.9mb of memory here
        # no real need to batch it, since this will mostly be run with
        # < 1000 releases per run
        for completed_binary in engine.execute(binary_query).fetchall():
            # some optimisations here. we used to take the binary id and load it
            # then compare binary.name and .posted to any releases
            # in doing so, we loaded the binary into the session
            # this meant that when we deleted it, it didn't cascade
            # we had to submit many, many delete queries - one per segment/part
            # by including name/posted in the big query, we don't load that much data
            # but it lets us check for a release without another query, and means
            # that we cascade delete when we clear the binary

            # first we check if the release already exists
            r = db.query(Release).filter(
                Release.name == completed_binary[1]).filter(
                    Release.posted == completed_binary[2]).first()

            if r:
                # if it does, we have a duplicate - delete the binary
                db.query(Binary).filter(
                    Binary.id == completed_binary[0]).delete()
            else:
                # get an approx size for the binary without loading everything
                # if it's a really big file, we want to deal with it differently
                binary = db.query(Binary).filter(
                    Binary.id == completed_binary[0]).first()

                # get the group early for use in uniqhash
                group = db.query(Group).filter(
                    Group.name == binary.group_name).one()

                # check if the uniqhash already exists too
                dupe_release = db.query(Release).filter(
                    Release.uniqhash == _create_hash(binary.name, group.id,
                                                     binary.posted)).first()
                if dupe_release:
                    db.query(Binary).filter(
                        Binary.id == completed_binary[0]).delete()
                    continue

                # this is an estimate, so it doesn't matter too much
                # 1 part nfo, 1 part sfv or something similar, so ignore two parts
                # take an estimate from the middle parts, since the first/last
                # have a good chance of being something tiny
                # we only care if it's a really big file
                # abs in case it's a 1 part release (abs(1 - 2) = 1)
                # int(/2) works fine (int(1/2) = 0, array is 0-indexed)
                try:
                    est_size = (abs(binary.total_parts - 2) * binary.parts[int(
                        binary.total_parts / 2)].total_segments *
                                binary.parts[int(
                                    binary.total_parts / 2)].segments[0].size)
                except IndexError:
                    log.error(
                        'release: binary [{}] - couldn\'t estimate size - bad regex: {}?'
                        .format(binary.id, binary.regex_id))
                    continue

                oversized = est_size > config.postprocess.get(
                    'max_process_size', 10 * 1024 * 1024 * 1024)

                if oversized and not config.postprocess.get(
                        'max_process_anyway', True):
                    log.debug('release: [{}] - removed (oversized)'.format(
                        binary.name))
                    db.query(Binary).filter(
                        Binary.id == completed_binary[0]).delete()
                    db.commit()
                    continue

                if oversized:
                    # for giant binaries, we do it differently
                    # lazyload the segments in parts and expunge when done
                    # this way we only have to store binary+parts
                    # and one section of segments at one time
                    binary = db.query(Binary).options(
                        subqueryload('parts'),
                        lazyload('parts.segments'),
                    ).filter(Binary.id == completed_binary[0]).first()
                else:
                    # otherwise, start loading all the binary details
                    binary = db.query(Binary).options(
                        subqueryload('parts'),
                        subqueryload('parts.segments'),
                        Load(Part).load_only(Part.id, Part.subject,
                                             Part.segments),
                    ).filter(Binary.id == completed_binary[0]).first()

                blacklisted = False
                for blacklist in blacklists:
                    if regex.search(blacklist.group_name, binary.group_name):
                        # we're operating on binaries, not releases
                        field = 'name' if blacklist.field == 'subject' else blacklist.field
                        if regex.search(blacklist.regex,
                                        getattr(binary, field)):
                            log.debug(
                                'release: [{}] - removed (blacklisted: {})'.
                                format(binary.name, blacklist.id))
                            db.query(Binary).filter(
                                Binary.id == binary.id).delete()
                            db.commit()
                            blacklisted = True
                            break

                if blacklisted:
                    continue

                binary_count += 1

                release = Release()
                release.name = binary.name
                release.original_name = binary.name
                release.posted = binary.posted
                release.posted_by = binary.posted_by
                release.regex_id = binary.regex_id
                release.grabs = 0

                # this counts segment sizes, so we can't use it for large releases
                # use the estimate for min_size and firm it up later during postproc
                if oversized:
                    release.size = est_size
                else:
                    release.size = binary.size()

                # check against minimum size for this group
                undersized = False
                for size, groups in config.postprocess.get('min_size',
                                                           {}).items():
                    if binary.group_name in groups:
                        if release.size < size:
                            undersized = True
                            break

                if undersized:
                    log.debug(
                        'release: [{}] - removed (smaller than minimum size for group)'
                        .format(binary.name))
                    db.query(Binary).filter(Binary.id == binary.id).delete()
                    db.commit()
                    continue

                # check to make sure we have over the configured minimum files
                # this one's okay for big releases, since we're only looking at part-level
                rars = []
                rar_count = 0
                zip_count = 0
                nzb_count = 0

                for part in binary.parts:
                    if pynab.nzbs.rar_part_regex.search(part.subject):
                        rar_count += 1
                    if pynab.nzbs.rar_regex.search(
                            part.subject
                    ) and not pynab.nzbs.metadata_regex.search(part.subject):
                        rars.append(part)
                    if pynab.nzbs.zip_regex.search(
                            part.subject
                    ) and not pynab.nzbs.metadata_regex.search(part.subject):
                        zip_count += 1
                    if pynab.nzbs.nzb_regex.search(part.subject):
                        nzb_count += 1

                # handle min_archives
                # keep, nzb, under
                status = 'keep'
                archive_rules = config.postprocess.get('min_archives', 1)
                if isinstance(archive_rules, dict):
                    # it's a dict
                    if binary.group_name in archive_rules:
                        group = binary.group_name
                    else:
                        group = '*'

                    # make sure the catchall exists
                    if group not in archive_rules:
                        archive_rules[group] = 1

                    # found a special rule
                    if rar_count + zip_count < archive_rules[group]:
                        if nzb_count > 0:
                            status = 'nzb'
                        else:
                            status = 'under'
                else:
                    # it's an integer, globalise that shit yo
                    if rar_count + zip_count < archive_rules:
                        if nzb_count > 0:
                            status = 'nzb'
                        else:
                            status = 'under'

                # if it's an nzb or we're under, kill it
                if status in ['nzb', 'under']:
                    if status == 'nzb':
                        log.debug('release: [{}] - removed (nzb only)'.format(
                            binary.name))
                    elif status == 'under':
                        log.debug(
                            'release: [{}] - removed (less than minimum archives)'
                            .format(binary.name))

                    db.query(Binary).filter(Binary.id == binary.id).delete()
                    db.commit()
                    continue

                # clean the name for searches
                release.search_name = clean_release_name(binary.name)

                # assign the release group
                release.group = group

                # give the release a category
                release.category_id = pynab.categories.determine_category(
                    binary.name, binary.group_name)

                # create the nzb, store it and link it here
                # no need to do anything special for big releases here
                # if it's set to lazyload, it'll kill rows as they're used
                # if it's a small release, it'll go straight from memory
                nzb = pynab.nzbs.create(release.search_name,
                                        parent_categories[release.category_id],
                                        binary)

                if nzb:
                    added_count += 1

                    log.info(
                        'release: [{}]: added release ({} rars, {} rarparts)'.
                        format(release.search_name, len(rars), rar_count))

                    release.nzb = nzb

                    # save the release
                    db.add(release)

                    try:
                        db.flush()
                    except Exception as e:
                        # this sometimes raises if we get a duplicate
                        # this requires a post of the same name at exactly the same time (down to the second)
                        # pretty unlikely, but there we go
                        log.debug(
                            'release: [{}]: duplicate release, discarded'.
                            format(release.search_name))
                        db.rollback()

                    # delete processed binaries
                    db.query(Binary).filter(Binary.id == binary.id).delete()

                    # publish processed releases?
                    if config.scan.get('publish', False):
                        futures = [
                            request_session.post(host, data=to_json(release))
                            for host in config.scan.get('publish_hosts')
                        ]

            db.commit()

    end = time.time()
    log.info('release: added {} out of {} binaries in {:.2f}s'.format(
        added_count, binary_count, end - start))
Exemple #5
0
 import pynab.util
 from scripts import nzedb_pre_import
 db = Session()
 start = time.time()
 print('Building tables...')
 Base.metadata.drop_all(engine)
 Base.metadata.create_all(engine)
 from alembic.config import Config
 from alembic import command
 alembic_cfg = Config("alembic.ini")
 command.stamp(alembic_cfg, "head")
 print('Installing admin user...')
 with open('db/initial/users.json', encoding='utf-8', errors='ignore') as f:
     data = json.load(f)
     try:
         engine.execute(User.__table__.insert(), data)
     except Exception as e:
         print('Problem inserting data into database: {}'.format(e))
         sys.exit(0)
 print('Copying groups into db...')
 with open('db/initial/groups.json', encoding='utf-8',
           errors='ignore') as f:
     data = json.load(f)
     try:
         engine.execute(Group.__table__.insert(), data)
     except Exception as e:
         print('Problem inserting data into database: {}'.format(e))
         sys.exit(0)
 print('Copying categories into db...')
 with open('db/initial/categories.json', encoding='utf-8',
           errors='ignore') as f:
Exemple #6
0
    print('Building tables...')
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

    from alembic.config import Config
    from alembic import command

    alembic_cfg = Config("alembic.ini")
    command.stamp(alembic_cfg, "head")

    print('Installing admin user...')
    with open('db/initial/users.json', encoding='utf-8', errors='ignore') as f:
        data = json.load(f)
        try:
            engine.execute(User.__table__.insert(), data)
        except Exception as e:
            print('Problem inserting data into database: {}'.format(e))
            sys.exit(0)

    print('Copying groups into db...')
    with open('db/initial/groups.json', encoding='utf-8', errors='ignore') as f:
        data = json.load(f)
        try:
            engine.execute(Group.__table__.insert(), data)
        except Exception as e:
            print('Problem inserting data into database: {}'.format(e))
            sys.exit(0)

    print('Copying categories into db...')
    with open('db/initial/categories.json', encoding='utf-8', errors='ignore') as f:
Exemple #7
0
    print('Building tables...')
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

    from alembic.config import Config
    from alembic import command

    alembic_cfg = Config("alembic.ini")
    command.stamp(alembic_cfg, "head")

    print('Installing admin user...')
    with open('db/initial/users.json', encoding='utf-8', errors='ignore') as f:
        data = json.load(f)
        try:
            engine.execute(User.__table__.insert(), data)
        except Exception as e:
            print('Problem inserting data into database: {}'.format(e))
            sys.exit(0)

    print('Copying groups into db...')
    with open('db/initial/groups.json', encoding='utf-8', errors='ignore') as f:
        data = json.load(f)
        try:
            engine.execute(Group.__table__.insert(), data)
        except Exception as e:
            print('Problem inserting data into database: {}'.format(e))
            sys.exit(0)

    print('Copying categories into db...')
    with open('db/initial/categories.json', encoding='utf-8', errors='ignore') as f:
Exemple #8
0
            if arguments[arg]:
                filename = arguments[arg]
                try:
                    if filename.endswith('.gz'):
                        with gzip.open(filename) as infile:
                            data = json.loads(infile.read().decode('utf=8'))
                    else:
                        with open(filename) as infile:
                            data = json.load(infile)
                except Exception as e:
                    # Even though we failed to open the file, other files may be
                    # fine so just issue the warning and continue along.
                    print("Failed to open {}: {}\nSkipping table {}.".format(
                        filename, e, table))
                    continue

                if truncate_table(engine, dbmap[table]):
                    print("Truncated {} table.".format(table))
                else:
                    # database may now be in a bad state, best to exit and let
                    # the user sort things out.
                    print(
                        "Failed to truncate {} table, exiting.".format(table))
                    sys.exit(0)

                try:
                    engine.execute(dbmap[table].__table__.insert(), data)
                except Exception as e:
                    print("Problem inserting data into table {}: {}".format(
                        table, e))
Exemple #9
0
    print('Building tables...')
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

    from alembic.config import Config
    from alembic import command

    alembic_cfg = Config("alembic.ini")
    command.stamp(alembic_cfg, "head")

    print('Installing admin user...')
    with open('db/initial/users.json', encoding='utf-8', errors='ignore') as f:
        data = json.load(f)
        try:
            engine.execute(User.__table__.insert(), data)
        except Exception as e:
            print('Problem inserting data into database: {}'.format(e))
            sys.exit(0)

    print('Copying groups into db...')
    with open('db/initial/groups.json', encoding='utf-8',
              errors='ignore') as f:
        data = json.load(f)
        try:
            engine.execute(Group.__table__.insert(), data)
        except Exception as e:
            print('Problem inserting data into database: {}'.format(e))
            sys.exit(0)

    print('Copying categories into db...')
            if arguments[arg]:
                filename = arguments[arg]
                try:
                    if filename.endswith('.gz'):
                        with gzip.open(filename) as infile:
                            data = json.loads(infile.read().decode('utf=8'))
                    else:
                        with open(filename) as infile:
                            data = json.load(infile)
                except Exception as e:
                    # Even though we failed to open the file, other files may be
                    # fine so just issue the warning and continue along.
                    print("Failed to open {}: {}\nSkipping table {}.".format(filename,
                                                                             e,
                                                                             table))
                    continue

                if truncate_table(engine, dbmap[table]):
                    print("Truncated {} table.".format(table))
                else:
                    # database may now be in a bad state, best to exit and let
                    # the user sort things out.
                    print("Failed to truncate {} table, exiting.".format(table))
                    sys.exit(0)

                try:
                    engine.execute(dbmap[table].__table__.insert(), data)
                except Exception as e:
                    print("Problem inserting data into table {}: {}".format(table,
                                                                            e))