def import_data(file_name, model, columns=None):
    db_connection = db.session.connection().connection
    cursor = db_connection.cursor()
    try:
        with open(file_name) as f:
            # Checking if table already contains any data
            if model.query.count() > 0:
                print("Table %s already contains data. Skipping." % model.__tablename__)
                return
            # and if it doesn't, trying to import data
            print("Importing data into %s table." % model.__tablename__)
            if columns is None:
                columns = get_columns(model)
            cursor.copy_from(f, '"%s"' % model.__tablename__, columns=columns)
            db_connection.commit()
    except IOError as exception:
        if exception.errno == errno.ENOENT:
            print("Can't find data file for %s table. Skipping." % model.__tablename__)
        else:
            sys.exit("Failed to open data file. Error: %s" % exception)
def import_data(file_name, model, columns=None):
    db_connection = db.session.connection().connection
    cursor = db_connection.cursor()
    try:
        with open(file_name) as f:
            # Checking if table already contains any data
            if model.query.count() > 0:
                print("Table %s already contains data. Skipping." %
                      model.__tablename__)
                return
            # and if it doesn't, trying to import data
            print("Importing data into %s table." % model.__tablename__)
            if columns is None:
                columns = get_columns(model)
            cursor.copy_from(f, '"%s"' % model.__tablename__, columns=columns)
            db_connection.commit()
    except IOError as exception:
        if exception.errno == errno.ENOENT:
            print("Can't find data file for %s table. Skipping." %
                  model.__tablename__)
        else:
            sys.exit("Failed to open data file. Error: %s" % exception)
def public(location, rotate=False):
    """Creates a set of archives with public data.

    1. Base archive with license-independent data (users, licenses).
    2. Archive with all reviews and revisions.
    3... Separate archives for each license (contain reviews and revisions associated with specific license).
    """
    print("Creating public database dump...")
    time_now = datetime.today()

    cursor = db.session.connection().connection.cursor()

    # Creating a directory where all dumps will go
    dump_dir = os.path.join(location, time_now.strftime('%Y%m%d-%H%M%S'))
    create_path(dump_dir)

    temp_dir = tempfile.mkdtemp()

    # Preparing meta files
    with open(os.path.join(temp_dir, 'TIMESTAMP'), 'w') as f:
        f.write(time_now.isoformat(' '))
    with open(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'), 'w') as f:
        f.write(str(model.__version__))

    # BASE ARCHIVE
    # Archiving stuff that is independent from licenses (users, licenses)
    with tarfile.open(os.path.join(dump_dir, "cbdump.tar.bz2"), "w:bz2") as tar:
        base_archive_dir = os.path.join(temp_dir, 'cbdump')
        create_path(base_archive_dir)

        # Dumping tables
        base_archive_tables_dir = os.path.join(base_archive_dir, 'cbdump')
        create_path(base_archive_tables_dir)
        with open(os.path.join(base_archive_tables_dir, 'user_sanitised'), 'w') as f:
            cursor.copy_to(f, '"user"', columns=('id', 'created', 'display_name', 'musicbrainz_id'))
        with open(os.path.join(base_archive_tables_dir, 'license'), 'w') as f:
            cursor.copy_to(f, 'license', columns=get_columns(model.License))
        tar.add(base_archive_tables_dir, arcname='cbdump')

        # Including additional information about this archive
        # Copying the most restrictive license there (CC BY-NC-SA 3.0)
        tar.add(os.path.join('critiquebrainz', 'data', 'licenses', 'cc-by-nc-sa-30.txt'), arcname='COPYING')
        tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
        tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'), arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump.tar.bz2" % dump_dir)

    # REVIEWS
    # Archiving review tables (review, revision)

    # 1. COMBINED
    # Archiving all reviews (any license)
    REVISION_COMBINED_SQL = "SELECT %s FROM revision JOIN review " \
                            "ON review.id = revision.review_id " \
                            "WHERE review.is_hidden = false AND review.is_draft = false" \
                            % ', '.join(['revision.' + col for col in get_columns(model.Revision)])
    with tarfile.open(os.path.join(dump_dir, "cbdump-reviews-all.tar.bz2"), "w:bz2") as tar:
        # Dumping tables
        reviews_combined_tables_dir = os.path.join(temp_dir, 'cbdump-reviews-all')
        create_path(reviews_combined_tables_dir)
        with open(os.path.join(reviews_combined_tables_dir, 'review'), 'w') as f:
            cursor.copy_to(f, "(SELECT %s FROM review WHERE is_hidden = false AND is_draft = false)" %
                           (', '.join(get_columns(model.Review))))
        with open(os.path.join(reviews_combined_tables_dir, 'revision'), 'w') as f:
            cursor.copy_to(f, "(%s)" % REVISION_COMBINED_SQL)
        tar.add(reviews_combined_tables_dir, arcname='cbdump')

        # Including additional information about this archive
        # Copying the most restrictive license there (CC BY-NC-SA 3.0)
        tar.add(os.path.join('critiquebrainz', 'data', 'licenses', 'cc-by-nc-sa-30.txt'), arcname='COPYING')
        tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
        tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'), arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump-reviews-all.tar.bz2" % dump_dir)

    # 2. SEPARATE
    # Creating separate archives for each license
    REVISION_SEPARATE_SQL = REVISION_COMBINED_SQL + " AND review.license_id ='%s'"
    for license in model.License.query.all():
        safe_name = slugify(license.id)
        with tarfile.open(os.path.join(dump_dir, "cbdump-reviews-%s.tar.bz2" % safe_name), "w:bz2") as tar:
            # Dumping tables
            tables_dir = os.path.join(temp_dir, safe_name)
            create_path(tables_dir)
            with open(os.path.join(tables_dir, 'review'), 'w') as f:
                cursor.copy_to(f, "(SELECT %s FROM review WHERE is_hidden = false AND is_draft = false " \
                                  "AND license_id = '%s')" % (', '.join(get_columns(model.Review)), license.id))
            with open(os.path.join(tables_dir, 'revision'), 'w') as f:
                cursor.copy_to(f, "(%s)" % (REVISION_SEPARATE_SQL % license.id))
            tar.add(tables_dir, arcname='cbdump')

            # Including additional information about this archive
            tar.add(os.path.join("critiquebrainz", "data", "licenses", safe_name + ".txt"), arcname='COPYING')
            tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
            tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'), arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump-reviews-%s.tar.bz2" % (dump_dir, safe_name))

    shutil.rmtree(temp_dir)  # Cleanup

    if rotate:
        print("Removing old dumps (except two latest)...")
        remove_old_archives(location, "[0-9]+-[0-9]+", is_dir=True)

    print("Done!")
def public(
        location=os.path.join(os.getcwd(), 'export', 'public'), rotate=False):
    """Creates a set of archives with public data.

    1. Base archive with license-independent data (users, licenses).
    2. Archive with all reviews and revisions.
    3... Separate archives for each license (contain reviews and revisions associated with specific license).
    """
    print("Creating public database dump...")
    time_now = datetime.today()

    cursor = db.session.connection().connection.cursor()

    # Creating a directory where all dumps will go
    dump_dir = os.path.join(location, time_now.strftime('%Y%m%d-%H%M%S'))
    create_path(dump_dir)

    temp_dir = tempfile.mkdtemp()

    # Preparing meta files
    with open(os.path.join(temp_dir, 'TIMESTAMP'), 'w') as f:
        f.write(time_now.isoformat(' '))
    with open(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'), 'w') as f:
        f.write(str(model.__version__))

    # BASE ARCHIVE
    # Archiving stuff that is independent from licenses (users, licenses)
    with tarfile.open(os.path.join(dump_dir, "cbdump.tar.bz2"),
                      "w:bz2") as tar:
        base_archive_dir = os.path.join(temp_dir, 'cbdump')
        create_path(base_archive_dir)

        # Dumping tables
        base_archive_tables_dir = os.path.join(base_archive_dir, 'cbdump')
        create_path(base_archive_tables_dir)
        with open(os.path.join(base_archive_tables_dir, 'user_sanitised'),
                  'w') as f:
            cursor.copy_to(f,
                           '"user"',
                           columns=('id', 'created', 'display_name',
                                    'musicbrainz_id'))
        with open(os.path.join(base_archive_tables_dir, 'license'), 'w') as f:
            cursor.copy_to(f, 'license', columns=get_columns(model.License))
        tar.add(base_archive_tables_dir, arcname='cbdump')

        # Including additional information about this archive
        # Copying the most restrictive license there (CC BY-NC-SA 3.0)
        tar.add(os.path.join('critiquebrainz', 'data', 'licenses',
                             'cc-by-nc-sa-30.txt'),
                arcname='COPYING')
        tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
        tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'),
                arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump.tar.bz2" % dump_dir)

    # REVIEWS
    # Archiving review tables (review, revision)

    # 1. COMBINED
    # Archiving all reviews (any license)
    with tarfile.open(os.path.join(dump_dir, "cbdump-reviews-all.tar.bz2"),
                      "w:bz2") as tar:
        # Dumping tables
        reviews_combined_tables_dir = os.path.join(temp_dir,
                                                   'cbdump-reviews-all')
        create_path(reviews_combined_tables_dir)
        with open(os.path.join(reviews_combined_tables_dir, 'review'),
                  'w') as f:
            cursor.copy_to(f, 'review', columns=get_columns(model.Review))
        with open(os.path.join(reviews_combined_tables_dir, 'revision'),
                  'w') as f:
            cursor.copy_to(f, 'revision', columns=get_columns(model.Revision))
        tar.add(reviews_combined_tables_dir, arcname='cbdump')

        # Including additional information about this archive
        # Copying the most restrictive license there (CC BY-NC-SA 3.0)
        tar.add(os.path.join('critiquebrainz', 'data', 'licenses',
                             'cc-by-nc-sa-30.txt'),
                arcname='COPYING')
        tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
        tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'),
                arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump-reviews-all.tar.bz2" % dump_dir)

    # 2. SEPARATE
    # Creating separate archives for each license
    for license in License.query.all():
        safe_name = slugify(license.id)
        with tarfile.open(
                os.path.join(dump_dir,
                             "cbdump-reviews-%s.tar.bz2" % safe_name),
                "w:bz2") as tar:
            # Dumping tables
            tables_dir = os.path.join(temp_dir, safe_name)
            create_path(tables_dir)
            with open(os.path.join(tables_dir, 'review'), 'w') as f:
                cursor.copy_to(
                    f, "(SELECT (%s) FROM review WHERE license_id = '%s')" %
                    (', '.join(get_columns(model.Review)), license.id))
            with open(os.path.join(tables_dir, 'revision'), 'w') as f:
                cursor.copy_to(
                    f,
                    "(SELECT (revision.%s) FROM revision JOIN review ON revision.review_id = review.id WHERE review.license_id = '%s')"
                    % (', revision.'.join(get_columns(
                        model.Revision)), license.id))
            tar.add(tables_dir, arcname='cbdump')

            # Including additional information about this archive
            tar.add(os.path.join("critiquebrainz", "data", "licenses",
                                 safe_name + ".txt"),
                    arcname='COPYING')
            tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
            tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'),
                    arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump-reviews-%s.tar.bz2" % (dump_dir, safe_name))

    shutil.rmtree(temp_dir)  # Cleanup

    if rotate:
        print("Removing old dumps (except two latest)...")
        remove_old_archives(location, "[0-9]+-[0-9]+", is_dir=True)

    print("Done!")