コード例 #1
0
 def test_list_licenses(self):
     db_license.create(
         id="test",
         full_name="Test license",
         info_url="www.example.com",
     )
     licenses = db_license.list_licenses()
     self.assertDictEqual(
         {
             "id": "test",
             "full_name": "Test license",
             "info_url": "www.example.com"
         }, licenses[0])
コード例 #2
0
def public(location, rotate=False):
    """Creates a set of archives with public data.

    1. Base archive with license-independent data (users, licenses).
    2. Archive with all reviews and revisions.
    3... Separate archives for each license (contain reviews and revisions associated with specific license).
    """
    print("Creating public database dump...")
    time_now = datetime.today()

    # Creating a directory where all dumps will go
    dump_dir = os.path.join(location, time_now.strftime('%Y%m%d-%H%M%S'))
    create_path(dump_dir)

    # Prepare meta files
    meta_files_dir = tempfile.mkdtemp()
    prepare_meta_files(meta_files_dir, time_now=time_now)

    # BASE ARCHIVE
    # Contains all license independent data (licenses, users)
    base_archive_path = create_base_archive(
        location=dump_dir,
        meta_files_dir=meta_files_dir,
    )
    print(base_archive_path)

    # 1. COMBINED
    # Archiving all reviews (any license)
    review_dump_path = create_reviews_archive(
        location=dump_dir,
        meta_files_dir=meta_files_dir,
    )
    print(review_dump_path)

    # 2. SEPARATE
    # Creating separate archives for each license
    for license in db_license.list_licenses():
        review_dump_path = create_reviews_archive(
            location=dump_dir,
            meta_files_dir=meta_files_dir,
            license_id=license['id'],
        )
        print(review_dump_path)

    shutil.rmtree(meta_files_dir)  # Cleanup
    if rotate:
        print("Removing old dumps (except two latest)...")
        remove_old_archives(location, "[0-9]+-[0-9]+", is_dir=True)

    print("Done!")
コード例 #3
0
def json(location, rotate=False):
    """Create JSON dumps with all reviews.

    This command will create an archive for each license available on CB.
    Archives will be put into a specified directory (default is *dump*).
    """
    create_path(location)

    current_app.json_encoder = DumpJSONEncoder

    print("Creating new archives...")
    for license in db_license.list_licenses():
        safe_name = slugify(license["id"])
        with tarfile.open(os.path.join(location, "critiquebrainz-%s-%s-json.tar.bz2" %
                                       (datetime.today().strftime('%Y%m%d'), safe_name)), "w:bz2") as tar:
            temp_dir = tempfile.mkdtemp()
            license_dir = os.path.join(temp_dir, safe_name)
            create_path(license_dir)

            # Finding entities that have reviews with current license
            entities = db_review.distinct_entities()
            for entity in entities:
                entity = str(entity)
                # Creating directory structure and dumping reviews
                dir_part = os.path.join(entity[0:1], entity[0:2])
                reviews = db_review.list_reviews(entity_id=entity, license_id=license["id"], limit=None)[0]
                if reviews:
                    rg_dir = '%s/%s' % (license_dir, dir_part)
                    create_path(rg_dir)
                    f = open('%s/%s.json' % (rg_dir, entity), 'w+')
                    f.write(jsonify(reviews=[db_review.to_dict(r) for r in reviews]).data.decode("utf-8"))
                    f.close()

            tar.add(license_dir, arcname='reviews')

            # Copying legal text
            tar.add(os.path.join("critiquebrainz", "data", "licenses", safe_name + ".txt"), arcname='COPYING')

            print(" + %s/critiquebrainz-%s-%s-json.tar.bz2" % (location, datetime.today().strftime('%Y%m%d'), safe_name))

            shutil.rmtree(temp_dir)  # Cleanup

    if rotate:
        print("Removing old sets of archives (except two latest)...")
        remove_old_archives(location, "critiquebrainz-[0-9]+-[-\w]+-json.tar.bz2",
                            is_dir=False, sort_key=os.path.getmtime)

    print("Done!")
コード例 #4
0
def public(location, rotate=False):
    """Creates a set of archives with public data.

    1. Base archive with license-independent data (users, licenses).
    2. Archive with all reviews and revisions.
    3... Separate archives for each license (contain reviews and revisions associated with specific license).
    """
    print("Creating public database dump...")
    time_now = datetime.today()

    connection = db.engine.raw_connection()
    cursor = connection.cursor()

    # Creating a directory where all dumps will go
    dump_dir = os.path.join(location, time_now.strftime('%Y%m%d-%H%M%S'))
    create_path(dump_dir)

    temp_dir = tempfile.mkdtemp()

    # Preparing meta files
    with open(os.path.join(temp_dir, 'TIMESTAMP'), 'w') as f:
        f.write(time_now.isoformat(' '))
    with open(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'), 'w') as f:
        f.write(str(db.SCHEMA_VERSION))

    # BASE ARCHIVE
    # Archiving stuff that is independent from licenses (users, licenses)
    with tarfile.open(os.path.join(dump_dir, "cbdump.tar.bz2"),
                      "w:bz2") as tar:
        base_archive_dir = os.path.join(temp_dir, 'cbdump')
        create_path(base_archive_dir)

        # Dumping tables
        base_archive_tables_dir = os.path.join(base_archive_dir, 'cbdump')
        create_path(base_archive_tables_dir)
        with open(os.path.join(base_archive_tables_dir, 'user_sanitised'),
                  'w') as f:
            cursor.copy_to(f,
                           '"user"',
                           columns=('id', 'created', 'display_name',
                                    'musicbrainz_id'))
        with open(os.path.join(base_archive_tables_dir, 'license'), 'w') as f:
            cursor.copy_to(f, 'license', columns=_TABLES["license"])
        tar.add(base_archive_tables_dir, arcname='cbdump')

        # Including additional information about this archive
        # Copying the most restrictive license there (CC BY-NC-SA 3.0)
        tar.add(os.path.join('critiquebrainz', 'data', 'licenses',
                             'cc-by-nc-sa-30.txt'),
                arcname='COPYING')
        tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
        tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'),
                arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump.tar.bz2" % dump_dir)

    # REVIEWS
    # Archiving review tables (review, revision)

    # 1. COMBINED
    # Archiving all reviews (any license)
    REVISION_COMBINED_SQL = """
        SELECT {columns} FROM revision JOIN review
            ON review.id = revision.review_id
         WHERE review.is_hidden = false AND review.is_draft = false
    """.format(
        columns=', '.join(['revision.' + col for col in _TABLES["revision"]]))
    with tarfile.open(os.path.join(dump_dir, "cbdump-reviews-all.tar.bz2"),
                      "w:bz2") as tar:
        # Dumping tables
        reviews_combined_tables_dir = os.path.join(temp_dir,
                                                   'cbdump-reviews-all')
        create_path(reviews_combined_tables_dir)
        with open(os.path.join(reviews_combined_tables_dir, 'review'),
                  'w') as f:
            cursor.copy_to(
                f,
                "(SELECT {columns} FROM review WHERE is_hidden = false AND is_draft = false)"
                .format(columns=', '.join(_TABLES["review"])))
        with open(os.path.join(reviews_combined_tables_dir, 'revision'),
                  'w') as f:
            cursor.copy_to(f, "({sql})".format(sql=REVISION_COMBINED_SQL))
        tar.add(reviews_combined_tables_dir, arcname='cbdump')

        # Including additional information about this archive
        # Copying the most restrictive license there (CC BY-NC-SA 3.0)
        tar.add(os.path.join('critiquebrainz', 'data', 'licenses',
                             'cc-by-nc-sa-30.txt'),
                arcname='COPYING')
        tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
        tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'),
                arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump-reviews-all.tar.bz2" % dump_dir)

    # 2. SEPARATE
    # Creating separate archives for each license
    for license in db_license.list_licenses():
        safe_name = slugify(license["id"])
        with tarfile.open(
                os.path.join(dump_dir,
                             "cbdump-reviews-%s.tar.bz2" % safe_name),
                "w:bz2") as tar:
            # Dumping tables
            tables_dir = os.path.join(temp_dir, safe_name)
            create_path(tables_dir)
            with open(os.path.join(tables_dir, 'review'), 'w') as f:
                cursor.copy_to(
                    f, """(
                    SELECT {columns}
                      FROM review
                     WHERE is_hidden = false
                       AND is_draft = false
                       AND license_id = '{license_id}'
                )""".format(columns=', '.join(_TABLES["review"]),
                            license_id=license["id"]))
            with open(os.path.join(tables_dir, 'revision'), 'w') as f:
                cursor.copy_to(
                    f,
                    """({REVISION_COMBINED_SQL} AND review.license_id='{license_id}')"""
                    .format(REVISION_COMBINED_SQL=REVISION_COMBINED_SQL,
                            license_id=license["id"]))
            tar.add(tables_dir, arcname='cbdump')

            # Including additional information about this archive
            tar.add(os.path.join("critiquebrainz", "data", "licenses",
                                 safe_name + ".txt"),
                    arcname='COPYING')
            tar.add(os.path.join(temp_dir, 'TIMESTAMP'), arcname='TIMESTAMP')
            tar.add(os.path.join(temp_dir, 'SCHEMA_SEQUENCE'),
                    arcname='SCHEMA_SEQUENCE')

        print(" + %s/cbdump-reviews-%s.tar.bz2" % (dump_dir, safe_name))

    shutil.rmtree(temp_dir)  # Cleanup
    connection.close()

    if rotate:
        print("Removing old dumps (except two latest)...")
        remove_old_archives(location, "[0-9]+-[0-9]+", is_dir=True)

    print("Done!")