コード例 #1
0
def public(location, rotate=False):
    """Creates a set of archives with public data.

    1. Base archive with license-independent data (users, licenses).
    2. Archive with all reviews and revisions.
    3... Separate archives for each license (contain reviews and revisions associated with specific license).
    """
    print("Creating public database dump...")
    time_now = datetime.today()

    # Creating a directory where all dumps will go
    dump_dir = os.path.join(location, time_now.strftime('%Y%m%d-%H%M%S'))
    create_path(dump_dir)

    # Prepare meta files
    meta_files_dir = tempfile.mkdtemp()
    prepare_meta_files(meta_files_dir, time_now=time_now)

    with db.engine.begin() as connection:
        # BASE ARCHIVE
        # Contains all license independent data (licenses, users)
        base_archive_path = create_base_archive(
            connection,
            location=dump_dir,
            meta_files_dir=meta_files_dir,
        )
        print(base_archive_path)

        # 1. COMBINED
        # Archiving all reviews (any license)
        review_dump_path = create_reviews_archive(
            connection,
            location=dump_dir,
            meta_files_dir=meta_files_dir,
        )
        print(review_dump_path)

        # 2. SEPARATE
        # Creating separate archives for each license
        for license in db_license.get_licenses_list(connection):
            review_dump_path = create_reviews_archive(
                connection,
                location=dump_dir,
                meta_files_dir=meta_files_dir,
                license_id=license['id'],
            )
            print(review_dump_path)

    shutil.rmtree(meta_files_dir)  # Cleanup
    if rotate:
        print("Removing old dumps (except two latest)...")
        remove_old_archives(location, "[0-9]+-[0-9]+", is_dir=True)

    print("Done!")
コード例 #2
0
def json(location, rotate=False):
    """Create JSON dumps with all reviews.

    This command will create an archive for each license available on CB.
    Archives will be put into a specified directory (default is *dump*).
    """
    create_path(location)

    current_app.json_encoder = DumpJSONEncoder

    print("Creating new archives...")
    with db.engine.begin() as connection:
        for license in db_license.get_licenses_list(connection):
            safe_name = slugify(license["id"])
            with tarfile.open(os.path.join(location, "critiquebrainz-%s-%s-json.tar.bz2" %
                                           (datetime.today().strftime('%Y%m%d'), safe_name)), "w:bz2") as tar:
                temp_dir = tempfile.mkdtemp()
                license_dir = os.path.join(temp_dir, safe_name)
                create_path(license_dir)

                # Finding entities that have reviews with current license
                entities = db_review.get_distinct_entities(connection)
                for entity in entities:
                    entity = str(entity)
                    # Creating directory structure and dumping reviews
                    dir_part = os.path.join(entity[0:1], entity[0:2])
                    reviews = db_review.get_reviews_list(connection, entity_id=entity, license_id=license["id"], limit=None)[0]
                    if reviews:
                        rg_dir = '%s/%s' % (license_dir, dir_part)
                        create_path(rg_dir)
                        f = open('%s/%s.json' % (rg_dir, entity), 'w+')
                        f.write(jsonify(reviews=[db_review.to_dict(r, connection=connection) for r in reviews])
                                .data.decode("utf-8"))
                        f.close()

                tar.add(license_dir, arcname='reviews')

                # Copying legal text
                tar.add(os.path.join(os.path.dirname(os.path.realpath(__file__)), "licenses", safe_name + ".txt"),
                        arcname='COPYING')

                print(" + %s/critiquebrainz-%s-%s-json.tar.bz2" % (location, datetime.today().strftime('%Y%m%d'), safe_name))

                shutil.rmtree(temp_dir)  # Cleanup

        if rotate:
            print("Removing old sets of archives (except two latest)...")
            remove_old_archives(location, r"critiquebrainz-[0-9]+-[-\w]+-json.tar.bz2",
                                is_dir=False, sort_key=os.path.getmtime)

        print("Done!")