コード例 #1
0
def test_uncompress_tarpaths(tmp_path, datadir, prepare_shutil_state):
    """High level call uncompression on un/supported tarballs

    """
    archive_dir = os.path.join(datadir, "archives")
    tarfiles = os.listdir(archive_dir)
    tarpaths = [os.path.join(archive_dir, tarfile) for tarfile in tarfiles]

    unsupported_tarpaths = []
    for t in tarpaths:
        if t.endswith(".Z") or t.endswith(".x") or t.endswith(".lz"):
            unsupported_tarpaths.append(t)

    # not supported yet
    for tarpath in unsupported_tarpaths:
        with pytest.raises(ValueError,
                           match=f"Problem during unpacking {tarpath}."):
            tarball.uncompress(tarpath, dest=tmp_path)

    # register those unsupported formats
    tarball.register_new_archive_formats()

    # unsupported formats are now supported
    for n, tarpath in enumerate(tarpaths, start=1):
        tarball.uncompress(tarpath, dest=tmp_path)

    assert n == len(tarpaths)
コード例 #2
0
def aggregate_tarballs(extraction_dir, archive_paths):
    """Aggregate multiple tarballs into one and returns this new archive's
       path.

    Args:
        extraction_dir (path): Path to use for the tarballs computation
        archive_paths ([str]): Deposit's archive paths

    Returns:
        Tuple (directory to clean up, archive path (aggregated or not))

    """
    # rebuild one zip archive from (possibly) multiple ones
    os.makedirs(extraction_dir, 0o755, exist_ok=True)
    dir_path = tempfile.mkdtemp(prefix="swh.deposit-", dir=extraction_dir)

    # root folder to build an aggregated tarball
    aggregated_tarball_rootdir = os.path.join(dir_path, "aggregate")
    os.makedirs(aggregated_tarball_rootdir, 0o755, exist_ok=True)

    # uncompress in a temporary location all archives
    for archive_path in archive_paths:
        tarball.uncompress(archive_path, aggregated_tarball_rootdir)

    # Aggregate into one big tarball the multiple smaller ones
    temp_tarpath = shutil.make_archive(aggregated_tarball_rootdir, "tar",
                                       aggregated_tarball_rootdir)
    # can already clean up temporary directory
    shutil.rmtree(aggregated_tarball_rootdir)

    try:
        yield temp_tarpath
    finally:
        shutil.rmtree(dir_path)
コード例 #3
0
def test_cran_extract_intrinsic_metadata(tmp_path, datadir):
    """Parsing existing archive's PKG-INFO should yield results"""
    uncompressed_archive_path = str(tmp_path)
    # sample url
    # https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz  # noqa
    archive_path = path.join(
        datadir,
        "https_cran.r-project.org",
        "src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz",
    )
    uncompress(archive_path, dest=uncompressed_archive_path)

    actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)

    expected_metadata = {
        "Package": "KernSmooth",
        "Priority": "recommended",
        "Version": "2.22-6",
        "Date": "2001-June-08",
        "Title": "Functions for kernel smoothing for Wand & Jones (1995)",
        "Author": "S original by Matt Wand.\n\tR port by  Brian Ripley <*****@*****.**>.",  # noqa
        "Maintainer": "Brian Ripley <*****@*****.**>",
        "Description": 'functions for kernel smoothing (and density estimation)\n  corresponding to the book: \n  Wand, M.P. and Jones, M.C. (1995) "Kernel Smoothing".',  # noqa
        "License": "Unlimited use and distribution (see LICENCE).",
        "URL": "http://www.biostat.harvard.edu/~mwand",
    }

    assert actual_metadata == expected_metadata
コード例 #4
0
def test_uncompress_tar_failure(tmp_path, datadir):
    """Unpack inexistent tarball should fail"""
    tarpath = os.path.join(datadir, "archives", "inexistent-archive.tar.Z")

    assert not os.path.exists(tarpath)

    with pytest.raises(ValueError, match="Problem during unpacking"):
        tarball.uncompress(tarpath, tmp_path)
コード例 #5
0
def test_uncompress_archives(tmp_path, datadir):
    """High level call uncompression on supported archives"""
    archive_dir = os.path.join(datadir, "archives")
    archive_files = os.listdir(archive_dir)

    for archive_file in archive_files:
        archive_path = os.path.join(archive_dir, archive_file)
        extract_dir = os.path.join(tmp_path, archive_file)
        tarball.uncompress(archive_path, dest=extract_dir)
        assert len(os.listdir(extract_dir)) > 0
コード例 #6
0
def test_uncompress_tar(tmp_path, datadir):
    """Unpack supported tarball into an existent folder should be ok"""
    filename = "groff-1.02.tar.Z"
    tarpath = os.path.join(datadir, "archives", filename)

    assert os.path.exists(tarpath)

    extract_dir = os.path.join(tmp_path, filename)

    tarball.uncompress(tarpath, extract_dir)

    assert len(os.listdir(extract_dir)) > 0
コード例 #7
0
def test_unpcompress_zip_imploded(tmp_path, datadir):
    """Unpack a zip archive with compression type 6 (implode),
    not supported by python zipfile module.

    """
    filename = "msk316src.zip"
    zippath = os.path.join(datadir, "archives", filename)

    assert os.path.exists(zippath)

    extract_dir = os.path.join(tmp_path, filename)

    tarball.uncompress(zippath, extract_dir)

    assert len(os.listdir(extract_dir)) > 0
コード例 #8
0
def test_compress_uncompress_tar(tmp_path):
    tocompress = tmp_path / "compressme"
    tocompress.mkdir()

    for i in range(10):
        fpath = tocompress / ("file%s.txt" % i)
        fpath.write_text("content of file %s" % i)

    tarfile = tmp_path / "archive.tar"
    tarball.compress(str(tarfile), "tar", str(tocompress))

    destdir = tmp_path / "destdir"
    tarball.uncompress(str(tarfile), str(destdir))

    lsdir = sorted(x.name for x in destdir.iterdir())
    assert ["file%s.txt" % i for i in range(10)] == lsdir
コード例 #9
0
def test_uncompress_upper_archive_extension(tmp_path, datadir):
    """Copy test archives in a temporary directory but turn their names
    to uppercase, then check they can be successfully extracted.
    """
    archives_path = os.path.join(datadir, "archives")
    archive_files = [
        f for f in os.listdir(archives_path)
        if os.path.isfile(os.path.join(archives_path, f))
    ]
    for archive_file in archive_files:
        archive_file_upper = os.path.join(tmp_path, archive_file.upper())
        extract_dir = os.path.join(tmp_path, archive_file)
        shutil.copy(os.path.join(archives_path, archive_file),
                    archive_file_upper)
        tarball.uncompress(archive_file_upper, extract_dir)
        assert len(os.listdir(extract_dir)) > 0
コード例 #10
0
def jar_dirs(datadir, tmp_path):
    jar_1_path = os.path.join(datadir, "https_maven.org",
                              "sprova4j-0.1.0-sources.jar")
    jar_2_path = os.path.join(datadir, "https_maven.org",
                              "sprova4j-0.1.1-sources.jar")

    jar_1_extract_path = os.path.join(tmp_path, "jar_1")
    jar_2_extract_path = os.path.join(tmp_path, "jar_2")

    uncompress(jar_1_path, jar_1_extract_path)
    uncompress(jar_2_path, jar_2_extract_path)

    jar_1_dir = Directory.from_disk(path=jar_1_extract_path.encode())
    jar_2_dir = Directory.from_disk(path=jar_2_extract_path.encode())

    return [jar_1_dir, jar_2_dir]
コード例 #11
0
    def fetch_data(self):
        """Retrieve, uncompress archive and fetch objects from the tarball.
           The actual ingestion takes place in the :meth:`store_data`
           implementation below.

        """
        url = self.get_tarball_url_to_retrieve()
        filepath, hashes = self.client.download(url)
        nature = tarball.uncompress(filepath, self.dir_path)

        dir_path = self.dir_path.encode('utf-8')
        directory = Directory.from_disk(path=dir_path, save_path=True)
        objects = directory.collect()
        if 'content' not in objects:
            objects['content'] = {}
        if 'directory' not in objects:
            objects['directory'] = {}

        # compute the full revision (with ids)
        revision = self.build_revision(filepath, nature, hashes)
        revision = revision_from(directory.hash, revision)
        objects['revision'] = {
            revision['id']: revision,
        }

        snapshot = self.build_snapshot(revision)
        objects['snapshot'] = {
            snapshot['id']: snapshot
        }
        self.objects = objects
コード例 #12
0
def init_git_repo_from_archive(project_name,
                               archive_path,
                               root_temp_dir="/tmp"):
    """Given a path to an archive containing a git repository.

    Uncompress that archive to a temporary location and returns the path.

    If any problem whatsoever is raised, clean up the temporary location.

    Args:
        project_name (str): Project's name
        archive_path (str): Full path to the archive
        root_temp_dir (str): Optional temporary directory mount point
                             (default to /tmp)

    Returns
        A tuple:
        - temporary folder: containing the mounted repository
        - repo_path, path to the mounted repository inside the temporary folder

    Raises
        ValueError in case of failure to run the command to uncompress

    """
    temp_dir = tempfile.mkdtemp(suffix=".swh.loader.git",
                                prefix="tmp.",
                                dir=root_temp_dir)

    try:
        # create the repository that will be loaded with the dump
        tarball.uncompress(archive_path, temp_dir)
        repo_path = os.path.join(temp_dir, project_name)
        # tarball content may not be as expected (e.g. no top level directory
        # or a top level directory with a name different from project_name),
        # so try to make it loadable anyway
        if not os.path.exists(repo_path):
            os.mkdir(repo_path)
            for root, dirs, files in os.walk(temp_dir):
                if ".git" in dirs:
                    shutil.copytree(os.path.join(root, ".git"),
                                    os.path.join(repo_path, ".git"))
                    break
        return temp_dir, repo_path
    except Exception as e:
        shutil.rmtree(temp_dir)
        raise e
コード例 #13
0
def test_uncompress_archive_no_extension(tmp_path, datadir):
    """Copy test archives in a temporary directory but turn their names
    to their md5 sums, then check they can be successfully extracted.
    """
    archives_path = os.path.join(datadir, "archives")
    archive_files = [
        f for f in os.listdir(archives_path)
        if os.path.isfile(os.path.join(archives_path, f))
    ]
    for archive_file in archive_files:
        archive_file_path = os.path.join(archives_path, archive_file)
        with open(archive_file_path, "rb") as f:
            md5sum = hashlib.md5(f.read()).hexdigest()
        archive_file_md5sum = os.path.join(tmp_path, md5sum)
        extract_dir = os.path.join(tmp_path, archive_file)
        shutil.copy(archive_file_path, archive_file_md5sum)
        tarball.uncompress(archive_file_md5sum, extract_dir)
        assert len(os.listdir(extract_dir)) > 0
コード例 #14
0
def test_compress_uncompress_tar_modes(tmp_path):
    tocompress = tmp_path / "compressme"
    tocompress.mkdir()

    fpath = tocompress / "text.txt"
    fpath.write_text("echo foo")
    fpath.chmod(0o644)

    fpath = tocompress / "executable.sh"
    fpath.write_text("echo foo")
    fpath.chmod(0o755)

    tarfile = tmp_path / "archive.tar"
    tarball.compress(str(tarfile), "tar", str(tocompress))

    destdir = tmp_path / "destdir"
    tarball.uncompress(str(tarfile), str(destdir))

    (executable_path, text_path) = sorted(destdir.iterdir())
    assert text_path.stat().st_mode == 0o100644
    assert executable_path.stat().st_mode == 0o100755
コード例 #15
0
def test_compress_uncompress_zip_modes(tmp_path):
    tocompress = tmp_path / "compressme"
    tocompress.mkdir()

    fpath = tocompress / "text.txt"
    fpath.write_text("echo foo")
    fpath.chmod(0o644)

    fpath = tocompress / "executable.sh"
    fpath.write_text("echo foo")
    fpath.chmod(0o755)

    zipfile = tmp_path / "archive.zip"
    tarball.compress(str(zipfile), "zip", str(tocompress))

    destdir = tmp_path / "destdir"
    tarball.uncompress(str(zipfile), str(destdir))

    (executable_path, text_path) = sorted(destdir.iterdir())
    assert text_path.stat().st_mode == 0o100644  # succeeds, it's the default
    assert executable_path.stat().st_mode == 0o100755  # fails
コード例 #16
0
def test_pypi_extract_intrinsic_metadata(tmp_path, datadir):
    """Parsing existing archive's PKG-INFO should yield results"""
    uncompressed_archive_path = str(tmp_path)
    archive_path = path.join(
        datadir, "https_files.pythonhosted.org", "0805nexter-1.1.0.zip"
    )
    uncompress(archive_path, dest=uncompressed_archive_path)

    actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)
    expected_metadata = {
        "metadata_version": "1.0",
        "name": "0805nexter",
        "version": "1.1.0",
        "summary": "a simple printer of nested lest",
        "home_page": "http://www.hp.com",
        "author": "hgtkpython",
        "author_email": "*****@*****.**",
        "platforms": ["UNKNOWN"],
    }

    assert actual_metadata == expected_metadata