def test_fileobj_tar_gz_should_not_simplify_archives(self):
     source = os.path.join(self.temp_dir, 'source_dir')
     os.mkdir(source)
     self.write_string_to_file('testing', os.path.join(source, 'filename'))
     self.do_upload(('source.tar.gz', tar_gzip_directory(source)))
     self.assertEqual(['filename'], self.listdir())
     self.check_file_equals_string('filename', 'testing')
 def test_single_fileobj_tar_gz_no_simplify_archives(self):
     source = os.path.join(self.temp_dir, 'source_dir')
     os.mkdir(source)
     self.write_string_to_file('testing', os.path.join(source, 'filename'))
     self.do_upload([('source.tar.gz', tar_gzip_directory(source))], simplify_archives=False)
     self.assertEqual(['filename'], os.listdir(self.bundle_location))
     self.check_file_contains_string(os.path.join(self.bundle_location, 'filename'), 'testing')
    def stream_tarred_gzipped_directory(self, target):
        """
        Returns a file-like object containing a tarred and gzipped archive
        of the given directory.
        """
        bundle_state = self._bundle_model.get_bundle_state(target.bundle_uuid)
        # Raises NotFoundException if uuid is invalid

        if bundle_state == State.PREPARING:
            raise NotFoundError(
                "Bundle {} hasn't started running yet, files not available".
                format(target.bundle_uuid))
        elif bundle_state != State.RUNNING:
            directory_path = self._get_target_path(target)
            return file_util.tar_gzip_directory(directory_path)
        else:
            # stream_tarred_gzipped_directory calls are sent to the worker even
            # on a shared filesystem since
            # 1) due to NFS caching the worker has more up to date
            #   information on directory contents
            # 2) the logic of hiding
            #   the dependency paths doesn't need to be re-implemented here.
            worker = self._bundle_model.get_bundle_worker(target.bundle_uuid)
            response_socket_id = self._worker_model.allocate_socket(
                worker['user_id'], worker['worker_id'])
            try:
                read_args = {'type': 'stream_directory'}
                self._send_read_message(worker, response_socket_id, target,
                                        read_args)
                fileobj = self._get_read_response_stream(response_socket_id)
                return Deallocating(fileobj, self._worker_model,
                                    response_socket_id)
            except Exception:
                self._worker_model.deallocate_socket(response_socket_id)
                raise
 def test_url_tar_gz_should_not_simplify_archives(self):
     source = os.path.join(self.temp_dir, 'source_dir')
     os.mkdir(source)
     self.write_string_to_file('testing', os.path.join(source, 'filename'))
     self.do_upload(
         self.mock_url_source(BytesIO(tar_gzip_directory(source).read()),
                              ext=".tar.gz"))
     self.check_file_equals_string('filename', 'testing')
 def write_git_repo(self, source: str, bundle_path: str):
     with tempfile.TemporaryDirectory() as tmpdir:
         file_util.git_clone(source, tmpdir)
         # Upload a fileobj with the repo's tarred and gzipped contents.
         self.write_fileobj(".tar.gz",
                            tar_gzip_directory(tmpdir),
                            bundle_path,
                            unpack_archive=True)
 def test_url_tar_gz(self):
     source = os.path.join(self.temp_dir, 'source_dir')
     os.mkdir(source)
     self.write_string_to_file('testing', os.path.join(source, 'file1'))
     self.write_string_to_file('testing', os.path.join(source, 'file2'))
     self.do_upload(
         self.mock_url_source(BytesIO(tar_gzip_directory(source).read()),
                              ext=".tar.gz"))
     self.assertIn('file2', self.listdir())
Exemple #7
0
    def test_tar_empty(self):
        dir = tempfile.mkdtemp()
        self.addCleanup(lambda: remove_path(dir))
        temp_dir = tempfile.mkdtemp()
        self.addCleanup(lambda: remove_path(temp_dir))

        output_dir = os.path.join(temp_dir, 'output')
        un_tar_directory(tar_gzip_directory(dir), output_dir, 'gz')
        self.assertEqual(os.listdir(output_dir), [])
 def test_fileobj_tar_gz_with_dsstore_should_not_simplify_archive(self):
     """If the user included two files, README and .DS_Store, in the archive,
     the archive should not be simplified because we have more than one file in the archive.
     """
     source = os.path.join(self.temp_dir, 'source_dir')
     os.mkdir(source)
     self.write_string_to_file('testing', os.path.join(source, 'README'))
     self.write_string_to_file('testing', os.path.join(source, '.DS_Store'))
     self.do_upload(('source.tar.gz', tar_gzip_directory(source)))
     self.assertEqual(['.DS_Store', 'README'], sorted(self.listdir()))
 def test_single_local_tar_gz_path_simplify_archives(self):
     source_dir = os.path.join(self.temp_dir, 'source_dir')
     os.mkdir(source_dir)
     self.write_string_to_file('testing', os.path.join(source_dir, 'filename'))
     source = os.path.join(self.temp_dir, 'source.tar.gz')
     with open(source, 'wb') as f:
         f.write(tar_gzip_directory(source_dir).read())
     self.do_upload([source], simplify_archives=True)
     self.assertTrue(os.path.exists(source))
     self.check_file_contains_string(self.bundle_location, 'testing')
    def test_tar_always_ignore(self):
        temp_dir = tempfile.mkdtemp()
        self.addCleanup(lambda: remove_path(temp_dir))
        output_dir = os.path.join(temp_dir, 'output')

        un_tar_directory(tar_gzip_directory(IGNORE_TEST_DIR), output_dir, 'gz')
        output_dir_entries = os.listdir(output_dir)
        self.assertNotIn('._ignored', output_dir_entries)
        self.assertIn('dir', output_dir_entries)
        self.assertNotIn('__MACOSX', output_dir_entries)
        self.assertFalse(os.path.exists(os.path.join(output_dir, 'dir', '__MACOSX')))
        self.assertFalse(os.path.exists(os.path.join(output_dir, 'dir', '._ignored2')))
    def test_tar_exclude_ignore(self):
        temp_dir = tempfile.mkdtemp()
        self.addCleanup(lambda: remove_path(temp_dir))
        output_dir = os.path.join(temp_dir, 'output')

        un_tar_directory(
            tar_gzip_directory(IGNORE_TEST_DIR, ignore_file='.tarignore'), output_dir, 'gz'
        )
        output_dir_entries = os.listdir(output_dir)
        self.assertIn('not_ignored.txt', output_dir_entries)
        self.assertIn('dir', output_dir_entries)
        self.assertNotIn('ignored.txt', output_dir_entries)
        self.assertNotIn('ignored_dir', output_dir_entries)
        self.assertTrue(os.path.exists(os.path.join(output_dir, 'dir', 'not_ignored2.txt')))
        self.assertFalse(os.path.exists(os.path.join(output_dir, 'dir', 'ignored2.txt')))
    def test_tar_has_files(self):
        temp_dir = tempfile.mkdtemp()
        self.addCleanup(lambda: remove_path(temp_dir))

        output_dir = os.path.join(temp_dir, 'output')
        un_tar_directory(
            tar_gzip_directory(FILES_DIR, False, ['f2'], ['f1', 'b.txt']), output_dir, 'gz'
        )
        output_dir_entries = os.listdir(output_dir)
        self.assertIn('dir1', output_dir_entries)
        self.assertIn('a.txt', output_dir_entries)
        self.assertNotIn('b.txt', output_dir_entries)
        self.assertTrue(os.path.exists(os.path.join(output_dir, 'dir1', 'f1')))
        self.assertFalse(os.path.exists(os.path.join(output_dir, 'dir1', 'f2')))
        self.assertTrue(os.path.islink(os.path.join(output_dir, 'a-symlink.txt')))
 def upload_folder(self, bundle, contents):
     with tempfile.TemporaryDirectory() as tmpdir:
         for item in contents:
             path, contents = item
             file_path = os.path.join(tmpdir, path)
             os.makedirs(os.path.dirname(file_path), exist_ok=True)
             with open(file_path, "wb+") as f:
                 f.write(contents)
             os.chmod(file_path, self.DEFAULT_PERM_FILE)
             os.chmod(os.path.dirname(file_path), self.DEFAULT_PERM_DIR)
         self.upload_manager.upload_to_bundle_store(
             bundle,
             source=["contents.tar.gz",
                     tar_gzip_directory(tmpdir)],
             git=False,
             unpack=True,
             use_azure_blob_beta=self.use_azure_blob_beta,
         )
Exemple #14
0
 def stream_tarred_gzipped_directory(self, uuid, path):
     """
     Returns a file-like object containing a tarred and gzipped archive
     of the given directory.
     """
     if self._is_available_locally(uuid):
         directory_path = self._get_target_path(uuid, path)
         return file_util.tar_gzip_directory(directory_path)
     else:
         worker = self._worker_model.get_bundle_worker(uuid)
         response_socket_id = self._worker_model.allocate_socket(
             worker['user_id'], worker['worker_id'])
         try:
             read_args = {'type': 'stream_directory'}
             self._send_read_message(worker, response_socket_id, uuid, path,
                                     read_args)
             fileobj = self._get_read_response_stream(response_socket_id)
             return Deallocating(fileobj, self._worker_model,
                                 response_socket_id)
         except Exception:
             self._worker_model.deallocate_socket(response_socket_id)
             raise
 def stream_thread(final_path):
     with closing(
             tar_gzip_directory(
                 final_path, exclude_names=exclude_names)) as fileobj:
         reply_fn(None, {}, fileobj)
 def archive(self, *args, **kwargs):
     return tar_gzip_directory(*args, **kwargs)
def pack_files_for_upload(sources,
                          should_unpack,
                          follow_symlinks,
                          exclude_patterns=None,
                          force_compression=False):
    """
    Create a single flat tarfile containing all the sources.
    Caller is responsible for closing the returned fileobj.

    Note: It may be possible to achieve additional speed gains on certain
    cases if we disable compression when tar-ing directories. But for now,
    force_compression only affects the case of single, uncompressed files.

    :param sources: list of paths to files to pack
    :param should_unpack: will unpack archives iff True
    :param follow_symlinks: will follow symlinks if True else behavior undefined
    :param exclude_patterns: list of glob patterns for files to ignore, or
                             None to include all files
    :param force_compression: True to always use compression
    :return: dict with {
        'fileobj': <file object of archive>,
        'filename': <name of archive file>,
        'filesize': <size of archive in bytes, or None if unknown>,
        'should_unpack': <True iff archive should be unpacked at server>,
        'should_simplify': <True iff directory should be 'simplified' at server>
        }
    """
    exclude_patterns = exclude_patterns or []

    def resolve_source(source):
        # Resolve symlink if desired
        resolved_source = source
        if follow_symlinks:
            resolved_source = os.path.realpath(source)
            if not os.path.exists(resolved_source):
                raise UsageError('Broken symlink')
        elif os.path.islink(source):
            raise UsageError('Not following symlinks.')
        return resolved_source

    sources = list(map(resolve_source, sources))

    # For efficiency, return single files and directories directly
    if len(sources) == 1:
        source = sources[0]
        filename = os.path.basename(source)
        if os.path.isdir(sources[0]):
            archived = tar_gzip_directory(source,
                                          follow_symlinks=follow_symlinks,
                                          exclude_patterns=exclude_patterns)
            return {
                'fileobj': archived,
                'filename': filename + '.tar.gz',
                'filesize': None,
                'should_unpack': True,
                'should_simplify': False,
            }
        elif path_is_archive(source):
            return {
                'fileobj': open(source, mode='rb'),
                'filename': filename,
                'filesize': os.path.getsize(source),
                'should_unpack': should_unpack,
                'should_simplify': True,
            }
        elif force_compression:
            return {
                'fileobj': gzip_file(source),
                'filename': filename + '.gz',
                'filesize': None,
                'should_unpack': True,
                'should_simplify': False,
            }
        else:
            return {
                'fileobj': open(source, mode='rb'),
                'filename': filename,
                'filesize': os.path.getsize(source),
                'should_unpack': False,
                'should_simplify': False,
            }

    # Build archive file incrementally from all sources
    # TODO: For further optimization, could either uses a temporary named pipe
    # or a wrapper around a TemporaryFile to concurrently write to the tarfile
    # while the REST client reads and sends it to the server. At the moment,
    # we wait for the tarfile to be created until we rewind and pass the file
    # to the client to be sent to the server.
    scratch_dir = tempfile.mkdtemp()
    archive_fileobj = tempfile.SpooledTemporaryFile()
    archive = tarfile.open(name='we', mode='w:gz', fileobj=archive_fileobj)

    def should_exclude(fn):
        basefn = os.path.basename(fn)
        return any(fnmatch(basefn, p) for p in exclude_patterns)

    for source in sources:
        if should_unpack and path_is_archive(source):
            # Unpack archive into scratch space
            dest_basename = strip_archive_ext(os.path.basename(source))
            dest_path = os.path.join(scratch_dir, dest_basename)
            unpack(get_archive_ext(source), source, dest_path)

            # Add file or directory to archive
            archive.add(dest_path, arcname=dest_basename, recursive=True)
        else:
            # Add file to archive, or add files recursively if directory
            archive.add(source,
                        arcname=os.path.basename(source),
                        recursive=True,
                        exclude=should_exclude)

    # Clean up, rewind archive file, and return it
    archive.close()
    shutil.rmtree(scratch_dir)
    filesize = archive_fileobj.tell()
    archive_fileobj.seek(0)
    return {
        'fileobj': archive_fileobj,
        'filename': 'contents.tar.gz',
        'filesize': filesize,
        'should_unpack': True,
        'should_simplify': False,
    }
Exemple #18
0
def tar_bz2_directory(*args, **kwargs):
    """Method used for creating a .tar.bz2 archive from a directory.
    This is just used for tests; it's not performance optimized
    and should not be used in production code."""
    output = tar_gzip_directory(*args, **kwargs)
    return BytesIO(bz2.compress(gzip.decompress(output.read())))