Beispiel #1
0
    def __init__(self, src, dst=None, simple=False, use_pigz=True, pool_size=None,
                 progress_cb=None, cb_interval=None):
        """
        :type src: string
        :param src: manifest file url
        """
        super(Download, self).__init__(pool_size=pool_size, progress_cb=progress_cb,
                                       cb_interval=cb_interval)
        self._simple = simple
        self._read_fd = None
        self._write_fd = None
        self._manifest = Manifest()

        if self._simple and not hasattr(src, '__iter__'):
            self.src = [src]
            assert dst, 'For simple download you must specify dst param'
        else:
            self.src = src
        self.dst = dst
        self.use_pigz = use_pigz
        self.output = None
Beispiel #2
0
    def __init__(self, src, dst, transfer_id=None, manifest='manifest.json', description='', tags='',
                 gzip=True, use_pigz=True, simple=False, pool_size=None,
                 chunk_size=None, progress_cb=None, cb_interval=None):
        """
        :type src: string / list / generator / iterator / NamedStream
        :param src: Transfer source, file path or stream

        :type dst: string
        :param dst: Transfer destination

        :type simple: bool
        :param simple: if True handle src as file path and don't use split and gzip
        """
        super(Upload, self).__init__(pool_size=pool_size, progress_cb=progress_cb,
                                     cb_interval=cb_interval)

        if not hasattr(src, '__iter__') or hasattr(src, 'read'):
            self.src = [src]
        else:
            self.src = src
        self.dst = dst
        self.gzip = gzip
        self.use_pigz = use_pigz

        self._simple = simple
        self._chunk_size = chunk_size or DEFAULT_CHUNK_SIZE
        self._manifest = None
        self._manifest_queue = None

        if not self._simple:
            transfer_id = transfer_id or uuid.uuid4().hex
            self.dst = os.path.join(dst, transfer_id)
            self._manifest_name = manifest
            self._manifest = Manifest()
            self._manifest['description'] = description
            self._manifest.cloudfs_path = os.path.join(self.dst, manifest)
            if tags:
                self._manifest['tags'] = tags
            self._manifest_queue = multiprocessing.Queue()
Beispiel #3
0
class Download(Transfer):

    def __init__(self, src, dst=None, simple=False, use_pigz=True, pool_size=None,
                 progress_cb=None, cb_interval=None):
        """
        :type src: string
        :param src: manifest file url
        """
        super(Download, self).__init__(pool_size=pool_size, progress_cb=progress_cb,
                                       cb_interval=cb_interval)
        self._simple = simple
        self._read_fd = None
        self._write_fd = None
        self._manifest = Manifest()

        if self._simple and not hasattr(src, '__iter__'):
            self.src = [src]
            assert dst, 'For simple download you must specify dst param'
        else:
            self.src = src
        self.dst = dst
        self.use_pigz = use_pigz
        self.output = None

    def apply_async(self):
        assert not self.running
        if not self._simple:
            # create new pipe and output before each start
            self._read_fd, self._write_fd = os.pipe()
            self.output = os.fdopen(self._read_fd, 'rb')
        super(Download, self).apply_async()
        if not self._simple:
            # close write fd after fork
            os.close(self._write_fd)

    def _chunk_generator(self):
        """
        Download chunks from manifest file and yield them in sorted order
        """
        downloader = _Transfer('get', pool_size=self._pool_size)
        try:
            # step 1
            # download manifest
            local_manifest_file = os.path.join(self._tmp_dir, os.path.basename(self.src))

            def on_manifest_complete(info):
                if info['status'] == 'error':
                    raise info['error'][0], info['error'][1], info['error'][2]

            downloader.apply_async(FileInfo(self.src), self._tmp_dir,
                                   complete_cb=on_manifest_complete)
            downloader.wait_completion()
            self._manifest.read(local_manifest_file)

            # step 2
            # download chunks and yield them in right order
            yield_cntr = 0
            remote_dir = os.path.dirname(self.src)
            results = {}

            def on_chunk_complete(info):
                self._on_file_complete(info)
                if info['status'] == 'done':
                    if cryptotool.calculate_md5_sum(info['result']) != info['md5_sum']:
                        raise MD5SumError('md5 sum mismatch', info)
                    priority = int(os.path.basename(info['src'])[-3:])
                    results[priority] = os.path.join(info['dst'], os.path.basename(info['src']))

            for f in self._manifest['files']:
                for chunk_data in sorted(f['chunks']):
                    chunk_rem_path = os.path.join(remote_dir, chunk_data[0])
                    if len(chunk_data) == 3:
                        md5_sum, size = chunk_data[1], chunk_data[2]
                    else:
                        md5_sum, size = chunk_data[1], None
                    chunk = FileInfo(chunk_rem_path, md5_sum=md5_sum, size=size)
                    downloader.apply_async(chunk, self._tmp_dir, complete_cb=on_chunk_complete,
                                           progress_cb=self._on_progress)
                    while self._semaphore.acquire(False):
                        time.sleep(DEFAULT_SLEEP_TIME)
                    while yield_cntr in results:
                        yield results[yield_cntr], f['streamer'], f['compressor']
                        yield_cntr += 1

                while yield_cntr < len(f['chunks']):
                    while self._semaphore.acquire(False):
                        time.sleep(DEFAULT_SLEEP_TIME)
                    while yield_cntr in results:
                        yield results[yield_cntr], f['streamer'], f['compressor']
                        yield_cntr += 1
                    time.sleep(DEFAULT_SLEEP_TIME)

                results.clear()

            downloader.wait_completion()
            downloader.stop()
            raise StopIteration
        except:
            downloader.stop(wait=False)
            raise

    def _simple_download(self):
        downloader = _Transfer('get', pool_size=1)
        try:
            for src in self.src:
                downloader.apply_async(FileInfo(src), self.dst,
                                       complete_cb=self._on_file_complete,
                                       progress_cb=self._on_progress)
            downloader.wait_completion()
            downloader.stop()
        except:
            downloader.stop(wait=False)
            raise

    def _large_download(self):
        stdin = None
        stdout = os.fdopen(self._write_fd, 'wb')
        compressors = {}

        for chunk_path, streamer, compressor in self._chunk_generator():
            if compressor:
                # create compressor if it dosn't exist
                if compressor not in compressors:
                    if compressor == 'gzip':
                        if self.use_pigz:
                            cmd = ['/usr/bin/pigz', '-d']
                        else:
                            cmd = ['/bin/gzip', '-d']
                    # Add custom compressor where
                    # elif compressor == 'xxx':
                    #   cmd = []
                    else:
                        raise Exception('Unsupported compressor: %s' % compressor)
                    compressors[compressor] = subprocess.Popen(cmd,
                                                               stdin=subprocess.PIPE,
                                                               stdout=stdout,
                                                               stderr=subprocess.PIPE,
                                                               close_fds=True)
                stdin = compressors[compressor].stdin
            else:
                stdin = stdout

            util.write_file_to_stream(chunk_path, stdin)
            os.remove(chunk_path)

        if stdin:
            stdin.close()

        for compressor in compressors.values():
            compressor.wait()

    def _run(self):
        if self._simple:
            self._simple_download()
        else:
            self._large_download()
Beispiel #4
0
class Upload(Transfer):

    def __init__(self, src, dst, transfer_id=None, manifest='manifest.json', description='', tags='',
                 gzip=True, use_pigz=True, simple=False, pool_size=None,
                 chunk_size=None, progress_cb=None, cb_interval=None):
        """
        :type src: string / list / generator / iterator / NamedStream
        :param src: Transfer source, file path or stream

        :type dst: string
        :param dst: Transfer destination

        :type simple: bool
        :param simple: if True handle src as file path and don't use split and gzip
        """
        super(Upload, self).__init__(pool_size=pool_size, progress_cb=progress_cb,
                                     cb_interval=cb_interval)

        if not hasattr(src, '__iter__') or hasattr(src, 'read'):
            self.src = [src]
        else:
            self.src = src
        self.dst = dst
        self.gzip = gzip
        self.use_pigz = use_pigz

        self._simple = simple
        self._chunk_size = chunk_size or DEFAULT_CHUNK_SIZE
        self._manifest = None
        self._manifest_queue = None

        if not self._simple:
            transfer_id = transfer_id or uuid.uuid4().hex
            self.dst = os.path.join(dst, transfer_id)
            self._manifest_name = manifest
            self._manifest = Manifest()
            self._manifest['description'] = description
            self._manifest.cloudfs_path = os.path.join(self.dst, manifest)
            if tags:
                self._manifest['tags'] = tags
            self._manifest_queue = multiprocessing.Queue()

    @property
    def manifest(self):
        if self._manifest_queue and not self._manifest_queue.empty():
            self._manifest = self._manifest_queue.get()
        return self._manifest

    def _cleanup_on_error(self):
        LOG.debug('Cleanup on error')
        self._manifest.delete()

    def _simple_upload(self):
        uploader = _Transfer('put', pool_size=1)
        try:
            file_generator = map(FileInfo, self.src)

            for file_info in file_generator:
                dst = os.path.join(self.dst, file_info.name)
                uploader.apply_async(file_info, dst,
                                     complete_cb=self._on_file_complete,
                                     progress_cb=self._on_progress)
                while not self._semaphore.acquire(False):
                    time.sleep(DEFAULT_SLEEP_TIME)

            uploader.wait_completion()
            uploader.stop()
        except:
            uploader.stop(wait=False)
            raise

    def _large_upload(self):
        uploader = _Transfer('put', pool_size=self._pool_size)
        try:
            if self.gzip and self.use_pigz:
                self._check_pigz()

            for src in self.src:
                name = streamer = extension = None

                if hasattr(src, 'fileno'):
                    # Popen stdout/fileobj/NamedStream
                    if isinstance(src, NamedStream):
                        stream = src
                    else:
                        name = 'stream-%s' % hash(src)
                        stream = NamedStream(src, name)
                elif isinstance(src, basestring) and os.path.isfile(src):
                    # file path
                    dirname, name = os.path.split(src)
                    cmd = ['/bin/tar', 'cp', '-C', dirname, name]
                    popen = subprocess.Popen(cmd, stdout=subprocess.PIPE)
                    stream = NamedStream(popen.stdout, name, streamer='tar', extension='tar')
                else:
                    raise TransferError('Unsupported source %s' % src)

                name = os.path.basename(stream.name).strip('<>')
                streamer = stream.streamer
                extension = stream.extension

                if self.gzip:
                    if extension:
                        extension += '.gz'
                    else:
                        extension = 'gz'
                    stream = NamedStream(gzip_compressor(stream, self.use_pigz),
                                         stream.name, extension=extension, streamer=streamer)
                file_generator = split(stream, self._tmp_dir,
                                       chunk_size=self._chunk_size, extension=extension)

                uploaded_chunks = []

                # add file info to manifest
                file_info = {
                    'name': name,
                    'streamer': streamer,
                    'compressor': 'gzip' if self.gzip and not self._simple else '',
                    'chunks': uploaded_chunks,
                }
                self._manifest['files'].append(file_info)

                def on_chunk_complete(info):
                    self._on_file_complete(info)
                    if info['status'] == 'done':
                        data = (os.path.basename(info['src']), info['md5_sum'], info['size'])
                        bisect.insort(uploaded_chunks, data)
                    os.remove(info['src'])

                for file_info in file_generator:
                    dst = os.path.join(self.dst, file_info.name)
                    uploader.apply_async(file_info, dst,
                                         complete_cb=on_chunk_complete,
                                         progress_cb=self._on_progress)
                    while not self._semaphore.acquire(False):
                        time.sleep(DEFAULT_SLEEP_TIME)

                uploader.wait_completion()

                # add file info to manifest
                file_info = {
                    'name': name,
                    'streamer': streamer,
                    'compressor': 'gzip' if self.gzip and not self._simple else '',
                    'chunks': sorted(uploaded_chunks),
                }

            manifest_file = os.path.join(self._tmp_dir, self._manifest_name)
            self._manifest.write(manifest_file)

            manifest_url = os.path.join(self.dst, self._manifest_name)

            def on_manifest_complete(info):
                if info['status'] == 'error':
                    raise info['error'][0], info['error'][1], info['error'][2]
                os.remove(info['src'])

            uploader.apply_async(FileInfo(manifest_file), manifest_url,
                                 complete_cb=on_manifest_complete)

            uploader.wait_completion()
            uploader.stop()

            self._manifest_queue.put(self._manifest)
        except:
            uploader.stop(wait=False)
            raise

    def _check_pigz(self):
        try:
            pkgmgr.check_software(['pigz'])
        except pkgmgr.SoftwareError:
            pkgmgr.epel_repository()
            pkgmgr.installed('pigz', updatedb=True)

    def _run(self):
        if self._simple:
            self._simple_upload()
        else:
            self._large_upload()