Beispiel #1
0
    def append_object(self, obj, source_file, upload_cb=None):
        """
        :param obj: (str) remote object path

        :param source_file: open file descriptor

        :param upload_db: progress.bar for uploading
        """
        self._assert_container()
        meta = self.get_container_info()
        blocksize = int(meta["x-container-block-size"])
        filesize = fstat(source_file.fileno()).st_size
        nblocks = 1 + (filesize - 1) // blocksize
        offset = 0
        headers = {}
        if upload_cb:
            self.progress_bar_gen = upload_cb(nblocks)
            self._cb_next()
        flying = {}
        self._init_thread_limit()
        try:
            for i in range(nblocks):
                block = source_file.read(min(blocksize, filesize - offset))
                offset += len(block)

                self._watch_thread_limit(flying.values())
                unfinished = {}
                flying[i] = SilentEvent(
                    method=self.object_post,
                    obj=obj,
                    update=True,
                    content_range="bytes */*",
                    content_type="application/octet-stream",
                    content_length=len(block),
                    data=block,
                )
                flying[i].start()

                for key, thread in flying.items():
                    if thread.isAlive():
                        if i < (nblocks - 1):
                            unfinished[key] = thread
                            continue
                        thread.join()
                    if thread.exception:
                        raise thread.exception
                    headers[key] = thread.value.headers
                    self._cb_next()
                flying = unfinished
        except KeyboardInterrupt:
            sendlog.info("- - - wait for threads to finish")
            for thread in activethreads():
                thread.join()
        finally:
            from time import sleep

            sleep(2 * len(activethreads()))
            self._cb_next()
        return headers.values()
Beispiel #2
0
    def download_to_string(
            self, obj,
            download_cb=None,
            version=None,
            range_str=None,
            if_match=None,
            if_none_match=None,
            if_modified_since=None,
            if_unmodified_since=None):
        """Download an object to a string (multiple connections). This method
        uses threads for http requests, but stores all content in memory.

        :param obj: (str) remote object path

        :param download_cb: optional progress.bar object for downloading

        :param version: (str) file version

        :param range_str: (str) from, to are file positions (int) in bytes

        :param if_match: (str)

        :param if_none_match: (str)

        :param if_modified_since: (str) formated date

        :param if_unmodified_since: (str) formated date

        :returns: (str) the whole object contents
        """
        restargs = dict(
            version=version,
            data_range=None if range_str is None else 'bytes=%s' % range_str,
            if_match=if_match,
            if_none_match=if_none_match,
            if_modified_since=if_modified_since,
            if_unmodified_since=if_unmodified_since)

        (
            blocksize,
            blockhash,
            total_size,
            hash_list,
            remote_hashes) = self._get_remote_blocks_info(obj, **restargs)
        assert total_size >= 0

        if download_cb:
            self.progress_bar_gen = download_cb(len(hash_list))
            self._cb_next()

        num_of_blocks = len(remote_hashes)
        ret = [''] * num_of_blocks
        self._init_thread_limit()
        flying = dict()
        try:
            for blockid, blockhash in enumerate(remote_hashes):
                start = blocksize * blockid
                is_last = start + blocksize > total_size
                end = (total_size - 1) if is_last else (start + blocksize - 1)
                data_range_str = _range_up(start, end, end, range_str)
                if data_range_str:
                    self._watch_thread_limit(flying.values())
                    restargs['data_range'] = 'bytes=%s' % data_range_str
                    flying[blockid] = self._get_block_async(obj, **restargs)
                for runid, thread in flying.items():
                    if (blockid + 1) == num_of_blocks:
                        thread.join()
                    elif thread.isAlive():
                        continue
                    if thread.exception:
                        raise thread.exception
                    ret[runid] = thread.value.content
                    self._cb_next()
                    flying.pop(runid)
            return ''.join(ret)
        except KeyboardInterrupt:
            sendlog.info('- - - wait for threads to finish')
            for thread in activethreads():
                thread.join()
Beispiel #3
0
    def upload_from_string(
            self, obj, input_str,
            hash_cb=None,
            upload_cb=None,
            etag=None,
            if_etag_match=None,
            if_not_exist=None,
            content_encoding=None,
            content_disposition=None,
            content_type=None,
            sharing=None,
            public=None,
            container_info_cache=None):
        """Upload an object using multiple connections (threads)

        :param obj: (str) remote object path

        :param input_str: (str) upload content

        :param hash_cb: optional progress.bar object for calculating hashes

        :param upload_cb: optional progress.bar object for uploading

        :param etag: (str)

        :param if_etag_match: (str) Push that value to if-match header at file
            creation

        :param if_not_exist: (bool) If true, the file will be uploaded ONLY if
            it does not exist remotely, otherwise the operation will fail.
            Involves the case of an object with the same path is created while
            the object is being uploaded.

        :param content_encoding: (str)

        :param content_disposition: (str)

        :param content_type: (str)

        :param sharing: {'read':[user and/or grp names],
            'write':[usr and/or grp names]}

        :param public: (bool)

        :param container_info_cache: (dict) if given, avoid redundant calls to
            server for container info (block size and hash information)
        """
        self._assert_container()

        blocksize, blockhash, size, nblocks = self._get_file_block_info(
                fileobj=None, size=len(input_str), cache=container_info_cache)
        (hashes, hmap, offset) = ([], {}, 0)
        if not content_type:
            content_type = 'application/octet-stream'

        hashes = []
        hmap = {}
        for blockid in range(nblocks):
            start = blockid * blocksize
            block = input_str[start: (start + blocksize)]
            hashes.append(_pithos_hash(block, blockhash))
            hmap[hashes[blockid]] = (start, block)

        hashmap = dict(bytes=size, hashes=hashes)
        missing, obj_headers = self._create_object_or_get_missing_hashes(
            obj, hashmap,
            content_type=content_type,
            size=size,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            content_encoding=content_encoding,
            content_disposition=content_disposition,
            permissions=sharing,
            public=public)
        if missing is None:
            return obj_headers
        num_of_missing = len(missing)

        if upload_cb:
            self.progress_bar_gen = upload_cb(nblocks)
            for i in range(nblocks + 1 - num_of_missing):
                self._cb_next()

        tries = 7
        old_failures = 0
        try:
            while tries and missing:
                flying = []
                failures = []
                for hash in missing:
                    offset, block = hmap[hash]
                    bird = self._put_block_async(block, hash)
                    flying.append(bird)
                    unfinished = self._watch_thread_limit(flying)
                    for thread in set(flying).difference(unfinished):
                        if thread.exception:
                            failures.append(thread.kwargs['hash'])
                        if thread.isAlive():
                            flying.append(thread)
                        else:
                            self._cb_next()
                    flying = unfinished
                for thread in flying:
                    thread.join()
                    if thread.exception:
                        failures.append(thread.kwargs['hash'])
                    self._cb_next()
                missing = failures
                if missing and len(missing) == old_failures:
                    tries -= 1
                old_failures = len(missing)
            if missing:
                raise ClientError('%s blocks failed to upload' % len(missing))
        except KeyboardInterrupt:
            sendlog.info('- - - wait for threads to finish')
            for thread in activethreads():
                thread.join()
            raise
        self._cb_next()

        r = self.object_put(
            obj,
            format='json',
            hashmap=True,
            content_type=content_type,
            content_encoding=content_encoding,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            etag=etag,
            json=hashmap,
            permissions=sharing,
            public=public,
            success=201)
        return r.headers
Beispiel #4
0
    def upload_object(
            self, obj, f,
            size=None,
            hash_cb=None,
            upload_cb=None,
            etag=None,
            if_etag_match=None,
            if_not_exist=None,
            content_encoding=None,
            content_disposition=None,
            content_type=None,
            sharing=None,
            public=None,
            container_info_cache=None):
        """Upload an object using multiple connections (threads)

        :param obj: (str) remote object path

        :param f: open file descriptor (rb)

        :param hash_cb: optional progress.bar object for calculating hashes

        :param upload_cb: optional progress.bar object for uploading

        :param etag: (str)

        :param if_etag_match: (str) Push that value to if-match header at file
            creation

        :param if_not_exist: (bool) If true, the file will be uploaded ONLY if
            it does not exist remotely, otherwise the operation will fail.
            Involves the case of an object with the same path is created while
            the object is being uploaded.

        :param content_encoding: (str)

        :param content_disposition: (str)

        :param content_type: (str)

        :param sharing: {'read':[user and/or grp names],
            'write':[usr and/or grp names]}

        :param public: (bool)

        :param container_info_cache: (dict) if given, avoid redundant calls to
            server for container info (block size and hash information)
        """
        self._assert_container()

        block_info = (
            blocksize, blockhash, size, nblocks) = self._get_file_block_info(
                f, size, container_info_cache)
        (hashes, hmap, offset) = ([], {}, 0)
        content_type = content_type or 'application/octet-stream'

        self._calculate_blocks_for_upload(
            *block_info,
            hashes=hashes,
            hmap=hmap,
            fileobj=f,
            hash_cb=hash_cb)

        hashmap = dict(bytes=size, hashes=hashes)
        missing, obj_headers = self._create_object_or_get_missing_hashes(
            obj, hashmap,
            content_type=content_type,
            size=size,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            content_encoding=content_encoding,
            content_disposition=content_disposition,
            permissions=sharing,
            public=public)

        if missing is None:
            return obj_headers

        if upload_cb:
            upload_gen = upload_cb(len(hashmap['hashes']))
            for i in range(len(hashmap['hashes']) + 1 - len(missing)):
                try:
                    upload_gen.next()
                except:
                    sendlog.debug('Progress bar failure')
                    break
        else:
            upload_gen = None

        retries = 7
        while retries:
            sendlog.info('%s blocks missing' % len(missing))
            num_of_blocks = len(missing)
            missing = self._upload_missing_blocks(
                missing, hmap, f, upload_gen)
            if missing:
                if num_of_blocks == len(missing):
                    retries -= 1
                else:
                    num_of_blocks = len(missing)
            else:
                break
        if missing:
            try:
                details = ['%s' % thread.exception for thread in missing]
            except Exception:
                details = ['Also, failed to read thread exceptions']
            raise ClientError(
                '%s blocks failed to upload' % len(missing),
                details=details)

        r = self.object_put(
            obj,
            format='json',
            hashmap=True,
            content_type=content_type,
            content_encoding=content_encoding,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            etag=etag,
            json=hashmap,
            permissions=sharing,
            public=public,
            success=201)
        return r.headers
Beispiel #5
0
    def upload_object(self,
                      obj,
                      f,
                      size=None,
                      hash_cb=None,
                      upload_cb=None,
                      etag=None,
                      if_etag_match=None,
                      if_not_exist=None,
                      content_encoding=None,
                      content_disposition=None,
                      content_type=None,
                      sharing=None,
                      public=None,
                      container_info_cache=None):
        self._assert_container()

        block_info = (blocksize, blockhash, size,
                      nblocks) = self._get_file_block_info(
                          f, size, container_info_cache)
        (hashes, hmap, offset) = ([], {}, 0)
        if not content_type:
            content_type = 'application/octet-stream'

        self._calculate_blocks_for_upload(*block_info,
                                          hashes=hashes,
                                          hmap=hmap,
                                          fileobj=f,
                                          hash_cb=hash_cb)

        hashmap = dict(bytes=size, hashes=hashes)
        missing, obj_headers = self._create_object_or_get_missing_hashes(
            obj,
            hashmap,
            content_type=content_type,
            size=size,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            content_encoding=content_encoding,
            content_disposition=content_disposition,
            permissions=sharing,
            public=public)

        if missing is None:
            yield size
            return

        sendlog.info('%s blocks missing' % len(missing))
        for hash in missing:
            offset, bytes = hmap[hash]
            f.seek(offset)
            data = f.read(bytes)
            r = self._put_block(data, hash)
            yield bytes

        r = self.object_put(obj,
                            format='json',
                            hashmap=True,
                            content_type=content_type,
                            content_encoding=content_encoding,
                            if_etag_match=if_etag_match,
                            if_etag_not_match='*' if if_not_exist else None,
                            etag=etag,
                            json=hashmap,
                            permissions=sharing,
                            public=public,
                            success=201)
Beispiel #6
0
    def upload_object(
            self, obj, f,
            size=None,
            hash_cb=None,
            upload_cb=None,
            etag=None,
            if_etag_match=None,
            if_not_exist=None,
            content_encoding=None,
            content_disposition=None,
            content_type=None,
            sharing=None,
            public=None,
            container_info_cache=None):
        self._assert_container()

        block_info = (
            blocksize, blockhash, size, nblocks) = self._get_file_block_info(
                f, size, container_info_cache)
        (hashes, hmap, offset) = ([], {}, 0)
        if not content_type:
            content_type = 'application/octet-stream'

        self._calculate_blocks_for_upload(
            *block_info,
            hashes=hashes,
            hmap=hmap,
            fileobj=f,
            hash_cb=hash_cb)

        hashmap = dict(bytes=size, hashes=hashes)
        missing, obj_headers = self._create_object_or_get_missing_hashes(
            obj, hashmap,
            content_type=content_type,
            size=size,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            content_encoding=content_encoding,
            content_disposition=content_disposition,
            permissions=sharing,
            public=public)

        if missing is None:
            yield size
            return

        sendlog.info('%s blocks missing' % len(missing))
        for hash in missing:
            offset, bytes = hmap[hash]
            f.seek(offset)
            data = f.read(bytes)
            r = self._put_block(data, hash)
            yield bytes

        r = self.object_put(
            obj,
            format='json',
            hashmap=True,
            content_type=content_type,
            content_encoding=content_encoding,
            if_etag_match=if_etag_match,
            if_etag_not_match='*' if if_not_exist else None,
            etag=etag,
            json=hashmap,
            permissions=sharing,
            public=public,
            success=201)