Beispiel #1
0
    def _handle_response(self, offset_stack, coalesced, response_handler,
                         data_map, next_offset):
        cur_offset_and_size = next_offset[0]
        # FIXME: this should know how many bytes are needed, for clarity.
        data = response_handler.read_body_bytes()
        data_offset = 0
        for c_offset in coalesced:
            if len(data) < c_offset.length:
                raise errors.ShortReadvError(relpath,
                                             c_offset.start,
                                             c_offset.length,
                                             actual=len(data))
            for suboffset, subsize in c_offset.ranges:
                key = (c_offset.start + suboffset, subsize)
                this_data = data[data_offset + suboffset:data_offset +
                                 suboffset + subsize]
                # Special case when the data is in-order, rather than packing
                # into a map and then back out again. Benchmarking shows that
                # this has 100% hit rate, but leave in the data_map work just
                # in case.
                # TODO: Could we get away with using buffer() to avoid the
                #       memory copy?  Callers would need to realize they may
                #       not have a real string.
                if key == cur_offset_and_size:
                    yield cur_offset_and_size[0], this_data
                    cur_offset_and_size = next_offset[0] = offset_stack.next()
                else:
                    data_map[key] = this_data
            data_offset += c_offset.length

            # Now that we've read some data, see if we can yield anything back
            while cur_offset_and_size in data_map:
                this_data = data_map.pop(cur_offset_and_size)
                yield cur_offset_and_size[0], this_data
                cur_offset_and_size = next_offset[0] = offset_stack.next()
Beispiel #2
0
 def _translate_error(self, resp, orig_path=None):
     """Raise an exception from a response"""
     if resp is None:
         what = None
     else:
         what = resp[0]
     if what == 'ok':
         return
     elif what == 'NoSuchFile':
         if orig_path is not None:
             error_path = orig_path
         else:
             error_path = resp[1]
         raise errors.NoSuchFile(error_path)
     elif what == 'error':
         raise errors.SmartProtocolError(unicode(resp[1]))
     elif what == 'FileExists':
         raise errors.FileExists(resp[1])
     elif what == 'DirectoryNotEmpty':
         raise errors.DirectoryNotEmpty(resp[1])
     elif what == 'ShortReadvError':
         raise errors.ShortReadvError(resp[1], int(resp[2]),
                                      int(resp[3]), int(resp[4]))
     elif what in ('UnicodeEncodeError', 'UnicodeDecodeError'):
         encoding = str(resp[1]) # encoding must always be a string
         val = resp[2]
         start = int(resp[3])
         end = int(resp[4])
         reason = str(resp[5]) # reason must always be a string
         if val.startswith('u:'):
             val = val[2:].decode('utf-8')
         elif val.startswith('s:'):
             val = val[2:].decode('base64')
         if what == 'UnicodeDecodeError':
             raise UnicodeDecodeError(encoding, val, start, end, reason)
         elif what == 'UnicodeEncodeError':
             raise UnicodeEncodeError(encoding, val, start, end, reason)
     elif what == "ReadOnlyError":
         raise errors.TransportNotPossible('readonly transport')
     elif what == "ReadError":
         if orig_path is not None:
             error_path = orig_path
         else:
             error_path = resp[1]
         raise errors.ReadError(error_path)
     elif what == "PermissionDenied":
         if orig_path is not None:
             error_path = orig_path
         else:
             error_path = resp[1]
         raise errors.PermissionDenied(error_path)
     else:
         raise errors.SmartProtocolError('unexpected smart server error: %r' % (resp,))
Beispiel #3
0
    def _checked_read(self, size):
        """Read the file checking for short reads.

        The data read is discarded along the way.
        """
        pos = self._pos
        remaining = size
        while remaining > 0:
            data = self._file.read(min(remaining, self._discarded_buf_size))
            remaining -= len(data)
            if not data:
                raise errors.ShortReadvError(self._path, pos, size,
                                             size - remaining)
        self._pos += size
Beispiel #4
0
 def _curl_perform(self, curl, header, more_headers=[]):
     """Perform curl operation and translate exceptions."""
     try:
         # There's no way in http/1.0 to say "must
         # revalidate"; we don't want to force it to always
         # retrieve.  so just turn off the default Pragma
         # provided by Curl.
         headers = [
             'Cache-control: max-age=0', 'Pragma: no-cache',
             'Connection: Keep-Alive'
         ]
         curl.setopt(pycurl.HTTPHEADER, headers + more_headers)
         curl.perform()
     except pycurl.error, e:
         url = curl.getinfo(pycurl.EFFECTIVE_URL)
         trace.mutter('got pycurl error: %s, %s, %s, url: %s ', e[0], e[1],
                      e, url)
         if e[0] in (
                 CURLE_COULDNT_RESOLVE_HOST,
                 CURLE_COULDNT_RESOLVE_PROXY,
                 CURLE_COULDNT_CONNECT,
                 CURLE_FTP_WEIRD_SERVER_REPLY,
                 CURLE_GOT_NOTHING,
                 CURLE_SSL_CACERT,
                 CURLE_SSL_CACERT_BADFILE,
         ):
             raise errors.ConnectionError(
                 'curl connection error (%s)\non %s' % (e[1], url))
         elif e[0] == CURLE_RECV_ERROR:
             raise errors.ConnectionReset(
                 'curl connection error (%s)\non %s' % (e[1], url))
         elif e[0] == CURLE_PARTIAL_FILE:
             # Pycurl itself has detected a short read.  We do not have all
             # the information for the ShortReadvError, but that should be
             # enough
             raise errors.ShortReadvError(
                 url,
                 offset='unknown',
                 length='unknown',
                 actual='unknown',
                 extra='Server aborted the request')
         raise
Beispiel #5
0
    def _readv(self, relpath, offsets):
        """Get parts of the file at the given relative path.

        :param offsets: A list of (offset, size) tuples.
        :param return: A list or generator of (offset, data) tuples
        """
        # offsets may be a generator, we will iterate it several times, so
        # build a list
        offsets = list(offsets)

        try_again = True
        retried_offset = None
        while try_again:
            try_again = False

            # Coalesce the offsets to minimize the GET requests issued
            sorted_offsets = sorted(offsets)
            coalesced = self._coalesce_offsets(
                sorted_offsets,
                limit=self._max_readv_combine,
                fudge_factor=self._bytes_to_read_before_seek,
                max_size=self._get_max_size)

            # Turn it into a list, we will iterate it several times
            coalesced = list(coalesced)
            if 'http' in debug.debug_flags:
                mutter('http readv of %s  offsets => %s collapsed %s', relpath,
                       len(offsets), len(coalesced))

            # Cache the data read, but only until it's been used
            data_map = {}
            # We will iterate on the data received from the GET requests and
            # serve the corresponding offsets respecting the initial order. We
            # need an offset iterator for that.
            iter_offsets = iter(offsets)
            cur_offset_and_size = iter_offsets.next()

            try:
                for cur_coal, rfile in self._coalesce_readv(
                        relpath, coalesced):
                    # Split the received chunk
                    for offset, size in cur_coal.ranges:
                        start = cur_coal.start + offset
                        rfile.seek(start, os.SEEK_SET)
                        data = rfile.read(size)
                        data_len = len(data)
                        if data_len != size:
                            raise errors.ShortReadvError(relpath,
                                                         start,
                                                         size,
                                                         actual=data_len)
                        if (start, size) == cur_offset_and_size:
                            # The offset requested are sorted as the coalesced
                            # ones, no need to cache. Win !
                            yield cur_offset_and_size[0], data
                            cur_offset_and_size = iter_offsets.next()
                        else:
                            # Different sorting. We need to cache.
                            data_map[(start, size)] = data

                    # Yield everything we can
                    while cur_offset_and_size in data_map:
                        # Clean the cached data since we use it
                        # XXX: will break if offsets contains duplicates --
                        # vila20071129
                        this_data = data_map.pop(cur_offset_and_size)
                        yield cur_offset_and_size[0], this_data
                        cur_offset_and_size = iter_offsets.next()

            except (errors.ShortReadvError, errors.InvalidRange,
                    errors.InvalidHttpRange, errors.HttpBoundaryMissing), e:
                mutter('Exception %r: %s during http._readv', e, e)
                if (not isinstance(e, errors.ShortReadvError)
                        or retried_offset == cur_offset_and_size):
                    # We don't degrade the range hint for ShortReadvError since
                    # they do not indicate a problem with the server ability to
                    # handle ranges. Except when we fail to get back a required
                    # offset twice in a row. In that case, falling back to
                    # single range or whole file should help or end up in a
                    # fatal exception.
                    self._degrade_range_hint(relpath, coalesced,
                                             sys.exc_info())
                # Some offsets may have been already processed, so we retry
                # only the unsuccessful ones.
                offsets = [cur_offset_and_size] + [o for o in iter_offsets]
                retried_offset = cur_offset_and_size
                try_again = True
Beispiel #6
0
    def request_and_yield_offsets(self, fp):
        """Request the data from the remote machine, yielding the results.

        :param fp: A Paramiko SFTPFile object that supports readv.
        :return: Yield the data requested by the original readv caller, one by
            one.
        """
        requests = self._get_requests()
        offset_iter = iter(self.original_offsets)
        cur_offset, cur_size = offset_iter.next()
        # paramiko .readv() yields strings that are in the order of the requests
        # So we track the current request to know where the next data is
        # being returned from.
        input_start = None
        last_end = None
        buffered_data = []
        buffered_len = 0

        # This is used to buffer chunks which we couldn't process yet
        # It is (start, end, data) tuples.
        data_chunks = []
        # Create an 'unlimited' data stream, so we stop based on requests,
        # rather than just because the data stream ended. This lets us detect
        # short readv.
        data_stream = itertools.chain(fp.readv(requests),
                                      itertools.repeat(None))
        for (start, length), data in itertools.izip(requests, data_stream):
            if data is None:
                if cur_coalesced is not None:
                    raise errors.ShortReadvError(self.relpath, start, length,
                                                 len(data))
            if len(data) != length:
                raise errors.ShortReadvError(self.relpath, start, length,
                                             len(data))
            self._report_activity(length, 'read')
            if last_end is None:
                # This is the first request, just buffer it
                buffered_data = [data]
                buffered_len = length
                input_start = start
            elif start == last_end:
                # The data we are reading fits neatly on the previous
                # buffer, so this is all part of a larger coalesced range.
                buffered_data.append(data)
                buffered_len += length
            else:
                # We have an 'interrupt' in the data stream. So we know we are
                # at a request boundary.
                if buffered_len > 0:
                    # We haven't consumed the buffer so far, so put it into
                    # data_chunks, and continue.
                    buffered = ''.join(buffered_data)
                    data_chunks.append((input_start, buffered))
                input_start = start
                buffered_data = [data]
                buffered_len = length
            last_end = start + length
            if input_start == cur_offset and cur_size <= buffered_len:
                # Simplify the next steps a bit by transforming buffered_data
                # into a single string. We also have the nice property that
                # when there is only one string ''.join([x]) == x, so there is
                # no data copying.
                buffered = ''.join(buffered_data)
                # Clean out buffered data so that we keep memory
                # consumption low
                del buffered_data[:]
                buffered_offset = 0
                # TODO: We *could* also consider the case where cur_offset is in
                #       in the buffered range, even though it doesn't *start*
                #       the buffered range. But for packs we pretty much always
                #       read in order, so you won't get any extra data in the
                #       middle.
                while (input_start == cur_offset
                       and (buffered_offset + cur_size) <= buffered_len):
                    # We've buffered enough data to process this request, spit it
                    # out
                    cur_data = buffered[buffered_offset:buffered_offset +
                                        cur_size]
                    # move the direct pointer into our buffered data
                    buffered_offset += cur_size
                    # Move the start-of-buffer pointer
                    input_start += cur_size
                    # Yield the requested data
                    yield cur_offset, cur_data
                    cur_offset, cur_size = offset_iter.next()
                # at this point, we've consumed as much of buffered as we can,
                # so break off the portion that we consumed
                if buffered_offset == len(buffered_data):
                    # No tail to leave behind
                    buffered_data = []
                    buffered_len = 0
                else:
                    buffered = buffered[buffered_offset:]
                    buffered_data = [buffered]
                    buffered_len = len(buffered)
        # now that the data stream is done, close the handle
        fp.close()
        if buffered_len:
            buffered = ''.join(buffered_data)
            del buffered_data[:]
            data_chunks.append((input_start, buffered))
        if data_chunks:
            if 'sftp' in debug.debug_flags:
                mutter('SFTP readv left with %d out-of-order bytes',
                       sum(map(lambda x: len(x[1]), data_chunks)))
            # We've processed all the readv data, at this point, anything we
            # couldn't process is in data_chunks. This doesn't happen often, so
            # this code path isn't optimized
            # We use an interesting process for data_chunks
            # Specifically if we have "bisect_left([(start, len, entries)],
            #                                       (qstart,)])
            # If start == qstart, then we get the specific node. Otherwise we
            # get the previous node
            while True:
                idx = bisect.bisect_left(data_chunks, (cur_offset, ))
                if idx < len(
                        data_chunks) and data_chunks[idx][0] == cur_offset:
                    # The data starts here
                    data = data_chunks[idx][1][:cur_size]
                elif idx > 0:
                    # The data is in a portion of a previous page
                    idx -= 1
                    sub_offset = cur_offset - data_chunks[idx][0]
                    data = data_chunks[idx][1]
                    data = data[sub_offset:sub_offset + cur_size]
                else:
                    # We are missing the page where the data should be found,
                    # something is wrong
                    data = ''
                if len(data) != cur_size:
                    raise AssertionError(
                        'We must have miscalulated.'
                        ' We expected %d bytes, but only found %d' %
                        (cur_size, len(data)))
                yield cur_offset, data
                cur_offset, cur_size = offset_iter.next()