Exemple #1
0
    def chunk_delete_many(self,
                          chunks,
                          cid=None,
                          concurrency=PARALLEL_CHUNKS_DELETE,
                          **kwargs):
        """
        :rtype: `list` of either `urllib3.response.HTTPResponse`
            or `urllib3.exceptions.HTTPError`, with an extra "chunk"
            attribute.
        """
        headers = kwargs['headers'].copy()
        if cid is not None:
            # This is only to get a nice access log
            headers['X-oio-chunk-meta-container-id'] = cid
        timeout = kwargs.get('timeout')
        if not timeout:
            timeout = urllib3.Timeout(CHUNK_TIMEOUT)

        def __delete_chunk(chunk_):
            try:
                resp = self.http_pool.request("DELETE",
                                              self.resolve_url(chunk_['url']),
                                              headers=headers,
                                              timeout=timeout)
                resp.chunk = chunk_
                return resp
            except urllib3.exceptions.HTTPError as ex:
                ex.chunk = chunk_
                return ex

        pile = GreenPile(concurrency)
        for chunk in chunks:
            pile.spawn(__delete_chunk, chunk)
        resps = [resp for resp in pile if resp]
        return resps
Exemple #2
0
    def get_stream(self):
        range_infos = self._get_range_infos()
        chunk_iter = iter(self.chunks)

        # we use eventlet GreenPool to manage readers
        with green.ContextPool(self.storage_method.ec_nb_data) as pool:
            pile = GreenPile(pool)
            # we use eventlet GreenPile to spawn readers
            for _j in range(self.storage_method.ec_nb_data):
                pile.spawn(self._get_fragment, chunk_iter, range_infos,
                           self.storage_method)

            readers = []
            for reader, parts_iter in pile:
                if reader.status in (200, 206):
                    readers.append((reader, parts_iter))
                # TODO log failures?

        # with EC we need at least ec_nb_data valid readers
        if len(readers) >= self.storage_method.ec_nb_data:
            # all readers should return the same Content-Length
            # so just take the headers from one of them
            resp_headers = HeadersDict(readers[0][0].headers)
            fragment_length = int(resp_headers.get('Content-Length'))
            read_iterators = [it for _, it in readers]
            stream = ECStream(self.storage_method, read_iterators, range_infos,
                              self.meta_length, fragment_length)
            # start the stream
            stream.start()
            return stream
        else:
            raise exceptions.ServiceUnavailable(
                'Not enough valid sources to read (%d/%d)' % (
                    len(readers), self.storage_method.ec_nb_data))
Exemple #3
0
    def chunk_delete_many(self, chunks, cid=None,
                          concurrency=PARALLEL_CHUNKS_DELETE,
                          **kwargs):
        """
        :rtype: `list` of either `urllib3.response.HTTPResponse`
            or `urllib3.exceptions.HTTPError`, with an extra "chunk"
            attribute.
        """
        headers = kwargs.pop('headers', None)
        # Actually this is not needed since ensure_request_id always sets it
        if headers is None:
            headers = dict()
        else:
            headers = headers.copy()
        if cid is not None:
            # This is only to get a nice access log
            headers['X-oio-chunk-meta-container-id'] = cid

        def __delete_chunk(chunk_):
            try:
                resp = self._request(
                    "DELETE", chunk_['url'], headers=headers, **kwargs)
                resp.chunk = chunk_
                return resp
            except urllib3.exceptions.HTTPError as ex:
                ex.chunk = chunk_
                return ex

        pile = GreenPile(concurrency)
        for chunk in chunks:
            pile.spawn(__delete_chunk, chunk)
        resps = [resp for resp in pile if resp]
        return resps
Exemple #4
0
    def rebuild(self):
        pile = GreenPile(len(self.meta_chunk))

        nb_data = self.storage_method.ec_nb_data

        headers = {}
        for chunk in self.meta_chunk:
            pile.spawn(self._get_response, chunk, headers)

        # Sort all responses according to the chunk size
        total_resps = 0
        resps_by_size = dict()
        resps_without_chunk_size = list()
        for resp in pile:
            if not resp:
                continue
            chunk_size = int_value(
                resp.getheader(CHUNK_HEADERS['chunk_size'], None), None)
            if chunk_size is None:
                self.logger.warning('Missing chunk size')
                resps_without_chunk_size.append(resp)
                continue
            total_resps += 1
            resps_by_size.setdefault(chunk_size, list()).append(resp)
        # Select the chunk with the majority chunk size
        resps = None
        max_resps = 0
        assumed_chunk_size = None
        for chunk_size, resps in resps_by_size.items():
            nb_resp = len(resps)
            if nb_resp > max_resps:
                max_resps = nb_resp
                assumed_chunk_size = chunk_size
        if assumed_chunk_size is None:
            self.logger.warning(
                'No chunk available with chunk size information')
            resps = list()
        else:
            resps = resps_by_size[assumed_chunk_size]
            if max_resps != total_resps:
                self.logger.warning(
                    '%d/%d chunks are not the same size as others (%d), '
                    'they should be removed',
                    total_resps - max_resps, total_resps, assumed_chunk_size)
        # Check the number of chunks available
        if max_resps < nb_data:
            # Add the chunks without size information
            # assuming they are the correct size
            resps = resps + resps_without_chunk_size
            if len(resps) < nb_data:
                self.logger.error(
                    'Unable to read enough valid sources to rebuild')
                raise exceptions.UnrecoverableContent(
                    'Not enough valid sources to rebuild')
            self.logger.warning(
                'Use chunk(s) without size information to rebuild a chunk')

        rebuild_iter = self._make_rebuild_iter(resps[:nb_data])
        return assumed_chunk_size, rebuild_iter
Exemple #5
0
    def _get_writers(self):
        """
        Initialize writers for all chunks of the metachunk and connect them
        """
        pile = GreenPile(len(self.meta_chunk))

        # we use eventlet GreenPile to spawn the writers
        for _pos, chunk in enumerate(self.meta_chunk):
            pile.spawn(self._get_writer, chunk)

        writers = [w for w in pile]
        return writers
Exemple #6
0
 def frag_iter():
     pile = GreenPile(len(resps))
     while True:
         for resp in resps:
             pile.spawn(_get_frag, resp)
         try:
             with Timeout(self.read_timeout):
                 frag = [frag for frag in pile]
         except Timeout as to:
             logger.error('ERROR while rebuilding: %s', to)
         except Exception:
             logger.exception('ERROR while rebuilding')
             break
         if not all(frag):
             break
         rebuilt_frag = self._reconstruct(frag)
         yield rebuilt_frag
Exemple #7
0
    def take_action(self, parsed_args):
        self.log.debug('take_action(%s)', parsed_args)
        digits = self.app.client_manager.meta1_digits
        concurrency = parsed_args.concurrency

        conf = {'namespace': self.app.client_manager.namespace}
        if parsed_args.proxy:
            conf.update({'proxyd_url': parsed_args.proxy})
        else:
            ns_conf = self.app.client_manager.sds_conf
            proxy = ns_conf.get('proxy')
            conf.update({'proxyd_url': proxy})

        workers = list()
        with green.ContextPool(concurrency) as pool:
            pile = GreenPile(pool)
            prefix_queue = Queue(16)

            # Prepare some workers
            for _ in range(concurrency):
                worker = WarmupWorker(self.app.client_manager.client_conf,
                                      self.log)
                workers.append(worker)
                pile.spawn(worker.run, prefix_queue)

            # Feed the queue
            trace_increment = 0.01
            trace_next = trace_increment
            sent, total = 0, float(count_prefixes(digits))
            for prefix in generate_prefixes(digits):
                sent += 1
                prefix_queue.put(prefix)
                # Display the progression
                ratio = float(sent) / total
                if ratio >= trace_next:
                    self.log.info("... %d%%", int(ratio * 100.0))
                    trace_next += trace_increment

            self.log.debug("Send the termination marker")
            prefix_queue.join()

        self.log.info("All the workers are done")
Exemple #8
0
    def _get_results(self, writers):
        # get the results from writers
        success_chunks = []
        failed_chunks = []

        # we use eventlet GreenPile to read the responses from the writers
        pile = GreenPile(len(writers))

        for writer in writers:
            if writer.failed:
                failed_chunks.append(writer.chunk)
                continue
            pile.spawn(self._get_response, writer)

        for (writer, resp) in pile:
            self._dispatch_response(writer, resp,
                                    success_chunks, failed_chunks)

        self.quorum_or_fail(success_chunks, failed_chunks)

        return success_chunks + failed_chunks
Exemple #9
0
    def rebuild(self):
        pile = GreenPile(len(self.meta_chunk))

        nb_data = self.storage_method.ec_nb_data

        headers = {}
        for chunk in self.meta_chunk:
            pile.spawn(self._get_response, chunk, headers)

        resps = []
        for resp in pile:
            if not resp:
                continue
            resps.append(resp)
            if len(resps) >= self.storage_method.ec_nb_data:
                break
        else:
            logger.error('Unable to read enough valid sources to rebuild')
            raise exceptions.UnrecoverableContent(
                'Not enough valid sources to rebuild')

        rebuild_iter = self._make_rebuild_iter(resps[:nb_data])
        return rebuild_iter
Exemple #10
0
    def _get_results(self, writers):
        """
        Check the results of the writers.
        Failures are appended to the self.failed_chunks list.

        :returns: a list of chunks that have been uploaded.
        """
        success_chunks = []

        # we use eventlet GreenPile to read the responses from the writers
        pile = GreenPile(len(writers))

        for writer in writers:
            if writer.failed:
                # Already in failures list
                continue
            pile.spawn(self._get_response, writer)

        for (writer, resp) in pile:
            self._dispatch_response(writer, resp, success_chunks)

        self.quorum_or_fail(success_chunks, self.failed_chunks)

        return success_chunks
Exemple #11
0
    def stream(self, source, size):
        bytes_transferred = 0
        meta_chunk = self.meta_chunk
        if self.chunk_checksum_algo:
            meta_checksum = hashlib.new(self.chunk_checksum_algo)
        else:
            meta_checksum = None
        pile = GreenPile(len(meta_chunk))
        failed_chunks = []
        current_conns = []

        for chunk in meta_chunk:
            pile.spawn(self._connect_put, chunk)

        for conn, chunk in pile:
            if not conn:
                failed_chunks.append(chunk)
            else:
                current_conns.append(conn)

        self.quorum_or_fail([co.chunk for co in current_conns], failed_chunks)

        bytes_transferred = 0
        try:
            with green.ContextPool(len(meta_chunk)) as pool:
                for conn in current_conns:
                    conn.failed = False
                    conn.queue = LightQueue(io.PUT_QUEUE_DEPTH)
                    pool.spawn(self._send_data, conn)

                while True:
                    buffer_size = self.buffer_size()
                    if size is not None:
                        remaining_bytes = size - bytes_transferred
                        if buffer_size < remaining_bytes:
                            read_size = buffer_size
                        else:
                            read_size = remaining_bytes
                    else:
                        read_size = buffer_size
                    with green.SourceReadTimeout(self.read_timeout):
                        try:
                            data = source.read(read_size)
                        except (ValueError, IOError) as err:
                            raise SourceReadError(str(err))
                        if len(data) == 0:
                            for conn in current_conns:
                                if not conn.failed:
                                    conn.queue.put('')
                            break
                    self.checksum.update(data)
                    if meta_checksum:
                        meta_checksum.update(data)
                    bytes_transferred += len(data)
                    # copy current_conns to be able to remove a failed conn
                    for conn in current_conns[:]:
                        if not conn.failed:
                            conn.queue.put(data)
                        else:
                            current_conns.remove(conn)
                            failed_chunks.append(conn.chunk)

                    self.quorum_or_fail([co.chunk for co in current_conns],
                                        failed_chunks)

                for conn in current_conns:
                    while conn.queue.qsize():
                        sleep(0)

        except green.SourceReadTimeout as err:
            logger.warn('Source read timeout (reqid=%s): %s', self.reqid, err)
            raise SourceReadTimeout(err)
        except SourceReadError as err:
            logger.warn('Source read error (reqid=%s): %s', self.reqid, err)
            raise
        except Timeout as to:
            logger.error('Timeout writing data (reqid=%s): %s', self.reqid, to)
            raise OioTimeout(to)
        except Exception:
            logger.exception('Exception writing data (reqid=%s)', self.reqid)
            raise

        success_chunks = []

        for conn in current_conns:
            if conn.failed:
                failed_chunks.append(conn.chunk)
                continue
            pile.spawn(self._get_response, conn)

        for (conn, resp) in pile:
            if resp:
                self._handle_resp(
                    conn, resp,
                    meta_checksum.hexdigest() if meta_checksum else None,
                    success_chunks, failed_chunks)
        self.quorum_or_fail(success_chunks, failed_chunks)

        for chunk in success_chunks:
            chunk["size"] = bytes_transferred

        return bytes_transferred, success_chunks[0]['hash'], success_chunks
Exemple #12
0
    def _stream(self, source, size, writers):
        bytes_transferred = 0

        # create EC encoding generator
        ec_stream = ec_encode(self.storage_method, len(self.meta_chunk))
        # init generator
        ec_stream.send(None)

        try:
            # we use eventlet GreenPool to manage writers
            with ContextPool(len(writers) * 2) as pool:
                # init writers in pool
                for writer in writers:
                    writer.start(pool)

                def read(read_size):
                    with SourceReadTimeout(self.read_timeout):
                        try:
                            data = source.read(read_size)
                        except (ValueError, IOError) as exc:
                            raise SourceReadError(str(exc))
                    return data

                # the main write loop
                # Maintain a list of writers which continue writing
                # TODO(FVE): use an instance variable
                # to maintain the list of writers
                curr_writers = writers
                if size:
                    while True:
                        buffer_size = self.buffer_size()
                        remaining_bytes = size - bytes_transferred
                        if buffer_size < remaining_bytes:
                            read_size = buffer_size
                        else:
                            read_size = remaining_bytes
                        data = read(read_size)
                        bytes_transferred += len(data)
                        if len(data) == 0:
                            break
                        curr_writers = self.encode_and_send(
                            ec_stream, data, curr_writers)
                else:
                    while True:
                        data = read(self.buffer_size())
                        bytes_transferred += len(data)
                        if len(data) == 0:
                            break
                        curr_writers = self.encode_and_send(
                            ec_stream, data, curr_writers)

                # flush out buffered data
                self.encode_and_send(ec_stream, '', curr_writers)

                # trailer headers
                # metachunk size
                # metachunk hash
                metachunk_size = bytes_transferred
                metachunk_hash = self.checksum.hexdigest()

                finish_pile = GreenPile(pool)
                for writer in writers:
                    finish_pile.spawn(writer.finish, metachunk_size,
                                      metachunk_hash)
                for just_failed in finish_pile:
                    # Avoid reporting problems twice
                    if just_failed and not any(x['url'] == just_failed['url']
                                               for x in self.failed_chunks):
                        self.failed_chunks.append(just_failed)

                return bytes_transferred

        except SourceReadTimeout as exc:
            self.logger.warn('%s (reqid=%s)', exc, self.reqid)
            raise exceptions.SourceReadTimeout(exc)
        except SourceReadError as exc:
            self.logger.warn('Source read error (reqid=%s): %s', self.reqid,
                             exc)
            raise
        except Timeout as to:
            self.logger.warn('Timeout writing data (reqid=%s): %s', self.reqid,
                             to)
            # Not the same class as the globally imported OioTimeout class
            raise exceptions.OioTimeout(to)
        except Exception:
            self.logger.exception('Exception writing data (reqid=%s)',
                                  self.reqid)
            raise