def _apply_fun(self, blob_generator, fun):
        threadsafe_generator = ThreadsafeIter(blob_generator,len(self.pipelines))
        executor = concurrent.futures.ThreadPoolExecutor(max_workers=len(self.pipelines))
        # Start the load operations and mark each future with its URL
        futures = [executor.submit(fun, pipeline, threadsafe_generator) for pipeline in self.pipelines]
        generators = [gen for gen in concurrent.futures.as_completed(futures)]
        has_more_blobs = True
        while has_more_blobs:
            b = Blob()
            out_blobs = [next(gen.result()) for gen in generators]
            blob_uuids = [blob.meta.uuid for blob in out_blobs]
            # If not all UUIDs are equal or num outputs is not the same as the number of pipelines
            if not out_blobs or blob_uuids.count(blob_uuids[0]) != len(blob_uuids) or len(out_blobs) != len(self.pipelines):
                logging.error("Number of elements changed within ParallelAlgorithm pipelines. This is not allowed!")
                raise Exception("Error")

            # If there are no more blobs, we are done
            if len(out_blobs) == 0:
                has_more_blobs = False
            else:
                b.data = [blob.data.ravel() for blob in out_blobs]
                b.data = hstack(b.data)
                b.meta = out_blobs[0].meta
                yield b
        logging.info("Finished training in ParallelAlgorithm")
    def _train(self, blob_generator):
        # First, collect all elements of the input
        data = []
        labels = []
        metas = []
        for blob in blob_generator:
            if self.use_sparse is None:
                # Determine automatically by comparing size
                sparse_vec = scipy.sparse.csr_matrix(blob.data.ravel())
                sparse_memory_req = sparse_vec.data.nbytes + sparse_vec.indptr.nbytes + sparse_vec.indices.nbytes
                self.use_sparse = sparse_memory_req < blob.data.nbytes
                logging.debug(
                    'Using sparse format for collecting features: %s' %
                    self.use_sparse)
                logging.debug('Blob data needs %i' % blob.data.nbytes)
                logging.debug('%i with sparse vs %i with dense' %
                              (sparse_memory_req, blob.data.nbytes))

            if self.use_sparse:
                data.append(scipy.sparse.csr_matrix(blob.data.ravel()))
            else:
                data.append(blob.data.ravel())
            labels.append(blob.meta.label)
            metas.append(blob.meta)

        # Stack data to matrix explicitly here, as both fit and predict
        # would to this stacking otherwise
        try:
            if self.use_sparse:
                data = scipy.sparse.vstack(data)
                data = data.astype(np.float64)
            else:
                data = np.array(data, dtype=np.float64)
        except ValueError:
            logging.error(
                "Length of all feature vectors need to be the same for Classificator training."
            )
            raise Exception

        logging.warning(
            'Training the model with feature dim %i, this might take a while' %
            data.shape[1])
        self.model.fit(data, labels)
        logging.warning('Finished')

        for (d, m) in zip(self.model.decision_function(data), metas):
            b = Blob()
            b.data = d
            b.meta = m
            yield b
    def _train(self, blob_generator):
        # If you need all data at once:
        # Remember the metas!
        # Example
        data = []
        labels = []
        metas = []
        for blob in blob_generator:
            data.append(blob.data.ravel())
            labels.append(blob.meta.label)
            metas.append(blob.meta)
        numpy_data = vstack(data)

        # process numpy_data
        # ...

        # Create generator for next layer
        for d, m in zip(data, metas):
            b = Blob()
            b.data = d
            b.meta = m
            yield b
    def _train(self, blob_generator):
        # First, collect all elements of the input
        data = []
        labels = []
        metas = []
        for blob in blob_generator:
            data.append(self._add_bias(blob.data.ravel()))
            labels.append(blob.meta.label)
            metas.append(blob.meta)
        try:
            data = vstack(data)
        except ValueError:
            logging.error(
                "Size of all input data needs to be the same for SVM training."
            )
            raise Exception

        self.svm_model.fit(data, labels)

        for (d, m) in zip(self.svm_model.predict(data), metas):
            b = Blob()
            b.data = d
            b.meta = m
            yield b