Ejemplo n.º 1
0
    def fit(self, data):
        """
        Fit the current model with the inputted data.

        :param data: Samples to fit model with
        :type data: ndarray(double), shape = (n_samples, n_features)
        :returns: Trained MiniBatch instance
        :rtype: MiniBatch
        """
        data = np.asarray(data, dtype=np.double)
        if self.verbose:
            logger.info("Initializing clusters")
        if self.init == 'random':
            self.cluster_centers_ = np.random.random(
                (self.n_clusters, data.shape[1]), dtype=np.double)
        elif self.init == 'kmeans++':
            self.cluster_centers_ = np.zeros((self.n_clusters, data.shape[1]),
                                             dtype=np.double)
            jobs = min(self.n_jobs, self.n_init)
            if jobs > 1:
                self.cluster_centers_ = _minibatch.kmeanspp_multi(
                    data, self.cluster_centers_, self.n_samples, self.n_init,
                    jobs)
            else:
                self.cluster_centers_ = _minibatch.kmeanspp(
                    data, self.cluster_centers_, self.n_samples)
        elif isinstance(self.init, np.ndarray):
            if not self.init.flags['C_CONTIGUOUS']:
                raise TypeError("init ndarray must be C_CONTIGUOUS")
            elif self.init.shape != (self.n_clusters, data.shape[1]):
                raise TypeError(
                    "init cluster not of correct shape "
                    "%r != (%d, %d)" %
                    (self.init.shape, self.n_clusters, data.shape[1]))
            self.cluster_centers_ = self.init

        if self.verbose:
            logger.info("Running minibatch")
        jobs = min(self.n_jobs, self.n_runs)
        if jobs > 1:
            self.cluster_centers_ = _minibatch.minibatch_multi(
                data, self.cluster_centers_, self.n_samples, self.max_iter,
                self.n_runs, jobs, self.bic_termination,
                self.reassignment_ratio)
        else:
            self.cluster_centers_ = _minibatch.minibatch(
                data, self.cluster_centers_, self.n_samples, self.max_iter,
                self.bic_termination, self.reassignment_ratio)

        if self.compute_labels:
            if self.verbose:
                logger.info("Computing labels")
            self.labels_ = np.zeros((data.shape[0], ), dtype=np.intc)
            self.labels_ = _minibatch.assign_centroids(data,
                                                       self.cluster_centers_,
                                                       self.labels_,
                                                       self.n_jobs)

        return self
Ejemplo n.º 2
0
    def fit(self, data):
        """
        Fit the current model with the inputted data.

        :param data: Samples to fit model with
        :type data: ndarray(double), shape = (n_samples, n_features)
        :returns: Trained MiniBatch instance
        :rtype: MiniBatch
        """
        data = np.asarray(data, dtype=np.double)
        if self.verbose:
            logger.info("Initializing clusters")
        if self.init == 'random':
            self.cluster_centers_ = np.random.random(
                (self.n_clusters, data.shape[1]), dtype=np.double)
        elif self.init == 'kmeans++':
            self.cluster_centers_ = np.zeros(
                (self.n_clusters, data.shape[1]), dtype=np.double)
            jobs = min(self.n_jobs, self.n_init)
            if jobs > 1:
                self.cluster_centers_ = _minibatch.kmeanspp_multi(
                    data, self.cluster_centers_, self.n_samples,
                    self.n_init, jobs)
            else:
                self.cluster_centers_ = _minibatch.kmeanspp(
                    data, self.cluster_centers_, self.n_samples)
        elif isinstance(self.init, np.ndarray):
            if not self.init.flags['C_CONTIGUOUS']:
                raise TypeError("init ndarray must be C_CONTIGUOUS")
            elif self.init.shape != (self.n_clusters, data.shape[1]):
                raise TypeError("init cluster not of correct shape "
                                "%r != (%d, %d)" % (self.init.shape,
                                                    self.n_clusters,
                                                    data.shape[1]))
            self.cluster_centers_ = self.init

        if self.verbose:
            logger.info("Running minibatch")
        jobs = min(self.n_jobs, self.n_runs)
        if jobs > 1:
            self.cluster_centers_ =  _minibatch.minibatch_multi(
                data, self.cluster_centers_, self.n_samples, self.max_iter,
                self.n_runs, jobs, self.bic_termination,
                self.reassignment_ratio)
        else:
            self.cluster_centers_ =  _minibatch.minibatch(
                data, self.cluster_centers_, self.n_samples, self.max_iter,
                self.bic_termination, self.reassignment_ratio)

        if self.compute_labels:
            if self.verbose:
                logger.info("Computing labels")
            self.labels_ = np.zeros((data.shape[0], ), dtype=np.intc)
            self.labels_ = _minibatch.assign_centroids(
                data, self.cluster_centers_, self.labels_, self.n_jobs)

        return self
Ejemplo n.º 3
0
    def predict(self, data):
        """
        Labels the data given the fitted mode.

        :param data: Samples to classify
        :type data: ndarray(double), shape = (n_samples, n_features)
        :returns: Index into MiniBatch.cluster_centers_ for each datapoint in
                  data
        :rtype: ndarray(intc), shape = (n_samples,)
        """
        assert self.cluster_centers_ is not None, "Model not yet fitted"
        labels = np.zeros((data.shape[0], ), dtype=np.intc)
        labels = _minibatch.assign_centroids(data, self.cluster_centers_, labels, self.n_jobs)
        return labels
Ejemplo n.º 4
0
    def predict(self, data):
        """
        Labels the data given the fitted mode.

        :param data: Samples to classify
        :type data: ndarray(double), shape = (n_samples, n_features)
        :returns: Index into MiniBatch.cluster_centers_ for each datapoint in
                  data
        :rtype: ndarray(intc), shape = (n_samples,)
        """
        assert self.cluster_centers_ is not None, "Model not yet fitted"
        labels = np.zeros((data.shape[0], ), dtype=np.intc)
        labels = _minibatch.assign_centroids(data, self.cluster_centers_,
                                             labels, self.n_jobs)
        return labels