def fit(self, data): """ Fit the current model with the inputted data. :param data: Samples to fit model with :type data: ndarray(double), shape = (n_samples, n_features) :returns: Trained MiniBatch instance :rtype: MiniBatch """ data = np.asarray(data, dtype=np.double) if self.verbose: logger.info("Initializing clusters") if self.init == 'random': self.cluster_centers_ = np.random.random( (self.n_clusters, data.shape[1]), dtype=np.double) elif self.init == 'kmeans++': self.cluster_centers_ = np.zeros((self.n_clusters, data.shape[1]), dtype=np.double) jobs = min(self.n_jobs, self.n_init) if jobs > 1: self.cluster_centers_ = _minibatch.kmeanspp_multi( data, self.cluster_centers_, self.n_samples, self.n_init, jobs) else: self.cluster_centers_ = _minibatch.kmeanspp( data, self.cluster_centers_, self.n_samples) elif isinstance(self.init, np.ndarray): if not self.init.flags['C_CONTIGUOUS']: raise TypeError("init ndarray must be C_CONTIGUOUS") elif self.init.shape != (self.n_clusters, data.shape[1]): raise TypeError( "init cluster not of correct shape " "%r != (%d, %d)" % (self.init.shape, self.n_clusters, data.shape[1])) self.cluster_centers_ = self.init if self.verbose: logger.info("Running minibatch") jobs = min(self.n_jobs, self.n_runs) if jobs > 1: self.cluster_centers_ = _minibatch.minibatch_multi( data, self.cluster_centers_, self.n_samples, self.max_iter, self.n_runs, jobs, self.bic_termination, self.reassignment_ratio) else: self.cluster_centers_ = _minibatch.minibatch( data, self.cluster_centers_, self.n_samples, self.max_iter, self.bic_termination, self.reassignment_ratio) if self.compute_labels: if self.verbose: logger.info("Computing labels") self.labels_ = np.zeros((data.shape[0], ), dtype=np.intc) self.labels_ = _minibatch.assign_centroids(data, self.cluster_centers_, self.labels_, self.n_jobs) return self
def fit(self, data): """ Fit the current model with the inputted data. :param data: Samples to fit model with :type data: ndarray(double), shape = (n_samples, n_features) :returns: Trained MiniBatch instance :rtype: MiniBatch """ data = np.asarray(data, dtype=np.double) if self.verbose: logger.info("Initializing clusters") if self.init == 'random': self.cluster_centers_ = np.random.random( (self.n_clusters, data.shape[1]), dtype=np.double) elif self.init == 'kmeans++': self.cluster_centers_ = np.zeros( (self.n_clusters, data.shape[1]), dtype=np.double) jobs = min(self.n_jobs, self.n_init) if jobs > 1: self.cluster_centers_ = _minibatch.kmeanspp_multi( data, self.cluster_centers_, self.n_samples, self.n_init, jobs) else: self.cluster_centers_ = _minibatch.kmeanspp( data, self.cluster_centers_, self.n_samples) elif isinstance(self.init, np.ndarray): if not self.init.flags['C_CONTIGUOUS']: raise TypeError("init ndarray must be C_CONTIGUOUS") elif self.init.shape != (self.n_clusters, data.shape[1]): raise TypeError("init cluster not of correct shape " "%r != (%d, %d)" % (self.init.shape, self.n_clusters, data.shape[1])) self.cluster_centers_ = self.init if self.verbose: logger.info("Running minibatch") jobs = min(self.n_jobs, self.n_runs) if jobs > 1: self.cluster_centers_ = _minibatch.minibatch_multi( data, self.cluster_centers_, self.n_samples, self.max_iter, self.n_runs, jobs, self.bic_termination, self.reassignment_ratio) else: self.cluster_centers_ = _minibatch.minibatch( data, self.cluster_centers_, self.n_samples, self.max_iter, self.bic_termination, self.reassignment_ratio) if self.compute_labels: if self.verbose: logger.info("Computing labels") self.labels_ = np.zeros((data.shape[0], ), dtype=np.intc) self.labels_ = _minibatch.assign_centroids( data, self.cluster_centers_, self.labels_, self.n_jobs) return self
def predict(self, data): """ Labels the data given the fitted mode. :param data: Samples to classify :type data: ndarray(double), shape = (n_samples, n_features) :returns: Index into MiniBatch.cluster_centers_ for each datapoint in data :rtype: ndarray(intc), shape = (n_samples,) """ assert self.cluster_centers_ is not None, "Model not yet fitted" labels = np.zeros((data.shape[0], ), dtype=np.intc) labels = _minibatch.assign_centroids(data, self.cluster_centers_, labels, self.n_jobs) return labels