def main(args=None): args = parse_args(args) steps = range(args.start, args.stop, args.step) if args.scheduler_address: client = Client(args.scheduler_address) info = client.scheduler_info() logger.info("Distributed mode: %s", client.scheduler) logger.info("Dashboard: %s:%s", info["address"], info["services"]["bokeh"]) else: logger.warning("Local mode") logger.info("Fitting for %s", list(steps)) logger.info("Reading data") X = read().pipe(transform).pipe(as_array) X, = persist(X) timings = [] for n_clusters in range(args.start, args.stop, args.step): logger.info("Starting %02d", n_clusters) t0 = tic() with _timer(n_clusters, _logger=logger): km = do(X, n_clusters, factor=args.factor) t1 = tic() logger.info("Cluster Centers [%s]:\n%s", n_clusters, km.cluster_centers_) inertia = km.inertia_.compute() logger.info("Inertia [%s]: %s", km.cluster_centers_, inertia) timings.append((n_clusters, args.factor, t1 - t0, inertia)) pd.DataFrame(timings, columns=["n_clusters", "factor", "time", "inertia"]).to_csv("timings.csv")
def partial_fit(self, X, y=None, **kwargs): logger.info("Starting partial_fit") with _timer("fit", _logger=logger): result = self.estimator.partial_fit(X, y, **kwargs) # Copy over learned attributes copy_learned_attributes(result, self) copy_learned_attributes(result, self.estimator) return self
def fit(data, use_scikit_learn=False): logger.info("Starting to cluster") # Cluster n_clusters = 8 oversampling_factor = 2 if use_scikit_learn: km = sk.KMeans(n_clusters=n_clusters, random_state=0) else: km = KMeans( n_clusters=n_clusters, oversampling_factor=oversampling_factor, random_state=0, ) logger.info( "Starting n_clusters=%2d, oversampling_factor=%2d", n_clusters, oversampling_factor, ) with _timer("km.fit", _logger=logger): km.fit(data)
def fit(self, X, y=None, **kwargs): """Fit the underlying estimator. Parameters ---------- X, y : array-like **kwargs Additional fit-kwargs for the underlying estimator. Returns ------- self : object """ logger.info("Starting fit") with _timer("fit", _logger=logger): result = self.estimator.fit(X, y, **kwargs) # Copy over learned attributes copy_learned_attributes(result, self) copy_learned_attributes(result, self.estimator) return self