def view(self, img_only=False): import ergo.views as views views.model(self, img_only) views.roc(self, img_only) views.stats(self, img_only) views.history(self, img_only) views.show(img_only)
def action_explore(argc, argv): global prj, nrows, ncols, attributes, n_jobs args = parse_args(argv) if args.all: args.pca = True args.correlations = True args.stats = True args.cluster = True args.D3 = True if args.workers == -1: import multiprocessing n_jobs = multiprocessing.cpu_count() elif args.workers != 0: n_jobs = args.workers log.info("using %d workers" % n_jobs) if args.nclusters and not args.cluster: log.warning( "number of clusters specified but clustering won't be perfomed") if not (args.pca or args.correlations or args.stats or args.cluster): log.error("No exploration action was specified") print("") parse_args(["-h"]) quit() prj = Project(args.path) err = prj.load() if err is not None: log.error("error while loading project: %s", err) quit() prj.prepare(args.dataset, 0.0, 0.0) if not prj.dataset.is_flat: log.error("data exploration can only be applied to flat inputs") quit() X, y = prj.dataset.subsample(args.ratio) nrows, ncols = X.shape attributes = get_attributes(args.attributes, ncols) if args.correlations: log.info("computing correlations of each feature with target") corr = compute_correlations_with_target(X, y) print_target_correlation_table(corr) log.info("computing features crosscorrelation") corr = calculate_corr(X) print_correlation_table(corr, min_corr=0.7) views.correlation_matrix(prj, corr, args.img_only) if args.pca: log.info("computing pca") pca = calculate_pca(X) log.info("computing pca projection") views.pca_projection(prj, pca, X, y, False) if args.D3: views.pca_projection(prj, pca, X, y, args.D3) views.pca_explained_variance(prj, pca, args.img_only) if args.stats: log.info("computing features stats") print_stats_table(X) inertia = False if args.cluster: if args.cluster_alg == 'kmeans': cluster_alg = kmeans_clustering if not args.nclusters: args.nclusters = len(set(np.argmax(y, axis=1))) args.nclusters = int(args.nclusters) if args.nmaxclusters: log.info( "performing inertia analysis with clusters in the range (%d, %d)" % (args.nclusters, args.nmaxclusters)) inertia = True n_clusters_analysis(X, args.nmaxclusters, args.nclusters) else: log.info("computing kmeans clustering with k=%d" % args.nclusters) elif args.cluster_alg == 'dbscan': cluster_alg = dbscan_clustering if not args.nclusters: args.nclusters = 2 log.info("computing dbscan clustering with eps=%f" % args.nclusters) if args.nmaxclusters: log.warning( "nmax specified but not used. Inertia analysis only available for Kmeans." ) if not args.pca and not inertia: log.info("computing pca to plot clusters") pca = calculate_pca(X) if not inertia: ca = cluster_alg(X, args.nclusters) if len(set(ca.labels_)) == 1: log.error("clustering failed. Check input parameter.") quit() views.plot_clusters(prj, pca, X, y, ca, False) if args.D3: views.plot_clusters(prj, pca, X, y, ca, args.D3) views.show(args.img_only)