Exemple #1
0
    def view(self, img_only=False):
        import ergo.views as views

        views.model(self, img_only)
        views.roc(self, img_only)
        views.stats(self, img_only)
        views.history(self, img_only)
        views.show(img_only)
Exemple #2
0
def action_explore(argc, argv):
    global prj, nrows, ncols, attributes, n_jobs

    args = parse_args(argv)

    if args.all:
        args.pca = True
        args.correlations = True
        args.stats = True
        args.cluster = True
        args.D3 = True

    if args.workers == -1:
        import multiprocessing
        n_jobs = multiprocessing.cpu_count()
    elif args.workers != 0:
        n_jobs = args.workers
    log.info("using %d workers" % n_jobs)

    if args.nclusters and not args.cluster:
        log.warning(
            "number of clusters specified but clustering won't be perfomed")

    if not (args.pca or args.correlations or args.stats or args.cluster):
        log.error("No exploration action was specified")
        print("")
        parse_args(["-h"])
        quit()

    prj = Project(args.path)
    err = prj.load()
    if err is not None:
        log.error("error while loading project: %s", err)
        quit()

    prj.prepare(args.dataset, 0.0, 0.0)
    if not prj.dataset.is_flat:
        log.error("data exploration can only be applied to flat inputs")
        quit()

    X, y = prj.dataset.subsample(args.ratio)
    nrows, ncols = X.shape
    attributes = get_attributes(args.attributes, ncols)

    if args.correlations:
        log.info("computing correlations of each feature with target")
        corr = compute_correlations_with_target(X, y)
        print_target_correlation_table(corr)
        log.info("computing features crosscorrelation")
        corr = calculate_corr(X)
        print_correlation_table(corr, min_corr=0.7)
        views.correlation_matrix(prj, corr, args.img_only)

    if args.pca:
        log.info("computing pca")
        pca = calculate_pca(X)
        log.info("computing pca projection")
        views.pca_projection(prj, pca, X, y, False)
        if args.D3:
            views.pca_projection(prj, pca, X, y, args.D3)
        views.pca_explained_variance(prj, pca, args.img_only)

    if args.stats:
        log.info("computing features stats")
        print_stats_table(X)

    inertia = False
    if args.cluster:
        if args.cluster_alg == 'kmeans':
            cluster_alg = kmeans_clustering
            if not args.nclusters:
                args.nclusters = len(set(np.argmax(y, axis=1)))
            args.nclusters = int(args.nclusters)
            if args.nmaxclusters:
                log.info(
                    "performing inertia analysis with clusters in the range (%d, %d)"
                    % (args.nclusters, args.nmaxclusters))
                inertia = True
                n_clusters_analysis(X, args.nmaxclusters, args.nclusters)
            else:
                log.info("computing kmeans clustering with k=%d" %
                         args.nclusters)
        elif args.cluster_alg == 'dbscan':
            cluster_alg = dbscan_clustering
            if not args.nclusters:
                args.nclusters = 2
            log.info("computing dbscan clustering with eps=%f" %
                     args.nclusters)
            if args.nmaxclusters:
                log.warning(
                    "nmax specified but not used. Inertia analysis only available for Kmeans."
                )
        if not args.pca and not inertia:
            log.info("computing pca to plot clusters")
            pca = calculate_pca(X)
        if not inertia:
            ca = cluster_alg(X, args.nclusters)
            if len(set(ca.labels_)) == 1:
                log.error("clustering failed. Check input parameter.")
                quit()
            views.plot_clusters(prj, pca, X, y, ca, False)
            if args.D3:
                views.plot_clusters(prj, pca, X, y, ca, args.D3)

    views.show(args.img_only)