Example #1
0
def execute_round(ensemble, r, n, rule, n_clusts):

    logger.info("- - - - - - - - - - - - - - - - - -")
    logger.info("Sampled of {} patterns.".format(n))
    logger.info("Rule: {}".format(rule.__doc__))
    logger.info("kmin: {}, kmax: {}".format(n_clusts[0], n_clusts[1]))
    logger.info("Type of mat: {}".format(type_mats[tm]))
    logger.info("Type of mat tm: {}".format(tm))
    logger.info("Round: {}".format(r))
    logger.info("Building matrix...")
    logger.info("Estimated req. mem. (MB):{}".format(mems[tm] / (1024.0**2)))

    useDiskMST = False

    if tm == 0: # full
        if f_mat > mem_full_max:
            logger.info("not enough memory")
            break
        eacEst = myEAC.EAC(n_samples=n, sparse=False, condensed=False)
        t.reset()
        t.tic()
        eacEst.buildMatrix(ensemble)
        t.tac()

        eacEst.coassoc.getDegree()
        degree = eacEst.coassoc.degree
        nnz = eacEst.coassoc.nnz

        n_max_degree = -1

    elif tm == 1: # full condensed
        if fc_mat > mem_full_max:
            logger.info("not enough memory")
            break
        eacEst = myEAC.EAC(n_samples=n, sparse=False, condensed=True)
        t.reset()
        t.tic()
        eacEst.buildMatrix(ensemble)
        t.tac()

        eacEst.coassoc.getDegree()
        degree = eacEst.coassoc.degree
        nnz = eacEst.coassoc.nnz

        n_max_degree = -1

    elif tm == 2: # sparse complete
        if sp_const > mem_full_max:
            logger.info("not enough memory")
            break

        if sp_const + sp_const_mst > mem_full_max:
            useDiskMST = True

        eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=False,
                           sparse_keep_degree=True, sl_disk=useDiskMST)
        eacEst.sp_max_assocs_mode="constant"
        eacEst.disk_dir = diskdir

        t.reset()
        t.tic()                    
        eacEst.buildMatrix(ensemble)
        t.tac()

        degree = eacEst.coassoc.degree[:-1]
        nnz = eacEst.coassoc.nnz

        n_max_degree = (degree == ma).sum()

    elif tm == 3: # sparse condensed const
        if sp_const > mem_full_max:
            logger.info("not enough memory")
            break

        if sp_const + sp_const_mst > mem_full_max:
            useDiskMST = True

        eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=True,
                           sparse_keep_degree=True, sl_disk=useDiskMST)
        eacEst.sp_max_assocs_mode="constant"
        eacEst.disk_dir = diskdir
        t.reset()
        t.tic()                    
        eacEst.buildMatrix(ensemble)
        t.tac()

        degree = eacEst.coassoc.degree[:-1]
        nnz = eacEst.coassoc.nnz

        n_max_degree = (degree == ma).sum()

    elif tm == 4: # sparse condensed linear
        if sp_lin > mem_full_max:
            logger.info("not enough memory")
            break

        if sp_lin + sp_lin_mst > mem_full_max:
            useDiskMST = True
        eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=True,
                           sparse_keep_degree=True, sl_disk=useDiskMST)
        eacEst.sp_max_assocs_mode="linear"
        eacEst.disk_dir = diskdir
        t.reset()
        t.tic()                    
        eacEst.buildMatrix(ensemble)
        t.tac()

        degree = eacEst.coassoc.degree[:-1]
        nnz = eacEst.coassoc.nnz

        indptr = mySpEAC.indptr_linear(n,
                                       eacEst.sp_max_assocs,
                                        n_s, n_e, val_s, val_e)
        max_degree = indptr[1:] - indptr[:-1]
        n_max_degree = (degree == max_degree).sum()

    else:
        raise NotImplementedError("mat type {} not implemented".format(type_mats[tm]))

    logger.info("Build time: {}".format(t.elapsed))

    results.round[res_idx] = r # round number
    results.n_samples[res_idx] = n # n_samples
    results.rule[res_idx] = rule.__doc__ # rule
    results.kmin[res_idx] = n_clusts[0] # kmin
    results.kmax[res_idx] = n_clusts[1] # kmax
    results.t_build[res_idx] = t.elapsed
    results.type_mat[res_idx] = type_mats[tm] # type of matrix
    results.disk[res_idx] = useDiskMST # SL disk

    results.t_ensemble[res_idx] = t.elapsed # ensemble time
    results.biggest_cluster[res_idx] = max_cluster_size # biggest_cluster

    # number of associations
    results.n_assocs[res_idx] = nnz

    # stats number associations
    results.max_degree[res_idx] = degree.max()
    results.min_degree[res_idx] = degree.min()
    results.mean_degree[res_idx] = degree.mean()
    results.std_degree[res_idx] = degree.std()
    results.n_max_degree[res_idx] = n_max_degree

    logger.info("SL clustering...")

    t.reset()
    t.tic()
    labels = eacEst.finalClustering(n_clusters=0)
    t.tac()
    logger.info("Clustering time: {}".format(t.elapsed))

    labels_filename = "labels_{}_{}_{}".format(n, rule.__doc__, r)
    np.save("")

    results.t_sl[res_idx] = t.elapsed # build time
    results.sl_clusts[res_idx] = eacEst.n_fclusts

    t.reset()
    t.tic()
    # logger.info("Scoring accuracy (consistency)...")
    # accEst = myAcc.ConsistencyIndex(n)
    # accEst.score(gt_sampled, labels)

    logger.info("Scoring accuracy (Hungarian)...")
    accEst = myAcc.HungarianIndex(n)
    accEst.score(gt_sampled, labels)
    
    t.tac()

    logger.info("Accuracy time: {}".format(t.elapsed))

    results.t_accuracy[res_idx] = t.elapsed # accuracy time
    results.accuracy[res_idx] = accEst.accuracy

    # if the accuracy is zero of different from the last round
    # then I want to check what's going on
    if accEst.accuracy == 0 or accEst.accuracy != results.accuracy[res_idx-1]:
        print "breakpoint"
    
    results.to_csv(os.path.join(folder,"results_kmin.csv"))
    res_idx += 1

    del eacEst, accEst, degree, labels
    gc.collect()
    # end of inner most loop
Example #2
0
                        useDiskMST = True
                    eacEst = myEAC.EAC(n_samples=n,
                                       sparse=True,
                                       condensed=True,
                                       sparse_keep_degree=True)
                    eacEst.sp_max_assocs_mode = "linear"
                    eacEst.disk_dir = diskdir
                    t.reset()
                    t.tic()
                    eacEst.buildMatrix(ensemble)
                    t.tac()

                    degree = eacEst.coassoc.degree[:-1]
                    nnz = eacEst.coassoc.nnz

                    indptr = mySpEAC.indptr_linear(n, eacEst.sp_max_assocs,
                                                   n_s, n_e, val_s, val_e)
                    max_degree = indptr[1:] - indptr[:-1]
                    n_max_degree = (degree == max_degree).sum()

                else:
                    raise NotImplementedError(
                        "mat type {} not implemented".format(type_mats[tm]))

                logger.info("Build time: {}".format(t.elapsed))

                results.round[res_idx] = r  # round number
                results.n_samples[res_idx] = n  # n_samples
                results.rule[res_idx] = rule.__doc__  # rule
                results.kmin[res_idx] = n_clusts[0]  # kmin
                results.kmax[res_idx] = n_clusts[1]  # kmax
                results.t_build[res_idx] = t.elapsed