def execute_round(ensemble, r, n, rule, n_clusts): logger.info("- - - - - - - - - - - - - - - - - -") logger.info("Sampled of {} patterns.".format(n)) logger.info("Rule: {}".format(rule.__doc__)) logger.info("kmin: {}, kmax: {}".format(n_clusts[0], n_clusts[1])) logger.info("Type of mat: {}".format(type_mats[tm])) logger.info("Type of mat tm: {}".format(tm)) logger.info("Round: {}".format(r)) logger.info("Building matrix...") logger.info("Estimated req. mem. (MB):{}".format(mems[tm] / (1024.0**2))) useDiskMST = False if tm == 0: # full if f_mat > mem_full_max: logger.info("not enough memory") break eacEst = myEAC.EAC(n_samples=n, sparse=False, condensed=False) t.reset() t.tic() eacEst.buildMatrix(ensemble) t.tac() eacEst.coassoc.getDegree() degree = eacEst.coassoc.degree nnz = eacEst.coassoc.nnz n_max_degree = -1 elif tm == 1: # full condensed if fc_mat > mem_full_max: logger.info("not enough memory") break eacEst = myEAC.EAC(n_samples=n, sparse=False, condensed=True) t.reset() t.tic() eacEst.buildMatrix(ensemble) t.tac() eacEst.coassoc.getDegree() degree = eacEst.coassoc.degree nnz = eacEst.coassoc.nnz n_max_degree = -1 elif tm == 2: # sparse complete if sp_const > mem_full_max: logger.info("not enough memory") break if sp_const + sp_const_mst > mem_full_max: useDiskMST = True eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=False, sparse_keep_degree=True, sl_disk=useDiskMST) eacEst.sp_max_assocs_mode="constant" eacEst.disk_dir = diskdir t.reset() t.tic() eacEst.buildMatrix(ensemble) t.tac() degree = eacEst.coassoc.degree[:-1] nnz = eacEst.coassoc.nnz n_max_degree = (degree == ma).sum() elif tm == 3: # sparse condensed const if sp_const > mem_full_max: logger.info("not enough memory") break if sp_const + sp_const_mst > mem_full_max: useDiskMST = True eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=True, sparse_keep_degree=True, sl_disk=useDiskMST) eacEst.sp_max_assocs_mode="constant" eacEst.disk_dir = diskdir t.reset() t.tic() eacEst.buildMatrix(ensemble) t.tac() degree = eacEst.coassoc.degree[:-1] nnz = eacEst.coassoc.nnz n_max_degree = (degree == ma).sum() elif tm == 4: # sparse condensed linear if sp_lin > mem_full_max: logger.info("not enough memory") break if sp_lin + sp_lin_mst > mem_full_max: useDiskMST = True eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=True, sparse_keep_degree=True, sl_disk=useDiskMST) eacEst.sp_max_assocs_mode="linear" eacEst.disk_dir = diskdir t.reset() t.tic() eacEst.buildMatrix(ensemble) t.tac() degree = eacEst.coassoc.degree[:-1] nnz = eacEst.coassoc.nnz indptr = mySpEAC.indptr_linear(n, eacEst.sp_max_assocs, n_s, n_e, val_s, val_e) max_degree = indptr[1:] - indptr[:-1] n_max_degree = (degree == max_degree).sum() else: raise NotImplementedError("mat type {} not implemented".format(type_mats[tm])) logger.info("Build time: {}".format(t.elapsed)) results.round[res_idx] = r # round number results.n_samples[res_idx] = n # n_samples results.rule[res_idx] = rule.__doc__ # rule results.kmin[res_idx] = n_clusts[0] # kmin results.kmax[res_idx] = n_clusts[1] # kmax results.t_build[res_idx] = t.elapsed results.type_mat[res_idx] = type_mats[tm] # type of matrix results.disk[res_idx] = useDiskMST # SL disk results.t_ensemble[res_idx] = t.elapsed # ensemble time results.biggest_cluster[res_idx] = max_cluster_size # biggest_cluster # number of associations results.n_assocs[res_idx] = nnz # stats number associations results.max_degree[res_idx] = degree.max() results.min_degree[res_idx] = degree.min() results.mean_degree[res_idx] = degree.mean() results.std_degree[res_idx] = degree.std() results.n_max_degree[res_idx] = n_max_degree logger.info("SL clustering...") t.reset() t.tic() labels = eacEst.finalClustering(n_clusters=0) t.tac() logger.info("Clustering time: {}".format(t.elapsed)) labels_filename = "labels_{}_{}_{}".format(n, rule.__doc__, r) np.save("") results.t_sl[res_idx] = t.elapsed # build time results.sl_clusts[res_idx] = eacEst.n_fclusts t.reset() t.tic() # logger.info("Scoring accuracy (consistency)...") # accEst = myAcc.ConsistencyIndex(n) # accEst.score(gt_sampled, labels) logger.info("Scoring accuracy (Hungarian)...") accEst = myAcc.HungarianIndex(n) accEst.score(gt_sampled, labels) t.tac() logger.info("Accuracy time: {}".format(t.elapsed)) results.t_accuracy[res_idx] = t.elapsed # accuracy time results.accuracy[res_idx] = accEst.accuracy # if the accuracy is zero of different from the last round # then I want to check what's going on if accEst.accuracy == 0 or accEst.accuracy != results.accuracy[res_idx-1]: print "breakpoint" results.to_csv(os.path.join(folder,"results_kmin.csv")) res_idx += 1 del eacEst, accEst, degree, labels gc.collect() # end of inner most loop
useDiskMST = True eacEst = myEAC.EAC(n_samples=n, sparse=True, condensed=True, sparse_keep_degree=True) eacEst.sp_max_assocs_mode = "linear" eacEst.disk_dir = diskdir t.reset() t.tic() eacEst.buildMatrix(ensemble) t.tac() degree = eacEst.coassoc.degree[:-1] nnz = eacEst.coassoc.nnz indptr = mySpEAC.indptr_linear(n, eacEst.sp_max_assocs, n_s, n_e, val_s, val_e) max_degree = indptr[1:] - indptr[:-1] n_max_degree = (degree == max_degree).sum() else: raise NotImplementedError( "mat type {} not implemented".format(type_mats[tm])) logger.info("Build time: {}".format(t.elapsed)) results.round[res_idx] = r # round number results.n_samples[res_idx] = n # n_samples results.rule[res_idx] = rule.__doc__ # rule results.kmin[res_idx] = n_clusts[0] # kmin results.kmax[res_idx] = n_clusts[1] # kmax results.t_build[res_idx] = t.elapsed