import numpy as np import tables import numba as nb import MyML.utils.profiling as myProf import MyML.helper.partition as part import MyML.EAC.sparse as eacSp import MyML.EAC.eac_new as myEAC import shutil t = myProf.Timer() # ensemble_file = '/home/diogoaos/QCThesis/datasets/gauss10e6_overlap/ensemble_500k_test2.h5' ensemble_file = '/media/Data/diogoaos_tmp/gaussseparated_ensembles/ensemble_500000_2sqrt.hdf' coassc_path_ssd = '/home/diogoaos/QCThesis/coassoc.h5' index_path_ssd = '/home/diogoaos/QCThesis/' coassc_path_spin = '/media/Data/diogoaos_tmp/coassoc.h5' index_path_spin = '/media/Data/diogoaos_tmp/' print "loading ensemble" t.reset() t.tic() ensemble = part.loadEnsembleFromFileHDF(ensemble_file) print 'load ensemble time: {}'.format(t.tac()) n_samples = part.n_samples_from_partition(ensemble[0]) n_partitions = len(ensemble) print "number of samples: {}".format(n_samples) print "number of partitions: {}".format(n_partitions)
import MyML.utils.profiling as myProf tImport = myProf.Timer() tImport.tic() import numpy as np import MyML.helper.partition as part import MyML.cluster.K_Means3 as myKM import MyML.metrics.accuracy as myAcc import MyML.EAC.eac_new as myEAC import MyML.EAC.sparse as mySpEAC import gc import argparse import os.path def correspond(l0, l1): """gets two labels arrays and, if they have the same number of clusters, tries to equal the label assignment """ l0_unique = np.unique(l0) if l0_unique.size != np.unique(l1).size: return -1 inc = l0_unique.max() + 100 # increment all labels for l in l0_unique: l0[l0 == l] = l + inc
print "Path is not directory: ", folder sys.exit(1) # await confirmation if not args.yes: raw_input("Folder: {}\nIs this correct?".format(folder)) else: print "Folder being used is: {}".format(folder) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORTS # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # import MyML.utils.profiling as myProf t = myProf.Timer() t.tic() import numpy as np from numba import cuda import pandas as pd # for storing results from sklearn import datasets # generate gaussian mixture import MyML.cluster.K_Means3 as myKM myKM.CUDA_PPT = PPT # Setup logging import logging
'min_degree', 'max_degree', 'mean_degree', 'std_degree', 'accuracy', 'sl_clusts', 'round','disk'] type_mats = ["full", "full condensed", "sparse complete", "sparse condensed const", "sparse condensed linear"] rounds = 1 res_lines = rounds * len(cardinality) * len(rules) * len(type_mats) results = pd.DataFrame(index=range(res_lines), columns=res_cols) t = myProf.Timer() # timer # ensemble properties n_partitions = 100 n_iters = 3 # EAC properties sparse_max_assocs_factor = 3 # ## run logger.info("Starting experiment...") # In[198]: res_idx = 0