예제 #1
0
import numpy as np
import tables
import numba as nb
import MyML.utils.profiling as myProf
import MyML.helper.partition as part
import MyML.EAC.sparse as eacSp
import MyML.EAC.eac_new as myEAC
import shutil

t = myProf.Timer()

# ensemble_file = '/home/diogoaos/QCThesis/datasets/gauss10e6_overlap/ensemble_500k_test2.h5'
ensemble_file = '/media/Data/diogoaos_tmp/gaussseparated_ensembles/ensemble_500000_2sqrt.hdf'

coassc_path_ssd = '/home/diogoaos/QCThesis/coassoc.h5'
index_path_ssd = '/home/diogoaos/QCThesis/'

coassc_path_spin = '/media/Data/diogoaos_tmp/coassoc.h5'
index_path_spin = '/media/Data/diogoaos_tmp/'

print "loading ensemble"
t.reset()
t.tic()
ensemble = part.loadEnsembleFromFileHDF(ensemble_file)
print 'load ensemble time: {}'.format(t.tac())

n_samples = part.n_samples_from_partition(ensemble[0])
n_partitions = len(ensemble)
print "number of samples: {}".format(n_samples)
print "number of partitions: {}".format(n_partitions)
예제 #2
0
import MyML.utils.profiling as myProf

tImport = myProf.Timer()
tImport.tic()

import numpy as np
import MyML.helper.partition as part
import MyML.cluster.K_Means3 as myKM
import MyML.metrics.accuracy as myAcc
import MyML.EAC.eac_new as myEAC
import MyML.EAC.sparse as mySpEAC

import gc
import argparse
import os.path


def correspond(l0, l1):
    """gets two labels arrays and, if they have the
    same number of clusters, tries to equal the label assignment
    """
    l0_unique = np.unique(l0)
    if l0_unique.size != np.unique(l1).size:
        return -1

    inc = l0_unique.max() + 100

    # increment all labels
    for l in l0_unique:
        l0[l0 == l] = l + inc
예제 #3
0
    print "Path is not directory: ", folder
    sys.exit(1)

# await confirmation
if not args.yes:
    raw_input("Folder: {}\nIs this correct?".format(folder))
else:
    print "Folder being used is: {}".format(folder)

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#                   IMPORTS
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

import MyML.utils.profiling as myProf

t = myProf.Timer()
t.tic()

import numpy as np
from numba import cuda
import pandas as pd  # for storing results

from sklearn import datasets  # generate gaussian mixture

import MyML.cluster.K_Means3 as myKM

myKM.CUDA_PPT = PPT

# Setup logging
import logging
예제 #4
0
            'min_degree', 'max_degree', 'mean_degree', 'std_degree',
            'accuracy', 'sl_clusts',
            'round','disk']

type_mats = ["full",
             "full condensed",
             "sparse complete",
             "sparse condensed const",
             "sparse condensed linear"]
rounds = 1
res_lines = rounds * len(cardinality) * len(rules) * len(type_mats)

results = pd.DataFrame(index=range(res_lines), columns=res_cols)


t = myProf.Timer() # timer

# ensemble properties
n_partitions = 100
n_iters = 3

# EAC properties
sparse_max_assocs_factor = 3

# ## run

logger.info("Starting experiment...")

# In[198]:

res_idx = 0