Ejemplo n.º 1
0
def spnClassificationSPNFit(X, Y, alpha=0.001, min_slices=80):
    classes = numpy.unique(Y)
    spns = []

    trainll = numpy.zeros((X.shape[0], classes.shape[0]))
    ws = []
    for j in range(classes.shape[0]):
        idx = Y == classes[j]
        ws.append(float(numpy.sum(idx)) / X.shape[0])

        data_train_class = X[idx, :]
        spn = LearnSPN(cache=memory,
                       alpha=alpha,
                       min_instances_slice=min_slices,
                       cluster_prep_method=None,
                       families="gaussian").fit_structure(data_train_class)
        spns.append(spn)

        trainll[idx, j] = spn.eval(data_train_class, individual=True)

    x = Variable(len(classes))

    constraints = [sum_entries(x) == 1, x > 0]

    A = numpy.exp(trainll)

    objective = Maximize(sum_entries(log(A * x)))
    prob = Problem(objective, constraints)
    prob.solve()
    # print("Optimal value", prob.solve())

    #ws = sum(x.value.tolist(), [])
    print(ws)

    return {'classes': classes, 'spns': spns, 'weights': ws}
Ejemplo n.º 2
0
    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        print(y.shape, numpy.unique(y))
        print(self.classes_)
        
        #0/0

        self.X_ = X
        self.y_ = y
        # Return the classifier
        
        
        # classes = numpy.unique(Y)
        self.spns_ = []
        
        self.ws_ = []
        trainll = numpy.zeros((X.shape[0],self.classes_.shape[0]))
        for j in range(self.classes_.shape[0]):
            idx = y == self.classes_[j]
            #self.ws_.append(float(numpy.sum(idx)) / X.shape[0])
            
            data_train_class = X[idx, :]
            spn = LearnSPN(alpha=self.alpha, min_instances_slice=self.min_instances_slice, cluster_prep_method="sqrt", families=self.families, cache=memory).fit_structure(data_train_class)
            self.spns_.append(spn)
            trainll[idx, j] = spn.eval(data_train_class, individual=True)
        
        
        #self.ws_ = self.ws_/numpy.sum(self.ws_)
        
        
        x = Variable(self.classes_.shape[0])
    
        constraints = [sum_entries(x) == 1, x > 0]
        
        A = numpy.exp(trainll)
            
        objective = Maximize(sum_entries(log(A * x)))
        prob = Problem(objective, constraints)
        prob.solve()
        
        
        self.ws_ = sum(x.value.tolist(), [])
        #print("Optimal w",self.ws_)
        
        return self
Ejemplo n.º 3
0
def pspnperplexity(train, test, min_slices, ind_test_method,
                   row_cluster_method):
    c1 = Chrono().start()
    spn = LearnSPN(alpha=0.001,
                   min_slices=min_slices,
                   cluster_prep_method="sqrt",
                   ind_test_method=ind_test_method,
                   row_cluster_method=row_cluster_method).fit_structure(train)
    c1.end()
    time = c1.elapsed()
    pwb, perplexity, words, logl = spn.perplexity(test)

    print(
        "SPN ll=%s %.3f per-word bound, %.1f perplexity estimate based on a held-out corpus of %i documents with %i words"
        % (logl, pwb, perplexity, test.shape[0], words))
    return perplexity, logl, time, spn.size()
Ejemplo n.º 4
0
def spnClassificationGeneralFit(X, Y, maxClasses, alpha=0.001, min_slices=500):
    # need to convert Y into one-hot encoding as there is no multinomial till now
    #Y = getOneHotEncoding(Y, maxClasses)
    print('X shape : ',X.shape)
    print('Y shape : ',Y.shape)
    families = ['gaussian']*X.shape[1]+['binomial']*Y.shape[1]
    data_train_class = numpy.c_[X,Y]
    spn = LearnSPN(cache=memory, row_cluster_method="RandomPartition",ind_test_method="subsample",alpha=alpha, min_features_slice=30, min_instances_slice=min_slices, cluster_prep_method=None, families=families).fit_structure(data_train_class)
    return spn
Ejemplo n.º 5
0
def spnlearn(data, alpha, min_slices=30, cluster_prep_method=None):

    numpy_rand_gen = numpy.random.RandomState(1337)

    print("learnspn")
    spn = LearnSPN(
        min_instances_slice=min_slices,
        row_cluster_method="KMeans",
        n_cluster_splits=2,
        # g_factor=5*10.0**-17,
        # g_factor=0.5,
        alpha=alpha,
        n_iters=2000,
        n_restarts=4,
        rand_gen=numpy_rand_gen,
        cluster_prep_method=cluster_prep_method).fit_structure(data=data)

    return spn
Ejemplo n.º 6
0
def spnClassificationNBFit(X, Y, alpha=0.001, min_slices=80):
    classes = numpy.unique(Y)
    spns = []
    
    # trainll = numpy.zeros((X.shape[0],classes.shape[0]))
    ws = []
    for j in range(classes.shape[0]):
        idx = Y == classes[j]
        ws.append(float(numpy.sum(idx)) / X.shape[0])
        
        data_train_class = X[idx, :]
        spn = LearnSPN(cache=memory, alpha=alpha, min_instances_slice=min_slices, cluster_prep_method=None, families="gaussian").fit_structure(data_train_class)
        spns.append(spn)
        
        # trainll[idx, j] = spn.eval(data_train_class, individual=True)
        

    return {'classes':classes, 'spns':spns, 'weights':ws}
Ejemplo n.º 7
0
    return pdn.getLogLikelihood(test)


for dsname, data, featureNames in [datasets.getCommunitiesAndCrimes()]:

    #for dsname, data, featureNames in [datasets.getNips(), datasets.getSynthetic(), datasets.getMSNBCclicks(), datasets.getCommunitiesAndCrimes()]:

    printlocal(dsname)
    printlocal(featureNames)
    printlocal(len(featureNames))
    printlocal(data.shape)

    stats = Stats(name=dsname)
    for train, test, i in kfolded(data, 5):
        spn = LearnSPN(alpha=0.001,
                       min_instances_slice=80,
                       cluster_prep_method="sqrt",
                       cache=memory).fit_structure(train)

        printlocal("done")
        stats.addConfig("PSPN", spn.config)
        # stats.add("SPN Pois", Stats.LOG_LIKELIHOOD, llspn(spn, test))
        printlocal("LL")
        stats.add("PSPN", Stats.MODEL_SIZE, spn.size())
        printlocal("model size")
        prediction = spnComputeLambdas(spn, test)
        printlocal("model spnComputeLambdas")
        #prediction2 = spnComputeLambdasCuda(spn, test)
        prediction2 = spnComputeLambdas2(spn, test)
        printlocal("model spnComputeLambdas2")
        stats.add("PSPN", Stats.ABS_ERROR, abs_error(test, prediction))
        stats.add("PSPN", Stats.SQUARED_ERROR, squared_error(test, prediction))
Ejemplo n.º 8
0
    out_log.write(preamble)
    out_log.flush()
    #
    # looping over all parameters combinations
    for g_factor in g_factors:
        for cluster_penalty in cluster_penalties:
            for min_inst_slice in min_inst_slices:

                #
                # Creating the structure learner
                learner = LearnSPN(g_factor=g_factor,
                                   min_instances_slice=min_inst_slice,
                                   # alpha=alpha,
                                   row_cluster_method=args.cluster_method,
                                   cluster_penalty=cluster_penalty,
                                   n_cluster_splits=args.n_row_clusters,
                                   n_iters=args.n_iters,
                                   n_restarts=args.n_restarts,
                                   sklearn_args=sklearn_args,
                                   cltree_leaves=cltree_leaves,
                                   rand_gen=numpy_rand_gen)

                learn_start_t = perf_counter()

                #
                # build an spn on the training set
                spn = learner.fit_structure(data=train,
                                            feature_sizes=features)
                # spn = learner.fit_structure_bagging(data=train,
                #                                     feature_sizes=features,
                #                                     n_components=10)
Ejemplo n.º 9
0
			out_log.write(preamble)
			out_log.flush()
			#
			# looping over all parameters combinations
			for g_factor in g_factors:
				for cluster_penalty in cluster_penalties:
					for min_inst_slice in min_inst_slices:

						#
						# Creating the structure learner
						learner = LearnSPN(g_factor=g_factor,
										   min_instances_slice=min_inst_slice,
										   # alpha=alpha,
										   row_cluster_method=args.cluster_method,
										   cluster_penalty=cluster_penalty,
										   n_cluster_splits=args.n_row_clusters,
										   n_iters=args.n_iters,
										   n_restarts=args.n_restarts,
										   sklearn_args=sklearn_args,
										   cltree_leaves=cltree_leaves,
										   rand_gen=numpy_rand_gen)

						learn_start_t = perf_counter()

						#
						# build an spn on the training set
						#spn = learner.fit_structure(data=train,
						#							feature_sizes=features)
						 spn = learner.fit_structure_bagging(data=train,
						                                     feature_sizes=features,
						                                     n_components=10)
Ejemplo n.º 10
0
                fold_models = []
                fold_params = defaultdict(dict)

                for i, (train, valid, test) in enumerate(fold_splits):
                    #
                    # fixing the seed
                    rand_gen = numpy.random.RandomState(seed)

                    #
                    # Creating the structure learner
                    learner = LearnSPN(g_factor=g_factor,
                                       min_instances_slice=min_inst_slice,
                                       alpha=alphas[0],
                                       row_cluster_method=args.cluster_method,
                                       cluster_penalty=cluster_penalty,
                                       n_cluster_splits=args.n_row_clusters,
                                       n_iters=args.n_iters,
                                       n_restarts=args.n_restarts,
                                       sklearn_args=sklearn_args,
                                       cltree_leaves=cltree_leaves,
                                       kde_leaves=kde_leaves,
                                       rand_gen=rand_gen)

                    learn_start_t = perf_counter()
                    #
                    # build an spn on the training set
                    spn = learner.fit_structure(data=train,
                                                feature_sizes=features)
                    learn_end_t = perf_counter()
                    l_time = learn_end_t - learn_start_t
                    logging.info('Structure learned in {} secs'.format(l_time))
                    fold_models.append(spn)
Ejemplo n.º 11
0
from natsort.natsort import natsorted
from natsort.ns_enum import ns
import numpy

from algo.learnspn import LearnSPN

result = json.load(open('gnspnoutfile4.json'))
oldres = json.dumps(result)
for fname in natsorted(glob("datasets/simdata*.csv"), alg=ns.IGNORECASE):
    print(fname)
    name = "%s_%s" % (fname.split("_")[1], fname.split("_")[2])
    idx = int(fname.split("_")[3]) - 1

    data = numpy.loadtxt(fname, dtype=float, delimiter=",", skiprows=1)
    for alpha in ["0.001", "0.0001", "0.00001"]:
        t0 = time.time()
        spn = LearnSPN(alpha=float(alpha),
                       min_instances_slice=data.shape[0] - 1,
                       cluster_first=False).fit_structure(data)

        ptime = (time.time() - t0)

        result[name]["glmptest_%s" % (alpha)][idx][0] = ptime
        # print(spn.to_text(list(map(lambda x: "V"+str(x),range(2,200000)))))
print(oldres)
print(json.dumps(result))

with open('gnspnoutfile4_withtime.txt', 'w') as outfile:
    json.dump(result, outfile)
Ejemplo n.º 12
0
def evalspnComplete(labels, data, dsname, writer, alpha, min_instances_slice=50):
    
    cvfolds = StratifiedKFold(labels, n_folds=10, random_state=123)
    classes = list(set(labels))
    
    evalresults = OrderedDict()
    
    for train_index, test_index in cvfolds:
        train_data = data[train_index, ]
        train_labels = labels[train_index]
        
        test_data = data[test_index, ]
        test_labels = labels[test_index]
        
        # clfsvc = GridSearchCV(estimator=svm.SVC(kernel='linear', probability=True), param_grid=dict(C=numpy.logspace(-10, 0, 10)), n_jobs=50, cv=5)
        clfsvc = GridSearchCV(estimator=svm.SVC(kernel='linear', probability=True), param_grid={'C': [10 ** 3, 10 ** 2, 10 ** 1, 10 ** 0, 10 ** -1, 10 ** -2, 10 ** -3]}, n_jobs=50, cv=5)
        start = time.time()
        evalModel(clfsvc, test_data, test_labels, train_data, train_labels, "SVM raw", evalresults)
        evalresults.setdefault("SVM time in secs \t\t", []).append((time.time() - start))
        
        clspn = SPNClassifier(alpha=alpha, min_instances_slice=min_instances_slice)
        start = time.time()
        evalModel(clspn, test_data, test_labels, train_data, train_labels, "SPN NB raw", evalresults)
        evalresults.setdefault("SPN time in secs \t\t", []).append((time.time() - start))
        
        #print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
        #clflr = LogisticRegression(solver='lbfgs')
        #start = time.time()
        #evalModel(clflr, test_data, test_labels, train_data, train_labels, "LR NB raw", evalresults)
        #evalresults.setdefault("SPN time in secs \t\t", []).append((time.time() - start))
        continue
        
        evals_train = numpy.zeros((train_data.shape[0], 0))
        evals_test = numpy.zeros((test_data.shape[0], 0))

        grads_train = numpy.zeros((train_data.shape[0], 0))
        grads_test = numpy.zeros((test_data.shape[0], 0))
        
        activations_train = numpy.zeros((train_data.shape[0], 0))
        activations_test = numpy.zeros((test_data.shape[0], 0))
        
        
        #model = ClassificationNBFit(train_data, train_labels)
        
        timespn = 0
        for c in classes:
            #break
            idx = train_labels == c
            print(idx)
            data_train_class = train_data[idx, :]
            
            start = time.time()
            spn = LearnSPN(alpha=alpha, min_instances_slice=min_instances_slice, cluster_prep_method="sqrt", cache=memory).fit_structure(data_train_class)
            print(alpha, min_instances_slice)
            # spn = spnlearn(data_train_class, alpha, min_slices=min_slices, cluster_prep_method="sqrt", family="poisson")
            timespn += (time.time() - start)
            
            # continue
            evalperclass = numpy.asarray(spn.eval(train_data, individual=True)).reshape((train_data.shape[0], 1))
            print(evalperclass.shape)
            print(evalperclass)
            gradsperclass = spn.gradients(train_data)
            activationperclass = spn.activations(train_data)
            print(evals_train.shape)
            evals_train = numpy.append(evals_train, evalperclass, axis=1)
            print(evals_train)
            grads_train = numpy.hstack((grads_train, gradsperclass))
            activations_train = numpy.hstack((activations_train, activationperclass))
            
            evals_test = numpy.hstack((evals_test, numpy.asarray(spn.eval(test_data, individual=True)).reshape((test_data.shape[0], 1))))
            grads_test = numpy.hstack((grads_test, spn.gradients(test_data)))
            activations_test = numpy.hstack((activations_test, spn.activations(test_data)))
            print("loop done")
            
        evalresults.setdefault("SPN time in secs \t\t", []).append(timespn)
         
        
        
        evalModel(clflr, evals_test, test_labels, evals_train, train_labels, "SPN per class ll -> LR", evalresults)
    
        #evalModel(clfsvc, grads_test, test_labels, grads_train, train_labels, "SPN per class gradients -> SVM", evalresults)
        
        #evalModel(clfsvc, activations_test, test_labels, activations_train, train_labels, "SPN per class activations -> SVM", evalresults)
    
    
    writer.write(json.dumps(evalresults))
    writer.write("\n")
    
    
    for key, value in evalresults.items():
        writer.write("%s: %0.6f (+/- %0.6f) \n" % (key, mean(value), stdev(value) * 2))
        
    writer.write("\n")
Ejemplo n.º 13
0
    def learn_model(self, cltree_leaves, args, comp, bgg):
        #set parameters for learning AC (cltree_leaves=True)and AL(cltree_leaves=false)
        print('-------MODELS CONSTRUCTION-----------')
        verbose = 1
        n_row_clusters = 2
        cluster_method = 'GMM'
        seed = 1337
        n_iters = 100
        n_restarts = 4
        cluster_penalties = [1.0]
        sklearn_Args = None
        if not args:
            g_factors = [5, 10, 15]
            min_inst_slices = [10, 50, 100]
            alphas = [0.1, 0.5, 1.0, 2.0]
        else:
            g_factors = [args[0]]
            min_inst_slices = [args[1]]
            alphas = [args[2]]
        # setting verbosity level
        if verbose == 1:
            logging.basicConfig(level=logging.INFO)
        elif verbose == 2:
            logging.basicConfig(level=logging.DEBUG)

        # logging.info("Starting with arguments:\n")

        if sklearn_Args is not None:
            sklearn_key_value_pairs = sklearn_translate({
                ord('['): '',
                ord(']'): ''
            }).split(',')
            sklearn_args = {
                key.strip(): value.strip()
                for key, value in
                [pair.strip().split('=') for pair in sklearn_key_value_pairs]
            }
        else:
            sklearn_args = {}
        # logging.info(sklearn_args)

        # initing the random generators
        MAX_RAND_SEED = 99999999  # sys.maxsize
        rand_gen = random.Random(seed)
        numpy_rand_gen = numpy.random.RandomState(seed)

        #
        # elaborating the dataset
        #

        dataset_name = self.dataset
        # logging.info('Loading datasets: %s', dataset_name)
        train = self.train
        n_instances = train.shape[0]

        #
        # estimating the frequencies for the features
        # logging.info('')
        freqs, features = dataset.data_2_freqs(train)
        best_train_avg_ll = NEG_INF
        best_state = {}
        best_test_lls = None
        index = 0
        spns = []
        for g_factor in g_factors:
            for cluster_penalty in cluster_penalties:
                for min_inst_slice in min_inst_slices:
                    print('model')
                    # Creating the structure learner
                    learner = LearnSPN(
                        g_factor=g_factor,
                        min_instances_slice=min_inst_slice,
                        # alpha=alpha,
                        row_cluster_method=cluster_method,
                        cluster_penalty=cluster_penalty,
                        n_cluster_splits=n_row_clusters,
                        n_iters=n_iters,
                        n_restarts=n_restarts,
                        sklearn_args=sklearn_args,
                        cltree_leaves=cltree_leaves,
                        rand_gen=numpy_rand_gen)

                    learn_start_t = perf_counter()

                    # build an spn on the training set
                    if (bgg):
                        spn = learner.fit_structure_bagging(
                            data=train,
                            feature_sizes=features,
                            n_components=comp)
                    else:
                        spn = learner.fit_structure(data=train,
                                                    feature_sizes=features)

                    learn_end_t = perf_counter()
                    n_edges = spn.n_edges()
                    n_levels = spn.n_layers()
                    n_weights = spn.n_weights()
                    n_leaves = spn.n_leaves()

                    #
                    # smoothing can be done after the spn has been built
                    for alpha in alphas:
                        # logging.info('Smoothing leaves with alpha = %f', alpha)
                        spn.smooth_leaves(alpha)
                        spns.append(spn)

                        # Compute LL on training set
                        # logging.info('Evaluating on training set')
                        train_ll = 0.0

                        for instance in train:
                            (pred_ll, ) = spn.eval(instance)
                            train_ll += pred_ll
                        train_avg_ll = train_ll / train.shape[0]

                        # updating best stats according to train ll
                        if train_avg_ll > best_train_avg_ll:
                            best_train_avg_ll = train_avg_ll
                            best_state['alpha'] = alpha
                            best_state['min_inst_slice'] = min_inst_slice
                            best_state['g_factor'] = g_factor
                            best_state['cluster_penalty'] = cluster_penalty
                            best_state['train_ll'] = train_avg_ll
                            best_state['index'] = index
                            best_state['name'] = self.dataset

                        # writing to file a line for the grid
                        # stats = stats_format([g_factor,
                        #                       cluster_penalty,
                        #                       min_inst_slice,
                        #                       alpha,
                        #                       n_edges, n_levels,
                        #                       n_weights, n_leaves,
                        #                       train_avg_ll],
                        #                      '\t',
                        #                      digits=5)
                        # index = index + 1

        best_spn = spns[best_state['index']]
        # logging.info('Grid search ended.')
        # logging.info('Best params:\n\t%s', best_state)

        return best_spn, best_state['g_factor'], best_state[
            'min_inst_slice'], best_state['alpha']
Ejemplo n.º 14
0
            # fixing the seed
            rand_gen = numpy.random.RandomState(seed)

            stats_dict = {}

            #
            # Creating the structure learner
            learner = LearnSPN(
                g_factor=g_factor,
                min_instances_slice=min_inst_slice,
                alpha=alpha,
                row_cluster_method=args.cluster_method,
                cluster_penalty=cluster_penalty,
                n_cluster_splits=args.n_row_clusters,
                n_iters=args.n_iters,
                n_restarts=args.n_restarts,
                sklearn_args=sklearn_args,
                cltree_leaves=cltree_leaves,
                kde_leaves=kde_leaves,
                rand_gen=rand_gen,
                features_split_method=args.features_split_method,
                entropy_threshold=entropy_threshold,
                adaptive_entropy=adaptive_entropy,
                percentage_rand_features=percentage_rand_features,
                percentage_instances=percentage_instances)

            learn_start_t = perf_counter()
            #
            # build an spn on the training set
            spn = learner.fit_structure(data=train,
                                        feature_sizes=features,
                                        learn_stats=stats_dict)
Ejemplo n.º 15
0
best_state_mix = {}

#
# looping over all parameters combinations
for g_factor in g_factors:
    for cluster_penalty in cluster_penalties:
        for min_inst_slice in min_inst_slices:

            #
            # Creating the structure learner
            learner = LearnSPN(
                g_factor=g_factor,
                min_instances_slice=min_inst_slice,
                # alpha=alpha,
                row_cluster_method=args.cluster_method,
                cluster_penalty=cluster_penalty,
                n_cluster_splits=args.n_row_clusters,
                n_iters=args.n_iters,
                n_restarts=args.n_restarts,
                sklearn_args=sklearn_args,
                cltree_leaves=cltree_leaves,
                rand_gen=numpy_rand_gen)

            #
            # learning a mixture

            spns = \
                learner.fit_mixture_bootstrap(train,
                                              n_mix_components=n_mix,
                                              bootstrap_samples_ids=bootstrap_ids,
                                              feature_sizes=features,
                                              perc=perc,
Ejemplo n.º 16
0
from joblib.memory import Memory
import numpy

from algo.learnspn import LearnSPN

if __name__ == '__main__':

    memory = Memory(cachedir="/tmp", verbose=0, compress=9)

    #data = numpy.loadtxt("data/breast_cancer/wdbc.data", delimiter=",")
    #data = data[:,1:]

    features_data = numpy.loadtxt("data/food/train/_preLogits.csv",
                                  delimiter=",")
    labels_data = numpy.loadtxt("data/food/train/_groundtruth.csv",
                                delimiter=",").astype(int)
    data = numpy.c_[features_data, labels_data]
    print(data.shape)
    print(data[1, :])

    fams = ["gaussian"] * features_data.shape[1] + ["binomial"
                                                    ] * labels_data.shape[1]
    spn = LearnSPN(cache=memory,
                   alpha=0.001,
                   min_instances_slice=200,
                   cluster_prep_method=None,
                   families=fams).fit_structure(data)

    print(spn.to_tensorflow(["V" + str(i) for i in range(data.shape[1])],
                            data))
Ejemplo n.º 17
0
def learn(denseCorpus):
    return LearnSPN(alpha=0.001, min_instances_slice=100, cluster_prep_method="sqrt", ind_test_method="subsample", sub_sample_rows=2000).fit_structure(denseCorpus)
Ejemplo n.º 18
0
def plotJointProb(filename, data, datarange):
    print(filename)
    print(data.shape)
    spn = LearnSPN(alpha=0.001, min_instances_slice=30, cache=memory).fit_structure(data)
    matplotlib.rcParams.update({'font.size': 16})
    pcm = cm.Blues


    f1 = 0
    f2 = 1

    x = data[:, f1]
    y = data[:, f2] 
    
    amin, amax = datarange[0], datarange[1]
    
    bins = numpy.asarray(list(range(amin, amax)))
    
    def getPxy(spn, f1, f2, xbins, ybins):
        import locale
        locale.setlocale(locale.LC_NUMERIC, 'C')
        Pxy = spn.getJointDist(f1, f2)

        jointDensity = numpy.zeros((max(xbins) + 1, max(ybins) + 1))
        for x  in xbins:
            for y in ybins:
                jointDensity[x, y] = Pxy(x, y)
        return jointDensity
    
   
    plt.clf()
    
    fig = plt.figure(figsize=(7, 7))
    
    
    # [left, bottom, width, height]
    xyHist = plt.axes([0.3, 0.3, 0.5, 0.5])
    cax = xyHist.imshow(getPxy(spn, f1, f2, bins, bins), extent=[amin, amax, amin, amax], interpolation='nearest', origin='lower', cmap=pcm)
    xyHist.set_xlim(amin, amax)
    xyHist.set_ylim(amin, amax)
    if amax > 20:
        xyHist.xaxis.set_major_locator(plticker.MultipleLocator(base=10))
        xyHist.yaxis.set_major_locator(plticker.MultipleLocator(base=10))
    
    xyHist.yaxis.grid(True, which='major', linestyle='-', color='darkgray')
    xyHist.xaxis.grid(True, which='major', linestyle='-', color='darkgray')
    
    xyHistcolor = plt.axes([0.82, 0.3, 0.03, 0.5])
    plt.colorbar(cax, cax=xyHistcolor)
    font = fm.FontProperties(size=32)
    # cax.yaxis.get_label().set_fontproperties(font)
    # cax.xaxis.get_label().set_fontproperties(font)
    
    xHist = plt.axes([0.05, 0.3, 0.15, 0.5])
    xHist.xaxis.set_major_formatter(NullFormatter())  # probs
    xHist.yaxis.set_major_formatter(NullFormatter())  # counts
    xHist.hist(x, bins=bins, orientation='horizontal', color='darkblue')
    xHist.invert_xaxis()
    xHist.set_ylim(amin, amax)
    
    yHist = plt.axes([0.3, 0.05, 0.5, 0.15])
    yHist.yaxis.set_major_formatter(NullFormatter())  # probs
    yHist.xaxis.set_major_formatter(NullFormatter())  # counts
    yHist.hist(y, bins=bins, color='darkblue')
    yHist.invert_yaxis()
    yHist.set_xlim(amin, amax)
    
    for elem in [xyHist, xHist, yHist]:
        elem.yaxis.grid(True, which='major', linestyle='-', color='darkgray')
        elem.xaxis.grid(True, which='major', linestyle='-', color='darkgray')

    plt.savefig(os.path.dirname(os.path.abspath(__file__)) + "/" + filename, bbox_inches='tight', dpi=600)
Ejemplo n.º 19
0
    adj = numpy.zeros((nF, nF))

    for i in range(nF):
        for j in range(i + 1, nF):
            print(i, j)
            adj[i, j] = adj[j,
                            i] = spn.computeDistance(words[i], words[j], words,
                                                     True)

    return adj


dsname, data, words = getNips()

spn = LearnSPN(alpha=0.001,
               min_instances_slice=100,
               cluster_prep_method="sqrt",
               cache=memory).fit_structure(data)

adjc = getMIAdjc(spn, words)

#adjc = getDistAdjc(spn, words)

adjc = numpy.log(adjc)

print(adjc)
print(numpy.any(adjc > 0.8))


def show_graph_with_labels(fname, adjacency_matrix, mylabels):
    def make_label_dict(labels):
        l = {}