Ejemplo n.º 1
0
 def test_cobweb3(self):
     tree = Cobweb3Tree()
     for i in range(40):
         data = {}
         data['x'] = random.normalvariate(0, 4)
         data['y'] = random.normalvariate(0, 4)
         tree.ifit(data)
     verify_counts(tree.root)
Ejemplo n.º 2
0
 def test_cobweb(self):
     tree = Cobweb3Tree()
     for i in range(40):
         data = {}
         data['a1'] = random.choice(['v1', 'v2', 'v3', 'v4'])
         data['a2'] = random.choice(['v1', 'v2', 'v3', 'v4'])
         tree.ifit(data)
     verify_counts(tree.root)
Ejemplo n.º 3
0
def cluster_cobweb3(data):
    """ cluster the data, using cobweb3"""

    npts = data.shape[0]
    n_channels = data.shape[1]

    # convert data from np array to list of dictionariesw
    data_new = []
    for i in range(npts):
        pt = data[i, :]
        pt_dict = {dim_list[j]: pt[j] for j in range(n_channels)}
        data_new.append(pt_dict)

    # perform cobweb3 clustering and get labels

    print('starting cobweb3')
    print('note, this can take some time ...')
    start_time = time.time()

    tree = Cobweb3Tree()

    clusters = cluster(tree, data_new[:])[0]
    print('# points:', len(clusters))

    clust_names = [c for c in set(clusters)]
    print('  cluster names:', clust_names)

    clust_dict = {c: idx for idx, c in enumerate(clust_names)}
    print(clust_dict)
    lbs = [clust_dict[c] for c in clusters]
    print('length of lbs:', len(lbs))

    clust_dict = {c: idx for idx, c in enumerate(clust_names)}
    print(clust_dict)
    lbs = [clust_dict[c] for c in clusters]
    print('length of lbs:', len(lbs))

    elapsed_time = time.time() - start_time

    print('done, elapsed mins:', np.round(elapsed_time / 60, 2))

    # append labels to csv file of data
    lbs = np.asarray(lbs).reshape(len(lbs), 1)
    print(lbs.shape)
    new = np.concatenate((data, lbs), axis=1)
    print(new.shape)

    np.savetxt(file_location_out_data, new, delimiter=',')

    print('done with cluster_cobweb3')

    # main use of this function is to return the clusters, and the labels?
    return clusters, lbs
Ejemplo n.º 4
0
def cobweb(data, attname_arr, integer_arr_list, norm_data):

    #shuffle(irises)
    maxsilh = float('-inf')
    centroid_best = []

    tree = Cobweb3Tree()
    cobweb_data = []
    first = True

    for row in data:
        datadict = dict()

        for att_name, att_val in zip(attname_arr, row):
            if att_name in integer_arr_list:
                datadict[att_name] = float(att_val)
            else:
                datadict[att_name] = att_val
        cobweb_data.append(datadict)
        if first:
            #print cobweb_data
            first = False
    #cobweb_data = [{a: iris[a] for a in iris } for row in data]

    tree.fit(cobweb_data)

    #print len( cobweb_data)
    print "nodes in tree"
    pprint(tree.root.output_json())
    print ""
    for k in range(2, 11):
        #cobweb_labels = [ c for c in cluster.k_cluster(tree, cobweb_data, k)[0]]

        cluster_labels = np.array(
            [c for c in cluster.k_cluster(tree, cobweb_data, k=k)], )
        print set(cluster_labels)
        try:
            silhouette_avg = silhouette_score(norm_data, cluster_labels)
        except:
            print "error in silhouette_score"
            continue
        print(k, silhouette_avg)
        if silhouette_avg > maxsilh:
            maxsilh = silhouette_avg
            centroid_best = set(cluster_labels)
            kbest = k

    print "kbest"
    print(kbest, maxsilh)

    return set(cluster_labels), kbest
Ejemplo n.º 5
0
def cobweb(data, attname_arr):

    shuffle(irises)

    tree = Cobweb3Tree()
    cobweb_data = []
    for row in data:

        cobweb_data.append(
            {att_name: att_val
             for att_val, att_name in zip(row, attname_arr)})

#cobweb_data = [{a: iris[a] for a in iris } for row in data]

    tree.fit(cobweb_data)

    print len(cobweb_data)
    print "nodes in tree"
    pprint(tree.root.output_json())
Ejemplo n.º 6
0
def cobweb():

    irises = load_iris()
    shuffle(irises)

    tree = Cobweb3Tree()
    irises_no_class = [{a: iris[a]
                        for a in iris if a != 'class'} for iris in irises]
    tree.fit(irises_no_class)

    print len(irises)
    print "nodes in tree"
    i = 0

    rootnode = tree.root
    print rootnode
    print ""
    for k, v in rootnode.items():
        print(k, v)
    clusters = k_cluster(tree, irises_no_class, 3)[0]
Ejemplo n.º 7
0
y[::5] += 1 * (0.5 - np.random.rand(8))
y2[::5] += 1 * (0.5 - np.random.rand(8))

# Create dictionaries
# Note that the y value is stored as a hidden variable because
# in this case we only want to use the X value to make predictions.
training_data = [{'X': v[0], '_y': y[i]} for i, v in enumerate(X)]
shuffle(training_data)

# Build test data
test_data = [{'X': v[0]} for i, v in enumerate(T)]
#test_data = [{'X': float(v)} for i,v in enumerate(X)]

# Fit cobweb models
cbt = CobwebTree()
cb3t = Cobweb3Tree()

cbt.fit(training_data, iterations=1)
cb3t.fit(training_data, iterations=1)
print(cb3t.root)

child = cb3t.categorize({'X': 4.16})
print(child.predict('X'))
print(child.predict('y'))

curr = child
print(curr)
while curr.parent is not None:
    curr = curr.parent
    print(curr)
Ejemplo n.º 8
0
 def __init__(self, **kwargs):
     self.tree = Cobweb3Tree(**kwargs)
     self.state_format = "variablized_state"
from concept_formation.examples.examples_utils import avg_lines
from concept_formation.evaluation import incremental_evaluation
from concept_formation.cobweb3 import Cobweb3Tree
from concept_formation.dummy import DummyTree
from concept_formation.datasets import load_iris

num_runs = 30
num_examples = 20
irises = load_iris()

naive_data = incremental_evaluation(DummyTree(),
                                    irises,
                                    run_length=num_examples,
                                    runs=num_runs,
                                    attr="class")
cobweb_data = incremental_evaluation(Cobweb3Tree(),
                                     irises,
                                     run_length=num_examples,
                                     runs=num_runs,
                                     attr="class")
cobweb_x, cobweb_y = [], []
naive_x, naive_y = [], []

for opp in range(len(cobweb_data[0])):
    for run in range(len(cobweb_data)):
        cobweb_x.append(opp)
        cobweb_y.append(cobweb_data[run][opp])

for opp in range(len(naive_data[0])):
    for run in range(len(naive_data)):
        naive_x.append(opp)
Ejemplo n.º 10
0
def run_clust_exp(nominal_noise=0, numeric_noise=0, scaling=False):
    data = []

    for i in range(60):
        x = {}
        x['_label'] = "G1"

        if random() >= nominal_noise:
            x['f1'] = "G1f1"
        else:
            x['f1'] = choice(['G2f1', 'G3f1'])

        if random() >= nominal_noise:
            x['f2'] = choice(["G1f2a", "G1f2b"])
        else:
            x['f2'] = choice(["G2f2a", "G2f2b", "G3f2a", "G3f2b"])

        if random() >= numeric_noise:
            x['f3'] = np.random.normal(4, 1, 1)[0]
        else:
            x['f3'] = choice(
                [np.random.normal(10, 1, 1)[0],
                 np.random.normal(16, 1, 1)[0]])

        if random() >= numeric_noise:
            x['f4'] = np.random.normal(20, 2, 1)[0]
        else:
            x['f4'] = choice(
                [np.random.normal(32, 2, 1)[0],
                 np.random.normal(44, 2, 1)[0]])

        data.append(x)

    for i in range(60):
        x = {}
        x['_label'] = "G2"

        if random() >= nominal_noise:
            x['f1'] = "G2f1"
        else:
            x['f1'] = choice(["G2f1", "G3f1"])

        if random() >= nominal_noise:
            x['f2'] = choice(["G2f2a", "G2f2b"])
        else:
            x['f2'] = choice(["G1f2a", "G1f2b", "G3f2a", "G3f2b"])

        if random() >= numeric_noise:
            x['f3'] = np.random.normal(10, 1, 1)[0]
        else:
            x['f3'] = choice(
                [np.random.normal(4, 1, 1)[0],
                 np.random.normal(16, 1, 1)[0]])

        if random() >= numeric_noise:
            x['f4'] = np.random.normal(32, 2, 1)[0]
        else:
            x['f4'] = choice(
                [np.random.normal(20, 2, 1)[0],
                 np.random.normal(44, 2, 1)[0]])

        data.append(x)

    for i in range(60):
        x = {}
        x['_label'] = "G3"

        if random() >= nominal_noise:
            x['f1'] = "G3f1"
        else:
            x['f1'] = choice(["G1f1", "G2f1"])

        if random() >= nominal_noise:
            x['f2'] = choice(["G3f2a", "G3f2b"])
        else:
            x['f2'] = choice(["G1f2a", "G1f2b", "G2f2a", "G2f2b"])

        if random() >= numeric_noise:
            x['f3'] = np.random.normal(16, 1, 1)[0]
        else:
            x['f3'] = choice(
                [np.random.normal(4, 1, 1)[0],
                 np.random.normal(10, 1, 1)[0]])

        if random() >= numeric_noise:
            x['f4'] = np.random.normal(44, 2, 1)[0]
        else:
            x['f4'] = choice(
                [np.random.normal(20, 2, 1)[0],
                 np.random.normal(32, 2, 1)[0]])

        data.append(x)

    shuffle(data)
    t = Cobweb3Tree(scaling=scaling)
    clustering = cluster(t, data)
    return data, clustering[0]
Ejemplo n.º 11
0
 def __init__(self, q_init=0, learning_rate=0):
     self.tree = Cobweb3Tree()
Ejemplo n.º 12
0
from __future__ import division
from random import shuffle

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.feature_extraction import DictVectorizer
from sklearn.metrics import adjusted_rand_score

from concept_formation.cobweb3 import Cobweb3Tree
from concept_formation.cluster import cluster
from concept_formation.datasets import load_iris

irises = load_iris()
shuffle(irises)

tree = Cobweb3Tree()
irises_no_class = [{a: iris[a] for a in iris if a != 'class'} for iris in irises]
clusters = cluster(tree, irises_no_class)[0] 
iris_class = [iris[a] for iris in irises for a in iris if a == 'class']
ari = adjusted_rand_score(clusters, iris_class)

dv = DictVectorizer(sparse=False)
iris_X = dv.fit_transform([{a:iris[a] for a in iris if a != 'class'} for iris in irises])
pca = PCA(n_components=2)
iris_2d_x = pca.fit_transform(iris_X)

colors = ['b', 'g', 'r', 'y', 'k', 'c', 'm']
shapes = ['o', '^', '+']
clust_set = {v:i for i,v in enumerate(list(set(clusters)))}
class_set = {v:i for i,v in enumerate(list(set(iris_class)))}
Ejemplo n.º 13
0
 def __init__(self, params=None):
     if params is None:
         self.tree = Cobweb3Tree()
     else:
         self.tree = Cobweb3Tree(**params)