def test_load_rb_wb_03():
    data = load_rb_wb_03(num_instances=1)
    known = {'_guid': 'aa5eff72-0572-4eff-a007-3def9a82ba5b',
             '_human_cluster_label': '0', 'component0': {'b': 2.0, 'l': 2.0,
                                                         'r': 3.0, 't': 3.0,
                                                         'type': 'cube0'},
             'component1': {'b': 2.0, 'l': 3.0, 'r': 4.0, 't': 3.0, 'type':
                            'cube0'}, 'component11': {'b': 3.0, 'l': 1.0, 'r':
                                                      4.0, 't': 4.0, 'type':
                                                      'ufoo0'}, 'component2':
             {'b': 1.0, 'l': 2.0, 'r': 5.0, 't': 2.0, 'type': 'plat0'},
             'component3': {'b': 0.0, 'l': 0.0, 'r': 5.0, 't': 1.0, 'type':
                            'rect0'}}
    assert known == data[0]
Esempio n. 2
0
def output_json(file="forest", size=100, prune=True, seed=50, burn=1):
    random.seed(seed)
    if file == "forest":
        instances = ds.load_forest_fires()
        variables = False
    elif file == "voting":
        instances = ds.load_congressional_voting()
        variables = False
    elif file == "iris":
        instances = ds.load_iris()
        variables = False
    elif file == "mushroom":
        instances = ds.load_mushroom()
        variables = False
    elif file == "rb_com_11":
        instances = ds.load_rb_com_11()
        variables = True
    elif file == "rb_s_07":
        instances = ds.load_rb_s_07()
        variables = True
    elif file == "rb_s_13":
        instances = ds.load_rb_s_13()
        variables = True
    elif file == "rb_wb_03":
        instances = ds.load_rb_wb_03()
        variables = True
    else:
        instances = ds.load_forest_fires()
        variables = False

    random.shuffle(instances)
    pprint.pprint(instances[0])
    instances = instances[:size]
    print(len(instances))

    if variables:
        variablizer = ObjectVariablizer()
        instances = [variablizer.transform(t) for t in instances]

    tree = TrestleTree()
    tree.fit(instances, iterations=burn)

    pprint.pprint(tree.root.output_json())

    with open('output.js', 'w') as out:
        out.write("var trestle_output = ")
        out.write(json.dumps(tree.root.output_json()))
        out.write(";")
Esempio n. 3
0
def output_json(file="forest", size=100, prune=True, seed=50, burn=1):
    random.seed(seed)
    if file == "forest":
        instances = ds.load_forest_fires()
        variables = False
    elif file == "voting":
        instances = ds.load_congressional_voting()
        variables = False
    elif file == "iris":
        instances = ds.load_iris()
        variables = False
    elif file == "mushroom":
        instances = ds.load_mushroom()
        variables = False
    elif file == "rb_com_11":
        instances = ds.load_rb_com_11()
        variables = True
    elif file == "rb_s_07":
        instances = ds.load_rb_s_07()
        variables = True
    elif file == "rb_s_13":
        instances = ds.load_rb_s_13()
        variables = True
    elif file == "rb_wb_03":
        instances = ds.load_rb_wb_03()
        variables = True
    else:
        instances = ds.load_forest_fires()
        variables = False

    random.shuffle(instances)
    pprint.pprint(instances[0])
    instances = instances[:size]
    print(len(instances))

    if variables:
        variablizer = ObjectVariablizer()
        instances = [variablizer.transform(t) for t in instances]

    tree = TrestleTree()
    tree.fit(instances, iterations=burn)

    # pprint.pprint(tree.root.output_json())

    with open('output.js', 'w') as out:
        out.write("var trestle_output = ")
        out.write(json.dumps(tree.root.output_json()))
        out.write(";")
def test_load_rb_wb_03():
    data = load_rb_wb_03(num_instances=1)
    known = {
        '_guid': 'aa5eff72-0572-4eff-a007-3def9a82ba5b',
        '_human_cluster_label': '0',
        'component0': {
            'b': 2.0,
            'l': 2.0,
            'r': 3.0,
            't': 3.0,
            'type': 'cube0'
        },
        'component1': {
            'b': 2.0,
            'l': 3.0,
            'r': 4.0,
            't': 3.0,
            'type': 'cube0'
        },
        'component11': {
            'b': 3.0,
            'l': 1.0,
            'r': 4.0,
            't': 4.0,
            'type': 'ufoo0'
        },
        'component2': {
            'b': 1.0,
            'l': 2.0,
            'r': 5.0,
            't': 2.0,
            'type': 'plat0'
        },
        'component3': {
            'b': 0.0,
            'l': 0.0,
            'r': 5.0,
            't': 1.0,
            'type': 'rect0'
        }
    }
    assert known == data[0]
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from random import shuffle

from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt

from concept_formation.trestle import TrestleTree
from concept_formation.cluster import cluster
from concept_formation.datasets import load_rb_wb_03
from concept_formation.structure_mapper import ObjectVariablizer

towers = load_rb_wb_03()
shuffle(towers)
towers = towers[:30]

variablizer = ObjectVariablizer()
towers = [variablizer.transform(t) for t in towers]

tree = TrestleTree()
clusters = cluster(tree, towers, maxsplit=10)
human_labels = [tower['_human_cluster_label'] for tower in towers]

x = [num_splits for num_splits in range(1,len(clusters)+1)]
y = [adjusted_rand_score(human_labels, split) for split in clusters]
plt.plot(x, y, label="TRESTLE")

plt.title("TRESTLE Clustering Accuracy (Given Human Ground Truth)")
plt.ylabel("Adjusted Rand Index (Agreement Correcting for Chance)")
    clusters = [cluster_split_search(tree, dataset, h, minsplit=1, maxsplit=40,
                                     mod=False) for h in hueristics]
    human_labels = [ds['_human_cluster_label'] for ds in dataset]

    return [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in
            clusters]


x = np.arange(len(hueristics))
width = 0.3

hueristic_names = ['AIC', 'BIC', 'CU', 'AICc']
# for i in range(len(clusters)):
#     hueristic_names[i] +=  '\nClusters='+str(len(set(clusters[i])))

b1 = plt.bar(x-width, calculate_aris(load_rb_wb_03()),
             width, color='r', alpha=.8, align='center')
b2 = plt.bar(x, calculate_aris(load_rb_com_11()),
             width, color='b', alpha=.8, align='center')
b3 = plt.bar(x+width, calculate_aris(load_rb_s_13()),
             width, color='g', alpha=.8, align='center')
plt.legend((b1[0], b2[0], b3[0]), ('wb_03', 'com_11', 's_13'))
plt.title("TRESTLE Clustering Accuracy of Best Clustering by Different"
          " Hueristics")
plt.ylabel("Adjusted Rand Index (Agreement Correcting for Chance)")
plt.ylim(0, 1)
plt.xlabel("Hueristic")
plt.xticks(x, hueristic_names)
plt.show()
from random import shuffle
from random import seed

from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt
import numpy as np

from concept_formation.trestle import TrestleTree
from concept_formation.cluster import cluster_split_search
from concept_formation.cluster import AIC, BIC, AICc, CU
from concept_formation.datasets import load_rb_wb_03
from concept_formation.preprocessor import ObjectVariablizer

seed(5)

towers = load_rb_wb_03()
shuffle(towers)
towers = towers[:60]

variablizer = ObjectVariablizer()
towers = [variablizer.transform(t) for t in towers]

tree = TrestleTree()
tree.fit(towers)

hueristics = [AIC, BIC, CU, AICc]

clusters = [
    cluster_split_search(tree, towers, h, minsplit=1, maxsplit=40, mod=False)
    for h in hueristics
]
    human_labels = [dataset['_human_cluster_label'] for dataset in dataset]

    return [
        max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters
    ]


x = np.arange(len(hueristics))
width = 0.3

hueristic_names = ['AIC', 'BIC', 'CU', 'AICc']
# for i in range(len(clusters)):
#     hueristic_names[i] +=  '\nClusters='+str(len(set(clusters[i])))

b1 = plt.bar(x - width,
             calculate_aris(load_rb_wb_03()),
             width,
             color='r',
             alpha=.8,
             align='center')
b2 = plt.bar(x,
             calculate_aris(load_rb_com_11()),
             width,
             color='b',
             alpha=.8,
             align='center')
b3 = plt.bar(x + width,
             calculate_aris(load_rb_s_13()),
             width,
             color='g',
             alpha=.8,