def test_load_rb_wb_03(): data = load_rb_wb_03(num_instances=1) known = {'_guid': 'aa5eff72-0572-4eff-a007-3def9a82ba5b', '_human_cluster_label': '0', 'component0': {'b': 2.0, 'l': 2.0, 'r': 3.0, 't': 3.0, 'type': 'cube0'}, 'component1': {'b': 2.0, 'l': 3.0, 'r': 4.0, 't': 3.0, 'type': 'cube0'}, 'component11': {'b': 3.0, 'l': 1.0, 'r': 4.0, 't': 4.0, 'type': 'ufoo0'}, 'component2': {'b': 1.0, 'l': 2.0, 'r': 5.0, 't': 2.0, 'type': 'plat0'}, 'component3': {'b': 0.0, 'l': 0.0, 'r': 5.0, 't': 1.0, 'type': 'rect0'}} assert known == data[0]
def output_json(file="forest", size=100, prune=True, seed=50, burn=1): random.seed(seed) if file == "forest": instances = ds.load_forest_fires() variables = False elif file == "voting": instances = ds.load_congressional_voting() variables = False elif file == "iris": instances = ds.load_iris() variables = False elif file == "mushroom": instances = ds.load_mushroom() variables = False elif file == "rb_com_11": instances = ds.load_rb_com_11() variables = True elif file == "rb_s_07": instances = ds.load_rb_s_07() variables = True elif file == "rb_s_13": instances = ds.load_rb_s_13() variables = True elif file == "rb_wb_03": instances = ds.load_rb_wb_03() variables = True else: instances = ds.load_forest_fires() variables = False random.shuffle(instances) pprint.pprint(instances[0]) instances = instances[:size] print(len(instances)) if variables: variablizer = ObjectVariablizer() instances = [variablizer.transform(t) for t in instances] tree = TrestleTree() tree.fit(instances, iterations=burn) pprint.pprint(tree.root.output_json()) with open('output.js', 'w') as out: out.write("var trestle_output = ") out.write(json.dumps(tree.root.output_json())) out.write(";")
def output_json(file="forest", size=100, prune=True, seed=50, burn=1): random.seed(seed) if file == "forest": instances = ds.load_forest_fires() variables = False elif file == "voting": instances = ds.load_congressional_voting() variables = False elif file == "iris": instances = ds.load_iris() variables = False elif file == "mushroom": instances = ds.load_mushroom() variables = False elif file == "rb_com_11": instances = ds.load_rb_com_11() variables = True elif file == "rb_s_07": instances = ds.load_rb_s_07() variables = True elif file == "rb_s_13": instances = ds.load_rb_s_13() variables = True elif file == "rb_wb_03": instances = ds.load_rb_wb_03() variables = True else: instances = ds.load_forest_fires() variables = False random.shuffle(instances) pprint.pprint(instances[0]) instances = instances[:size] print(len(instances)) if variables: variablizer = ObjectVariablizer() instances = [variablizer.transform(t) for t in instances] tree = TrestleTree() tree.fit(instances, iterations=burn) # pprint.pprint(tree.root.output_json()) with open('output.js', 'w') as out: out.write("var trestle_output = ") out.write(json.dumps(tree.root.output_json())) out.write(";")
def test_load_rb_wb_03(): data = load_rb_wb_03(num_instances=1) known = { '_guid': 'aa5eff72-0572-4eff-a007-3def9a82ba5b', '_human_cluster_label': '0', 'component0': { 'b': 2.0, 'l': 2.0, 'r': 3.0, 't': 3.0, 'type': 'cube0' }, 'component1': { 'b': 2.0, 'l': 3.0, 'r': 4.0, 't': 3.0, 'type': 'cube0' }, 'component11': { 'b': 3.0, 'l': 1.0, 'r': 4.0, 't': 4.0, 'type': 'ufoo0' }, 'component2': { 'b': 1.0, 'l': 2.0, 'r': 5.0, 't': 2.0, 'type': 'plat0' }, 'component3': { 'b': 0.0, 'l': 0.0, 'r': 5.0, 't': 1.0, 'type': 'rect0' } } assert known == data[0]
from __future__ import print_function from __future__ import unicode_literals from __future__ import absolute_import from __future__ import division from random import shuffle from sklearn.metrics import adjusted_rand_score import matplotlib.pyplot as plt from concept_formation.trestle import TrestleTree from concept_formation.cluster import cluster from concept_formation.datasets import load_rb_wb_03 from concept_formation.structure_mapper import ObjectVariablizer towers = load_rb_wb_03() shuffle(towers) towers = towers[:30] variablizer = ObjectVariablizer() towers = [variablizer.transform(t) for t in towers] tree = TrestleTree() clusters = cluster(tree, towers, maxsplit=10) human_labels = [tower['_human_cluster_label'] for tower in towers] x = [num_splits for num_splits in range(1,len(clusters)+1)] y = [adjusted_rand_score(human_labels, split) for split in clusters] plt.plot(x, y, label="TRESTLE") plt.title("TRESTLE Clustering Accuracy (Given Human Ground Truth)") plt.ylabel("Adjusted Rand Index (Agreement Correcting for Chance)")
clusters = [cluster_split_search(tree, dataset, h, minsplit=1, maxsplit=40, mod=False) for h in hueristics] human_labels = [ds['_human_cluster_label'] for ds in dataset] return [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters] x = np.arange(len(hueristics)) width = 0.3 hueristic_names = ['AIC', 'BIC', 'CU', 'AICc'] # for i in range(len(clusters)): # hueristic_names[i] += '\nClusters='+str(len(set(clusters[i]))) b1 = plt.bar(x-width, calculate_aris(load_rb_wb_03()), width, color='r', alpha=.8, align='center') b2 = plt.bar(x, calculate_aris(load_rb_com_11()), width, color='b', alpha=.8, align='center') b3 = plt.bar(x+width, calculate_aris(load_rb_s_13()), width, color='g', alpha=.8, align='center') plt.legend((b1[0], b2[0], b3[0]), ('wb_03', 'com_11', 's_13')) plt.title("TRESTLE Clustering Accuracy of Best Clustering by Different" " Hueristics") plt.ylabel("Adjusted Rand Index (Agreement Correcting for Chance)") plt.ylim(0, 1) plt.xlabel("Hueristic") plt.xticks(x, hueristic_names) plt.show()
from random import shuffle from random import seed from sklearn.metrics import adjusted_rand_score import matplotlib.pyplot as plt import numpy as np from concept_formation.trestle import TrestleTree from concept_formation.cluster import cluster_split_search from concept_formation.cluster import AIC, BIC, AICc, CU from concept_formation.datasets import load_rb_wb_03 from concept_formation.preprocessor import ObjectVariablizer seed(5) towers = load_rb_wb_03() shuffle(towers) towers = towers[:60] variablizer = ObjectVariablizer() towers = [variablizer.transform(t) for t in towers] tree = TrestleTree() tree.fit(towers) hueristics = [AIC, BIC, CU, AICc] clusters = [ cluster_split_search(tree, towers, h, minsplit=1, maxsplit=40, mod=False) for h in hueristics ]
human_labels = [dataset['_human_cluster_label'] for dataset in dataset] return [ max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters ] x = np.arange(len(hueristics)) width = 0.3 hueristic_names = ['AIC', 'BIC', 'CU', 'AICc'] # for i in range(len(clusters)): # hueristic_names[i] += '\nClusters='+str(len(set(clusters[i]))) b1 = plt.bar(x - width, calculate_aris(load_rb_wb_03()), width, color='r', alpha=.8, align='center') b2 = plt.bar(x, calculate_aris(load_rb_com_11()), width, color='b', alpha=.8, align='center') b3 = plt.bar(x + width, calculate_aris(load_rb_s_13()), width, color='g', alpha=.8,