def random_concept(num_instances=1, num_objects=10): tree = TrestleTree() for i in range(num_instances): #print("Training concept with instance", i+1) inst = random_instance(num_objects) #pprint(inst) tree.ifit(inst) return tree.root
def random_concept(num_instances=1, num_objects=10): tree = TrestleTree() for i in range(num_instances): # print("Training concept with instance", i+1) inst = random_instance(num_objects) # pprint(inst) tree.ifit(inst) return tree.root
def output_json(file="forest", size=100, prune=True, seed=50, burn=1): random.seed(seed) if file == "forest": instances = ds.load_forest_fires() variables = False elif file == "voting": instances = ds.load_congressional_voting() variables = False elif file == "iris": instances = ds.load_iris() variables = False elif file == "mushroom": instances = ds.load_mushroom() variables = False elif file == "rb_com_11": instances = ds.load_rb_com_11() variables = True elif file == "rb_s_07": instances = ds.load_rb_s_07() variables = True elif file == "rb_s_13": instances = ds.load_rb_s_13() variables = True elif file == "rb_wb_03": instances = ds.load_rb_wb_03() variables = True else: instances = ds.load_forest_fires() variables = False random.shuffle(instances) pprint.pprint(instances[0]) instances = instances[:size] print(len(instances)) if variables: variablizer = ObjectVariablizer() instances = [variablizer.transform(t) for t in instances] tree = TrestleTree() tree.fit(instances, iterations=burn) pprint.pprint(tree.root.output_json()) with open('output.js', 'w') as out: out.write("var trestle_output = ") out.write(json.dumps(tree.root.output_json())) out.write(";")
def output_json(file="forest", size=100, prune=True, seed=50, burn=1): random.seed(seed) if file == "forest": instances = ds.load_forest_fires() variables = False elif file == "voting": instances = ds.load_congressional_voting() variables = False elif file == "iris": instances = ds.load_iris() variables = False elif file == "mushroom": instances = ds.load_mushroom() variables = False elif file == "rb_com_11": instances = ds.load_rb_com_11() variables = True elif file == "rb_s_07": instances = ds.load_rb_s_07() variables = True elif file == "rb_s_13": instances = ds.load_rb_s_13() variables = True elif file == "rb_wb_03": instances = ds.load_rb_wb_03() variables = True else: instances = ds.load_forest_fires() variables = False random.shuffle(instances) pprint.pprint(instances[0]) instances = instances[:size] print(len(instances)) if variables: variablizer = ObjectVariablizer() instances = [variablizer.transform(t) for t in instances] tree = TrestleTree() tree.fit(instances, iterations=burn) # pprint.pprint(tree.root.output_json()) with open('output.js', 'w') as out: out.write("var trestle_output = ") out.write(json.dumps(tree.root.output_json())) out.write(";")
def calculate_aris(dataset): shuffle(dataset) dataset = dataset[:60] variablizer = ObjectVariablizer() dataset = [variablizer.transform(t) for t in dataset] tree = TrestleTree() tree.fit(dataset) clusters = [cluster_split_search(tree, dataset, h, minsplit=1, maxsplit=40, mod=False) for h in hueristics] human_labels = [ds['_human_cluster_label'] for ds in dataset] return [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters]
class ScikitTrestle(object): def __init__(self, params=None): if params is None: self.tree = TrestleTree() else: self.tree = TrestleTree(**params) def ifit(self, x, y): x = deepcopy(x) x['_y_label'] = "%i" % y self.tree.ifit(x) def fit(self, X, y): X = deepcopy(X) for i, x in enumerate(X): x['_y_label'] = "%i" % y[i] self.tree.fit(X, randomize_first=False) def predict(self, X): return [int(self.tree.categorize(x).predict('_y_label')) for x in X]
class ScikitTrestle(object): def __init__(self, **kwargs): self.tree = TrestleTree(**kwargs) self.state_format = "variablized_state" def ifit(self, x, y): x = deepcopy(x) x['_y_label'] = float(y) self.tree.ifit(x) def fit(self, X, y): X = deepcopy(X) for i, x in enumerate(X): x['_y_label'] = float(y) self.tree.fit(X, randomize_first=False) def skill_info(self, X): raise NotImplementedError("Not implemented Erik H. says there is a way \ to serialize this -> TODO") def predict(self, X): return [self.tree.categorize(x).predict('_y_label') for x in X]
def calculate_aris(dataset): shuffle(dataset) dataset = dataset[:60] variablizer = ObjectVariablizer() dataset = [variablizer.transform(t) for t in dataset] tree = TrestleTree() tree.fit(dataset) clusters = [ cluster_split_search(tree, dataset, h, minsplit=1, maxsplit=40, mod=False) for h in hueristics ] human_labels = [dataset['_human_cluster_label'] for dataset in dataset] return [ max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters ]
from concept_formation.datasets import load_rb_s_07_human_predictions from concept_formation.preprocessor import ObjectVariablizer seed(5) num_runs = 30 num_examples = 29 towers = load_rb_s_07() variablizer = ObjectVariablizer() towers = [variablizer.transform(t) for t in towers] naive_data = incremental_evaluation(DummyTree(), towers, run_length=num_examples, runs=num_runs, attr="success") cobweb_data = incremental_evaluation(TrestleTree(), towers, run_length=num_examples, runs=num_runs, attr="success") human_data = [] key = None human_predictions = load_rb_s_07_human_predictions() for line in human_predictions: line = line.rstrip().split(",") if key is None: key = {v: i for i, v in enumerate(line)} continue x = int(line[key['order']])-1 y = (1 - abs(int(line[key['correctness']]) - int(line[key['prediction']]))) human_data.append((x, y))
def __init__(self, **kwargs): self.tree = TrestleTree(**kwargs) self.state_format = "variablized_state"
num_examples = 25 animals = load_quadruped(num_examples) variablizer = ObjectVariablizer() animals = [variablizer.transform(t) for t in animals] for animal in animals: animal['type'] = animal['_type'] del animal['_type'] naive_data = incremental_evaluation(DummyTree(), animals, run_length=num_examples, runs=num_runs, attr="type") trestle_data = incremental_evaluation(TrestleTree(), animals, run_length=num_examples, runs=num_runs, attr="type") trestle_x, trestle_y = [], [] naive_x, naive_y = [], [] human_x, human_y = [], [] for opp in range(len(trestle_data[0])): for run in range(len(trestle_data)): trestle_x.append(opp) trestle_y.append(trestle_data[run][opp]) for opp in range(len(naive_data[0])):
from concept_formation.visualize import visualize # These lines load up and use one of the example datasets included in the # library if you don't have a readily available dataset to test. The rb_s_07 # dataset is similar to but not exactly the same as the one used to generate # the figures in the paper. from concept_formation.datasets import load_rb_s_07 from concept_formation.preprocessor import ObjectVariablizer data = load_rb_s_07() # As long as your data conforms to the instance representation: # https://concept-formation.readthedocs.io/en/latest/instance_representation.html # it can be basically anything. # data = [] # This step is to make sure the component attributes of the instances are # properly tagged as variable. See the instance representation link above for # this. # ov = ObjectVariablizer() # data = ov.batch_transform(data) # These three lines are the core of the process. They will fit the data and # generate a visualization that will automatically open a browser to the view. # If you want to embed the output in some other process, like a LearnSphere # workflow, it would take a little more work but is easy in principle. tree = TrestleTree() tree.fit(data) visualize(tree, "vizfiles")
from concept_formation.trestle import TrestleTree from concept_formation.cluster import cluster_split_search from concept_formation.cluster import AIC, BIC, AICc, CU from concept_formation.datasets import load_rb_wb_03 from concept_formation.preprocessor import ObjectVariablizer seed(5) towers = load_rb_wb_03() shuffle(towers) towers = towers[:60] variablizer = ObjectVariablizer() towers = [variablizer.transform(t) for t in towers] tree = TrestleTree() tree.fit(towers) hueristics = [AIC, BIC, CU, AICc] clusters = [ cluster_split_search(tree, towers, h, minsplit=1, maxsplit=40, mod=False) for h in hueristics ] human_labels = [tower['_human_cluster_label'] for tower in towers] x = np.arange(len(hueristics)) y = [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters] width = 0.45 hueristic_names = ['AIC', 'BIC', 'CU', 'AICc']
def __init__(self, params=None): if params is None: self.tree = TrestleTree() else: self.tree = TrestleTree(**params)
from sklearn.metrics import adjusted_rand_score import matplotlib.pyplot as plt from concept_formation.trestle import TrestleTree from concept_formation.cluster import cluster from concept_formation.datasets import load_rb_wb_03 from concept_formation.preprocessor import ObjectVariablizer seed(0) towers = load_rb_wb_03() shuffle(towers) towers = towers[:60] variablizer = ObjectVariablizer() towers = [variablizer.transform(t) for t in towers] tree = TrestleTree() clusters = [c for c in cluster(tree, towers, maxsplit=10)] human_labels = [tower['_human_cluster_label'] for tower in towers] x = [num_splits for num_splits in range(1, len(clusters) + 1)] y = [adjusted_rand_score(human_labels, split) for split in clusters] plt.plot(x, y, label="TRESTLE") plt.title("TRESTLE Clustering Accuracy (Given Human Ground Truth)") plt.ylabel("Adjusted Rand Index (Agreement Correcting for Chance)") plt.xlabel("# of Splits of Trestle Tree") plt.legend(loc=4) plt.show()
import matplotlib.pyplot as plt from random import seed # Create a random dataset rng = np.random.RandomState(1) seed(0) X = np.sort(5 * rng.rand(80, 1), axis=0) y = np.sin(X).ravel() y[::5] += 3 * (0.5 - rng.rand(16)) # Fit regression models (Decision Tree and TRESTLE) # For TRESTLE the y attribute is hidden, so only the X is used to make # predictions. dtree = DecisionTreeRegressor(max_depth=3) dtree.fit(X, y) ttree = TrestleTree() training_data = [{ 'x': float(X[i][0]), '_y': float(y[i]) } for i, v in enumerate(X)] ttree.fit(training_data, iterations=1) # Predict X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] y_dtree = dtree.predict(X_test) y_trestle = [ttree.categorize({'x': float(v)}).predict('_y') for v in X_test] # Plot the results plt.figure() plt.scatter(X, y, c="k", label="Data") plt.plot(X_test, y_trestle, c="g", label="TRESTLE", linewidth=2)