Exemple #1
0
def random_concept(num_instances=1, num_objects=10):
    tree = TrestleTree()
    for i in range(num_instances):
        #print("Training concept with instance", i+1)
        inst = random_instance(num_objects)
        #pprint(inst)
        tree.ifit(inst)
    return tree.root
def random_concept(num_instances=1, num_objects=10):
    tree = TrestleTree()
    for i in range(num_instances):
        # print("Training concept with instance", i+1)
        inst = random_instance(num_objects)
        # pprint(inst)
        tree.ifit(inst)
    return tree.root
def output_json(file="forest", size=100, prune=True, seed=50, burn=1):
    random.seed(seed)
    if file == "forest":
        instances = ds.load_forest_fires()
        variables = False
    elif file == "voting":
        instances = ds.load_congressional_voting()
        variables = False
    elif file == "iris":
        instances = ds.load_iris()
        variables = False
    elif file == "mushroom":
        instances = ds.load_mushroom()
        variables = False
    elif file == "rb_com_11":
        instances = ds.load_rb_com_11()
        variables = True
    elif file == "rb_s_07":
        instances = ds.load_rb_s_07()
        variables = True
    elif file == "rb_s_13":
        instances = ds.load_rb_s_13()
        variables = True
    elif file == "rb_wb_03":
        instances = ds.load_rb_wb_03()
        variables = True
    else:
        instances = ds.load_forest_fires()
        variables = False

    random.shuffle(instances)
    pprint.pprint(instances[0])
    instances = instances[:size]
    print(len(instances))

    if variables:
        variablizer = ObjectVariablizer()
        instances = [variablizer.transform(t) for t in instances]

    tree = TrestleTree()
    tree.fit(instances, iterations=burn)

    pprint.pprint(tree.root.output_json())

    with open('output.js', 'w') as out:
        out.write("var trestle_output = ")
        out.write(json.dumps(tree.root.output_json()))
        out.write(";")
def output_json(file="forest", size=100, prune=True, seed=50, burn=1):
    random.seed(seed)
    if file == "forest":
        instances = ds.load_forest_fires()
        variables = False
    elif file == "voting":
        instances = ds.load_congressional_voting()
        variables = False
    elif file == "iris":
        instances = ds.load_iris()
        variables = False
    elif file == "mushroom":
        instances = ds.load_mushroom()
        variables = False
    elif file == "rb_com_11":
        instances = ds.load_rb_com_11()
        variables = True
    elif file == "rb_s_07":
        instances = ds.load_rb_s_07()
        variables = True
    elif file == "rb_s_13":
        instances = ds.load_rb_s_13()
        variables = True
    elif file == "rb_wb_03":
        instances = ds.load_rb_wb_03()
        variables = True
    else:
        instances = ds.load_forest_fires()
        variables = False

    random.shuffle(instances)
    pprint.pprint(instances[0])
    instances = instances[:size]
    print(len(instances))

    if variables:
        variablizer = ObjectVariablizer()
        instances = [variablizer.transform(t) for t in instances]

    tree = TrestleTree()
    tree.fit(instances, iterations=burn)

    # pprint.pprint(tree.root.output_json())

    with open('output.js', 'w') as out:
        out.write("var trestle_output = ")
        out.write(json.dumps(tree.root.output_json()))
        out.write(";")
def calculate_aris(dataset):
    shuffle(dataset)
    dataset = dataset[:60]

    variablizer = ObjectVariablizer()
    dataset = [variablizer.transform(t) for t in dataset]

    tree = TrestleTree()
    tree.fit(dataset)

    clusters = [cluster_split_search(tree, dataset, h, minsplit=1, maxsplit=40,
                                     mod=False) for h in hueristics]
    human_labels = [ds['_human_cluster_label'] for ds in dataset]

    return [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in
            clusters]
class ScikitTrestle(object):
    def __init__(self, params=None):
        if params is None:
            self.tree = TrestleTree()
        else:
            self.tree = TrestleTree(**params)

    def ifit(self, x, y):
        x = deepcopy(x)
        x['_y_label'] = "%i" % y
        self.tree.ifit(x)

    def fit(self, X, y):
        X = deepcopy(X)
        for i, x in enumerate(X):
            x['_y_label'] = "%i" % y[i]
        self.tree.fit(X, randomize_first=False)

    def predict(self, X):
        return [int(self.tree.categorize(x).predict('_y_label')) for x in X]
Exemple #7
0
class ScikitTrestle(object):

    def __init__(self, **kwargs):
        self.tree = TrestleTree(**kwargs)
        self.state_format = "variablized_state"

    def ifit(self, x, y):
        x = deepcopy(x)
        x['_y_label'] = float(y)
        self.tree.ifit(x)

    def fit(self, X, y):
        X = deepcopy(X)
        for i, x in enumerate(X):
            x['_y_label'] = float(y)
        self.tree.fit(X, randomize_first=False)

    def skill_info(self, X):
        raise NotImplementedError("Not implemented Erik H. says there is a way \
             to serialize this -> TODO")

    def predict(self, X):
        return [self.tree.categorize(x).predict('_y_label') for x in X]
def calculate_aris(dataset):
    shuffle(dataset)
    dataset = dataset[:60]

    variablizer = ObjectVariablizer()
    dataset = [variablizer.transform(t) for t in dataset]

    tree = TrestleTree()
    tree.fit(dataset)

    clusters = [
        cluster_split_search(tree,
                             dataset,
                             h,
                             minsplit=1,
                             maxsplit=40,
                             mod=False) for h in hueristics
    ]
    human_labels = [dataset['_human_cluster_label'] for dataset in dataset]

    return [
        max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters
    ]
from concept_formation.datasets import load_rb_s_07_human_predictions
from concept_formation.preprocessor import ObjectVariablizer

seed(5)

num_runs = 30
num_examples = 29
towers = load_rb_s_07()

variablizer = ObjectVariablizer()
towers = [variablizer.transform(t) for t in towers]

naive_data = incremental_evaluation(DummyTree(), towers,
                                    run_length=num_examples,
                                    runs=num_runs, attr="success")
cobweb_data = incremental_evaluation(TrestleTree(), towers,
                                     run_length=num_examples,
                                     runs=num_runs, attr="success")

human_data = []
key = None
human_predictions = load_rb_s_07_human_predictions()
for line in human_predictions:
    line = line.rstrip().split(",")
    if key is None:
        key = {v: i for i, v in enumerate(line)}
        continue
    x = int(line[key['order']])-1
    y = (1 - abs(int(line[key['correctness']]) -
                 int(line[key['prediction']])))
    human_data.append((x, y))
Exemple #10
0
 def __init__(self, **kwargs):
     self.tree = TrestleTree(**kwargs)
     self.state_format = "variablized_state"
Exemple #11
0
num_examples = 25
animals = load_quadruped(num_examples)

variablizer = ObjectVariablizer()
animals = [variablizer.transform(t) for t in animals]

for animal in animals:
    animal['type'] = animal['_type']
    del animal['_type']

naive_data = incremental_evaluation(DummyTree(),
                                    animals,
                                    run_length=num_examples,
                                    runs=num_runs,
                                    attr="type")
trestle_data = incremental_evaluation(TrestleTree(),
                                      animals,
                                      run_length=num_examples,
                                      runs=num_runs,
                                      attr="type")

trestle_x, trestle_y = [], []
naive_x, naive_y = [], []
human_x, human_y = [], []

for opp in range(len(trestle_data[0])):
    for run in range(len(trestle_data)):
        trestle_x.append(opp)
        trestle_y.append(trestle_data[run][opp])

for opp in range(len(naive_data[0])):
Exemple #12
0
from concept_formation.visualize import visualize

# These lines load up and use one of the example datasets included in the
# library if you don't have a readily available dataset to test. The rb_s_07
# dataset is similar to but not exactly the same as the one used to generate
# the figures in the paper.
from concept_formation.datasets import load_rb_s_07
from concept_formation.preprocessor import ObjectVariablizer

data = load_rb_s_07()

# As long as your data conforms to the instance representation:
# https://concept-formation.readthedocs.io/en/latest/instance_representation.html
# it can be basically anything.

# data = []

# This step is to make sure the component attributes of the instances are
# properly tagged as variable. See the instance representation link above for
# this.

# ov = ObjectVariablizer()
# data = ov.batch_transform(data)

# These three lines are the core of the process. They will fit the data and
# generate a visualization that will automatically open a browser to the view.
# If you want to embed the output in some other process, like a LearnSphere
# workflow, it would take a little more work but is easy in principle.
tree = TrestleTree()
tree.fit(data)
visualize(tree, "vizfiles")
from concept_formation.trestle import TrestleTree
from concept_formation.cluster import cluster_split_search
from concept_formation.cluster import AIC, BIC, AICc, CU
from concept_formation.datasets import load_rb_wb_03
from concept_formation.preprocessor import ObjectVariablizer

seed(5)

towers = load_rb_wb_03()
shuffle(towers)
towers = towers[:60]

variablizer = ObjectVariablizer()
towers = [variablizer.transform(t) for t in towers]

tree = TrestleTree()
tree.fit(towers)

hueristics = [AIC, BIC, CU, AICc]

clusters = [
    cluster_split_search(tree, towers, h, minsplit=1, maxsplit=40, mod=False)
    for h in hueristics
]
human_labels = [tower['_human_cluster_label'] for tower in towers]

x = np.arange(len(hueristics))
y = [max(adjusted_rand_score(human_labels, huer), 0.01) for huer in clusters]
width = 0.45

hueristic_names = ['AIC', 'BIC', 'CU', 'AICc']
 def __init__(self, params=None):
     if params is None:
         self.tree = TrestleTree()
     else:
         self.tree = TrestleTree(**params)
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt

from concept_formation.trestle import TrestleTree
from concept_formation.cluster import cluster
from concept_formation.datasets import load_rb_wb_03
from concept_formation.preprocessor import ObjectVariablizer

seed(0)

towers = load_rb_wb_03()
shuffle(towers)
towers = towers[:60]

variablizer = ObjectVariablizer()
towers = [variablizer.transform(t) for t in towers]

tree = TrestleTree()
clusters = [c for c in cluster(tree, towers, maxsplit=10)]
human_labels = [tower['_human_cluster_label'] for tower in towers]

x = [num_splits for num_splits in range(1, len(clusters) + 1)]
y = [adjusted_rand_score(human_labels, split) for split in clusters]
plt.plot(x, y, label="TRESTLE")

plt.title("TRESTLE Clustering Accuracy (Given Human Ground Truth)")
plt.ylabel("Adjusted Rand Index (Agreement Correcting for Chance)")
plt.xlabel("# of Splits of Trestle Tree")
plt.legend(loc=4)
plt.show()
import matplotlib.pyplot as plt
from random import seed

# Create a random dataset
rng = np.random.RandomState(1)
seed(0)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression models (Decision Tree and TRESTLE)
# For TRESTLE the y attribute is hidden, so only the X is used to make
# predictions.
dtree = DecisionTreeRegressor(max_depth=3)
dtree.fit(X, y)
ttree = TrestleTree()
training_data = [{
    'x': float(X[i][0]),
    '_y': float(y[i])
} for i, v in enumerate(X)]
ttree.fit(training_data, iterations=1)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_dtree = dtree.predict(X_test)
y_trestle = [ttree.categorize({'x': float(v)}).predict('_y') for v in X_test]

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="Data")
plt.plot(X_test, y_trestle, c="g", label="TRESTLE", linewidth=2)