def test_hoeffding_anytime_tree_coverage():
    # Cover memory management
    stream = SEAGenerator(random_state=1, noise_percentage=0.05)
    stream.prepare_for_use()
    X, y = stream.next_sample(15000)

    learner = HATT(max_byte_size=30,
                   memory_estimate_period=100,
                   grace_period=10,
                   leaf_prediction='nba')

    learner.partial_fit(X, y, classes=stream.target_values)

    learner.reset()

    # Cover nominal attribute observer
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()
    X, y = stream.next_sample(15000)
    learner = HATT(leaf_prediction='nba',
                   nominal_attributes=[i for i in range(1, 9)])
    learner.partial_fit(X, y, classes=stream.target_values)
Esempio n. 2
0
def test_hoeffding_anytime_tree_nba(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()

    learner = HATT(nominal_attributes=[i for i in range(1, 9)])

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    test_file = os.path.join(test_path, 'test_hoeffding_anytime_tree.npy')
    expected_proba = np.load(test_file)[:49, :]

    assert np.allclose(proba_predictions, expected_proba)

    expected_info = "HATT(binary_split=False, grace_period=200, leaf_prediction='nba',\n" \
                    "     max_byte_size=33554432, memory_estimate_period=1000000,\n" \
                    "     min_samples_reevaluate=20, nb_threshold=0,\n" \
                    "     nominal_attributes=[1, 2, 3, 4, 5, 6, 7, 8], split_confidence=1e-07,\n" \
                    "     split_criterion='info_gain', stop_mem_management=False,\n" \
                    "     tie_threshold=0.05)"
    assert learner.get_info() == expected_info

    expected_model = 'ifAttribute1=0.0:ifAttribute3=0.0:Leaf=Class1|{0:260.0,1:287.0}' \
                     'ifAttribute3=1.0:Leaf=Class0|{0:163.0,1:117.0}ifAttribute1=1.0:Leaf=Class0|{0:718.0,1:495.0}'

    assert (learner.get_model_description().replace("\n", " ").replace(
        " ", "") == expected_model.replace(" ", ""))
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Esempio n. 3
0
def test_hoeffding_anytime_tree_nb_gini(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()

    learner = HATT(nominal_attributes=[i for i in range(1, 9)],
                   leaf_prediction='nb',
                   split_criterion='gini')

    cnt = 0
    max_samples = 5000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0,
        0
    ])

    assert np.alltrue(predictions == expected_predictions)

    expected_info = "HATT(binary_split=False, grace_period=200, leaf_prediction='nb',\n" \
                    "     max_byte_size=33554432, memory_estimate_period=1000000,\n" \
                    "     min_samples_reevaluate=20, nb_threshold=0,\n" \
                    "     nominal_attributes=[1, 2, 3, 4, 5, 6, 7, 8], split_confidence=1e-07,\n" \
                    "     split_criterion='gini', stop_mem_management=False, tie_threshold=0.05)"
    assert learner.get_info() == expected_info
Esempio n. 4
0
def basic_simulation():
  model = HATT()

  print("EFDT Basic Sim")
  num_choices = 2
  click_gen = de.default_sex_age_click_gen()
  de.test_default_sex_age_click_gen()

  batch_size = 1000
  batch_count = 10
  batches = de.get_batches(batch_size, batch_count)

  choice = 0
  for i, batch in enumerate(batches):
    choices = [choice] * len(batch)
    clicks = [click_gen.get_click(visit, choice) for visit,choice in zip(batch, choices)]
    print_clicks(de.types, i, clicks, batch, choices)
    tim = time.time()
    model.partial_fit(batch, clicks)
    print("fit time: ", time.time() - tim)
    print("--Model: ", choice)
    print_predictions(model)
def test_hoeffding_anytime_tree(test_path):
    stream = RandomTreeGenerator(tree_random_state=23,
                                 sample_random_state=12,
                                 n_classes=2,
                                 n_cat_features=2,
                                 n_categories_per_cat_feature=4,
                                 n_num_features=1,
                                 max_tree_depth=30,
                                 min_leaf_depth=10,
                                 fraction_leaves_per_level=0.45)
    stream.prepare_for_use()

    learner = HATT(nominal_attributes=[i for i in range(1, 9)])

    cnt = 0
    max_samples = 15000
    predictions = array('i')
    proba_predictions = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(learner.predict(X)[0])
            proba_predictions.append(learner.predict_proba(X)[0])
        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = array('i', [
        1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1,
        1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
        0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0,
        1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
        1, 1, 0, 1, 1
    ])

    test_file = os.path.join(test_path, 'test_hoeffding_anytime_tree.npy')

    data_prob = np.load(test_file)

    assert np.alltrue(predictions == expected_predictions)
    assert np.alltrue(proba_predictions == data_prob)

    expected_info = 'HATT: max_byte_size: 33554432 - memory_estimate_period: 1000000 - grace_period: 200 - ' \
                    'min_samples_reevaluate: 20 - split_criterion: info_gain - split_confidence: 1e-07 - ' \
                    'tie_threshold: 0.05 - binary_split: False - stop_mem_management: False - leaf_prediction: ' \
                    'nba - nb_threshold: 0 - nominal_attributes: [1, 2, 3, 4, 5, 6, 7, 8] - '
    assert learner.get_info() == expected_info

    expected_model = 'if Attribute 1 = 0: if Attribute 3 = 0: Leaf = Class 1 | {0: 896.0, 1: 947.0} ' \
                     'if Attribute 3 = 1: Leaf = Class 0 | {0: 500.0, 1: 388.0} if Attribute 1 = 1: ' \
                     'if Attribute 5 = 0: Leaf = Class 0 | {0: 404.0, 1: 259.0} if Attribute 5 = 1: ' \
                     'Leaf = Class 0 | {0: 166.0, 1: 82.0}'

    assert (learner.get_model_description().replace("\n", " ").replace(
        " ", "") == expected_model.replace(" ", ""))
    assert type(learner.predict(X)) == np.ndarray
    assert type(learner.predict_proba(X)) == np.ndarray
Esempio n. 6
0
class Tree():
    def check_input(self, kwargs):

        if all(k in kwargs for k in ['tk', 'provider']):

            return isinstance(kwargs['tk'], tkinter.Tk) and isinstance(
                kwargs['provider'], DataProvider)

    def __init__(self, **kwargs):

        if not self.check_input(kwargs):
            raise ValueError(
                'Tree expects two arguments: \'tk\', a Tkinter object, and \'provider\' a DataProvider object'
            )

        # save arguments
        self.tk = kwargs['tk']
        self.provider = kwargs['provider']

        # subscribe to algorithm and listeners lists
        self.provider.subscribe_to_listening_list(self)
        self.provider.subscribe_to_algorithm_list(self)

        # list to save all tree states (store root nodes)
        self.history = []
        self.current_timestamp = -1

        # get the actual ExtremlyFastDecisionTreeClassifier here
        self.tree = TreeClass()

        # master frame
        self.frame = tkinter.Frame(self.tk,
                                   highlightthickness=5,
                                   highlightbackground='black',
                                   bd=0)
        self.frame.pack(side=tkinter.TOP, fill=tkinter.BOTH)

    def _save_tree_state(self):

        self.history.append(Node(self.tree._tree_root))

    def notify(self):

        # destroy previous widgets
        if (self.current_timestamp != -1):
            self.history[self.current_timestamp].destroy()

        # update 'self.current_timestamp' and build widgets for the new timestamp
        self.current_timestamp = self.provider.get_timestamp()
        self.history[self.current_timestamp].build_widget(self.frame)

    def train(self):

        # just save the new tree state to 'self.history', 'notify' will take care of the widget

        # train tree using current object
        obj = self.provider.get_current_object()

        self.tree.partial_fit(obj['x'], obj['y'])

        # save new tree
        self._save_tree_state()