def auto(camera, rawCapture, cas_params, stop_event):
    print("In Auto Detction System for PiCamera...")
    g.track_flag
    check_candidates(stop_event)
    for frame in camera.capture_continuous(rawCapture,
                                           format="bgr",
                                           use_video_port=True):
        flag = False
        raw_img = frame.array
        # cv2.imshow("Raw", img)
        # img = preprocess(img)
        # cv2.imshow("Preprocessed", img)
        rects, detected_img = detect(raw_img, cas_params)
        g.img = box(rects, detected_img)
        #    cv2.imshow("Cascaded", img)
        measure(raw_img, rects)

        # if there's no rects found, look around
        # if not rects:
        #     look_around()
        # Check time elapsed, if over 10 sec, invoke spiral search
        # if (time.time()-start) > 10:
        #     spiral_search()
        if g.track_flag:
            track(g.avg_pos)
            g.track_flag = False
            #time.sleep(.1)
        rawCapture.truncate(0)
def pi_detection_system(camera, rawCapture, cas_params):
    print("In Auto Detction System for PiCamera...")
    for frame in camera.capture_continuous(rawCapture,
                                           format="bgr",
                                           use_video_port=True):
        flag = False
        raw_img = frame.array
        # cv2.imshow("Raw", img)
        # img = preprocess(img)
        # cv2.imshow("Preprocessed", img)
        rects, detected_img = detect(raw_img, cas_params)
        g.img = box(rects, detected_img)
        #    cv2.imshow("Cascaded", img)
        measure(raw_img, rects)

        # if there's no rects found, look around
        # if not rects:
        #     look_around()
        # Check time elapsed, if over 10 sec, invoke spiral search
        # if (time.time()-start) > 10:
        #     spiral_search()

        rawCapture.truncate(0)
        if (cv2.waitKey(1) & 0xFF == ord('q')):
            break
Example #3
0
def nominal(dataset, attr, cls_attr, measure, impurity=None, normalize=True, _cmp=None):
    '''
    Distinct value without order
        - Multiway partitioning
        e.g. Color
    '''
    if impurity is None:
        impurity = measure(dataset, cls_attr)

# TODO: may try to pass index instead of a set to impurity measure
    cluster = {}
    for instance in dataset:
        val = instance[attr]
        if not cluster.has_key(val):
            cluster[val] = []
        cluster[val].append(instance)

    gain = impurity
    size = len(dataset)

    if normalize:
        split = .0

    for c in cluster.values():
        ratio = float(len(c)) / size
        gain -= ratio * measure(c, cls_attr)
        if normalize:
            # compute split info for normalization
            split += -ratio * math.log(ratio, 2)

    if normalize:
        # normalize as gain ratio
        gain /= split

    return cluster.keys(), gain, cluster
Example #4
0
def ratio(dataset,
          attr,
          cls_attr,
          measure,
          impurity=None,
          normalize=True,
          _cmp=None):
    '''
    Numeric value where both the differences and the ratio are meaningful
    The number zero has meaning
        - Binary partitioning
        e.g. length, mass
    '''
    if _cmp is None:
        _cmp = lambda x, y: cmp(x[attr], y[attr])

    if impurity is None:
        impurity = measure(dataset, cls_attr)

    dataset = dataset[:]  # create clone
    dataset = sorted(dataset, cmp=_cmp)

    best_gain = .0
    best_pivot = None
    cluster = None
    size = len(dataset)
    for i in xrange(1, size):
        if dataset[i - 1][attr] == dataset[i][attr]:
            # same value, skip
            continue
        else:
            pivot = (dataset[i - 1][attr] + dataset[i][attr]) / 2.0
            # TODO: may try to pass index instead of a set to impurity measure
            head_partition = dataset[:i]
            head_ratio = float(len(head_partition)) / size
            head_impurity = measure(head_partition, cls_attr)

            tail_partition = dataset[i:]
            tail_ratio = float(len(tail_partition)) / size
            tail_impurity = measure(tail_partition, cls_attr)

            gain = impurity - (head_ratio * head_impurity) - (tail_ratio *
                                                              tail_impurity)

            if normalize:
                # compute split info
                split = .0
                split += -head_ratio * math.log(head_ratio, 2)
                split += -tail_ratio * math.log(tail_ratio, 2)

                gain /= split

            if gain > best_gain:
                best_gain = gain
                best_pivot = pivot
                cluster = {0: tail_partition, 1: head_partition}

    return best_pivot, best_gain, cluster
Example #5
0
def interval(dataset,
             attr,
             cls_attr,
             measure,
             impurity=None,
             normalize=True,
             _cmp=None):
    '''
    Numeric value where the differences between value is meaningful
    Measured along a scale in which each position is equidistant from another
        - Binary partitioning
        e.g. calendar date
    '''
    if _cmp is None:
        _cmp = lambda x, y: cmp(x[attr], y[attr])

    if impurity is None:
        impurity = measure(dataset, cls_attr)

    dataset = dataset[:]  # create clone
    dataset = sorted(dataset, cmp=_cmp)

    best_gain = .0
    best_pivot = None
    cluster = None
    size = len(dataset)
    for i in xrange(1, size):
        if dataset[i - 1][attr] == dataset[i][attr]:
            # same value, skip
            continue
        else:
            pivot = dataset[i][attr]
            # TODO: may try to pass index instead of a set to impurity measure
            head_partition = dataset[:i]
            head_ratio = float(len(head_partition)) / size
            head_impurity = measure(head_partition, cls_attr)

            tail_partition = dataset[i:]
            tail_ratio = float(len(tail_partition)) / size
            tail_impurity = measure(tail_partition, cls_attr)

            gain = impurity - (head_ratio * head_impurity) - (tail_ratio *
                                                              tail_impurity)

            if normalize:
                # compute split info
                split = .0
                split += -head_ratio * math.log(head_ratio, 2)
                split += -tail_ratio * math.log(tail_ratio, 2)

                gain /= split

            if gain > best_gain:
                best_gain = gain
                best_pivot = pivot
                cluster = {0: tail_partition, 1: head_partition}

    return best_pivot, best_gain, cluster
Example #6
0
def ordinal(dataset,
            attr,
            cls_attr,
            measure,
            impurity=None,
            normalize=True,
            _cmp=None):
    '''
    Distinct value with order
        - Binary partitioning
        e.g. Grade {A, B, C, ..., F}
    '''
    if _cmp is None:
        _cmp = lambda x, y: cmp(x[attr], y[attr])

    if impurity is None:
        impurity = measure(dataset, cls_attr)

    dataset = dataset[:]  # create clone
    dataset = sorted(dataset, cmp=_cmp)

    best_gain = .0
    best_pivot = None
    cluster = None
    size = len(dataset)
    for i in xrange(1, size):
        if dataset[i - 1][attr] == dataset[i][attr]:
            # same value, skip
            continue
        else:
            pivot = dataset[i][attr]
            # TODO: may try to pass index instead of a set to impurity measure
            head_partition = dataset[:i]
            head_ratio = float(len(head_partition)) / size
            head_impurity = measure(head_partition, cls_attr)

            tail_partition = dataset[i:]
            tail_ratio = float(len(tail_partition)) / size
            tail_impurity = measure(tail_partition, cls_attr)

            gain = impurity - (head_ratio * head_impurity) - (tail_ratio *
                                                              tail_impurity)

            if normalize:
                # compute split info
                split = .0
                split += -head_ratio * math.log(head_ratio, 2)
                split += -tail_ratio * math.log(tail_ratio, 2)

                gain /= split

            if gain > best_gain:
                best_gain = gain
                best_pivot = pivot
                cluster = {0: tail_partition, 1: head_partition}

    return best_pivot, best_gain, cluster
def detection_system(cap, cas_params):
    print("In auto..")
    while (True):
        ret, raw_img = cap.read()
        imshape = raw_img.shape
        vertices = np.array([[(0, imshape[0]), (0, int(imshape[0] / 2)),
                              (int(imshape[1]), int(imshape[0] / 2)),
                              (imshape[1], imshape[0])]],
                            dtype=np.int32)
        processed_img = region_of_interest(raw_img, vertices)
        rects, detected_img = detect(processed_img, cas_params)
        g.img = box(rects, detected_img)
        measure(raw_img, rects)
Example #8
0
def interval(dataset, attr, cls_attr, measure, impurity=None, normalize=True, _cmp=None):
    '''
    Numeric value where the differences between value is meaningful
    Measured along a scale in which each position is equidistant from another
        - Binary partitioning
        e.g. calendar date
    '''
    if _cmp is None:
        _cmp = lambda x,y: cmp(x[attr], y[attr])

    if impurity is None:
        impurity = measure(dataset, cls_attr)

    dataset = dataset[:]    # create clone
    dataset = sorted(dataset, cmp=_cmp)

    best_gain  = .0
    best_pivot = None
    cluster    = None
    size       = len(dataset)
    for i in xrange(1, size):
        if dataset[i-1][attr] == dataset[i][attr]:
            # same value, skip
            continue
        else:
            pivot = dataset[i][attr]
# TODO: may try to pass index instead of a set to impurity measure
            head_partition = dataset[:i]
            head_ratio     = float(len(head_partition)) / size
            head_impurity  = measure(head_partition, cls_attr)

            tail_partition = dataset[i:]
            tail_ratio     = float(len(tail_partition)) / size
            tail_impurity  = measure(tail_partition, cls_attr)

            gain = impurity - (head_ratio * head_impurity) - (tail_ratio * tail_impurity)

            if normalize:
                # compute split info
                split  = .0
                split += -head_ratio * math.log(head_ratio, 2)
                split += -tail_ratio * math.log(tail_ratio, 2)

                gain /= split

            if gain > best_gain:
                best_gain  = gain
                best_pivot = pivot
                cluster    = {0: tail_partition, 1: head_partition}

    return best_pivot, best_gain, cluster
Example #9
0
def ratio(dataset, attr, cls_attr, measure, impurity=None, normalize=True, _cmp=None):
    '''
    Numeric value where both the differences and the ratio are meaningful
    The number zero has meaning
        - Binary partitioning
        e.g. length, mass
    '''
    if _cmp is None:
        _cmp = lambda x,y: cmp(x[attr], y[attr])

    if impurity is None:
        impurity = measure(dataset, cls_attr)

    dataset = dataset[:]    # create clone
    dataset = sorted(dataset, cmp=_cmp)

    best_gain  = .0
    best_pivot = None
    cluster    = None
    size       = len(dataset)
    for i in xrange(1, size):
        if dataset[i-1][attr] == dataset[i][attr]:
            # same value, skip
            continue
        else:
            pivot = (dataset[i-1][attr] + dataset[i][attr]) / 2.0
# TODO: may try to pass index instead of a set to impurity measure
            head_partition = dataset[:i]
            head_ratio     = float(len(head_partition)) / size
            head_impurity  = measure(head_partition, cls_attr)

            tail_partition = dataset[i:]
            tail_ratio     = float(len(tail_partition)) / size
            tail_impurity  = measure(tail_partition, cls_attr)

            gain = impurity - (head_ratio * head_impurity) - (tail_ratio * tail_impurity)

            if normalize:
                # compute split info
                split  = .0
                split += -head_ratio * math.log(head_ratio, 2)
                split += -tail_ratio * math.log(tail_ratio, 2)

                gain /= split

            if gain > best_gain:
                best_gain  = gain
                best_pivot = pivot
                cluster    = {0: tail_partition, 1: head_partition}

    return best_pivot, best_gain, cluster
Example #10
0
def ordinal(dataset, attr, cls_attr, measure, impurity=None, normalize=True, _cmp=None):
    '''
    Distinct value with order
        - Binary partitioning
        e.g. Grade {A, B, C, ..., F}
    '''
    if _cmp is None:
        _cmp = lambda x,y: cmp(x[attr], y[attr])

    if impurity is None:
        impurity = measure(dataset, cls_attr)

    dataset = dataset[:]    # create clone
    dataset = sorted(dataset, cmp=_cmp)

    best_gain  = .0
    best_pivot = None
    cluster    = None
    size       = len(dataset)
    for i in xrange(1, size):
        if dataset[i-1][attr] == dataset[i][attr]:
            # same value, skip
            continue
        else:
            pivot = dataset[i][attr]
# TODO: may try to pass index instead of a set to impurity measure
            head_partition = dataset[:i]
            head_ratio     = float(len(head_partition)) / size
            head_impurity  = measure(head_partition, cls_attr)

            tail_partition = dataset[i:]
            tail_ratio     = float(len(tail_partition)) / size
            tail_impurity  = measure(tail_partition, cls_attr)

            gain = impurity - (head_ratio * head_impurity) - (tail_ratio * tail_impurity)

            if normalize:
                # compute split info
                split  = .0
                split += -head_ratio * math.log(head_ratio, 2)
                split += -tail_ratio * math.log(tail_ratio, 2)

                gain /= split

            if gain > best_gain:
                best_gain  = gain
                best_pivot = pivot
                cluster    = {0: tail_partition, 1: head_partition}

    return best_pivot, best_gain, cluster
    def _calc_score(self, measure, params=None):
        results = defaultdict(lambda: [])
        count, passes = 0, 0
        for graph_idx, (A, y_true) in tqdm(enumerate(self.graphs),
                                           total=len(self.graphs),
                                           desc=measure.name):
            mg = measure(A)
            if params is None:
                params = mg.scaler.scale(np.linspace(0, 1, self.n_params))
            for param in params:
                try:
                    count += 1
                    K = mg.get_K(param)
                    y_pred = SpectralClustering(2).fit_predict(K)
                    results[graph_idx].append(FC(y_true, y_pred))
                    passes += 1
                except:
                    pass

        mins = [min(x) for x in results.values()]
        quality = np.mean(mins)

        # logging results for report
        logging.info('{}; Passes: {}/{}; Min error: {:0.4f}'.format(
            measure.name, passes, count, 1 - quality))

        return quality
Example #12
0
def nominal(dataset,
            attr,
            cls_attr,
            measure,
            impurity=None,
            normalize=True,
            _cmp=None):
    '''
    Distinct value without order
        - Multiway partitioning
        e.g. Color
    '''
    if impurity is None:
        impurity = measure(dataset, cls_attr)


# TODO: may try to pass index instead of a set to impurity measure
    cluster = {}
    for instance in dataset:
        val = instance[attr]
        if not cluster.has_key(val):
            cluster[val] = []
        cluster[val].append(instance)

    gain = impurity
    size = len(dataset)

    if normalize:
        split = .0

    for c in cluster.values():
        ratio = float(len(c)) / size
        gain -= ratio * measure(c, cls_attr)
        if normalize:
            # compute split info for normalization
            split += -ratio * math.log(ratio, 2)

    if normalize:
        # normalize as gain ratio
        gain /= split

    return cluster.keys(), gain, cluster
Example #13
0
 def __init__(self, numMeasures, beatsInMeasure, options):
     # List to hold the measures, all initialized in the constructor
     # Measures are indexed from 0
     self.measures = []
     self.numMeasures = numMeasures
     self.beatsInMeasure = beatsInMeasure
     for i in range(0, numMeasures):
         self.measures.append(measure(beatsInMeasure))
     
     # List to hold the notes are currently being sounded
     self.currentlyPlaying = []
     self.timeStep = 0
     self.maxTimeStep = (numMeasures * beatsInMeasure * 4) - 1
     self.loop = options.getLoop()
     
     self.harmonicTable = sndobj.HarmTable(options.getHarmTableLength(), options.getNumHarmonics(), options.getWaveType())
     self.oscillators = dict([])
     self.amplitude = options.getAmplitude()
Example #14
0
def main():
    global train, test
    global labels, features, test_labels

    process_options()

    # read problem and get all labels
    (labels, features) = read_problem(train)
    all_labels = count_labels(labels)

    build_test(test)

    predict = []
    for i in range(len(test_labels)):
        predict.append([])

    for i in range(len(all_labels)):
        # train binary problem for the label all_labels[i]
        lab = all_labels[i]

        build_problem(lab)
        train_problem(lab)
        test_problem(lab)
        index = get_output(lab)
        for idx in index:
            predict[idx].append("%s" % lab)

    out_predict = open("tmp_predict", "w")
    for i in range(len(predict)):
        out_predict.write("%s\n" % join(predict[i], ","))
    out_predict.close()

    result = measure(test_labels, predict, all_labels)

    print "Exact match ratio: %s" % result[0]
    print "Microaverage F-measure: %s" % result[1]
    print "Macroaverage F-measure: %s" % result[2]

    sys.stdout.flush()
Example #15
0
def build_tree(dataset,
               cls_attr,
               attr_strategy,
               measure=None,
               threshold=.0,
               quiet=True,
               _depth=0):
    '''
    Build a tree of decisions based on given the dataset to carry classification
    Each tree node is a function to partition the dataset
    Each leave node is a class
        - attr_strategy is a list of tuple: [(attr, strategy, sorting fn), ...]
    '''
    if not quiet:
        pad = ''
        for p in xrange(0, _depth):
            pad += '  '

    if threshold is None:
        threshold = .0

    if measure is None:
        import measure as m
        measure = m.entropy

    # if no more element for decision
    # return a leaf node for unclassified
    if len(dataset) == 0:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = []
        leaf.cls = 'Un-classified'

        if not quiet:
            print '%sleaf - %s' % (pad, leaf.cls)
        return leaf

    # if no more attribute for decision
    # return a leaf node by majority
    if len(attr_strategy) == 0:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = dataset
        leaf.cls_attr = cls_attr
        leaf.cls = leaf.majority()

        if not quiet:
            print '%sleaf - %s by majority [no attr left]' % (pad, leaf.cls)
        return leaf

    # compute impurity for further processing
    impurity = measure(dataset, cls_attr)

    # if impurity of dataset is 0 ==> all instances belong to same class
    # return a leaf node as of that class
    if impurity == 0:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = dataset
        leaf.cls_attr = cls_attr
        leaf.cls = dataset[0][cls_attr]

        if not quiet:
            print '%sleaf - %s' % (pad, leaf.cls)
        return leaf

    # if impurity of dataset is below threshold
    # return a leaf node by majority
    elif impurity < threshold:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = dataset
        leaf.cls_attr = cls_attr
        leaf.cls = leaf.majority()

        if not quiet:
            print '%sleaf - %s by majority [threshold reach]' % (pad, leaf.cls)
        return leaf

    # pick a partition strategy by the best purity gain
    else:
        # get all gains
        attr_gain_map = {}
        for attr, strategy, _cmp in attr_strategy:
            attr_gain_map[attr] = strategy(dataset, attr, cls_attr, measure,
                                           impurity, _cmp)

        # retrieve best gain
        best_gain = .0
        best_attr = None
        pivot = None
        clusters = None
        for attr, result in attr_gain_map.items():
            p, g, c = result
            if g > best_gain:
                best_attr = attr
                pivot = p
                best_gain = g
                clusters = c

        if best_attr is None:
            # early return for gaining not much purity
            leaf = TreeNode()
            leaf.depth = _depth
            leaf.cluster = dataset
            leaf.cls_attr = cls_attr
            leaf.cls = leaf.majority()

            if not quiet:
                print '%sleaf - %s by majority [no further gain]' % (pad,
                                                                     leaf.cls)
            return leaf

        if not quiet:
            if isinstance(pivot, list):
                print '%simpurity: %s, gain: %s, attr: %s, decision: by %s' \
                    % (pad, impurity, best_gain, best_attr, pivot)
            else:
                print '%simpurity: %s, gain: %s, attr: %s, decision: val < %s' \
                    % (pad, impurity, best_gain, best_attr, pivot)

        # remove best attribute from further levels of decision
        attr_strategy = [(a, s, c) for a, s, c in attr_strategy
                         if a != best_attr]

        tree = TreeNode()
        tree.cls_attr = cls_attr
        tree.depth = _depth
        tree.pivot = pivot
        tree.attr = best_attr
        tree.branches = {}

        for val, c in clusters.items():
            tree.branches[val] = build_tree(c, cls_attr, attr_strategy,
                                            measure, threshold, quiet,
                                            _depth + 1)

        return tree
Example #16
0
    else:
        pass
print("Engaging...")
file = open("data.csv", "w")
embblink = PWM(emb, freq=4)
time.sleep(10)

embblink.deinit()
cycle = 0  #a cycle is ~1 second
file.write("Time,ADC Value,Std Deviation,Calibrated OD\n")
while (b1.value() == 1) and (
        cycle <= 64800
):  #continue until the button is pushed or when 18 hours worth of data is reached
    blue.value(1)
    if cycle % 30 == 0:
        data = measure(current, init)
        OD = -1 * (math.log10(data[0] / init))  #calculate OD
        ODcali = 5.5099 * OD + 0.0471  #change OD to calibrated value
        file.write("{},{},{},{}\n".format(cycle, data[0], data[1], ODcali))
        print("{},{},{},{}\n".format(cycle, data[0], data[1], ODcali))
    elif cycle % 2 == 0:  #blink green every other second
        if green.value() == 0:
            pass
            #green.value(1)
        else:
            pass
            #green.value(0)
    else:
        pass
    time.sleep(1)  #wait a second between every cycle
    cycle += 1
Example #17
0
vs = PiVideoStream((win_w, win_h), 64).start()
time.sleep(2.0)

start_time = time.time()
monitor_start_time = 0.

while (True):
    img = vs.read()
    # cv2.imshow("Raw", img)
    # img = preprocess(img)
    # cv2.imshow("Preprocessed", img)
    rects, img = detect(img, scale_factor, min_neighs, obj_w, obj_h)
    img = box(rects, img)
    #    cv2.imshow("Cascaded", img)
    measure(img, rects, candidates)

    # if there's no rects found, look around
    # if not rects:
    #     look_around()
    # Check time elapsed, if over 10 sec, invoke spiral search
    # if (time.time()-start) > 10:
    #     spiral_search()
    #if (time.time()-start_time >.1):

    if True:  # if time.time() - start_time > 5:
        if candidates:
            avg_pos = mean(candidates)
            track_flag = True
            candidates[:] = []
            start_time = time.time()
def build_tree(dataset, cls_attr, attr_strategy, measure=None, threshold=0.0, quiet=True, _depth=0):
    """
    Build a tree of decisions based on given the dataset to carry classification
    Each tree node is a function to partition the dataset
    Each leave node is a class
        - attr_strategy is a list of tuple: [(attr, strategy, sorting fn), ...]
    """
    if not quiet:
        pad = ""
        for p in xrange(0, _depth):
            pad += "  "

    if threshold is None:
        threshold = 0.0

    if measure is None:
        import measure as m

        measure = m.entropy

    # if no more element for decision
    # return a leaf node for unclassified
    if len(dataset) == 0:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = []
        leaf.cls = "Un-classified"

        if not quiet:
            print "%sleaf - %s" % (pad, leaf.cls)
        return leaf

    # if no more attribute for decision
    # return a leaf node by majority
    if len(attr_strategy) == 0:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = dataset
        leaf.cls_attr = cls_attr
        leaf.cls = leaf.majority()

        if not quiet:
            print "%sleaf - %s by majority [no attr left]" % (pad, leaf.cls)
        return leaf

    # compute impurity for further processing
    impurity = measure(dataset, cls_attr)

    # if impurity of dataset is 0 ==> all instances belong to same class
    # return a leaf node as of that class
    if impurity == 0:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = dataset
        leaf.cls_attr = cls_attr
        leaf.cls = dataset[0][cls_attr]

        if not quiet:
            print "%sleaf - %s" % (pad, leaf.cls)
        return leaf

    # if impurity of dataset is below threshold
    # return a leaf node by majority
    elif impurity < threshold:
        leaf = TreeNode()
        leaf.depth = _depth
        leaf.cluster = dataset
        leaf.cls_attr = cls_attr
        leaf.cls = leaf.majority()

        if not quiet:
            print "%sleaf - %s by majority [threshold reach]" % (pad, leaf.cls)
        return leaf

    # pick a partition strategy by the best purity gain
    else:
        # get all gains
        attr_gain_map = {}
        for attr, strategy, _cmp in attr_strategy:
            attr_gain_map[attr] = strategy(dataset, attr, cls_attr, measure, impurity, _cmp)

        # retrieve best gain
        best_gain = 0.0
        best_attr = None
        pivot = None
        clusters = None
        for attr, result in attr_gain_map.items():
            p, g, c = result
            if g > best_gain:
                best_attr = attr
                pivot = p
                best_gain = g
                clusters = c

        if best_attr is None:
            # early return for gaining not much purity
            leaf = TreeNode()
            leaf.depth = _depth
            leaf.cluster = dataset
            leaf.cls_attr = cls_attr
            leaf.cls = leaf.majority()

            if not quiet:
                print "%sleaf - %s by majority [no further gain]" % (pad, leaf.cls)
            return leaf

        if not quiet:
            if isinstance(pivot, list):
                print "%simpurity: %s, gain: %s, attr: %s, decision: by %s" % (
                    pad,
                    impurity,
                    best_gain,
                    best_attr,
                    pivot,
                )
            else:
                print "%simpurity: %s, gain: %s, attr: %s, decision: val < %s" % (
                    pad,
                    impurity,
                    best_gain,
                    best_attr,
                    pivot,
                )

        # remove best attribute from further levels of decision
        attr_strategy = [(a, s, c) for a, s, c in attr_strategy if a != best_attr]

        tree = TreeNode()
        tree.cls_attr = cls_attr
        tree.depth = _depth
        tree.pivot = pivot
        tree.attr = best_attr
        tree.branches = {}

        for val, c in clusters.items():
            tree.branches[val] = build_tree(c, cls_attr, attr_strategy, measure, threshold, quiet, _depth + 1)

        return tree