Beispiel #1
0
def create_n_trees(data: np.array, number: int):
    trees = []

    for i in range(number):
        indexes, choosePoints = choose_n_points(data, 200)
        root = tr.treeNode(create_point_array(choosePoints), indexes=indexes)
        tr.build_tree(root, 3)
        trees.append(root)

    return trees
Beispiel #2
0
def best_select(parses, dictionary, wordvec):
	scores = np.zeros(len(parses))
	for i in xrange(len(parses)):
		scores[i] = -i

	words = parse_tree.get_words(parse_tree.build_tree(parses[0]))
	vectors = np.zeros((wordvec.shape[0] ,len(words)))
	z = np.zeros(len(words))
	for i in xrange(len(words)):
		if (words[i].lower() in dictionary) == False:
			vectors[:,i] = 0
		else:
			ind = dictionary.index(words[i].lower())
			vectors[:,i] = wordvec[:,ind]

		z[i] = np.exp(np.dot(vectors[:,i:i+1].transpose(), wordvec)).sum()

	# print words
	# print vectors
	# print np.dot(vectors.transpose(), vectors)

	wscores = np.zeros(len(parses))
	for i in xrange(len(parses)):
		wscores[i] = get_score(parses[i], vectors, z)
	order = (-wscores).argsort() 
	for i in xrange(len(parses)):
		scores[order[i]] -= i * 20.0

	# print wscores
	# print scores

	return parses[scores.argmax()]
def random_forest_training(data_train, trees_num):
    '''构建随机森林
    input:  data_train(list):训练数据
            trees_num(int):分类树的个数
    output: trees_result(list):每一棵树的最好划分
            trees_feature(list):每一棵树中对原始特征的选择
    '''
    trees_result = []  # 构建好每一棵树的最好划分
    trees_feature = []
    n = np.shape(data_train)[1]  # 样本的维数
    if n > 2:
        k = int(log(n - 1, 2)) + 1 # 设置特征的个数
    else:
        k = 1
    # 开始构建每一棵树
    for i in xrange(trees_num):
        # 1、随机选择m个样本, k个特征
        data_samples, feature = choose_samples(data_train, k)
        # 2、构建每一棵分类树
        tree = build_tree(data_samples)
        # 3、保存训练好的分类树
        trees_result.append(tree)
        # 4、保存好该分类树使用到的特征
        trees_feature.append(feature)
    
    return trees_result, trees_feature
Beispiel #4
0
def main():
    f = open("result.csv", "w")

    N = []
    tempos = []

    iterations = range(1, 23)
    k = 0  # apenas para dar print a uma percentagem de progresso

    f.write("N, tempo(s)\n")

    for i in iterations:
        array = generate_array(2**i)
        start = t.time()
        root = tree.build_tree(array)
        total_time = t.time() - start

        tempos.append(total_time)
        N.append(2**i)
        f.write(f"{2**i},{total_time}\n")

        k += 1
        print(f"{((k/len(iterations))*100):.0f}%")  # Print do progresso

    g.draw_graph(N, tempos)
    f.close()
Beispiel #5
0
def random_forest_training(data_train, trees_num):
    ''' 构建随机森林
    :param data_train:(list)训练数据
    :param trees_num:(int)分类树个数
    :return: trees_result(list)每棵树的最好划分
            trees_feature(list)每棵树中对原始特征的选择
    '''
    trees_result = []
    trees_feature = []
    n = np.shape(data_train)[1]  # 样本的维度,即样本个数是mxn,此处n是列,包含数据和结果,因此其特征值个数是n也就是2

    if n > 2:
        k = int(log(n - 1, 2)) + 1  # 设置特征值个数
    else:
        k = 1
    # 开始构建每一课树,需要分类树的个数
    for i in range(trees_num):
        # 1. 随机选择m个样本,k个特征(打乱数据的数据编号,随机分配index,再将数据组合,返回data_samples)
        data_samples, feature = choose_samples(data_train, k)
        # 2. 构建每一棵分类树(建立tree的class)
        tree = build_tree(data_samples)
        # 3. 保存训练好的分类树
        trees_result.append(tree)
        # 4.保存好该分类树使用到的特征
        trees_feature.append(feature)

    return trees_result, trees_feature
Beispiel #6
0
 def test_eq(self, data1, tree1):
     data1['extra'] = 10
     tree2 = build_tree(data1)
     copy_tree = copy.deepcopy(tree1)
     assert copy_tree is not tree1
     assert copy_tree == tree1
     assert tree2 != tree1
Beispiel #7
0
def get_score(parse, vectors, z):
	tree = parse_tree.build_tree(parse)
	parse_tree.fix_tree(tree)
	#parse_tree.print_tree(tree)
	D, depth = parse_tree.calc_dist(tree)
	W = np.exp(-D * 0.5)
	# W = 1.0 / (1.0 + D)
	score = 0
	for i in xrange(vectors.shape[1]):
		x = vectors[:,i:i+1]
		if np.abs(x).sum() == 0:
			continue
		for j in xrange(i+1, vectors.shape[1]):
			y = vectors[:,j:j+1]
			if np.abs(y).sum() == 0:
				continue
			# s = np.dot(x.transpose(), y).sum()
			# w = W[i,j] / W[i,:].sum() + W[i,j] / W[:,j].sum()
			s1 = W[i,j] * (np.exp(np.dot(x.transpose(), y).sum()) / z[i])
			s2 = W[i,j] * (np.exp(np.dot(x.transpose(), y).sum()) / z[j])
			# s1 = (W[i,j]/W[i,:].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[i])
			# s2 = (W[i,j]/W[:,j].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[j])
			# s1 = np.exp(-10.0*D[i,j]/D[i,:].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[i])
			# s2 = np.exp(-10.0*D[i,j]/D[:,j].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[j])
			score += s1 + s2

	# print score
	return score
def random_forest_training(data_train, trees_num):
    '''构建随机森林
    input:  data_train(list):训练数据
            trees_num(int):分类树的个数
    output: trees_result(list):每一棵树的最好划分
            trees_feature(list):每一棵树中对原始特征的选择
    '''
    trees_result = []  # 构建好每一棵树的最好划分
    trees_feature = []
    n = np.shape(data_train)[1]  # 样本的维数
    if n > 2:
        k = int(log(n - 1, 2)) + 1  # 设置特征的个数
    else:
        k = 1
    # 开始构建每一棵树
    for i in xrange(trees_num):
        # 1、随机选择m个样本, k个特征
        data_samples, feature = choose_samples(data_train, k)
        # 2、构建每一棵分类树
        tree = build_tree(data_samples)
        # 3、保存训练好的分类树
        trees_result.append(tree)
        # 4、保存好该分类树使用到的特征
        trees_feature.append(feature)

    return trees_result, trees_feature
Beispiel #9
0
def tree4():
    return build_tree({
        'a': {
            'b': {
                'c': 3
            }
        }
    })
Beispiel #10
0
def grow_forest(x, y, n, max_depth):
    forest = np.empty(n, dtype=object)
    for i in range(n):
        indexes = np.random.random_integers((x.shape[0] - 1),
                                            size=((x.shape[0]) // n))
        xi = x[indexes][:]
        yi = y[indexes]
        forest[i] = tree.build_tree(xi, yi, max_depth=max_depth)
    return forest
Beispiel #11
0
def AnalyzeMining(target, tables):

    if tables == "All":
        tables = AllTables()
    elif tables == "Batting":
        tables = BattingTables()
    elif tables == "Fielding":
        tables = FieldingTables()
    elif tables == "Pitching":
        tables = PitchingTables()
    # else:
    #     raise

    sql = tables.sql
    dfcols = tables.cols

    results = databaseconnection(sql)
    print("get result from db..")

    df = pd.DataFrame(list(results))

    df.columns = dfcols
    df.fillna(value=0, inplace=True)
    df = removezero(df)
    y = df[target].values.astype(int)
    # y_hof = df['hof'].values.astype(int)
    # y_man = df['man'].values.astype(int)
    df.drop(columns=['playerID', 'nom', 'hof', 'man'], inplace=True)

    cols = list(df.columns.values)
    df = df[cols].applymap(np.int64)
    df = df[cols].round(decimals=-1)
    #print(df)

    ########tree algorithm

    #nom_data = df
    df[target] = y.tolist()
    train, test = tree.train_test_split(df)

    ##
    attributes = cols
    print("Generating decision tree..")
    root = tree.build_tree(train, attributes, target)

    print("Accuracy of test data")
    # df_test = clean('horseTest.txt')
    acc = str(tree.test_predictions(root, test, target) * 100.0) + '%'
    print(acc)

    print("F1 score of test data")
    # df_test = clean('horseTest.txt')
    f1_score = str(tree.test_f1score(root, test, target) * 100.0) + '%'
    print(f1_score)

    return acc, f1_score
Beispiel #12
0
def learn_forest_learn(x, y, quan, depth):  # quan и depth -- кол-во и глубина деревьев
    forest = []
    for i in range(quan):
        r = np.random.random_integers(x.shape[0]-1, size=(x.shape[0]//quan))
        xx = x[r, :]
        yy = y[r].T
        # xx, yy = bootstrap(x, y, quan)
        # print(xx)
        # print(yy)
        forest.append(tree.build_tree(xx, yy, max_depth=depth))
    return forest
Beispiel #13
0
def search(B, num_query, num_items):
    data = []
    dims = ["x", "y", "z"]

    for i in range(num_items):
        item = [(dimension, random.randrange(-1000, 1000)) for dimension in ["x", "y", "z"]]
        # for dimension in ['x']]
        data.append(item)

    tree_file = "test-tree-2.hodor"
    if os.path.isfile(tree_file):
        os.remove(tree_file)
    serializer = test_serializer(tree_file)

    root = tree.build_tree(data, B, serializer)

    ranges = []
    start = time.time()
    for i in range(num_query):
        ranges = {d: (random.randrange(-1000, 0), random.randrange(0, 1000)) for d in ["x", "y", "z"]}
        result = root.range_query(ranges)
        real_result = []

        for d in data:
            incl = True
            for key in ranges:
                dk = next(i[1] for i in d if i[0] == key)
                if dk < ranges[key][0] or dk > ranges[key][1]:
                    incl = False
            if incl:
                real_result.append(d)

        if len(real_result) != len(result):
            print "ranges:", ranges
            print "result:", len(result), "should be:", len(real_result), "items"

            result = [i[:3] for i in result]
            missing = [i for i in real_result if i not in result]
            missing = map(lambda res: tuple(i[1] for i in res), missing)
            print "should have been in the result:", missing

            extra = [i for i in result if i not in real_result]
            extra = map(lambda res: tuple(i[1] for i in res), extra)
            print "should not have been in the result:", extra

        assert len(real_result) == len(result)

    total = time.time() - start
    print num_query, "queries on", num_items, "items took", total, "seconds"

    print serializer.normal_seeks, "forward seeks averaged", 10 ** 6 * serializer.normal_seek_time / serializer.normal_seeks, "microseconds each"
    print serializer.back_seeks  # , 'back seeks averaged',\
Beispiel #14
0
def main():
    files = get_filenames()
    x, y = [], []

    # generate the learning curve data
    data = list(parse(file(files.dataset)))
    for train_prop in np.arange(0.1, 0.99, 0.05):
        training_set, testing_set = sample(data, train_prop)
        tree = build_tree(training_set).prune(MIN_GAIN)
        check = [record[RESULT_IDX] == plurality(tree.classify(record))
                 for record in testing_set]
        counter = Counter(check)
        precision = counter[True] / float(counter[True] + counter[False])
        print 'Training set sampling probability = %.2f:' % (train_prop)
        print 'training data size = %d,' % (len(training_set)),
        print 'test data size = %d,' % (len(testing_set)),
        print 'precision = %.4f' % (precision)
        x.append(len(training_set))
        y.append(precision)

    # statistics
    ymean, ystd, ymin, ymax = np.mean(y), np.std(y), np.min(y), np.max(y)
    print 'Mean of precision = %.4f' % (ymean)
    print 'Standard deviation of precision = %.4f' % (ystd)
    print 'Min = %.4f, max = %.4f' % (ymin, ymax)
    xy = sorted(zip(x, y), key=lambda a: a[0])
    x, y = zip(*xy)

    # setup decorations
    plt.rc('font', family='serif')
    plt.yticks(np.arange(0.0, 1.0, 0.1))
    plt.ylim(0.0, 1.0)
    plt.grid(True)
    plt.title('Learning Curve')
    plt.xlabel('Training set size')
    plt.ylabel('Precision on test set')

    # plot smoothed learning curve
    xnew = np.linspace(np.min(x), np.max(x), 100)
    ynew = interp1d(x, y)(xnew)
    plt.plot(x, y, '.', xnew, ynew, '--')

    # annotation
    box = dict(boxstyle='square', fc="w", ec="k")
    txt = '$\mu = %.4f$, $\sigma = %.4f$' % (ymean, ystd)
    txt += ', $min = %.4f$, $max = %.4f$' % (ymin, ymax)
    plt.text(170, 0.05, txt, bbox=box)

    plt.savefig(files.curve)
    print 'Save learning curve to', files.curve
Beispiel #15
0
def build_forrest(data, max_depth, min_size, n_attributes, n_trees, n_tests,
                  bg_size):
    forrest = list()
    score = int()
    print('trees', n_trees)
    for i in range(n_trees):
        i_train = create_bootstrap_group(data, bg_size)
        i_tree = build_tree(i_train, max_depth, min_size, n_attributes)
        forrest.append(i_tree)
        print('nr drzewa', i)
        i_test = create_test_group(data, n_tests)
        for j in range(len(i_test)):
            print('n test', j, i_test[j])
            prediction = predict_by_vote(forrest, i_test[j])
            [data, verification] = verify(data, prediction, i_test[j])
            score += verification
    print(score)
    accuracy = score / n_trees / n_tests
    return accuracy
Beispiel #16
0
    def __init__(self, columns, target_column, rows, forest_size=None):
        if not forest_size:
            forest_size = ceil(sqrt(len(rows)))

        bag_size = ceil(len(rows) / forest_size)

        self._trees = [
            build_tree(
                columns=sample(columns,
                               randint(min(2, len(columns)), len(columns))),
                target_column=target_column,
                rows=choices(rows, k=bag_size),  # with replacement
                score_type=choice(('entropy', 'gini')),
                # TODO min_gain_for_split=
            ) for _ in range(forest_size)
        ]

        for tree in self._trees:
            print('-' * 80)
            print(tree)
Beispiel #17
0
def build_chain(S: Iterable[Permutation], order=None):
    n = max(S, key=lambda x: x.n).n

    if order == None:
        order = list(range(1, n + 1))

    b = 0

    ans = []

    while S:
        tree = build_tree(order[b], S)
        ans.append(tree)
        S = make_gens(tree, S)
        S = normalize(S)
        b += 1

    S = []

    for t in ans:
        S.extend(t.S)

    return FullStabilizerChain(n, list(range(1, b + 1)), S, ans)
Beispiel #18
0
    def test_tree(self):
        expected_output = """asciitree
 +-- sometimes
 |   +-- you
 +-- just
 |   +-- want
 |       +-- to
 |       +-- draw
 +-- trees
 +-- in
     +-- your
         +-- terminal"""
        from tree import build_tree, print_node
        import io
        from contextlib import redirect_stdout

        bulded_tree = build_tree('(asciitree (sometimes you) (just (want to draw)) trees (in (your terminal)))')
        f = io.StringIO()
        with redirect_stdout(f):
            print_node(bulded_tree)

        output = f.getvalue()

        assert output == (expected_output + '\n')
Beispiel #19
0
def build_test():
    return
    data = []

    for j in range(1000):
        data_item = [("field" + str(i), random.random() * 10000) for i in range(3)]
        data.append(data_item)

    tree_file = "test-tree.hodor"
    if os.path.isfile(tree_file):
        os.remove(tree_file)
    serializer = test_serializer(tree_file)

    root1 = tree.build_tree(data, 28, serializer)
    root2 = root1.link()
    root3 = root2.link()

    dim1 = [data_item[1] for data_item in sorted((data_item[0] for data_item in data), key=lambda d: d[1])]
    dim2 = [data_item[1] for data_item in sorted((data_item[1] for data_item in data), key=lambda d: d[1])]
    dim3 = [data_item[1] for data_item in sorted((data_item[2] for data_item in data), key=lambda d: d[1])]

    # make sure all the roots have all values at bottom
    # assert len(root1.get_all_data()) == len(data)
    # assert len(root2.get_all_data()) == len(data)
    # assert len(root3.get_all_data()) == len(data)

    # for root, dim in zip([root1, root2, root3], [dim1, dim2, dim3]):
    #    root_children = [root.load_child(child) for child in root.children]
    #    root_grandchildren = [child.load_child(grandchild) for child in
    #            root_children for grandchild in child.children]

    #    # test the intervals of the root's children
    #    assert root_children[0].min == dim[0]
    #    assert root_children[0].max == dim[8]
    #    assert root_children[1].min == dim[9]
    #    assert root_children[1].max == dim[9]

    #    for i in range(3):
    #        # test the intervals of the root's grandchildren: leaves
    #        assert root_grandchildren[i].min == dim[i * 3]
    #        assert root_grandchildren[i].max == dim[i * 3 + 2]

    #        # test the linked list at the leaves
    #        if i != 0:
    #            assert root_grandchildren[i].prev == \
    #                root_grandchildren[i - 1].pos
    #
    #        # test that full data is only stored at the last dimension
    #        if root != root3:
    #            assert root_grandchildren[i].full_data == None
    #        else:
    #            assert len(root_grandchildren[i].full_data) > 0
    #            assert len(root_grandchildren[i].full_data[0]) == 4

    ranges = {"field0": (0, 3), "field1": (2, 6), "field2": (0, 10)}

    # result = [data_item for data_item in data if
    #        data_item[0] >= ranges['field0'][0] and data_item[0] <= ranges['field0'][1] and
    #        data_item[1] >= ranges['field1'][0] and data_item[1] <= ranges['field1'][1] and
    #        data_item[2] >= ranges['field2'][0] and data_item[2] <= ranges['field2'][1]]

    start, end = 42, 103

    result = [data_item[0] for data_item in data if data_item[0][1] >= start and data_item[0][1] <= end]

    # print root1.range_query(ranges)
    # print root1.get_range_data(start, end)

    # print result
    assert len(result) == len(root1.get_range_data(start, end))

    # check that getting leaves is working in final dimension
    result = [data_item[0] for data_item in data if data_item[2][1] >= start and data_item[2][1] <= end]
    assert len(result) == len(root3.get_range_data(start, end))
Beispiel #20
0
 def get_structure():
     nodes = Section.objects.filter(is_enabled=True).values()
     return build_tree(nodes)
from tree import build_tree, print_tree, car_data, car_labels
import random
random.seed(4)
tree = build_tree(car_data, car_labels)
#print_tree(tree)
indices = [random.randint(0, 999) for i in range(1000)]
data_subset = [car_data[index] for index in indices]
labels_subset = [car_labels[index] for index in indices]
subset_tree = build_tree(data_subset, labels_subset)
print_tree(subset_tree)

from tree import car_data, car_labels, split, information_gain
import random
import numpy as np
np.random.seed(1)
random.seed(4)


def find_best_split(dataset, labels):
    best_gain = 0
    best_feature = 0
    #Create features here
    features = np.random.choice(len(dataset[0]), 3, replace=False)
    for feature in features:
        data_subsets, label_subsets = split(dataset, labels, feature)
        gain = information_gain(labels, label_subsets)
        if gain > best_gain:
            best_gain, best_feature = gain, feature
    return best_gain, best_feature

Beispiel #22
0
    def run_build_system(self,
                         extra_args='',
                         subdir='',
                         stdout=None,
                         stderr='',
                         status=0,
                         match=None,
                         pass_toolset=None,
                         **kw):

        self.previous_tree = build_tree(self.workdir)

        if match is None:
            match = self.match

        if pass_toolset is None:
            pass_toolset = self.pass_toolset

        try:
            if pass_toolset:
                kw['program'] = self.program + ' ' + self.toolset + ' ' + extra_args
            else:
                kw['program'] = self.program + ' ' + extra_args
            kw['chdir'] = subdir
            apply(TestCmd.TestCmd.run, [self], kw)
        except:
            print "STDOUT ============"
            print self.stdout()
            print "STDERR ============"
            print self.stderr()
            raise

        if status != None and _failed(self, status):
            expect = ''
            if status != 0:
                expect = " (expected %d)" % status

            print '"%s %s" returned %d%s' % (self.program, extra_args,
                                             _status(self), expect)

            print "STDOUT ============"
            print self.stdout()
            print "STDERR ============"
            print self.stderr()
            self.fail_test(1)

        if not stdout is None and not match(self.stdout(), stdout):
            print "Expected STDOUT =========="
            print stdout
            print "Actual STDOUT ============"
            print self.stdout()
            stderr = self.stderr()
            if stderr:
                print "STDERR ==================="
                print stderr
            self.maybe_do_diff(self.stdout(), stdout)
            self.fail_test(1)

        if not stderr is None and not match(self.stderr(), stderr):
            print "STDOUT ==================="
            print self.stdout()
            print "Expected STDERR =========="
            print stderr
            print "Actual STDERR ============"
            print self.stderr()
            self.maybe_do_diff(self.stderr(), stderr)
            self.fail_test(1)

        self.tree = build_tree(self.workdir)
        self.difference = trees_difference(self.previous_tree, self.tree)
        self.difference.ignore_directories()
        self.unexpected_difference = copy.deepcopy(self.difference)

        self.last_build_time = time.time()
Beispiel #23
0
def ValidationMining(target, fn, ln):

    print("target: %s, fn: %s, ln: %s" % (target, fn, ln))
    # get real value for input
    r_sql = "select * from validtree where nameFirst = \'" + fn + "\' and nameLast = \'" + ln + "\' limit 1;"

    realdata = databaseconnection(r_sql)
    if realdata == None or len(realdata) == 0:
        print("No record exists for %s %s" % (fn, ln))
        pred = "Unknown"
        real = "Unknown"
        #exit()
        return pred, real
    else:
        r_df = pd.DataFrame(list(realdata))
        r_df.columns = [
            'playerID', 'nameFirst', 'nameLast', 'nom', 'hof', 'man'
        ]
        r_df.fillna(value=0, inplace=True)
        real = r_df[target].iloc[0]
        real = "Y" if int(real) == 1 else "N"
        print("real value is ", real)

    # get corresponding row
    playerid = r_df['playerID'].iloc[0]

    tables = AllTables()
    dfcols = tables.cols

    row_sql = "select * from treesource where playerID = \'" + playerid + "\'"
    rowdata = databaseconnection(row_sql)
    rowdf = pd.DataFrame(list(rowdata))

    rowdf.columns = dfcols
    rowdf.fillna(value=0, inplace=True)

    #######decision tree data
    sql = tables.sql

    results = databaseconnection(sql)
    print("get result from db..")
    df = pd.DataFrame(list(results))

    df.columns = dfcols
    df.fillna(value=0, inplace=True)
    df = removezero(df)
    y = df[target].values.astype(int)
    df.drop(columns=['playerID', 'nom', 'hof', 'man'], inplace=True)

    cols = list(df.columns.values)
    df = df[cols].applymap(np.int64)
    df = df[cols].round(decimals=-1)

    rowdf = rowdf[cols].applymap(np.int64)
    rowdf = rowdf[cols].round(decimals=-1)
    row = rowdf.iloc[0]

    df[target] = y.tolist()
    train, test = tree.train_test_split(df)

    ##
    attributes = cols
    print("Generating decision tree..")
    root = tree.build_tree(train, attributes, target)

    print("Start to predict..")
    pred = str(tree.predict(root, row))
    pred = "Y" if int(pred) == 1 else "N"

    return pred, real
Beispiel #24
0
def tree5():
    return build_tree({
        'a': {
            'b': 1
        }
    })
Beispiel #25
0
def tree3():
    return build_tree({
        'b': 1
    })
Beispiel #26
0
def tree1(data1):
    return build_tree(data1)
Beispiel #27
0
    def run_build_system(
        self, extra_args='', subdir='', stdout = None, stderr = '',
        status = 0, match = None, pass_toolset = None, **kw):

        self.previous_tree = build_tree(self.workdir)

        if match is None:
            match = self.match
	    	    
	if pass_toolset is None:
	    pass_toolset = self.pass_toolset	    

        try:
            if pass_toolset:
                kw['program'] = self.program + ' ' + self.toolset + ' ' + extra_args                                
            else:
                kw['program'] = self.program + ' ' + extra_args                
            kw['chdir'] = subdir
            apply(TestCmd.TestCmd.run, [self], kw)
        except:
            print "STDOUT ============"
            print self.stdout()
            print "STDERR ============"
            print self.stderr()
            raise

        if status != None and _failed(self, status):
            expect = ''
            if status != 0:
                expect = " (expected %d)" % status

            print '"%s %s" returned %d%s' % (
                self.program, extra_args, _status(self), expect)

            print "STDOUT ============"
            print self.stdout()
            print "STDERR ============"
            print self.stderr()
            self.fail_test(1)

        if not stdout is None and not match(self.stdout(), stdout):
            print "Expected STDOUT =========="
            print stdout
            print "Actual STDOUT ============"
            print self.stdout()
            stderr = self.stderr()
            if stderr:
                print "STDERR ==================="
                print stderr
            self.maybe_do_diff(self.stdout(), stdout)
            self.fail_test(1)

        if not stderr is None and not match(self.stderr(), stderr):
            print "STDOUT ==================="
            print self.stdout()
            print "Expected STDERR =========="
            print stderr
            print "Actual STDERR ============"
            print self.stderr()
            self.maybe_do_diff(self.stderr(), stderr)
            self.fail_test(1)

        self.tree = build_tree(self.workdir)
        self.difference = trees_difference(self.previous_tree, self.tree)
        self.difference.ignore_directories()
        self.unexpected_difference = copy.deepcopy(self.difference)

        self.last_build_time = time.time()
Beispiel #28
0
from tree import tree
from tree import build_tree

class walk_the(tree):
    def print_tree(self):
        print(self.data)
        if self.children:
            for child in self.children:
                child.print_tree()

build_tree(walk_the)
Beispiel #29
0
        run = run_size.popleft(
        )  # to keep them aligned must pop when node pops

        left_run, right_run = run, run
        if node.left:
            if node.val == node.left.val - 1:
                left_run += 1
                max_run = max(max_run, left_run)
            else:
                run = 1
            level.append(node.left)
            run_size.append(left_run)
        if node.right:
            if node.val == node.right.val - 1:
                right_run += 1
                max_run = max(max_run, right_run)
            else:
                run = 1
            level.append(node.right)
            run_size.append(right_run)
    return max_run


root = build_tree([3, 2, 4, 4, 3, None, 5, None, None, None, 4, 6, 7, 1, 5])
print(longest_run(root))

root = build_tree([4, 2, 5, 1, None, 6])
print(longest_run(root))

# Or we can DFS -> where we build run from bottom-up
Beispiel #30
0
def parse(json_str):
    return build_tree(json.loads(json_str))
Beispiel #31
0
        while len(tree_queue) > 0:
            # Get elem
            elem = tree_queue.popleft()

            # push into buffer
            if elem.left is not None:
                buffer_queue.append(elem.left)
            if elem.right is not None:
                buffer_queue.append(elem.right)

            # push into n level
            n_level_queue.append(elem)
            counter -= 1
            if counter == 0:
                result.append(n_level_queue)
                n_level_queue = collections.deque()
                # push into the tree queue
                while len(buffer_queue) > 0:
                    tree_queue.append(buffer_queue.popleft())
                    counter += 1
        return result


sorted_arr = [11, 20, 29, 32, 41, 50, 65, 72, 91, 99]
minimal_tree = build_tree(sorted_arr)
list_of_depth = ListOfDepth(minimal_tree)
for i, depth_list in enumerate(list_of_depth.list):
    print('DEPTH LEVEL: {}'.format(i))
    for elem in depth_list:
        print(elem)
from tree import build_tree, print_tree, car_data, car_labels, classify
import random
random.seed(4)

# The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating
unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med']

indices = [random.randint(0, 999) for i in range(1000)]
predictions = []
for i in range(0, 20):
    data_subset = [car_data[index] for index in indices]
    labels_subset = [car_labels[index] for index in indices]
    subset_tree = build_tree(data_subset, labels_subset)
    predictions.append(classify(unlabeled_point, subset_tree))
print(predictions)
final_prediction = max(predictions, key=predictions.count)
print(final_prediction)
Beispiel #33
0
    radius_multiplicative_update_factor = args.radius_multiplicative_update_factor

    # row 0: walltime vs number of iterations
    # row 1: semilogy version of plot in row 1
    fig, ax = plt.subplots(2, 1, figsize=(12, 12), sharex=True)

    # list solution
    max_steps = 18

    t = []
    x = np.arange(1, max_steps + 1)
    for num_steps in range(1, max_steps + 1):
        start_time = time.time()
        tree.build_tree(num_steps,
                        theta_step_size,
                        radius_length,
                        radius_multiplicative_update_factor,
                        plot=False)
        end_time = time.time()
        walltime = end_time - start_time
        t.append(walltime)
    t = np.array(t)

    # plot x vs t
    ax[0].plot(x, t, color='g', label='empirical data (tree.py)')

    # plot x vs log(t)
    A = np.vstack([x[t > 0], np.ones(len(x[t > 0]))]).T
    k, m = np.linalg.lstsq(A, np.log2(t[t > 0]), rcond=None)[0]

    ax[1].semilogy(x,
Beispiel #34
0
    def run_build_system(
        self, extra_args='', subdir='', stdout = None, stderr = '',
        status = 0, match = None, pass_toolset = None, **kw):

        if os.path.isabs(subdir):
            if stderr:
                print "You must pass a relative directory to subdir <"+subdir+">."
            status = 1
            return

        self.previous_tree = build_tree(self.workdir)

        if match is None:
            match = self.match

        if pass_toolset is None:
            pass_toolset = self.pass_toolset        

        try:
            kw['program'] = []
            kw['program'] += self.program
            if extra_args:
                kw['program'] += extra_args.split(" ")            
            if pass_toolset:
                kw['program'].append(self.toolset)
            kw['chdir'] = subdir
            apply(TestCmd.TestCmd.run, [self], kw)
        except:
            self.dump_stdio()
            raise

        if status != None and _failed(self, status):
            expect = ''
            if status != 0:
                expect = " (expected %d)" % status

            print '"%s" returned %d%s' % (
                kw['program'], _status(self), expect)

            self.fail_test(1)

        if not stdout is None and not match(self.stdout(), stdout):
            print "Expected STDOUT =========="
            print stdout
            print "Actual STDOUT ============"
            print self.stdout()
            stderr = self.stderr()
            if stderr:
                print "STDERR ==================="
                print stderr
            self.maybe_do_diff(self.stdout(), stdout)
            self.fail_test(1, dump_stdio = 0)

        # Intel tends to produce some message to stderr, which makes tests
        # fail
        intel_workaround = re.compile("^xi(link|lib): executing.*\n", re.M)
        actual_stderr = re.sub(intel_workaround, "", self.stderr())

        if not stderr is None and not match(actual_stderr, stderr):
            print "STDOUT ==================="
            print self.stdout()
            print "Expected STDERR =========="
            print stderr
            print "Actual STDERR ============"
            print actual_stderr
            self.maybe_do_diff(actual_stderr, stderr)
            self.fail_test(1, dump_stdio = 0)

        self.tree = build_tree(self.workdir)
        self.difference = trees_difference(self.previous_tree, self.tree)
        self.difference.ignore_directories()
        self.unexpected_difference = copy.deepcopy(self.difference)

        self.last_build_time = time.time()
Beispiel #35
0
    if rstart:
        node.next = rstart
        rstart.prev = node

    return (lstart or node), (rend or node)


def bstToDoubleLinkedList(root):
    if not root:
        return None

    s, e = _bst(root)
    return s


tree = build_tree(range(0, 40, 2))
head = bstToDoubleLinkedList(tree)
dll = DoublyLinkedlist()
dll.head = head

# print "bst to DLL...", dll


# Number ways of decoding integer to alphabet 'A' -> 1,
# 'B' -> 2, ..., 'Z' -> 26
# Dynamic programing: x(n) = x(n-1) + x(n-2)
# https://leetcode.com/problems/decode-ways/description/
def numDecodings(s):
    """
    :type s: str
    :rtype: int
Beispiel #36
0
def tree2():
    return build_tree({
        'a': {
            'c': 1
        }
    })
Beispiel #37
0
def test_build():
    print(build_tree(7, [to_perm([1,2,3]), to_perm([3,4,5]), to_perm([5,6,7,8]), to_perm([4,5]), to_perm([9, 10])]))
Beispiel #38
0
def test_gens():
    tree = build_tree(1, [to_perm([1,2]), to_perm([1,2,3,4,5])])
    gens = make_gens(tree, [to_perm([1,2]), to_perm([1,2,3,4,5])])

    print(normalize(gens))
Beispiel #39
0
    def run_build_system(self,
                         extra_args='',
                         subdir='',
                         stdout=None,
                         stderr='',
                         status=0,
                         match=None,
                         pass_toolset=None,
                         **kw):

        self.previous_tree = build_tree(self.workdir)

        if match is None:
            match = self.match

        if pass_toolset is None:
            pass_toolset = self.pass_toolset

        try:
            kw['program'] = []
            kw['program'] += self.program
            if extra_args:
                kw['program'] += extra_args.split(" ")
            if pass_toolset:
                kw['program'].append(self.toolset)
            kw['chdir'] = subdir
            apply(TestCmd.TestCmd.run, [self], kw)
        except:
            self.dump_stdio()
            raise

        if status != None and _failed(self, status):
            expect = ''
            if status != 0:
                expect = " (expected %d)" % status

            print '"%s" returned %d%s' % (kw['program'], _status(self), expect)

            self.fail_test(1)

        if not stdout is None and not match(self.stdout(), stdout):
            print "Expected STDOUT =========="
            print stdout
            print "Actual STDOUT ============"
            print self.stdout()
            stderr = self.stderr()
            if stderr:
                print "STDERR ==================="
                print stderr
            self.maybe_do_diff(self.stdout(), stdout)
            self.fail_test(1, dump_stdio=0)

        # Intel tends to produce some message to stderr, which makes tests
        # fail
        intel_workaround = re.compile("^xi(link|lib): executing.*\n", re.M)
        actual_stderr = re.sub(intel_workaround, "", self.stderr())

        if not stderr is None and not match(actual_stderr, stderr):
            print "STDOUT ==================="
            print self.stdout()
            print "Expected STDERR =========="
            print stderr
            print "Actual STDERR ============"
            print actual_stderr
            self.maybe_do_diff(actual_stderr, stderr)
            self.fail_test(1, dump_stdio=0)

        self.tree = build_tree(self.workdir)
        self.difference = trees_difference(self.previous_tree, self.tree)
        self.difference.ignore_directories()
        self.unexpected_difference = copy.deepcopy(self.difference)

        self.last_build_time = time.time()
Beispiel #40
0
import pdb
from collections import deque

from tree import TreeNode, build_tree, print_tree, serialize_tree

# build a sample tree
root = build_tree([3, 1, 7, -3, 2, 4, 9])
print_tree(root)

###################
# DFS using a stack
###################
print("DFS")
stack = []  # to go down the tree and backtrack up
current = root

while stack or current:
    # Finish when nothing to go down or backtrack
    while current:
        # go down until left most leafnode
        stack.append(current)
        current = current.left

    current = stack.pop()
    print(current.val)
    current = current.right

###################
# BFS using a queue
###################
print("BFS")
Beispiel #41
0
def test(tree, isvalid):
    root = build_tree(tree)
    assert isvalid == Solution().isValidBST(root)
    def run_build_system(self, extra_args="", subdir="", stdout=None, stderr="",
        status=0, match=None, pass_toolset=None, use_test_config=None,
        ignore_toolset_requirements=None, expected_duration=None, **kw):

        self.last_build_time_start = time.time()

        try:
            if os.path.isabs(subdir):
                if stderr:
                    print "You must pass a relative directory to subdir <"+subdir+">."
                status = 1
                return

            self.previous_tree = tree.build_tree(self.workdir)

            if match is None:
                match = self.match

            if pass_toolset is None:
                pass_toolset = self.pass_toolset

            if use_test_config is None:
                use_test_config = self.use_test_config

            if ignore_toolset_requirements is None:
                ignore_toolset_requirements = self.ignore_toolset_requirements

            try:
                kw['program'] = []
                kw['program'] += self.program
                if extra_args:
                    kw['program'] += extra_args.split(" ")
                if pass_toolset:
                    kw['program'].append("toolset=" + self.toolset)
                if use_test_config:
                    kw['program'].append('--test-config="%s"'
                        % os.path.join(self.original_workdir, "test-config.jam"))
                if ignore_toolset_requirements:
                    kw['program'].append("--ignore-toolset-requirements")
                kw['chdir'] = subdir
                apply(TestCmd.TestCmd.run, [self], kw)
            except:
                self.dump_stdio()
                raise
        finally:
            self.last_build_time_finish = time.time()

        if (status != None) and _failed(self, status):
            expect = ''
            if status != 0:
                expect = " (expected %d)" % status

            annotation("failure", '"%s" returned %d%s'
                % (kw['program'], _status(self), expect))
            
            annotation("reason", "unexpected status returned by bjam")
            self.fail_test(1)

        if not (stdout is None) and not match(self.stdout(), stdout):
            annotation("failure", "Unexpected stdout")
            annotation("Expected STDOUT", stdout)
            annotation("Actual STDOUT", self.stdout())
            stderr = self.stderr()
            if stderr:
                annotation("STDERR", stderr)
            self.maybe_do_diff(self.stdout(), stdout)
            self.fail_test(1, dump_stdio=False)

        # Intel tends to produce some messages to stderr which make tests fail.
        intel_workaround = re.compile("^xi(link|lib): executing.*\n", re.M)
        actual_stderr = re.sub(intel_workaround, "", self.stderr())

        if not (stderr is None) and not match(actual_stderr, stderr):
            annotation("failure", "Unexpected stderr")
            annotation("Expected STDERR", stderr)
            annotation("Actual STDERR", self.stderr())
            annotation("STDOUT", self.stdout())
            self.maybe_do_diff(actual_stderr, stderr)
            self.fail_test(1, dump_stdio=False)

        if not expected_duration is None:
            actual_duration = self.last_build_time_finish - self.last_build_time_start 
            if (actual_duration > expected_duration):
                print "Test run lasted %f seconds while it was expected to " \
                    "finish in under %f seconds." % (actual_duration,
                    expected_duration)
                self.fail_test(1, dump_stdio=False)

        self.tree = tree.build_tree(self.workdir)
        self.difference = tree.trees_difference(self.previous_tree, self.tree)
        self.difference.ignore_directories()
        self.unexpected_difference = copy.deepcopy(self.difference)
Beispiel #43
0
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from tree import build_tree, Leaf
from file import File

N = int(sys.argv[1])
k = 10
m = 10

# Build the Data File and store it in a new kd-tree
data = np.random.normal(0.5, 0.1, (N, k))  # Normal distribution
#data = np.random.rand(N, k)    # Uniform distribution

F = File(data)
tree = build_tree(F)

# Global variables

Xq = np.random.rand(k)  # Query record
PQD = [
    math.inf for _ in range(m)
]  # Priority queue of the m closest distances encountered at any phase of the search
PQR = [
    None for _ in range(m)
]  # Priority queue of the record numbers of the corresponding m best matches encountered at any phase of the search
Bu = [math.inf for _ in range(k)]  # Coordinate upper bounds
Bl = [-math.inf for _ in range(k)]  # Coordinate lower bounds

# Only works for the k == 2 case
if k == 2: