def create_n_trees(data: np.array, number: int): trees = [] for i in range(number): indexes, choosePoints = choose_n_points(data, 200) root = tr.treeNode(create_point_array(choosePoints), indexes=indexes) tr.build_tree(root, 3) trees.append(root) return trees
def best_select(parses, dictionary, wordvec): scores = np.zeros(len(parses)) for i in xrange(len(parses)): scores[i] = -i words = parse_tree.get_words(parse_tree.build_tree(parses[0])) vectors = np.zeros((wordvec.shape[0] ,len(words))) z = np.zeros(len(words)) for i in xrange(len(words)): if (words[i].lower() in dictionary) == False: vectors[:,i] = 0 else: ind = dictionary.index(words[i].lower()) vectors[:,i] = wordvec[:,ind] z[i] = np.exp(np.dot(vectors[:,i:i+1].transpose(), wordvec)).sum() # print words # print vectors # print np.dot(vectors.transpose(), vectors) wscores = np.zeros(len(parses)) for i in xrange(len(parses)): wscores[i] = get_score(parses[i], vectors, z) order = (-wscores).argsort() for i in xrange(len(parses)): scores[order[i]] -= i * 20.0 # print wscores # print scores return parses[scores.argmax()]
def random_forest_training(data_train, trees_num): '''构建随机森林 input: data_train(list):训练数据 trees_num(int):分类树的个数 output: trees_result(list):每一棵树的最好划分 trees_feature(list):每一棵树中对原始特征的选择 ''' trees_result = [] # 构建好每一棵树的最好划分 trees_feature = [] n = np.shape(data_train)[1] # 样本的维数 if n > 2: k = int(log(n - 1, 2)) + 1 # 设置特征的个数 else: k = 1 # 开始构建每一棵树 for i in xrange(trees_num): # 1、随机选择m个样本, k个特征 data_samples, feature = choose_samples(data_train, k) # 2、构建每一棵分类树 tree = build_tree(data_samples) # 3、保存训练好的分类树 trees_result.append(tree) # 4、保存好该分类树使用到的特征 trees_feature.append(feature) return trees_result, trees_feature
def main(): f = open("result.csv", "w") N = [] tempos = [] iterations = range(1, 23) k = 0 # apenas para dar print a uma percentagem de progresso f.write("N, tempo(s)\n") for i in iterations: array = generate_array(2**i) start = t.time() root = tree.build_tree(array) total_time = t.time() - start tempos.append(total_time) N.append(2**i) f.write(f"{2**i},{total_time}\n") k += 1 print(f"{((k/len(iterations))*100):.0f}%") # Print do progresso g.draw_graph(N, tempos) f.close()
def random_forest_training(data_train, trees_num): ''' 构建随机森林 :param data_train:(list)训练数据 :param trees_num:(int)分类树个数 :return: trees_result(list)每棵树的最好划分 trees_feature(list)每棵树中对原始特征的选择 ''' trees_result = [] trees_feature = [] n = np.shape(data_train)[1] # 样本的维度,即样本个数是mxn,此处n是列,包含数据和结果,因此其特征值个数是n也就是2 if n > 2: k = int(log(n - 1, 2)) + 1 # 设置特征值个数 else: k = 1 # 开始构建每一课树,需要分类树的个数 for i in range(trees_num): # 1. 随机选择m个样本,k个特征(打乱数据的数据编号,随机分配index,再将数据组合,返回data_samples) data_samples, feature = choose_samples(data_train, k) # 2. 构建每一棵分类树(建立tree的class) tree = build_tree(data_samples) # 3. 保存训练好的分类树 trees_result.append(tree) # 4.保存好该分类树使用到的特征 trees_feature.append(feature) return trees_result, trees_feature
def test_eq(self, data1, tree1): data1['extra'] = 10 tree2 = build_tree(data1) copy_tree = copy.deepcopy(tree1) assert copy_tree is not tree1 assert copy_tree == tree1 assert tree2 != tree1
def get_score(parse, vectors, z): tree = parse_tree.build_tree(parse) parse_tree.fix_tree(tree) #parse_tree.print_tree(tree) D, depth = parse_tree.calc_dist(tree) W = np.exp(-D * 0.5) # W = 1.0 / (1.0 + D) score = 0 for i in xrange(vectors.shape[1]): x = vectors[:,i:i+1] if np.abs(x).sum() == 0: continue for j in xrange(i+1, vectors.shape[1]): y = vectors[:,j:j+1] if np.abs(y).sum() == 0: continue # s = np.dot(x.transpose(), y).sum() # w = W[i,j] / W[i,:].sum() + W[i,j] / W[:,j].sum() s1 = W[i,j] * (np.exp(np.dot(x.transpose(), y).sum()) / z[i]) s2 = W[i,j] * (np.exp(np.dot(x.transpose(), y).sum()) / z[j]) # s1 = (W[i,j]/W[i,:].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[i]) # s2 = (W[i,j]/W[:,j].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[j]) # s1 = np.exp(-10.0*D[i,j]/D[i,:].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[i]) # s2 = np.exp(-10.0*D[i,j]/D[:,j].sum()) * (np.exp(np.dot(x.transpose(), y).sum()) / z[j]) score += s1 + s2 # print score return score
def tree4(): return build_tree({ 'a': { 'b': { 'c': 3 } } })
def grow_forest(x, y, n, max_depth): forest = np.empty(n, dtype=object) for i in range(n): indexes = np.random.random_integers((x.shape[0] - 1), size=((x.shape[0]) // n)) xi = x[indexes][:] yi = y[indexes] forest[i] = tree.build_tree(xi, yi, max_depth=max_depth) return forest
def AnalyzeMining(target, tables): if tables == "All": tables = AllTables() elif tables == "Batting": tables = BattingTables() elif tables == "Fielding": tables = FieldingTables() elif tables == "Pitching": tables = PitchingTables() # else: # raise sql = tables.sql dfcols = tables.cols results = databaseconnection(sql) print("get result from db..") df = pd.DataFrame(list(results)) df.columns = dfcols df.fillna(value=0, inplace=True) df = removezero(df) y = df[target].values.astype(int) # y_hof = df['hof'].values.astype(int) # y_man = df['man'].values.astype(int) df.drop(columns=['playerID', 'nom', 'hof', 'man'], inplace=True) cols = list(df.columns.values) df = df[cols].applymap(np.int64) df = df[cols].round(decimals=-1) #print(df) ########tree algorithm #nom_data = df df[target] = y.tolist() train, test = tree.train_test_split(df) ## attributes = cols print("Generating decision tree..") root = tree.build_tree(train, attributes, target) print("Accuracy of test data") # df_test = clean('horseTest.txt') acc = str(tree.test_predictions(root, test, target) * 100.0) + '%' print(acc) print("F1 score of test data") # df_test = clean('horseTest.txt') f1_score = str(tree.test_f1score(root, test, target) * 100.0) + '%' print(f1_score) return acc, f1_score
def learn_forest_learn(x, y, quan, depth): # quan и depth -- кол-во и глубина деревьев forest = [] for i in range(quan): r = np.random.random_integers(x.shape[0]-1, size=(x.shape[0]//quan)) xx = x[r, :] yy = y[r].T # xx, yy = bootstrap(x, y, quan) # print(xx) # print(yy) forest.append(tree.build_tree(xx, yy, max_depth=depth)) return forest
def search(B, num_query, num_items): data = [] dims = ["x", "y", "z"] for i in range(num_items): item = [(dimension, random.randrange(-1000, 1000)) for dimension in ["x", "y", "z"]] # for dimension in ['x']] data.append(item) tree_file = "test-tree-2.hodor" if os.path.isfile(tree_file): os.remove(tree_file) serializer = test_serializer(tree_file) root = tree.build_tree(data, B, serializer) ranges = [] start = time.time() for i in range(num_query): ranges = {d: (random.randrange(-1000, 0), random.randrange(0, 1000)) for d in ["x", "y", "z"]} result = root.range_query(ranges) real_result = [] for d in data: incl = True for key in ranges: dk = next(i[1] for i in d if i[0] == key) if dk < ranges[key][0] or dk > ranges[key][1]: incl = False if incl: real_result.append(d) if len(real_result) != len(result): print "ranges:", ranges print "result:", len(result), "should be:", len(real_result), "items" result = [i[:3] for i in result] missing = [i for i in real_result if i not in result] missing = map(lambda res: tuple(i[1] for i in res), missing) print "should have been in the result:", missing extra = [i for i in result if i not in real_result] extra = map(lambda res: tuple(i[1] for i in res), extra) print "should not have been in the result:", extra assert len(real_result) == len(result) total = time.time() - start print num_query, "queries on", num_items, "items took", total, "seconds" print serializer.normal_seeks, "forward seeks averaged", 10 ** 6 * serializer.normal_seek_time / serializer.normal_seeks, "microseconds each" print serializer.back_seeks # , 'back seeks averaged',\
def main(): files = get_filenames() x, y = [], [] # generate the learning curve data data = list(parse(file(files.dataset))) for train_prop in np.arange(0.1, 0.99, 0.05): training_set, testing_set = sample(data, train_prop) tree = build_tree(training_set).prune(MIN_GAIN) check = [record[RESULT_IDX] == plurality(tree.classify(record)) for record in testing_set] counter = Counter(check) precision = counter[True] / float(counter[True] + counter[False]) print 'Training set sampling probability = %.2f:' % (train_prop) print 'training data size = %d,' % (len(training_set)), print 'test data size = %d,' % (len(testing_set)), print 'precision = %.4f' % (precision) x.append(len(training_set)) y.append(precision) # statistics ymean, ystd, ymin, ymax = np.mean(y), np.std(y), np.min(y), np.max(y) print 'Mean of precision = %.4f' % (ymean) print 'Standard deviation of precision = %.4f' % (ystd) print 'Min = %.4f, max = %.4f' % (ymin, ymax) xy = sorted(zip(x, y), key=lambda a: a[0]) x, y = zip(*xy) # setup decorations plt.rc('font', family='serif') plt.yticks(np.arange(0.0, 1.0, 0.1)) plt.ylim(0.0, 1.0) plt.grid(True) plt.title('Learning Curve') plt.xlabel('Training set size') plt.ylabel('Precision on test set') # plot smoothed learning curve xnew = np.linspace(np.min(x), np.max(x), 100) ynew = interp1d(x, y)(xnew) plt.plot(x, y, '.', xnew, ynew, '--') # annotation box = dict(boxstyle='square', fc="w", ec="k") txt = '$\mu = %.4f$, $\sigma = %.4f$' % (ymean, ystd) txt += ', $min = %.4f$, $max = %.4f$' % (ymin, ymax) plt.text(170, 0.05, txt, bbox=box) plt.savefig(files.curve) print 'Save learning curve to', files.curve
def build_forrest(data, max_depth, min_size, n_attributes, n_trees, n_tests, bg_size): forrest = list() score = int() print('trees', n_trees) for i in range(n_trees): i_train = create_bootstrap_group(data, bg_size) i_tree = build_tree(i_train, max_depth, min_size, n_attributes) forrest.append(i_tree) print('nr drzewa', i) i_test = create_test_group(data, n_tests) for j in range(len(i_test)): print('n test', j, i_test[j]) prediction = predict_by_vote(forrest, i_test[j]) [data, verification] = verify(data, prediction, i_test[j]) score += verification print(score) accuracy = score / n_trees / n_tests return accuracy
def __init__(self, columns, target_column, rows, forest_size=None): if not forest_size: forest_size = ceil(sqrt(len(rows))) bag_size = ceil(len(rows) / forest_size) self._trees = [ build_tree( columns=sample(columns, randint(min(2, len(columns)), len(columns))), target_column=target_column, rows=choices(rows, k=bag_size), # with replacement score_type=choice(('entropy', 'gini')), # TODO min_gain_for_split= ) for _ in range(forest_size) ] for tree in self._trees: print('-' * 80) print(tree)
def build_chain(S: Iterable[Permutation], order=None): n = max(S, key=lambda x: x.n).n if order == None: order = list(range(1, n + 1)) b = 0 ans = [] while S: tree = build_tree(order[b], S) ans.append(tree) S = make_gens(tree, S) S = normalize(S) b += 1 S = [] for t in ans: S.extend(t.S) return FullStabilizerChain(n, list(range(1, b + 1)), S, ans)
def test_tree(self): expected_output = """asciitree +-- sometimes | +-- you +-- just | +-- want | +-- to | +-- draw +-- trees +-- in +-- your +-- terminal""" from tree import build_tree, print_node import io from contextlib import redirect_stdout bulded_tree = build_tree('(asciitree (sometimes you) (just (want to draw)) trees (in (your terminal)))') f = io.StringIO() with redirect_stdout(f): print_node(bulded_tree) output = f.getvalue() assert output == (expected_output + '\n')
def build_test(): return data = [] for j in range(1000): data_item = [("field" + str(i), random.random() * 10000) for i in range(3)] data.append(data_item) tree_file = "test-tree.hodor" if os.path.isfile(tree_file): os.remove(tree_file) serializer = test_serializer(tree_file) root1 = tree.build_tree(data, 28, serializer) root2 = root1.link() root3 = root2.link() dim1 = [data_item[1] for data_item in sorted((data_item[0] for data_item in data), key=lambda d: d[1])] dim2 = [data_item[1] for data_item in sorted((data_item[1] for data_item in data), key=lambda d: d[1])] dim3 = [data_item[1] for data_item in sorted((data_item[2] for data_item in data), key=lambda d: d[1])] # make sure all the roots have all values at bottom # assert len(root1.get_all_data()) == len(data) # assert len(root2.get_all_data()) == len(data) # assert len(root3.get_all_data()) == len(data) # for root, dim in zip([root1, root2, root3], [dim1, dim2, dim3]): # root_children = [root.load_child(child) for child in root.children] # root_grandchildren = [child.load_child(grandchild) for child in # root_children for grandchild in child.children] # # test the intervals of the root's children # assert root_children[0].min == dim[0] # assert root_children[0].max == dim[8] # assert root_children[1].min == dim[9] # assert root_children[1].max == dim[9] # for i in range(3): # # test the intervals of the root's grandchildren: leaves # assert root_grandchildren[i].min == dim[i * 3] # assert root_grandchildren[i].max == dim[i * 3 + 2] # # test the linked list at the leaves # if i != 0: # assert root_grandchildren[i].prev == \ # root_grandchildren[i - 1].pos # # # test that full data is only stored at the last dimension # if root != root3: # assert root_grandchildren[i].full_data == None # else: # assert len(root_grandchildren[i].full_data) > 0 # assert len(root_grandchildren[i].full_data[0]) == 4 ranges = {"field0": (0, 3), "field1": (2, 6), "field2": (0, 10)} # result = [data_item for data_item in data if # data_item[0] >= ranges['field0'][0] and data_item[0] <= ranges['field0'][1] and # data_item[1] >= ranges['field1'][0] and data_item[1] <= ranges['field1'][1] and # data_item[2] >= ranges['field2'][0] and data_item[2] <= ranges['field2'][1]] start, end = 42, 103 result = [data_item[0] for data_item in data if data_item[0][1] >= start and data_item[0][1] <= end] # print root1.range_query(ranges) # print root1.get_range_data(start, end) # print result assert len(result) == len(root1.get_range_data(start, end)) # check that getting leaves is working in final dimension result = [data_item[0] for data_item in data if data_item[2][1] >= start and data_item[2][1] <= end] assert len(result) == len(root3.get_range_data(start, end))
def get_structure(): nodes = Section.objects.filter(is_enabled=True).values() return build_tree(nodes)
from tree import build_tree, print_tree, car_data, car_labels import random random.seed(4) tree = build_tree(car_data, car_labels) #print_tree(tree) indices = [random.randint(0, 999) for i in range(1000)] data_subset = [car_data[index] for index in indices] labels_subset = [car_labels[index] for index in indices] subset_tree = build_tree(data_subset, labels_subset) print_tree(subset_tree) from tree import car_data, car_labels, split, information_gain import random import numpy as np np.random.seed(1) random.seed(4) def find_best_split(dataset, labels): best_gain = 0 best_feature = 0 #Create features here features = np.random.choice(len(dataset[0]), 3, replace=False) for feature in features: data_subsets, label_subsets = split(dataset, labels, feature) gain = information_gain(labels, label_subsets) if gain > best_gain: best_gain, best_feature = gain, feature return best_gain, best_feature
def run_build_system(self, extra_args='', subdir='', stdout=None, stderr='', status=0, match=None, pass_toolset=None, **kw): self.previous_tree = build_tree(self.workdir) if match is None: match = self.match if pass_toolset is None: pass_toolset = self.pass_toolset try: if pass_toolset: kw['program'] = self.program + ' ' + self.toolset + ' ' + extra_args else: kw['program'] = self.program + ' ' + extra_args kw['chdir'] = subdir apply(TestCmd.TestCmd.run, [self], kw) except: print "STDOUT ============" print self.stdout() print "STDERR ============" print self.stderr() raise if status != None and _failed(self, status): expect = '' if status != 0: expect = " (expected %d)" % status print '"%s %s" returned %d%s' % (self.program, extra_args, _status(self), expect) print "STDOUT ============" print self.stdout() print "STDERR ============" print self.stderr() self.fail_test(1) if not stdout is None and not match(self.stdout(), stdout): print "Expected STDOUT ==========" print stdout print "Actual STDOUT ============" print self.stdout() stderr = self.stderr() if stderr: print "STDERR ===================" print stderr self.maybe_do_diff(self.stdout(), stdout) self.fail_test(1) if not stderr is None and not match(self.stderr(), stderr): print "STDOUT ===================" print self.stdout() print "Expected STDERR ==========" print stderr print "Actual STDERR ============" print self.stderr() self.maybe_do_diff(self.stderr(), stderr) self.fail_test(1) self.tree = build_tree(self.workdir) self.difference = trees_difference(self.previous_tree, self.tree) self.difference.ignore_directories() self.unexpected_difference = copy.deepcopy(self.difference) self.last_build_time = time.time()
def ValidationMining(target, fn, ln): print("target: %s, fn: %s, ln: %s" % (target, fn, ln)) # get real value for input r_sql = "select * from validtree where nameFirst = \'" + fn + "\' and nameLast = \'" + ln + "\' limit 1;" realdata = databaseconnection(r_sql) if realdata == None or len(realdata) == 0: print("No record exists for %s %s" % (fn, ln)) pred = "Unknown" real = "Unknown" #exit() return pred, real else: r_df = pd.DataFrame(list(realdata)) r_df.columns = [ 'playerID', 'nameFirst', 'nameLast', 'nom', 'hof', 'man' ] r_df.fillna(value=0, inplace=True) real = r_df[target].iloc[0] real = "Y" if int(real) == 1 else "N" print("real value is ", real) # get corresponding row playerid = r_df['playerID'].iloc[0] tables = AllTables() dfcols = tables.cols row_sql = "select * from treesource where playerID = \'" + playerid + "\'" rowdata = databaseconnection(row_sql) rowdf = pd.DataFrame(list(rowdata)) rowdf.columns = dfcols rowdf.fillna(value=0, inplace=True) #######decision tree data sql = tables.sql results = databaseconnection(sql) print("get result from db..") df = pd.DataFrame(list(results)) df.columns = dfcols df.fillna(value=0, inplace=True) df = removezero(df) y = df[target].values.astype(int) df.drop(columns=['playerID', 'nom', 'hof', 'man'], inplace=True) cols = list(df.columns.values) df = df[cols].applymap(np.int64) df = df[cols].round(decimals=-1) rowdf = rowdf[cols].applymap(np.int64) rowdf = rowdf[cols].round(decimals=-1) row = rowdf.iloc[0] df[target] = y.tolist() train, test = tree.train_test_split(df) ## attributes = cols print("Generating decision tree..") root = tree.build_tree(train, attributes, target) print("Start to predict..") pred = str(tree.predict(root, row)) pred = "Y" if int(pred) == 1 else "N" return pred, real
def tree5(): return build_tree({ 'a': { 'b': 1 } })
def tree3(): return build_tree({ 'b': 1 })
def tree1(data1): return build_tree(data1)
def run_build_system( self, extra_args='', subdir='', stdout = None, stderr = '', status = 0, match = None, pass_toolset = None, **kw): self.previous_tree = build_tree(self.workdir) if match is None: match = self.match if pass_toolset is None: pass_toolset = self.pass_toolset try: if pass_toolset: kw['program'] = self.program + ' ' + self.toolset + ' ' + extra_args else: kw['program'] = self.program + ' ' + extra_args kw['chdir'] = subdir apply(TestCmd.TestCmd.run, [self], kw) except: print "STDOUT ============" print self.stdout() print "STDERR ============" print self.stderr() raise if status != None and _failed(self, status): expect = '' if status != 0: expect = " (expected %d)" % status print '"%s %s" returned %d%s' % ( self.program, extra_args, _status(self), expect) print "STDOUT ============" print self.stdout() print "STDERR ============" print self.stderr() self.fail_test(1) if not stdout is None and not match(self.stdout(), stdout): print "Expected STDOUT ==========" print stdout print "Actual STDOUT ============" print self.stdout() stderr = self.stderr() if stderr: print "STDERR ===================" print stderr self.maybe_do_diff(self.stdout(), stdout) self.fail_test(1) if not stderr is None and not match(self.stderr(), stderr): print "STDOUT ===================" print self.stdout() print "Expected STDERR ==========" print stderr print "Actual STDERR ============" print self.stderr() self.maybe_do_diff(self.stderr(), stderr) self.fail_test(1) self.tree = build_tree(self.workdir) self.difference = trees_difference(self.previous_tree, self.tree) self.difference.ignore_directories() self.unexpected_difference = copy.deepcopy(self.difference) self.last_build_time = time.time()
from tree import tree from tree import build_tree class walk_the(tree): def print_tree(self): print(self.data) if self.children: for child in self.children: child.print_tree() build_tree(walk_the)
run = run_size.popleft( ) # to keep them aligned must pop when node pops left_run, right_run = run, run if node.left: if node.val == node.left.val - 1: left_run += 1 max_run = max(max_run, left_run) else: run = 1 level.append(node.left) run_size.append(left_run) if node.right: if node.val == node.right.val - 1: right_run += 1 max_run = max(max_run, right_run) else: run = 1 level.append(node.right) run_size.append(right_run) return max_run root = build_tree([3, 2, 4, 4, 3, None, 5, None, None, None, 4, 6, 7, 1, 5]) print(longest_run(root)) root = build_tree([4, 2, 5, 1, None, 6]) print(longest_run(root)) # Or we can DFS -> where we build run from bottom-up
def parse(json_str): return build_tree(json.loads(json_str))
while len(tree_queue) > 0: # Get elem elem = tree_queue.popleft() # push into buffer if elem.left is not None: buffer_queue.append(elem.left) if elem.right is not None: buffer_queue.append(elem.right) # push into n level n_level_queue.append(elem) counter -= 1 if counter == 0: result.append(n_level_queue) n_level_queue = collections.deque() # push into the tree queue while len(buffer_queue) > 0: tree_queue.append(buffer_queue.popleft()) counter += 1 return result sorted_arr = [11, 20, 29, 32, 41, 50, 65, 72, 91, 99] minimal_tree = build_tree(sorted_arr) list_of_depth = ListOfDepth(minimal_tree) for i, depth_list in enumerate(list_of_depth.list): print('DEPTH LEVEL: {}'.format(i)) for elem in depth_list: print(elem)
from tree import build_tree, print_tree, car_data, car_labels, classify import random random.seed(4) # The features are the price of the car, the cost of maintenance, the number of doors, the number of people the car can hold, the size of the trunk, and the safety rating unlabeled_point = ['high', 'vhigh', '3', 'more', 'med', 'med'] indices = [random.randint(0, 999) for i in range(1000)] predictions = [] for i in range(0, 20): data_subset = [car_data[index] for index in indices] labels_subset = [car_labels[index] for index in indices] subset_tree = build_tree(data_subset, labels_subset) predictions.append(classify(unlabeled_point, subset_tree)) print(predictions) final_prediction = max(predictions, key=predictions.count) print(final_prediction)
radius_multiplicative_update_factor = args.radius_multiplicative_update_factor # row 0: walltime vs number of iterations # row 1: semilogy version of plot in row 1 fig, ax = plt.subplots(2, 1, figsize=(12, 12), sharex=True) # list solution max_steps = 18 t = [] x = np.arange(1, max_steps + 1) for num_steps in range(1, max_steps + 1): start_time = time.time() tree.build_tree(num_steps, theta_step_size, radius_length, radius_multiplicative_update_factor, plot=False) end_time = time.time() walltime = end_time - start_time t.append(walltime) t = np.array(t) # plot x vs t ax[0].plot(x, t, color='g', label='empirical data (tree.py)') # plot x vs log(t) A = np.vstack([x[t > 0], np.ones(len(x[t > 0]))]).T k, m = np.linalg.lstsq(A, np.log2(t[t > 0]), rcond=None)[0] ax[1].semilogy(x,
def run_build_system( self, extra_args='', subdir='', stdout = None, stderr = '', status = 0, match = None, pass_toolset = None, **kw): if os.path.isabs(subdir): if stderr: print "You must pass a relative directory to subdir <"+subdir+">." status = 1 return self.previous_tree = build_tree(self.workdir) if match is None: match = self.match if pass_toolset is None: pass_toolset = self.pass_toolset try: kw['program'] = [] kw['program'] += self.program if extra_args: kw['program'] += extra_args.split(" ") if pass_toolset: kw['program'].append(self.toolset) kw['chdir'] = subdir apply(TestCmd.TestCmd.run, [self], kw) except: self.dump_stdio() raise if status != None and _failed(self, status): expect = '' if status != 0: expect = " (expected %d)" % status print '"%s" returned %d%s' % ( kw['program'], _status(self), expect) self.fail_test(1) if not stdout is None and not match(self.stdout(), stdout): print "Expected STDOUT ==========" print stdout print "Actual STDOUT ============" print self.stdout() stderr = self.stderr() if stderr: print "STDERR ===================" print stderr self.maybe_do_diff(self.stdout(), stdout) self.fail_test(1, dump_stdio = 0) # Intel tends to produce some message to stderr, which makes tests # fail intel_workaround = re.compile("^xi(link|lib): executing.*\n", re.M) actual_stderr = re.sub(intel_workaround, "", self.stderr()) if not stderr is None and not match(actual_stderr, stderr): print "STDOUT ===================" print self.stdout() print "Expected STDERR ==========" print stderr print "Actual STDERR ============" print actual_stderr self.maybe_do_diff(actual_stderr, stderr) self.fail_test(1, dump_stdio = 0) self.tree = build_tree(self.workdir) self.difference = trees_difference(self.previous_tree, self.tree) self.difference.ignore_directories() self.unexpected_difference = copy.deepcopy(self.difference) self.last_build_time = time.time()
if rstart: node.next = rstart rstart.prev = node return (lstart or node), (rend or node) def bstToDoubleLinkedList(root): if not root: return None s, e = _bst(root) return s tree = build_tree(range(0, 40, 2)) head = bstToDoubleLinkedList(tree) dll = DoublyLinkedlist() dll.head = head # print "bst to DLL...", dll # Number ways of decoding integer to alphabet 'A' -> 1, # 'B' -> 2, ..., 'Z' -> 26 # Dynamic programing: x(n) = x(n-1) + x(n-2) # https://leetcode.com/problems/decode-ways/description/ def numDecodings(s): """ :type s: str :rtype: int
def tree2(): return build_tree({ 'a': { 'c': 1 } })
def test_build(): print(build_tree(7, [to_perm([1,2,3]), to_perm([3,4,5]), to_perm([5,6,7,8]), to_perm([4,5]), to_perm([9, 10])]))
def test_gens(): tree = build_tree(1, [to_perm([1,2]), to_perm([1,2,3,4,5])]) gens = make_gens(tree, [to_perm([1,2]), to_perm([1,2,3,4,5])]) print(normalize(gens))
def run_build_system(self, extra_args='', subdir='', stdout=None, stderr='', status=0, match=None, pass_toolset=None, **kw): self.previous_tree = build_tree(self.workdir) if match is None: match = self.match if pass_toolset is None: pass_toolset = self.pass_toolset try: kw['program'] = [] kw['program'] += self.program if extra_args: kw['program'] += extra_args.split(" ") if pass_toolset: kw['program'].append(self.toolset) kw['chdir'] = subdir apply(TestCmd.TestCmd.run, [self], kw) except: self.dump_stdio() raise if status != None and _failed(self, status): expect = '' if status != 0: expect = " (expected %d)" % status print '"%s" returned %d%s' % (kw['program'], _status(self), expect) self.fail_test(1) if not stdout is None and not match(self.stdout(), stdout): print "Expected STDOUT ==========" print stdout print "Actual STDOUT ============" print self.stdout() stderr = self.stderr() if stderr: print "STDERR ===================" print stderr self.maybe_do_diff(self.stdout(), stdout) self.fail_test(1, dump_stdio=0) # Intel tends to produce some message to stderr, which makes tests # fail intel_workaround = re.compile("^xi(link|lib): executing.*\n", re.M) actual_stderr = re.sub(intel_workaround, "", self.stderr()) if not stderr is None and not match(actual_stderr, stderr): print "STDOUT ===================" print self.stdout() print "Expected STDERR ==========" print stderr print "Actual STDERR ============" print actual_stderr self.maybe_do_diff(actual_stderr, stderr) self.fail_test(1, dump_stdio=0) self.tree = build_tree(self.workdir) self.difference = trees_difference(self.previous_tree, self.tree) self.difference.ignore_directories() self.unexpected_difference = copy.deepcopy(self.difference) self.last_build_time = time.time()
import pdb from collections import deque from tree import TreeNode, build_tree, print_tree, serialize_tree # build a sample tree root = build_tree([3, 1, 7, -3, 2, 4, 9]) print_tree(root) ################### # DFS using a stack ################### print("DFS") stack = [] # to go down the tree and backtrack up current = root while stack or current: # Finish when nothing to go down or backtrack while current: # go down until left most leafnode stack.append(current) current = current.left current = stack.pop() print(current.val) current = current.right ################### # BFS using a queue ################### print("BFS")
def test(tree, isvalid): root = build_tree(tree) assert isvalid == Solution().isValidBST(root)
def run_build_system(self, extra_args="", subdir="", stdout=None, stderr="", status=0, match=None, pass_toolset=None, use_test_config=None, ignore_toolset_requirements=None, expected_duration=None, **kw): self.last_build_time_start = time.time() try: if os.path.isabs(subdir): if stderr: print "You must pass a relative directory to subdir <"+subdir+">." status = 1 return self.previous_tree = tree.build_tree(self.workdir) if match is None: match = self.match if pass_toolset is None: pass_toolset = self.pass_toolset if use_test_config is None: use_test_config = self.use_test_config if ignore_toolset_requirements is None: ignore_toolset_requirements = self.ignore_toolset_requirements try: kw['program'] = [] kw['program'] += self.program if extra_args: kw['program'] += extra_args.split(" ") if pass_toolset: kw['program'].append("toolset=" + self.toolset) if use_test_config: kw['program'].append('--test-config="%s"' % os.path.join(self.original_workdir, "test-config.jam")) if ignore_toolset_requirements: kw['program'].append("--ignore-toolset-requirements") kw['chdir'] = subdir apply(TestCmd.TestCmd.run, [self], kw) except: self.dump_stdio() raise finally: self.last_build_time_finish = time.time() if (status != None) and _failed(self, status): expect = '' if status != 0: expect = " (expected %d)" % status annotation("failure", '"%s" returned %d%s' % (kw['program'], _status(self), expect)) annotation("reason", "unexpected status returned by bjam") self.fail_test(1) if not (stdout is None) and not match(self.stdout(), stdout): annotation("failure", "Unexpected stdout") annotation("Expected STDOUT", stdout) annotation("Actual STDOUT", self.stdout()) stderr = self.stderr() if stderr: annotation("STDERR", stderr) self.maybe_do_diff(self.stdout(), stdout) self.fail_test(1, dump_stdio=False) # Intel tends to produce some messages to stderr which make tests fail. intel_workaround = re.compile("^xi(link|lib): executing.*\n", re.M) actual_stderr = re.sub(intel_workaround, "", self.stderr()) if not (stderr is None) and not match(actual_stderr, stderr): annotation("failure", "Unexpected stderr") annotation("Expected STDERR", stderr) annotation("Actual STDERR", self.stderr()) annotation("STDOUT", self.stdout()) self.maybe_do_diff(actual_stderr, stderr) self.fail_test(1, dump_stdio=False) if not expected_duration is None: actual_duration = self.last_build_time_finish - self.last_build_time_start if (actual_duration > expected_duration): print "Test run lasted %f seconds while it was expected to " \ "finish in under %f seconds." % (actual_duration, expected_duration) self.fail_test(1, dump_stdio=False) self.tree = tree.build_tree(self.workdir) self.difference = tree.trees_difference(self.previous_tree, self.tree) self.difference.ignore_directories() self.unexpected_difference = copy.deepcopy(self.difference)
import matplotlib.pyplot as plt import matplotlib.patches as patches from tree import build_tree, Leaf from file import File N = int(sys.argv[1]) k = 10 m = 10 # Build the Data File and store it in a new kd-tree data = np.random.normal(0.5, 0.1, (N, k)) # Normal distribution #data = np.random.rand(N, k) # Uniform distribution F = File(data) tree = build_tree(F) # Global variables Xq = np.random.rand(k) # Query record PQD = [ math.inf for _ in range(m) ] # Priority queue of the m closest distances encountered at any phase of the search PQR = [ None for _ in range(m) ] # Priority queue of the record numbers of the corresponding m best matches encountered at any phase of the search Bu = [math.inf for _ in range(k)] # Coordinate upper bounds Bl = [-math.inf for _ in range(k)] # Coordinate lower bounds # Only works for the k == 2 case if k == 2: