def make_tree(kmers): rb = RedBlackTree() for k in tqdm(kmers, desc="adding to tree: "): if not rb.contains(k): rb.add(k) return rb
def true_address_no_sort(lcp: list, au: RedBlackTree, s_arr: deque, top=100, bot=20, distance=300): # dict to be returned d = OrderedDict() assert type(au) == RedBlackTree if type(lcp) != deque: lcp = deque(lcp) if type(s_arr) != deque: s_arr = deque(s_arr) assert len(lcp) == len(s_arr) d = {} past = 0 runs = len(lcp) for _ in trange(runs,desc="calculating unique starts: "): l = lcp.popleft() lcp.append(past if past > l else l) past = l runs = len(s_arr) pbar = tqdm(total=runs, desc="Finding true addresses: ") u_ceil = 0 u_floor = 0 myl = [] while s_arr: sa = s_arr.popleft() pbar.update(1) # get next suffix address and corresponding lcp (unique start) value lcp_curr = lcp.popleft() if type(sa) != int and type(lcp) != int: sa = int(sa) lcp_curr = int(lcp_curr) if sa > u_ceil or sa < u_floor: # myl = list from RedBlackTree # [unam ambs] myl = au.floor([sa]) u_floor = 0 if not myl else myl[0] myl = au.ceil([sa]) if not myl: continue u_ceil = myl[0] if (lcp_curr + sa) > u_ceil: continue else: difference = u_ceil - sa # do we need to add 1 here? # todo: is the u_floor going too low? d[sa + myl[1]] = (lcp_curr, (difference if difference < top else top)) return d
def rb_tree_ambs(ambs:deque, unam:deque): rb = RedBlackTree() u_off = -1 a_off = 0 # assuming that genome starts with ambs while ambs and unam: a_off += ambs.popleft() u_off += unam.popleft() pair = [u_off, a_off] rb.add(pair) return rb
def build_rb_tree(alist): rb_tree = RedBlackTree() for i in range(len(alist)): temp_a = alist[i].split(" ") temp_w = temp_a[0] if temp_w.isalpha(): embedding = [] for j in range(len(temp_a - 1)): embedding.append(temp_a[j + 1]) temp_n = RBTNode(temp_w, embedding) rb_tree.insert(temp_n)
def main(): ''' test rb tree ''' rbt = RedBlackTree() rbt.add(5) rbt.add(10) rbt.add(30) rbt.add(3) rbt.add(13) print("PRE ORDER TRAVERSAL") print("===================\n") i = 0 for node in rbt: if i == 0: print("ROOT: " + str(node)) else: print(node) i += 1
def linkage(df): T = RedBlackTree() link = [] INF = 1e9 centroids = {} number = {} n = df.shape[0] for i in range(n): centroids[i] = list(df.loc[i, :]) number[i] = 1 current_clusters = set([i for i in range(n)]) for i in current_clusters: for j in current_clusters: if (i != j): T.add(triple(i, j, cityblock(centroids[i], centroids[j]))) for z in range(n - 1): iter = T.__iter__() x = iter.__next__() while (not (x.left_c in current_clusters and x.right_c in current_clusters)): T.remove(x) iter = T.__iter__() x = iter.__next__() centroids[n + z] = [0 for i in range(df.shape[1])] for i in range(df.shape[1]): centroids[n + z][i] = (centroids[x.left_c][i] * number[x.left_c] + centroids[x.right_c][i] * number[x.right_c]) / ( number[x.left_c] + number[x.right_c]) number[n + z] = number[x.left_c] + number[x.right_c] link.append([x.left_c, x.right_c, x.d, number[n + z]]) current_clusters.add(n + z) current_clusters.remove(x.left_c) current_clusters.remove(x.right_c) for i in current_clusters: if (i != n + z): T.add( triple(i, n + z, cityblock(centroids[i], centroids[n + z]))) return np.array(link)
class IntervalTree: rbTree = RedBlackTree() def __init__(self): pass def insert(self, value): return self.rbTree.add(value) def overlapping(self, i1, i2): return i1.minimum <= i2.maximum and i1.maximum >= i2.minimum # Return all overlapping intervals def find_overlap(self, value: Interval): # If the interval is less than root.min, traverse left, otherwise right pass
def func(li): tree = RedBlackTree() depths = [] for num in li: next_node = tree.ceil(My_pair(num, None)) prev_node = tree.floor(My_pair(num, None)) parent_depth = -1 for node in (next_node, prev_node): if node != None: parent_depth = max(parent_depth, node.value) tree.add(My_pair(num, parent_depth + 1)) depths.append(parent_depth + 1) return depths
def func(li): li.sort(key=lambda x: x[1]) tree = RedBlackTree() for a, h, b in li: ans = [(element.key, element.value) for element in tree] prev_b = tree.floor(My_pair(b, None)) if prev_b is None: height_b = 0 else: height_b = prev_b.value next_a = tree.ceil(My_pair(a, None)) while (next_a is not None) and next_a.key <= b: tree.remove(next_a) next_a = tree.ceil(My_pair(a, None)) tree.add(My_pair(a, h)) tree.add(My_pair(b, height_b)) ans = [(element.key, element.value) for element in tree] return ans
return dir_data def get_train_data(array7=[], *args): train_data = "" for dir_name in array7: path_to_dir = os.path.join(path_to_root_directory, dir_name) dir_data = get_dir_data(path_to_dir) train_data += dir_data return train_data #print(get_train_data(array7)) file_content = get_train_data(array7) tree = RedBlackTree() for f in ngrams(file_content.split(), 5): tree.red_black_insert(f) dictionary = tree.get_elements() #for keys, values in dictionary.items(): # print(keys, values) Sorted_List = sorted(dictionary, key=dictionary.__getitem__, reverse=True) for x in range(0, int(0.7 * len(Sorted_List)), 1): Sorted_List.pop() print("\n") for x in Sorted_List: print(x) Writing_file_Reference = open("new_text.txt", "w")