def find_top_k_with_avltree(filename = TDATA, k = 10): """ Profile result: 5 million strings: memory consuming: 259 MB time consuming: 88.2190001011 [(753, 'bf'), (753, 'qj'), (753, 'zb'), (753, 'vz'), (763, 'ma'), (755, 'lx'), (779, 'qp'), (768, 'bg'), (758, 'eq'), (767, 'tf')] """ result = [] t = FastAVLTree() with open(filename) as f: for line in f: key = line.strip() t[key] = t.setdefault(key, 0) + 1 # heapq for key, val in t.iter_items(): if len(result) < k: heapq.heappush(result, (val, key)) else: heapq.heappushpop(result, (val, key)) return result
class BlockIndex: def __init__(self): self.avl = FastAVLTree() def add_block(self, block): self.avl[block.offset + block.size] = block def get_blocks(self, start, length): end = start + length found_block = False for key, block in self.avl.iter_items(start, end): found_block = True yield block try: if (found_block): _, block = self.avl.succ_item(key) if (block.offset < end): yield block else: _, block = self.avl.ceiling_item(start) yield block except KeyError: pass def get_chunks(self, offset, length): for block in self.get_blocks(offset, length): assert length >= 0 if (length == 0): break if (block.offset <= offset and offset < block.offset + block.size): chunk_offset = offset - block.offset chunk_size = min((length, block.size - chunk_offset)) yield (chunk_offset, chunk_size, block) offset += chunk_size length -= chunk_size
def reset(self): """ Resets all info. :return: None """ self.clients = {} self.sorted_clients = FastAVLTree()
def competetive_fast_marching(vertices, graph, seeds): ''' Label all vertices on highres mesh to the closest seed vertex using a balanced binary search tree ''' import numpy as np import sys from bintrees import FastAVLTree # make a labelling container to be filled with the search tree # first column are the vertex indices of the complex mesh # second column are the labels from the simple mesh # (-1 for all but the corresponding points for now) labels = np.zeros((vertices.shape[0],2), dtype='int64')-1 labels[:,0] = range(vertices.shape[0]) for i in range(seeds.shape[0]): labels[seeds[i]][1] = i # initiate AVLTree for binary search tree = FastAVLTree() # organisation of the tree will be # key: edge length; value: tuple of vertices (source, target) # add all neighbours of the voronoi seeds for v in seeds: add_neighbours(v, 0, graph, labels, tree) # Competetive fast marching starting from voronoi seeds printcount = 0 while tree.count > 0: printcount += 1 #pdb.set_trace() # pop the item with minimum edge length min_item = tree.pop_min() length = min_item[0] source = min_item[1][0] target = min_item[1][1] #if target no label yet (but source does!), assign label of source if labels[target][1] == -1: if labels[source][1] == -1: sys.exit('Source has no label, something went wrong!') else: # assign label of source to target labels[target][1] = labels[source][1] # test if labelling is complete if any(labels[:,1]==-1): # if not, add neighbours of target to tree add_neighbours(target, length, graph, labels, tree) else: break if np.mod(printcount, 100) == 0.0: print 'tree '+str(tree.count) print 'labels '+str(np.where(labels[:,1]==-1)[0].shape[0]) return labels
def __init__(self): # Clients that have reported score # <key> :<value> -> <client_id> : <client> # # where: # # <client_id> (int) : Id that uniquelly specifies the client. # <client> (Client) : Information of the client (including score) # self.clients = {} # AVL tree in which each node is a list of Client instances (i.e., all the clients with the same score) # Allows to access sorted info in O(log(N)) self.sorted_clients = FastAVLTree()
def getNewAVL(seq): if has_fast_tree_support(): from bintrees import FastAVLTree return FastAVLTree(seq) else: from bintrees import AVLTree return AVLTree(seq)
def __init__(self, field, directory): ''' Initializes the BinTreeIndex class. Parameters ---------- field : str The metadata field name that the index represents directory : str The directory location where the index file will be saved Returns ------- An initialized BinTreeIndex object ''' # initialize index properties self.field = field self.directory = directory self.file = self.directory + self.field + '.idx' # load if already present if os.path.exists(self.file): with open(self.file, "rb", buffering=0) as fd: self.index = pickle.load(fd) # otherwise initialize else: self.index = FastAVLTree()
def __init__(self): # AVL trees are used as a main structure due its optimal performance features for this purpose self._participants = FastAVLTree() self._order_owners = FastAVLTree() # Assigning ID -> Owner self._asks = FastAVLTree() # MIN Heap self._bids = FastAVLTree() # MAX Heap self._price_ids = FastAVLTree() # Assigning ID -> Price self._total_ask_size = 0 # For monitoring purpose self._total_bid_size = 0 # For monitoring purpose self._last_order_id = 0 # Increases with each order processed self._cleared_orders_count = 0 # For monitoring purpose self._total_volume_trad = 0 # For monitoring purpose self._total_volume_pending = 0 # For monitoring purpose
def scan_1D(result, d, self_edges=False, weight=False, bound=None, handling=None, memory=False): # recover sim parameters N = len(result[0][0]) # "N" = N + 1 D = len(result[0]) P = len(result) G = [] # list of graphs # Each time for t in range(N): # copy previous matrix or start fresh as appropriate if t is 0 or memory is False: G.append(nx.Graph()) else: G.append(G[t - 1].copy()) G[t].add_nodes_from(range(P)) idx = FastAVLTree() # insert all points not out of bounds for i in range(P): I = result[i][0][t] if I < 999999999.0: idx.insert(I, i) for i in range(P): I = result[i][0][t] if I < 999999999.0: minimum = I - d maximum = I + d # get all results within range hits = [v for (k, v) in idx.item_slice(minimum, maximum)] if handling is "Torus" and maximum > bound: hits += [ v for (k, v) in idx.item_slice(0, maximum % bound) ] if handling is "Torus" and minimum < 0: hits += [ v for (k, v) in idx.item_slice(minimum % bound, bound) ] for j in hits: if self_edges or i != j: G[t].add_edge(i, j, weight=1) # add something to handle weight case here stdout.write(".") stdout.flush() print("") return G
class AkashicRecord(object): """ Database of Tumblr Post get by id sorted with unix_timestamp viewed flag. new note check """ def __init__(self): self.impl = FastAVLTree() def get(self, post_id): return self.impl.get(post_id) def put(self, post): assert isinstance(post, Post) return self.impl.insert(post.id, post) def get_after(self, start): pass
def _submit_lmt(self, side, size, price, pi_d): """ Submits LMT order to book """ # Assign order ID order_id = self._get_order_id() # Pending volume monitoring self._total_volume_pending += size self._price_ids.insert(order_id, (price, side)) # Keep track of participant orders, book will be asked for sure if pi_d not in self._participants: self._participants.insert(pi_d, [order_id]) else: owner_trades = self._participants.get(pi_d, []) owner_trades.append(order_id) self._order_owners.insert(order_id, pi_d) # Assign to right (correct) side if side == 'ask': self._total_ask_size += size ask_level = self._asks.get(price, FastAVLTree()) ask_level.insert(order_id, size) if price not in self._asks: self._asks.insert(price, ask_level) else: # bid self._total_bid_size += size bid_level = self._bids.get(price, FastAVLTree()) bid_level.insert(order_id, size) if price not in self._bids: self._bids.insert(price, bid_level) return order_id
class GeoDB(object): """ Container object for the geo tree that allows applications to easily lookup country information by IP address. """ def __init__(self, csvfile=None, parser=MaxMindGeoLiteCSVParser()): treelist = parser.parse(csvfile) self.tree = FastAVLTree(treelist) # Done with the list, remove it since it can be rather large del treelist def lookup(self, ip, default=None): """ Get the GeoValue object for a given IP address IP addresses can be either a Long or a dotted-decimal formatted string. """ return self.tree.get(ip, default)
def __init__(self, items=[], key = None , maxitems=None, maxkey=None): """ Create a new PriorityQueueSet. items: An initial item list - it can be unsorted and non-unique. The data structure will be created in O(N). """ if key == None: self.key=lambda x: x else: self.key=key self.tree = FastAVLTree() #self.tree = AVLTree() self.maxitems = maxitems self.maxkey = maxkey for x in items: self.add(x)
def cavl_build_delete(): tree = FastAVLTree.from_keys(keys) for key in keys: del tree[key]
from __main__ import avl_build_delete, avl_build, avl_search """ setup_FastAVLTree = """ from __main__ import cavl_build_delete, cavl_build, cavl_search """ try: fp = open('testkeys.txt') keys = eval(fp.read()) fp.close() except IOError: print("create 'testkeys.txt' with profile_bintree.py\n") sys.exit() py_searchtree = AVLTree.from_keys(keys) cy_searchtree = FastAVLTree.from_keys(keys) def avl_build_delete(): tree = AVLTree.from_keys(keys) for key in keys: del tree[key] def cavl_build_delete(): tree = FastAVLTree.from_keys(keys) for key in keys: del tree[key] def avl_build():
def group_lines(layout, pts_thres=4.0): """ Find columns and row_bboxes from line segments TODO: combine line-based detection with clustering of alignments """ segments = [] curves = [] # Group segments shifted in parallel, allow for small mismatch # caused by formatting. # Not using sorting because it is similar to clustering without # well-ordering of segments. # This is the C version, use AVLTree for Python compatibility. h_segs_by_x = FastAVLTree() v_segs_by_y = FastAVLTree() # Analyzes the pdf for line regions that could potentially # contain a table def process_segment_func(e): if type(e) is LTCurve: curves.append(e) # Only keep lines here if isinstance(e, LTLine) and max(e.width, e.height) > pts_thres: segments.append(e) group_segs(e, h_segs_by_x, v_segs_by_y, pts_thres) # Recursively traverse the PDF document tree and apply func traverse_layout(layout, process_segment_func) # Segments grouped and sorted into rows/cols row_group = sorted_groups(v_segs_by_y, group_key=lambda l: l.x0) col_group = sorted_groups(h_segs_by_x, group_key=lambda l: l.y0) # Now group rows/cols into tables rows_by_x0 = FastAVLTree() cols_by_y0 = FastAVLTree() def seg_close(s1, s2): return segment_diff(s1, s2) < pts_thres for row_bbox, row_segs in row_group: bbox_key = (row_bbox[x0], row_bbox[x1]) align_add_to_tree(rows_by_x0, bbox_key, row_bbox, seg_close) for col_bbox, col_segs in col_group: bbox_key = (col_bbox[y0], col_bbox[y1]) align_add_to_tree(cols_by_y0, bbox_key, col_bbox, seg_close) # Extract bbox of potential tables row_major_tables = [bound_bboxes(rows) for rows in rows_by_x0.values()] col_major_tables = [bound_bboxes(cols) for cols in cols_by_y0.values()] # Filter non-tables and consolidate duplicates tables = row_major_tables + col_major_tables table_proto = (["x0", "x1", "xn"], ["y0", "y1", "y2", "yn"]) # find non-overlapping columns and output those as tables # store line locations so that we can check # if a line exists betwen text lines # For debugging: # tables = row_bboxes = [b for b,_ in row_group] return segments, curves, tables
# n * log n solution from bintrees import FastAVLTree # read in the first line of input k, n = [int(x) for x in raw_input().split(' ')] # read in the second line of input A = [int(x) for x in raw_input().split(' ')] assert len(A) == n # check all the subsequences and keep the length best_sequence_length = 0 min_tree = FastAVLTree() max_tree = FastAVLTree() def ok(): if max_tree.is_empty(): return True else: difference = max_tree.max_item()[0][0] \ - min_tree.min_item()[0][0] return difference <= k start, end = 0, 0 while end < len(A): # extend the sequence until violation reached. while end < len(A) and ok():
class Scoreboard(): """ Keeps Scoreboard info and allows highly efficient checks. On the one hand, keeps a hash table (dict) to the updated information of each client. On the other hand, keeps a lookup accelerator that allows to retrieve client score sorting with logaritmic complexity (O(log N)). """ def __init__(self): # Clients that have reported score # <key> :<value> -> <client_id> : <client> # # where: # # <client_id> (int) : Id that uniquelly specifies the client. # <client> (Client) : Information of the client (including score) # self.clients = {} # AVL tree in which each node is a list of Client instances (i.e., all the clients with the same score) # Allows to access sorted info in O(log(N)) self.sorted_clients = FastAVLTree() def reset(self): """ Resets all info. :return: None """ self.clients = {} self.sorted_clients = FastAVLTree() def get(self, client_id): """ Returns the current score of the specified client. :param client_id: (int) The id of the client. :return: (int) The current score. None if not found. """ try: result = self.clients[client_id] except KeyError: result = None return result def update(self, client_info): """ Modifies the client total score. :param client_info: (dict) A JSON submitted by the client, as specified in the Code Challenge: Examples: {"user": 123, "total": 250} {"user": 456, "score": "+10"} {"user": 789, "score": "-20"} :return: (bool) True if successfully updated; False otherwise. """ try: # # Handle first report/old sorting order # client_id = int(client_info["user"]) if client_id not in self.clients: # First client report self.clients[client_id] = Client(client_id) else: # Remove client prior score, since it is going to be modified prior_score = self.clients[client_id].score if len(self.sorted_clients[prior_score]) > 1: # There are other clients with that score self.sorted_clients[prior_score].remove( self.clients[client_id]) else: # The only one with that score del self.sorted_clients[prior_score] # # Compute/Update new score # try: result = self.clients[client_id].total(client_info["total"]) except KeyError: # Try with relative update result = self.clients[client_id].relative(client_info["score"]) # # Update/restore client sorting order # new_score = self.clients[client_id].score if new_score not in self.sorted_clients: # First client with that score. Initialize an empty list to hold all users with that same score. self.sorted_clients.insert(new_score, []) self.sorted_clients[new_score].append(self.clients[client_id]) except (KeyError, ValueError): # Invalid client_info result = False return result def top(self, top_size): """ Returns the clients that occupy the specified number of top ranking positions (i.e., those with the higher score values), according to the absolute ranking. IMPLEMENTATION NOTE: If more than one clients are tied in a given position the returned list considers them as a single ranking position. Example: [{"user": 123, "total": 100}, {"user": 456, "total": 200}, {"user": 789, "total": 100}] The top-2 (i.e., top_size = 2) is: [1st-{"user": 456, "total": 200}, 2nd-{"user": 123, "total": 100} 2nd-{"user": 789, "total": 100} ] :param top_size: (int) Number of higher ranking positions to retrieve. :return: (list of Client) The clients that occupies the specified ranking positions. """ result = [] try: top_positions = self.sorted_clients.nlargest(top_size) for position in top_positions: result.extend(position[1]) except TypeError: pass return result def relative_top(self, ranking_position, scope_size): """ Returns the clients in the specified scope around the one that occupy the specified ranking position of top ranking positions (i.e., those with the higher score values), according to the absolute ranking. The scope around a given ranking position is defined as the clients that occupy the scope_size ranking positions before the client that occupies the specified ranking position, followed by the scope_size ranking positions before the client that occupies the specified ranking position. Example: [{"user": 1, "total": 150}, {"user": 2, "total": 200}, {"user": 3, "total": 100}, {"user": 4, "total": 300}, {"user": 5, "total": 120}, {"user": 6, "total": 90}] The relative_top(ranking_position=3, scope_size=2) is the following: [{"user": 4, "total": 300} {"user": 2, "total": 200}, {"user": 1, "total": 150}, {"user": 5, "total": 120}, {"user": 3, "total": 100} ] Since the 3rd ranking position is occupied by {"user": 1, "total": 150}, the full requested positions are: 1st, 2nd, 3rd, 4th, and 5th (i.e., from (ranking_position - scope_size) to (ranking_position + scope_size) IMPLEMENTATION NOTE: If more than one clients are tied in a given position the returned list considers them as a single ranking position. (same as with top but for relative ranking) :param ranking_position: (int) Ranking position to retrieve scope around. Must be a positive value, from 1 to N. :param scope_size: (int) Scope size (see explanation above). Must be a positive value. :return: (list of Client) The clients that occupies the specified ranking positions. """ result = [] if ranking_position >= 1 and scope_size >= 0: try: top_positions = self.sorted_clients.nlargest( len(self.sorted_clients)) if (ranking_position - scope_size > 1) and (ranking_position - scope_size < len( self.sorted_clients)): # # Not truncated on the left (high scores) # if ranking_position + scope_size <= len( self.sorted_clients): # # Full range exists # top_positions = top_positions[ranking_position - scope_size - 1:ranking_position + scope_size] else: # # Range truncated on the right (not enough low scores) # top_positions = top_positions[ranking_position - scope_size - 1:len(top_positions)] elif ranking_position - scope_size < 1: # # Range truncated on the left (not enough high scores) # if ranking_position + scope_size <= len( self.sorted_clients): # # Only truncated on the left (enough low scores) # top_positions = top_positions[0:ranking_position + scope_size] else: # # Range truncated both on the left and on the right (not enough neither low nor high scores) # # That is, all the positions. # pass for position in top_positions: result.extend(position[1]) except TypeError: pass return result
def __init__(self): self.avl = FastAVLTree()
def get_tree(dico, size): tree = FastAVLTree() for i in range(size): tree.insert(dico[i],None)
def construct(src=None): return FastAVLTree(src)
class PriorityQueue(object): """ Combined priority queue and set data structure. Acts like a priority queue, except that its items are guaranteed to be unique. Provides O(1) membership test, O(log N) insertion and O(log N) removal of the smallest item. Important: the items of this data structure must be both comparable and hashable (i.e. must implement __cmp__ and __hash__). This is true of Python's built-in objects, but you should implement those methods if you want to use the data structure for custom objects. """ def __init__(self, items=[], key = None , maxitems=None, maxkey=None): """ Create a new PriorityQueueSet. items: An initial item list - it can be unsorted and non-unique. The data structure will be created in O(N). """ if key == None: self.key=lambda x: x else: self.key=key self.tree = FastAVLTree() #self.tree = AVLTree() self.maxitems = maxitems self.maxkey = maxkey for x in items: self.add(x) def has_item(self, item): """ Check if *item* exists in the queue """ return bool(self.tree.get(self.key(item), False)) def pop_smallest(self): return self.tree.pop_min() def peek(self, d = None): try: return self.tree.min_item()[1] except: return d def __setitem__(self, key, value): self.tree[self.key(key)]=value def __getitem__(self, item): return self.tree[self.key(item)] # updateing by removing and reinserting # i cant find a anode by object ?? # i hate your data structures ... index in O(n) :( def update(self, item): itemsbykey = self.tree[self.key(item):self.key(item)] del self.tree[self.key(item):self.key(item)] for x in itemsbykey: #if not (x is item): self.add(x) def add(self, item): """ Add *item* to the queue. The item will be added only if it doesn't already exist in the queue. """ #print "PriorityQue add " + str(item) if self.maxkey and self.key(item) > self.maxkey: return if self.tree.get(self.key(item), None) is None: self.tree[self.key(item)]=item # sholdnt it be pop biggest??? [yes we need a tree] if self.maxitems and self.tree.__len__() > self.maxitems: self.tree.pop_max() #print "PriorityQue add peek " + str(self.peek()) def prettyprint(self): pp = operator.methodcaller('prettyprint') return "".join(map(pp,self.tree.values())) """
class BinTreeIndex(Index): ''' Binary tree to index high cardinality fields. Uses bintrees package: https://pypi.python.org/pypi/bintrees/2.0.2 We use a set of values for each key, to allow multiple (but unique) values ''' def __init__(self, field, directory): ''' Initializes the BinTreeIndex class. Parameters ---------- field : str The metadata field name that the index represents directory : str The directory location where the index file will be saved Returns ------- An initialized BinTreeIndex object ''' # initialize index properties self.field = field self.directory = directory self.file = self.directory + self.field + '.idx' # load if already present if os.path.exists(self.file): with open(self.file, "rb", buffering=0) as fd: self.index = pickle.load(fd) # otherwise initialize else: self.index = FastAVLTree() def add_key(self, key): ''' Adds a new index key (i.e. possible metadata field value) and initializes as empty (i.e. primary keys associated with it). Parameters ---------- key : str The metadata field value Returns ------- Nothing, modifies in-place. ''' # initialize new field index as an empty set # will contain all pks that match this value self.index[key] = set() def add_pk(self, key, pk): ''' Adds a primary key to an index key (i.e. metadata field value). Parameters ---------- key : str The metadata field value pk : str Primary key identifier Returns ------- Nothing, modifies in-place. ''' self.index[key].add(pk) def remove_pk(self, key, pk): ''' Removes a primary key from an index key (i.e. metadata field value). Parameters ---------- key : str The metadata field value pk : str Primary key identifier Returns ------- Nothing, modifies in-place. ''' self.index[key].discard(pk) # clear key if no further primary keys left if len(self.index[key]) == 0: self.remove_key(key) def keys(self): ''' Returns the index keys (i.e. possible metadata values). Parameters ---------- None Returns ------- List of index keys. ''' return list(self.index.keys()) def values(self): ''' Returns the index values (i.e. primary keys associated with metadata). Parameters ---------- None Returns ------- List of index values. ''' return list(self.index.values()) def items(self): ''' Returns the index items (i.e. possible metadata values, and the primary keys associated with each of them). Parameters ---------- None Returns ------- List of index items. ''' return list(self.index.items())
def get_ROIs(path, delta_mz=0.005, required_points=15, dropped_points=3, progress_callback=None): ''' :param path: path to mzml file :param delta_mz: :param required_points: :param dropped_points: can be zero points :param pbar: an pyQt5 progress bar to visualize :return: ROIs - a list of ROI objects found in current file ''' # read all scans in mzML file run = pymzml.run.Reader(path) scans = [] for scan in run: if scan.ms_level == 1: scans.append(scan) ROIs = [] # completed ROIs process_ROIs = FastAVLTree() # processed ROIs # initialize a processed data number = 1 # number of processed scan init_scan = scans[0] start_time = init_scan.scan_time[0] min_mz = max(init_scan.mz) max_mz = min(init_scan.mz) for mz, i in zip(init_scan.mz, init_scan.i): if i != 0: process_ROIs[mz] = ProcessROI([1, 1], [start_time, start_time], [i], [mz], mz) min_mz = min(min_mz, mz) max_mz = max(max_mz, mz) for scan in tqdm(scans): if number == 1: # already processed scan number += 1 continue # expand ROI for n, mz in enumerate(scan.mz): if scan.i[n] != 0: ceiling_mz, ceiling_item = None, None floor_mz, floor_item = None, None if mz < max_mz: _, ceiling_item = process_ROIs.ceiling_item(mz) ceiling_mz = ceiling_item.mzmean if mz > min_mz: _, floor_item = process_ROIs.floor_item(mz) floor_mz = floor_item.mzmean # choose closest if ceiling_mz is None and floor_mz is None: time = scan.scan_time[0] process_ROIs[mz] = ProcessROI([number, number], [time, time], [scan.i[n]], [mz], mz) continue elif ceiling_mz is None: closest_mz, closest_item = floor_mz, floor_item elif floor_mz is None: closest_mz, closest_item = ceiling_mz, ceiling_item else: if ceiling_mz - mz > mz - floor_mz: closest_mz, closest_item = floor_mz, floor_item else: closest_mz, closest_item = ceiling_mz, ceiling_item if abs(closest_item.mzmean - mz) < delta_mz: roi = closest_item if roi.scan[1] == number: # ROIs is already extended (two peaks in one mz window) roi.mzmean = (roi.mzmean * roi.points + mz) / (roi.points + 1) roi.points += 1 roi.mz[-1] = (roi.i[-1]*roi.mz[-1] + scan.i[n]*mz) / (roi.i[-1] + scan.i[n]) roi.i[-1] = (roi.i[-1] + scan.i[n]) else: roi.mzmean = (roi.mzmean * roi.points + mz) / (roi.points + 1) roi.points += 1 roi.mz.append(mz) roi.i.append(scan.i[n]) roi.scan[1] = number # show that we extended the roi roi.rt[1] = scan.scan_time[0] else: time = scan.scan_time[0] process_ROIs[mz] = ProcessROI([number, number], [time, time], [scan.i[n]], [mz], mz) # Check and cleanup to_delete = [] for mz, roi in process_ROIs.items(): if roi.scan[1] < number <= roi.scan[1] + dropped_points: # insert 'zero' in the end roi.mz.append(roi.mzmean) roi.i.append(0) elif roi.scan[1] != number: to_delete.append(mz) if roi.points >= required_points: ROIs.append(ROI( roi.scan, roi.rt, roi.i, roi.mz, roi.mzmean )) process_ROIs.remove_items(to_delete) try: min_mz, _ = process_ROIs.min_item() max_mz, _ = process_ROIs.max_item() except ValueError: min_mz = float('inf') max_mz = 0 number += 1 if progress_callback is not None and not number % 10: progress_callback.emit(int(number * 100 / len(scans))) # add final rois for mz, roi in process_ROIs.items(): if roi.points >= required_points: for n in range(dropped_points - (number - 1 - roi.scan[1])): # insert 'zero' in the end roi.mz.append(roi.mzmean) roi.i.append(0) ROIs.append(ROI( roi.scan, roi.rt, roi.i, roi.mz, roi.mzmean )) # expand constructed roi for roi in ROIs: for n in range(dropped_points): # insert in the begin roi.i.insert(0, 0) roi.mz.insert(0, roi.mzmean) # change scan numbers (necessary for future matching) roi.scan = (roi.scan[0] - dropped_points, roi.scan[1] + dropped_points) assert roi.scan[1] - roi.scan[0] == len(roi.i) - 1 return ROIs
def find_voronoi_seeds(simple_vertices, simple_faces, complex_vertices, complex_faces, log_file, cutoff_angle=(np.pi / 2)): ''' Finds those points on the complex mesh that correspond best to the simple mesh (taking into accound euclidian distance and direction of normals) while forcing a one-to-one mapping ''' from bintrees import FastAVLTree import scipy.spatial as spatial from utils import log # calculate normals for simple and complex vertices simple_normals = calculate_normals(simple_vertices, simple_faces) complex_normals = calculate_normals(complex_vertices, complex_faces) # prepare array to store seeds voronoi_seed_idx = np.zeros( (simple_vertices.shape[0], ), dtype='int64') - 1 missing = np.where(voronoi_seed_idx == -1)[0].shape[0] # initialize with all vertices and small number of neighbours remaining_idxs = range(simple_vertices.shape[0]) neighbours = 100 while missing > 0: log(log_file, 'producing nearest neighbours k=%i' % (neighbours)) # find nearest neighbours of simple vertices on complex mesh using kdtree inaccuracy, mapping = spatial.KDTree(complex_vertices).query( simple_vertices[remaining_idxs], k=neighbours) # create tidy long-format lists simple_idxs = np.asarray([ neighbours * [simple_idx] for simple_idx in remaining_idxs ]).flatten() candidate_idxs = mapping.flatten() diff_euclid = inaccuracy.flatten() # for each vertex pair calculate the angle between their normals diff_normals, _ = compare_normals(simple_normals[simple_idxs], complex_normals[candidate_idxs]) log(log_file, 'candidates %i' % (diff_normals.shape[0])) # remove those pairs that have an angle / distance above cutoff #mask = np.unique(np.concatenate((np.where(diff_euclid>cutoff_euclid)[0], np.where(diff_normals>cutoff_rad)[0]))) mask = np.unique(np.where(diff_normals > cutoff_angle)[0]) diff_normals = np.delete(diff_normals, mask) diff_euclid = np.delete(diff_euclid, mask) simple_idxs = np.delete(simple_idxs, mask) candidate_idxs = np.delete(candidate_idxs, mask) log(log_file, 'remaining candidates %i' % (diff_normals.shape[0])) # calculate scores for each vertex pair scores = (diff_normals - np.mean(diff_normals)) + (diff_euclid - np.mean(diff_euclid)) log(log_file, 'producing tree') # make a binary search tree from the scores and vertex pairs, # organisation is key: score, values: tuple(simple_vertex, candiate_complex_vertex) tree = FastAVLTree(zip(scores, zip(simple_idxs, candidate_idxs))) while tree.count > 0: min_item = tree.pop_min() simple_idx = min_item[1][0] candidate_idx = min_item[1][1] if (voronoi_seed_idx[simple_idx] == -1): if candidate_idx not in voronoi_seed_idx: voronoi_seed_idx[simple_idx] = candidate_idx else: pass else: pass missing = np.where(voronoi_seed_idx == -1)[0].shape[0] if missing == 0: break # if the tree is empty, but there are still seeds missing, increase the number of nearest neighbours # and repeat procedure, but only for those simple vertices that have not been matched yet log(log_file, 'missing %i' % (missing)) remaining_idxs = np.where(voronoi_seed_idx == -1)[0] neighbours *= 5 return voronoi_seed_idx, inaccuracy, log_file
def competetive_fast_marching(vertices, graph, seeds): ''' Label all vertices on highres mesh to the closest seed vertex using a balanced binary search tree ''' import numpy as np import sys from bintrees import FastAVLTree # make a labelling container to be filled with the search tree # first column are the vertex indices of the complex mesh # second column are the labels from the simple mesh # (-1 for all but the corresponding points for now) labels = np.zeros((vertices.shape[0], 2), dtype='int64') - 1 labels[:, 0] = range(vertices.shape[0]) for i in range(seeds.shape[0]): labels[seeds[i]][1] = i # initiate AVLTree for binary search tree = FastAVLTree() # organisation of the tree will be # key: edge length; value: tuple of vertices (source, target) # add all neighbours of the voronoi seeds for v in seeds: add_neighbours(v, 0, graph, labels, tree) # Competetive fast marching starting from voronoi seeds printcount = 0 while tree.count > 0: printcount += 1 # pdb.set_trace() # pop the item with minimum edge length min_item = tree.pop_min() length = min_item[0] source = min_item[1][0] target = min_item[1][1] # if target no label yet (but source does!), assign label of source if labels[target][1] == -1: if labels[source][1] == -1: sys.exit('Source has no label, something went wrong!') else: # assign label of source to target labels[target][1] = labels[source][1] # test if labelling is complete if any(labels[:, 1] == -1): # if not, add neighbours of target to tree add_neighbours(target, length, graph, labels, tree) else: break # if the target already has a label the item is just popped out of the # tree and nothing else happens else: pass # for monitoring the progress if np.mod(printcount, 100) == 0.0: print 'tree ' + str(tree.count) print 'labels ' + str(np.where(labels[:, 1] == -1)[0].shape[0]) return labels
def cavl_build(): tree = FastAVLTree.from_keys(keys)
def __init__(self, csvfile=None, parser=MaxMindGeoLiteCSVParser()): treelist = parser.parse(csvfile) self.tree = FastAVLTree(treelist) # Done with the list, remove it since it can be rather large del treelist
class OrderBook: """Limit order book able to process LMT and MKT orders MKT orders are disassembled to LMT orders up to current liquidity situation """ def __init__(self): # AVL trees are used as a main structure due its optimal performance features for this purpose self._participants = FastAVLTree() self._order_owners = FastAVLTree() # Assigning ID -> Owner self._asks = FastAVLTree() # MIN Heap self._bids = FastAVLTree() # MAX Heap self._price_ids = FastAVLTree() # Assigning ID -> Price self._total_ask_size = 0 # For monitoring purpose self._total_bid_size = 0 # For monitoring purpose self._last_order_id = 0 # Increases with each order processed self._cleared_orders_count = 0 # For monitoring purpose self._total_volume_trad = 0 # For monitoring purpose self._total_volume_pending = 0 # For monitoring purpose def __getstate__(self): """ Whole book could be repopulated from dict containing class attributes """ return self.__dict__ def __setstate__(self, state): """ Book repopulation (recovery) """ for at_name, at_val in state.items(): setattr(self, at_name, at_val) def _get_order_id(self): """ Orders id managment """ self._last_order_id += 1 return self._last_order_id def _balance(self, trades_stack): """ Executes trades if it finds liquidity for them """ # No liquidity at all if self._asks.is_empty() or self._bids.is_empty(): return trades_stack min_ask = self._asks.min_key() max_bid = self._bids.max_key() # Check liquidity situation if max_bid >= min_ask: ask_orders = self._asks.get(min_ask) bid_orders = self._bids.get(max_bid) for ask_order in ask_orders: for bid_order in bid_orders: if not ask_order in ask_orders or not bid_order in bid_orders: continue trad = min(ask_orders[ask_order], bid_orders[bid_order]) ask_orders[ask_order] -= traded bid_orders[bid_order] -= traded self._total_ask_size -= traded self._total_bid_size -= traded self._total_volume_trad += traded self._total_volume_pending -= 2 * traded ask_owner = self._order_owners[ask_order] bid_owner = self._order_owners[bid_order] # Buy side order fully liquidated if bid_orders[bid_order] == 0: # print("BID ORDER LIQUIDATED") self._cleared_orders_count += 1 del bid_orders[bid_order] del self._price_ids[bid_order] del self._order_owners[bid_order] owner_ids = self._participants[bid_owner] owner_ids.remove(bid_order) del self._participants[bid_owner] self._participants.insert(bid_owner, owner_ids) # Sell side order fully liquidated if ask_orders[ask_order] == 0: # print("ASK ORDER LIQUIDATED") self._cleared_orders_count += 1 del ask_orders[ask_order] del self._price_ids[ask_order] del self._order_owners[ask_order] owner_ids = self._participants[ask_owner] owner_ids.remove(ask_order) del self._participants[ask_owner] self._participants.insert(ask_owner, owner_ids) # Inform sides about state of their orders trades_stack.append((0, traded, max_bid)) trades_stack.append((1, ask_order, traded, max_bid, ask_owner, 'ask')) trades_stack.append((1, bid_order, traded, max_bid, bid_owner, 'bid')) # Whole ASK price level were liquidated, remove it from three and let it rebalance if self._asks[min_ask].is_empty(): # print("ASK level liquidated") del self._asks[min_ask] # Whole BID price level were liquidated, remove it from three and let it rebalance if self._bids[max_bid].is_empty(): # print("BID level liquidated") del self._bids[max_bid] else: return trades_stack return self._balance(trades_stack) def _submit_mkt(self, side, size, pi_d): """ Find liquidity for mkt order - put multiple lmt orders to extract liquidity for order execution """ olst = [] trades_stack = [] while size > 0: if side == 'ask': second_side_size = self.bid_size second_side_price = self.bid else: second_side_size = self.ask_size second_side_price = self.ask # We could only taky liquidity which exists trade_size = min([second_side_size, size]) olst.append(self._submit_lmt(side, trade_size, second_side_price, pi_d)) trades_stack = self._balance(trades_stack) size -= trade_size return 0, trades_stack def _submit_lmt(self, side, size, price, pi_d): """ Submits LMT order to book """ # Assign order ID order_id = self._get_order_id() # Pending volume monitoring self._total_volume_pending += size self._price_ids.insert(order_id, (price, side)) # Keep track of participant orders, book will be asked for sure if pi_d not in self._participants: self._participants.insert(pi_d, [order_id]) else: owner_trades = self._participants.get(pi_d, []) owner_trades.append(order_id) self._order_owners.insert(order_id, pi_d) # Assign to right (correct) side if side == 'ask': self._total_ask_size += size ask_level = self._asks.get(price, FastAVLTree()) ask_level.insert(order_id, size) if price not in self._asks: self._asks.insert(price, ask_level) else: # bid self._total_bid_size += size bid_level = self._bids.get(price, FastAVLTree()) bid_level.insert(order_id, size) if price not in self._bids: self._bids.insert(price, bid_level) return order_id def cancel(self, order_id): """ Cancel order """ # Finds and cancels order order = self._price_ids[order_id] if order[1] == 'ask': del self._asks[order[0]][order_id] if self._asks[order[0]].is_empty(): del self._asks[order[0]] else: del self._bids[order[0]][order_id] if self._bids[order[0]].is_empty(): del self._bids[order[0]] @property def ask_size(self): """ Volume waiting on ask side bottom level - liquidity level size for ask price """ best_ask = self.gmd(1)[0] if len(best_ask) == 0: return 0 else: return best_ask[0][1] @property def total_ask_size(self): return self._total_ask_size @property def bid_size(self): """ Volume waiting on bid side top level - liquidity level size for bid price """ best_bid = self.gmd(1)[1] if len(best_bid) == 0: return 0 else: return best_bid[0][1] @property def total_volume_traded(self): """ Total trad volume """ return self._total_volume_traded @property def total_volume_pending(self): """ Total size of orders in whole book """ return self._total_volume_pending @property def total_bid_size(self): return self._total_bid_size @property def ask(self): """ Best ask """ try: return self.gmd(1)[0][0][0] except: return -1 @property def bid(self): """ Best bid """ try: return self.gmd(1)[1][0][0] except: return -1 @property def spread(self): """ Difference between ask and bid """ return self.ask - self.bid def get_participant_orders(self, pi_d): """ Orders of given participant """ olst = self._participants.get_value(pi_d) order_prices = {} for order_id in olst: order = self._price_ids.get_value(order_id) if order[1] == 'ask': order_size = self._asks.get_value(order[0]).get_value(order_id) else: order_size = self._bids.get_value(order[0]).get_value(order_id) # price, side, size order_prices[order_id] = (order[0], order[1], order_size) return olst, order_prices def submit_order(self, order_type, side, size, price, pi_d): """ Abstraction on order placement - boht LMT and MKT """ if order_type == 'lmt': order_id = self._submit_lmt(side, size, price, pi_d) trades = self._balance([]) return order_id, trades if order_type == 'mkt': second_side_ask = 0 if side != 'ask': second_side_ask = self._total_ask_size else: second_side_ask = self._total_bid_size if second_side_ask >= size: return self._submit_mkt(side, size, pi_d) else: # Insufficient liquidity return -1, [] def gmd(self, depth): """ Liquidity levels size for both bid and ask """ ask_side = [] if not self._asks.is_empty(): for price in self._asks.keys(): ask_level = self._asks.get(price) ask_size = 0 for order_id in ask_level.keys(): # print(ask_size, order_id, ask_level.get(order_id)) ask_size += ask_level.get(order_id) ask_side.append([price, ask_size]) if len(ask_side) >= depth: break bid_side = [] if not self._bids.is_empty(): for price in self._bids.keys(reverse=True): bid_level = self._bids.get(price) bid_size = 0 for order_id in bid_level.keys(): bid_size += bid_level.get(order_id) bid_side.append([price, bid_size]) if len(bid_side) >= depth: break return [ask_side, bid_side]
def __init__(self): self.impl = FastAVLTree()
class RangeTree(Generic[V]): """A specialized tree dealing with ranges.""" def __init__(self) -> None: self._tree = FastAVLTree( ) # Map ints to tuples (val, Union[end, InfinityMarker]) def __setitem__(self, key: Union[slice, range], value: V) -> None: """Set a value to the given interval. If the interval is already occupied, a ValueError will be thrown. Only slices and ranges with the default step (1) are supported. If the slice or range is inverted (end < start), the interval will be flipped. Open slices and ranges ([:1], [1:]) are supported. """ if isinstance(key, (slice, range)): if key.step is not None and key.step != 1: m = 'Intervals with custom steps ({}) not' \ ' supported.'.format(key) raise ValueError(m) else: raise ValueError('Only slices and ranges supported.') s, e = key.start, key.stop if s is not None and e is not None and s > e: s, e = e, s # The check for an empty space is a little complex. # First check the lower bound. anchor = s if s is not None else e - 1 try: lower_item = self._tree.floor_item(anchor) except KeyError: lower_item = None if lower_item is not None: if (s is None or lower_item[1][1] is InfinityMarker.INF_PLUS or (lower_item[1][1] is not InfinityMarker.INF_MINUS and lower_item[1][1] > s)): raise KeyError('Overlapping intervals.') # Now the higher bound. try: higher_item = self._tree.ceiling_item(anchor) except KeyError: higher_item = None if higher_item is not None: if e is None or higher_item[1][ 1] is InfinityMarker.INF_MINUS or higher_item[0] < e: raise KeyError('Overlapping intervals') if e is None: e = InfinityMarker.INF_PLUS elif s is None: e = InfinityMarker.INF_MINUS self._tree[anchor] = (value, e) def __getitem__(self, key: int) -> V: try: res = self._tree.floor_item(key) except KeyError: res = self._tree.ceiling_item(key) val, e = res[1] if e is InfinityMarker.INF_MINUS: return val else: raise KeyError(key) val, e = res[1] if (e is InfinityMarker.INF_PLUS or (e is InfinityMarker.INF_MINUS and res[0] == key) or (e is not InfinityMarker.INF_MINUS and key < e)): return val else: raise KeyError(key) def get(self, key, default: D = None) -> Union[V, D]: try: res = self._tree.floor_item(key) except KeyError: try: res = self._tree.ceiling_item(key) except KeyError: return default val, e = res[1] if e is InfinityMarker.INF_MINUS: return val else: return default val, e = res[1] if (e is InfinityMarker.INF_PLUS or (e is InfinityMarker.INF_MINUS and res[0] == key) or (e is not InfinityMarker.INF_MINUS and key < e)): return val else: return default def __contains__(self, key: int) -> bool: try: existing = self._tree.floor_item(key) except KeyError: try: existing = self._tree.ceiling_item(key) except KeyError: return False else: return existing[1][1] is InfinityMarker.INF_MINUS else: start, (_, end) = existing if end is InfinityMarker.INF_MINUS: return start == key elif end is InfinityMarker.INF_PLUS: return True else: return key < end
def __init__(self) -> None: self._tree = FastAVLTree( ) # Map ints to tuples (val, Union[end, InfinityMarker])
def __init__(self, price_level_type, **kwargs): super(AVLTreePriceLevels, self).__init__(price_level_type, **kwargs) self.price_levels = FastAVLTree(), FastAVLTree()