def calc_overlap_between_segments(ordered_segments1, ordered_segments2): ''' Calculates the total overlap size between a pair of ordered and disjoint groups of segments. Each group of segment is given by: [(start1, end1), (start2, end2), ...]. ''' from interval_tree import IntervalTree if len(ordered_segments1) == 0 or len(ordered_segments2) == 0: return 0 if len(ordered_segments1) > len(ordered_segments2): ordered_segments1, ordered_segments2 = ordered_segments2, ordered_segments1 min_value = min(ordered_segments1[0][0], ordered_segments2[0][0]) max_value = max(ordered_segments1[-1][1], ordered_segments2[-1][1]) interval_tree1 = IntervalTree( [segment + (segment, ) for segment in ordered_segments1], min_value, max_value) total_overlap = 0 for segment in ordered_segments2: for overlapping_segment in interval_tree1.find_range(segment): overlapping_start = max(segment[0], overlapping_segment[0]) overlapping_end = min(segment[1], overlapping_segment[1]) assert overlapping_start <= overlapping_end, 'Reported overlap between %d..%d to %d..%d.' % (segment + \ overlapping_segment) total_overlap += (overlapping_end - overlapping_start + 1) return total_overlap
def __init__(self, peak_calling_file, inference_file, output): """ :param peak_calling_file: peak_calling的文件 :param inference_file: CNN预测输出结果 """ self.peak_file = peak_calling_file self.infer_file = inference_file self.peak = None self.infer = None self.interval_tree = IntervalTree() self.output = output
def db_hit(database, query_mass): ''' Returns database hits for the query_mass. Each DatabaseEntry in database now knows its own range of begin and end masses The tolerance ppm for that range is specified when the DatabaseEntry is created. See test_discretisation.py and identification.py for example usage. Args: - database: a list of DatabaseEntry objects - query_mass: the mass to query Returns: a list of DatabaseEntry objects {e} where e.get_begin() < query_mass < e.get_end() ''' T = IntervalTree(database) hits = T.search(query_mass) return hits
def build(self): ivals = [] for p in self.polygon: ivals += self.build_polygon(p.exterior.coords) for i in p.interiors: ivals += self.build_polygon(i.coords) self.tree = IntervalTree(ivals)
def identify_bins(self, moldb, bins): print "Checking discrete identification" T = IntervalTree(bins) # store bins in an interval tree found = 0 ambiguous = 0 unambiguous = 0 for db_entry in moldb: matching_bins = T.search(db_entry.mass) found = found + len(matching_bins) if len(matching_bins) == 1: # exactly one matching bin ambiguous = ambiguous+1 elif len(matching_bins) > 1: # more than one possible matching bins unambiguous = unambiguous+1 print '\tfound=' + str(found) print '\tmatching 1 bin=' + str(unambiguous) print '\tmatching >1 bins=' + str(ambiguous)
def main(): # build intervals list from file intervals = str_to_interval(readIntervalsFile()) # build an interval tree tree = IntervalTree(intervals) for line in sys.stdin: # search input number in tree processLine(tree, line)
def __init__(self, filename): self.filename = filename self.facets = [] self.parse() self.direction = '+Z' self.ex = dict() self.tree = IntervalTree(0) self.sorted_z = [] self.update()
def make_tree(self): i = 0 self.sorted_z = [] for facet in self.facets: self.sorted_z.append((facet.minz(), i)) self.sorted_z.append((facet.maxz(), i)) i += 1 self.sorted_z.sort() self.tree = IntervalTree(len(self.sorted_z)) for facet in self.facets: l = self.find_z(facet.minz(), True) r = self.find_z(facet.maxz(), False) self.tree.push(l, r - 1, facet)
def _build_gene_interval_trees(genes): if len(genes) == 0: return None segments = [] max_coordinate = 1 for gene in genes: start, end = _get_gene_locus(gene) segments += [(start, end, gene)] max_coordinate = max(max_coordinate, end) return IntervalTree(segments, 1, max_coordinate)
def fully_scan_old(self): if len(self.lines) <= 1: return [] if not self.calculated_fully_scan_old is None: return self.calculated_fully_scan_old i = 0 self.sorted_y = [] for line in self.lines: self.sorted_y.append((line.p1.y, i)) self.sorted_y.append((line.p2.y, -1)) i += 1 self.sorted_y.sort() self.tree_x = IntervalTree(len(self.sorted_y)) for line in self.lines: l = self.find_y_old(line.p1.y, False) r = self.find_y_old(line.p2.y, False) if l > r: (l, r) = (r, l) self.tree_x.push(l, r - 1, line) miny = self.ex['miny'] maxy = self.ex['maxy'] y = miny + CORRECTION ans = [] number_tries = 0 while y < maxy: while self.exist(y): logging.info('Correction in fully_scan_old') y += CORRECTION if number_tries > 3: logging.error("I tired to tries so much! ;(") if self.asrt: raise stl_utils.FormatSTLError('Cant slice') try: ans.extend(self.get_lines_in_row_old(y)) y += STEP number_tries = 0 except AssertionError: y += CORRECTION number_tries += 1 self.calculated_fully_scan = ans return ans
def build_gene_trees(infile, gene_db): """Build gene trees from the gene file""" gene_trees = {} chromosome_stops = {} with gzip.open(infile) as f: for line in f: if not line.startswith('#'): line = line.rstrip().split('\t') # print(line) # print(len(line)) if len(line) >= 6: chrom = line[0] start = int(line[1]) stop = int(line[2]) hgnc_symbol = line[5] if hgnc_symbol: if chrom in gene_trees: if not hgnc_symbol in gene_trees[chrom]: gene_trees[chrom][hgnc_symbol] = [start, stop] else: gene_trees[chrom] = {} gene_trees[chrom][hgnc_symbol] = [start, stop] if stop > chromosome_stops.get(chrom, 0): chromosome_stops[chrom] = stop + 1 #Prepare for interval tree interval_trees = {} for chromosome in gene_trees: for gene_symbol in gene_trees[chromosome]: start = gene_trees[chromosome][gene_symbol][0] stop = gene_trees[chromosome][gene_symbol][1] interval = [start, stop, gene_symbol] if chromosome in interval_trees: interval_trees[chromosome].append(interval) else: interval_trees[chromosome] = [interval] for chrom in gene_trees: interval_trees[chrom] = IntervalTree(interval_trees[chrom], 1, chromosome_stops[chrom]) with open(gene_db, 'wb') as f: logger.info("Dumping gene database to {0}.".format(gene_db)) pickle.dump(interval_trees, f) logger.debug("Dumping successful.")
def test_random(range_num): print('test_random(%d)' % range_num) users = list(gen_users(1000, min_ip, max_ip)) ranges = [Segment(*r) for r in gen_ranges(range_num, min_ip, max_ip)] tree = IntervalTree.build_tree(ranges) if not tree and range_num == 0: return for user, ip in users: res1 = [s.name for s in tree.find(ip)] res2 = find_user_ranges_lazy(ip, ranges) if sorted(res1) != sorted(res2): print('Error: [%s] != [%s]' % (','.join(res1), ','.join(res2))) print('User: %d\t%s\n' % (user, ip_address(ip))) with open('test\\failed_ranges.tsv', 'w+') as f: for s in ranges: f.write('%s-%s\t%s\n' % (ip_address(s.left), ip_address(s.right), s.name)) with open('test\\failed_transactions.tsv', 'w+') as f: f.write('%d\t%s\n' % (user, ip_address(ip))) exit()
def fully_scan(self): if not self.calculated_fully_scan is None: return self.calculated_fully_scan loops = self.get_loops() lines = [] indx = 0 for loop in loops: prev = loop.points[-1] for p in loop: lines.append((prev, p, indx, loop.is_hole())) prev = p indx += 1 self.sorted_y = [] for (start, end, i, hole) in lines: self.sorted_y.append((start.y, i)) self.sorted_y.sort() self.tree_x = IntervalTree(len(self.sorted_y)) for (start, end, i, hole) in lines: l = self.find_y(start.y) r = self.find_y(end.y) if l > r: (l, r) = (r, l) self.tree_x.push(l, r, (start, end, i, hole)) miny = self.ex['miny'] maxy = self.ex['maxy'] y = miny ans = [] while y < maxy: while self.exist(y): logging.info('Correction in fully_scan') y += CORRECTION ans.extend(self.get_lines_in_row(y)) y += STEP self.calculated_fully_scan = ans return ans
def __init__(self, model, z): self.stl_model = model self.z = z self.lines = [] self.sorted_y = [] self.ex = {'minx': MAXSIZE, 'maxx': -MAXSIZE, 'miny': MAXSIZE, 'maxy': -MAXSIZE} if self.stl_model.loaded: if model.max_size() > MAXSIZE: logging.error("Cant slice %.2f model. The max size is %.2f" % (model.max_size(), MAXSIZE)) raise SizeSliceError("Cant slice so big model") if len(model.facets) > MAXFACETS: logging.error("Cant slice %d facets. The max supposed numbers of facets is %.2f" % (len(model.facets), MAXFACETS)) raise SizeSliceError("Cant slice so big model") for facet in model.facets: if facet.isIntersect(z): line = facet.intersect(z) if line.length > EPS: self.lines.append(line) for line in self.lines: for p in line: self.ex['minx'] = min(self.ex['minx'], p.x) self.ex['maxx'] = max(self.ex['maxx'], p.x) self.ex['miny'] = min(self.ex['miny'], p.y) self.ex['maxy'] = max(self.ex['maxy'], p.y) for line in self.lines: self.sorted_y.append(line.p1.y) self.sorted_y.append(line.p2.y) self.sorted_y.sort() #making interval tree for fast search intersected lines self.tree_x = IntervalTree(len(self.sorted_y)) for line in self.lines: l = self.find_y(line.p1.y, False) r = self.find_y(line.p2.y, False) if l > r: (l, r) = (r, l) self.tree_x.push(l, r - 1, line)
def __init__(self, model, z, asrt=True): print 'new slice %.2f' % z self.asrt = asrt self.calculated_fully_scan_old = None self.calculated_fully_scan = None self.calculated_get_loops = None self.calculated_get_shape = None self.stl_model = model self.z = z self.lines = [] self.ex = {'minx': MAXSIZE, 'maxx': -MAXSIZE, 'miny': MAXSIZE, 'maxy': -MAXSIZE} if len(model.facets) > MAXFACETS: logging.error("Cant slice %d facets. The max supposed numbers of facets is %.2f" % (len(model.facets), MAXFACETS)) raise SizeSliceError("Cant slice so big model") for facet in model.intersect_facets(z): if facet.isIntersect(z): line = facet.intersect(z) if line.length > EPS: self.lines.append(line) for line in self.lines: for p in line: self.ex['minx'] = min(self.ex['minx'], p.x) self.ex['maxx'] = max(self.ex['maxx'], p.x) self.ex['miny'] = min(self.ex['miny'], p.y) self.ex['maxy'] = max(self.ex['maxy'], p.y) if self.max_size() > MAXSIZE: logging.error("Cant slice %.2f model. The max size is %.2f" % (self.max_size(), MAXSIZE)) raise SizeSliceError("Cant slice so big model") self.sorted_y = [] self.tree_x = IntervalTree(0) print "lines in slice: %d" % len(self.lines)
class Slice: def __init__(self, model, z, asrt=True): print 'new slice %.2f' % z self.asrt = asrt self.calculated_fully_scan_old = None self.calculated_fully_scan = None self.calculated_get_loops = None self.calculated_get_shape = None self.stl_model = model self.z = z self.lines = [] self.ex = {'minx': MAXSIZE, 'maxx': -MAXSIZE, 'miny': MAXSIZE, 'maxy': -MAXSIZE} if len(model.facets) > MAXFACETS: logging.error("Cant slice %d facets. The max supposed numbers of facets is %.2f" % (len(model.facets), MAXFACETS)) raise SizeSliceError("Cant slice so big model") for facet in model.intersect_facets(z): if facet.isIntersect(z): line = facet.intersect(z) if line.length > EPS: self.lines.append(line) for line in self.lines: for p in line: self.ex['minx'] = min(self.ex['minx'], p.x) self.ex['maxx'] = max(self.ex['maxx'], p.x) self.ex['miny'] = min(self.ex['miny'], p.y) self.ex['maxy'] = max(self.ex['maxy'], p.y) if self.max_size() > MAXSIZE: logging.error("Cant slice %.2f model. The max size is %.2f" % (self.max_size(), MAXSIZE)) raise SizeSliceError("Cant slice so big model") self.sorted_y = [] self.tree_x = IntervalTree(0) print "lines in slice: %d" % len(self.lines) def max_size(self): x = max(-self.ex['minx'], self.ex['maxx']) y = max(-self.ex['miny'], self.ex['maxy']) return max(x, y) def __len__(self): return len(self.lines) def __nonzero__(self): return True #Used it for find first index, self.sorted_y[idx] > y. #If there are numbers, equal with y, answer may be any index of them. def find_y_old(self, y, asrt=True, left=True): l = 0 r = len(self.sorted_y) while r > l: m = (r + l) // 2 if asrt and equal(self.sorted_y[m][0], y): logging.info('You want find_y(%.3f) with Assert mode, but there are such y' % y) assert 0 if self.sorted_y[m][0] > y: r = m else: l = m + 1 # l == r return l #simple fully scan each STEP row #returns list[Line2] def fully_scan_old(self): if len(self.lines) <= 1: return [] if not self.calculated_fully_scan_old is None: return self.calculated_fully_scan_old i = 0 self.sorted_y = [] for line in self.lines: self.sorted_y.append((line.p1.y, i)) self.sorted_y.append((line.p2.y, -1)) i += 1 self.sorted_y.sort() self.tree_x = IntervalTree(len(self.sorted_y)) for line in self.lines: l = self.find_y_old(line.p1.y, False) r = self.find_y_old(line.p2.y, False) if l > r: (l, r) = (r, l) self.tree_x.push(l, r - 1, line) miny = self.ex['miny'] maxy = self.ex['maxy'] y = miny + CORRECTION ans = [] number_tries = 0 while y < maxy: while self.exist(y): logging.info('Correction in fully_scan_old') y += CORRECTION if number_tries > 3: logging.error("I tired to tries so much! ;(") if self.asrt: raise stl_utils.FormatSTLError('Cant slice') try: ans.extend(self.get_lines_in_row_old(y)) y += STEP number_tries = 0 except AssertionError: y += CORRECTION number_tries += 1 self.calculated_fully_scan = ans return ans #Remeber, it doesnt work if there is edge in the row def get_lines_in_row_old(self, y): ans = [] intersects = [] index = self.find_y_old(y) for line in self.tree_x.get(index): if line.isIntersect(y): intersects.append(line.calcIntersect(y)) else: logging.info('get_lines_in_row: It can not be! ;(') assert 0 if len(intersects) % 2 == 1: logging.error('get_lines_in_row: I have odd number of intersects %f slice %f row. Trying to increment less.' % (self.z, y)) assert 0 else: intersects.sort() for i in range(len(intersects) // 2): p1 = Point2(intersects[2 * i], y) p2 = Point2(intersects[2 * i + 1], y) ans.append(Line2(p1, p2)) return ans #this function is not used now def get_points_in_row(self, y): intersects = [] for line in self.lines: try: if line.isIntersect(y): intersects.append(line.calcIntersect(y)) except AssertionError: intersects.append(line.p1.x) intersects.append(line.p2.x) intersects.sort() ans = [intersects[0]] last = ans[0] for i in intersects[1:]: if abs(i - last) > EPS: ans.append(i) last = i return ans #find first element, >= y def find_y_left(self, y): l = 0 r = len(self.sorted_y) while r > l: m = (r + l) // 2 if self.sorted_y[m][0] + EPS > y: r = m else: l = m + 1 # l == r return l #find first element, > y def find_y_right(self, y): l = 0 r = len(self.sorted_y) while r > l: m = (r + l) // 2 if self.sorted_y[m][0] - EPS > y: r = m else: l = m + 1 # l == r return l def find_y(self, y): l = 0 r = len(self.sorted_y) while r > l: m = (r + l) // 2 if equal(y, self.sorted_y[m][0]): return m if self.sorted_y[m][0] > y: r = m else: l = m + 1 if l == len(self.sorted_y): assert 0 if equal(y, self.sorted_y[l][0]): return l # not found assert 0 def get_loops(self): if not self.calculated_get_loops is None: return self.calculated_get_loops self.sorted_y = [] i = 0 for line in self.lines: self.sorted_y.append((line.p1.y, i)) i += 1 self.sorted_y.sort() ans = [] checked = [] for j in range(len(self.lines)): checked.append(False) for j in range(len(self.lines)): if checked[j]: continue checked[j] = True line = self.lines[j] loop = [line.p1] p = line.p2 missed = 0 while p.dist(line.p1) > EPS: if p.dist(loop[-1]) > 0.5: loop.append(p) else: missed += 1 nearest = False dist = 100 nearest_idx = -1 i = self.find_y_left(p.y - CORRECTION) while (i < len(self.sorted_y)) and ((self.sorted_y[i][0] - CORRECTION) < p.y): if not checked[self.sorted_y[i][1]]: if p.dist(self.lines[self.sorted_y[i][1]].p1) < dist: dist = p.dist(self.lines[self.sorted_y[i][1]].p1) nearest = self.lines[self.sorted_y[i][1]].p2 nearest_idx = self.sorted_y[i][1] i += 1 if dist > CORRECTION: logging.info("Can't find nearest point. Loop is missed.") loop = [] break p = nearest checked[nearest_idx] = True print "point in loop %d" % len(loop) print "missed point in loop %d" % missed print if len(loop) > 2: ans.append(Loop(loop)) self.calculated_get_loops = ans return ans #fing loops first def fully_scan(self): if not self.calculated_fully_scan is None: return self.calculated_fully_scan loops = self.get_loops() lines = [] indx = 0 for loop in loops: prev = loop.points[-1] for p in loop: lines.append((prev, p, indx, loop.is_hole())) prev = p indx += 1 self.sorted_y = [] for (start, end, i, hole) in lines: self.sorted_y.append((start.y, i)) self.sorted_y.sort() self.tree_x = IntervalTree(len(self.sorted_y)) for (start, end, i, hole) in lines: l = self.find_y(start.y) r = self.find_y(end.y) if l > r: (l, r) = (r, l) self.tree_x.push(l, r, (start, end, i, hole)) miny = self.ex['miny'] maxy = self.ex['maxy'] y = miny ans = [] while y < maxy: while self.exist(y): logging.info('Correction in fully_scan') y += CORRECTION ans.extend(self.get_lines_in_row(y)) y += STEP self.calculated_fully_scan = ans return ans def exist(self, y): try: self.find_y(y) return True except AssertionError: return False def get_lines_in_row(self, y): ans = [] intersects = [] index = self.find_y_right(y) max_i = 0 for (start, end, i, hole) in self.tree_x.get(index): if i > max_i: max_i = i line = Line2(start, end) if line.isIntersect(y): intersects.append((line.calcIntersect(y), i, hole)) assert len(intersects) % 2 == 0 active_loop = dict() for i in range(max_i + 1): active_loop[i] = False intersects.sort() last = [] for i in range(len(intersects)): if not active_loop[intersects[i][1]]: active_loop[intersects[i][1]] = True last.append(intersects[i][2]) else: active_loop[intersects[i][1]] = False assert last.pop() == intersects[i][2] if last and not last[-1]: p1 = Point2(intersects[i][0], y) p2 = Point2(intersects[i + 1][0], y) ans.append(Line2(p1, p2)) return ans def get_shape(self): if not self.calculated_get_shape is None: return self.calculated_get_shape loops = self.get_loops() ans = [] for loop in loops: prev = loop.points[-1] for p in loop: ans.append(Line2(prev, p)) prev = p self.calculated_get_shape = ans return ans
def parse_range_file(filename): segments = [] with open(filename) as f: for l in f: segments.append(parse_range(*l.rstrip().split('\t'))) return segments if __name__ == '__main__': parser = argparse.ArgumentParser(description='Find user networks.') parser.add_argument('range_file') parser.add_argument('user_file') parser.add_argument('output_file') args = parser.parse_args() tree = IntervalTree.build_tree(parse_range_file(args.range_file)) if not tree: print('Range file is empty!') exit() with open(args.output_file, 'w+') as output: for user, ip in parse_user_file(args.user_file): res = tree.find(ip) if res: output.write('%s\t%s\n' % (user, ','.join(map(lambda x: x.name, res)))) else: output.write('%s\tnot found\n' % user)
class Slice: def __init__(self, model, z): self.stl_model = model self.z = z self.lines = [] self.sorted_y = [] self.ex = {'minx': MAXSIZE, 'maxx': -MAXSIZE, 'miny': MAXSIZE, 'maxy': -MAXSIZE} if self.stl_model.loaded: if model.max_size() > MAXSIZE: logging.error("Cant slice %.2f model. The max size is %.2f" % (model.max_size(), MAXSIZE)) raise SizeSliceError("Cant slice so big model") if len(model.facets) > MAXFACETS: logging.error("Cant slice %d facets. The max supposed numbers of facets is %.2f" % (len(model.facets), MAXFACETS)) raise SizeSliceError("Cant slice so big model") for facet in model.facets: if facet.isIntersect(z): line = facet.intersect(z) if line.length > EPS: self.lines.append(line) for line in self.lines: for p in line: self.ex['minx'] = min(self.ex['minx'], p.x) self.ex['maxx'] = max(self.ex['maxx'], p.x) self.ex['miny'] = min(self.ex['miny'], p.y) self.ex['maxy'] = max(self.ex['maxy'], p.y) for line in self.lines: self.sorted_y.append(line.p1.y) self.sorted_y.append(line.p2.y) self.sorted_y.sort() #making interval tree for fast search intersected lines self.tree_x = IntervalTree(len(self.sorted_y)) for line in self.lines: l = self.find_y(line.p1.y, False) r = self.find_y(line.p2.y, False) if l > r: (l, r) = (r, l) self.tree_x.push(l, r - 1, line) def setHeight(self, height): # Set new height and recalculate list of facets self.z = height self.lines = [] if self.stl_model and self.stl_model.max_size() > MAXSIZE: logging.error("Cant slice %.2f model. The max size is %.2f" % (model.max_size(), MAXSIZE)) raise SizeSliceError("Cant slice so big model") for facet in self.stl_model.facets: if facet.isIntersect(self.z): self.lines.append(facet.intersect(self.z)) def __len__(self): return len(self.lines) #Used it for find first index, self.sorted_y[idx] > y. #If there are numbers, equal with y, answer may be any index of them. def find_y(self, y, asrt=True): l = 0 r = len(self.sorted_y) while r > l: m = (r + l) // 2 if asrt: if equal(self.sorted_y[m], y): logging.info('You want find_y(%.3f) with Assert mode, but there are such y' % y) assert 0 if self.sorted_y[m] > y: r = m else: l = m + 1 # l == r return l #simple fully scan each STEP row #returns list[Line2] def fully_scan(self): if len(self.lines) <= 1: return [] miny = self.ex['miny'] maxy = self.ex['maxy'] y = miny + EPS ans = [] number_tries = 0 while y < maxy: if number_tries > 3: logging.error("I tired to tries so much! ;(") raise stl_utils.FormatSTLError('Cant slice') try: ans.extend(self.get_lines_in_row(y)) y += STEP number_tries = 0 except AssertionError: y += EPS number_tries += 1 return ans #scans only significant rows #returns list[tuple[Point2]] #it wasn't a good idea. no profit def intellectual_scan(self): if len(self.lines) <= 1: return [] all_y = [] for line in self.lines: all_y.append(line.p1.y) all_y.append(line.p2.y) all_y.sort() ans = [] y_prev = all_y[0] for y_next in all_y[1:]: if y_next - STEP / 5 < y_prev: y_prev = y_next continue try: lines_prev = self.get_lines_in_row(y_prev + EPS) lines_next = self.get_lines_in_row(y_next - EPS) except: logging.error("Can't get_lines_in_row. %f slice, %f row" % (self.z, y_next)) raise stl_utils.FormatSTLError("Can't get_lines_in_row. %f slice, %f row" % (self.z, y_next)) #continue if len(lines_prev) != len(lines_next): logging.error("Ooops, the lengths is not equal!") raise stl_utils.FormatSTLError("Ooops, the lengths is not equal! Row %f" % y_next) #continue for i in range(len(lines_prev)): ans.append((lines_prev[i].p1, lines_prev[i].p2, lines_next[i].p2, lines_next[i].p1)) y_prev = y_next return ans #Remeber, it doesnt work if there is edge in the row def get_lines_in_row(self, y): ans = [] intersects = [] index = self.find_y(y) for line in self.tree_x.get(index): if line.isIntersect(y): intersects.append(line.calcIntersect(y)) else: logging.info('get_lines_in_row: It can not be! ;(') assert 0 if len(intersects) % 2 == 1: logging.error('get_lines_in_row: I have odd number of intersects %f slice %f row. Trying to increment less.' % (self.z, y)) assert 0 else: intersects.sort() for i in range(len(intersects) // 2): p1 = Point2(intersects[2 * i], y) p2 = Point2(intersects[2 * i + 1], y) ans.append(Line2(p1, p2)) return ans #this function is not used now def get_points_in_row(self, y): intersects = [] for line in self.lines: try: if line.isIntersect(y): intersects.append(line.calcIntersect(y)) except AssertionError: intersects.append(line.p1.x) intersects.append(line.p2.x) intersects.sort() ans = [intersects[0]] last = ans[0] for i in intersects[1:]: if abs(i - last) > EPS: ans.append(i) last = i return ans def make_correct_loops(self): print len(self.lines) return [] '''
class StlModel: def __init__(self, filename): self.filename = filename self.facets = [] self.parse() self.direction = '+Z' self.ex = dict() self.tree = IntervalTree(0) self.sorted_z = [] self.update() def __nonzero__(self): return True def update(self): logging.info("Current scales:") self.ex = self.get_extremal() self.log_scales() logging.info('Making tree for STL-model...') self.make_tree() logging.info('Finished tree.') def make_tree(self): i = 0 self.sorted_z = [] for facet in self.facets: self.sorted_z.append((facet.minz(), i)) self.sorted_z.append((facet.maxz(), i)) i += 1 self.sorted_z.sort() self.tree = IntervalTree(len(self.sorted_z)) for facet in self.facets: l = self.find_z(facet.minz(), True) r = self.find_z(facet.maxz(), False) self.tree.push(l, r - 1, facet) def intersect_facets(self, z): return self.tree.get(self.find_z(z)) #if bot: returns first element >= z # else: returns first element > z def find_z(self, z, bot=False): l = 0 r = len(self.sorted_z) while r > l: m = (r + l) // 2 if bot: if self.sorted_z[m][0] + EPS > z: r = m else: l = m + 1 else: if self.sorted_z[m][0] - EPS > z: r = m else: l = m + 1 # l == r return l def read_facet(self, f): line = f.readline().strip() if line != 'outer loop': raise ValueError('Expected "outer loop", got "%s"' % line) facet = [] line = f.readline().strip() while line != 'endloop': parts = line.split() if parts[0] != 'vertex': raise ValueError('Expected "vertex x y z", got "%s"' % line) facet.append(tuple([float(num) for num in parts[1:]])) line = f.readline().strip() line = f.readline().strip() if line != 'endfacet': raise ValueError('Expected "endfacet", got "%s"' % line) return Facet(Point3(facet[0]), Point3(facet[1]), Point3(facet[2])) def log_scales(self): e = self.ex logging.info('minx = %.3f \t maxx = %.3f' % (e['minx'], e['maxx'])) logging.info('miny = %.3f \t maxy = %.3f' % (e['miny'], e['maxy'])) logging.info('minz = %.3f \t maxz = %.3f' % (e['minz'], e['maxz'])) def parse_text(self): f = open(self.filename, 'r') logging.info('Parsing STL text model') line = f.readline().strip() parts = line.split() if parts[0] != 'solid': raise FormatSTLError('Expected "solid ...", got "%s"' % line) name = ' '.join(parts[1:]) line = f.readline().strip() while line.startswith('facet'): try: facet = self.read_facet(f) self.facets.append(facet) except AssertionError: pass line = f.readline().strip() if line != ('endsolid %s' % name) and line != "endsolid": raise FormatSTLError('Expected "endsolid %s", got "%s"' % (name, line)) def parse_bin(self): file = open(self.filename, 'rb') import struct try: header = file.read(80) logging.info('Parsing STL binary model') logging.info('HEADER: %s' % header) (count,) = struct.unpack('<I', file.read(4)) logging.info('COUNT: %d' % count) for i in range(count): normal = struct.unpack('<fff', file.read(12)) points = [] for i in range(3): points.append(struct.unpack('<fff', file.read(12))) try: f = Facet(Point3(points[0]), Point3(points[1]), Point3(points[2]) ) f.normal = Vector3(Point3(normal)) f.normal.normalize() self.facets.append(f) except AssertionError: pass attribute_byte_count = file.read(2) except: self.facets = [] raise FormatSTLError def parse(self): f = open(self.filename, 'r') data = f.read() if "facet normal" in data[0:300] and "outer loop" in data[0:300]: self.parse_text() else: self.parse_bin() def get_extremal(self): rand_point = self.facets[0].points[0] extremals = {'minx': rand_point.x, 'maxx': rand_point.x, 'miny': rand_point.y, 'maxy': rand_point.y, 'minz': rand_point.z, 'maxz': rand_point.z} for facet in self.facets: for p in facet: extremals['minx'] = min(extremals['minx'], p.x) extremals['maxx'] = max(extremals['maxx'], p.x) extremals['miny'] = min(extremals['miny'], p.y) extremals['maxy'] = max(extremals['maxy'], p.y) extremals['minz'] = min(extremals['minz'], p.z) extremals['maxz'] = max(extremals['maxz'], p.z) extremals['xsize'] = extremals['maxx'] - extremals['minx'] extremals['ysize'] = extremals['maxy'] - extremals['miny'] extremals['zsize'] = extremals['maxz'] - extremals['minz'] extremals['diameter'] = math.sqrt(extremals['xsize']**2 + extremals['ysize']**2 + extremals['zsize']**2) extremals['xcenter'] = (extremals['maxx'] + extremals['minx']) / 2 extremals['ycenter'] = (extremals['maxy'] + extremals['miny']) / 2 extremals['zcenter'] = (extremals['maxz'] + extremals['minz']) / 2 return extremals def changeDirection(self, direction): #This strange 3 lines make reverse transformation for i in range(3): for f in self.facets: f.changeDirection(self.direction) self.direction = direction for f in self.facets: f.changeDirection(direction) self.update() def zoom_x(self, scale): for f in self.facets: f.zoom_x(scale) self.update() def zoom_y(self, scale): for f in self.facets: f.zoom_y(scale) self.update() def zoom_z(self, scale): for f in self.facets: f.zoom_z(scale) self.update() def add_x(self, v): for f in self.facets: f.add_x(v) def add_y(self, v): for f in self.facets: f.add_y(v) def add_z(self, v): for f in self.facets: f.add_z(v) def zoom(self, scale): for f in self.facets: f.zoom(scale) self.update() def max_size(self): max_v = 0 for v in (self.ex['minx'], self.ex['maxx'], self.ex['miny'], self.ex['maxy'], self.ex['minz'], self.ex['maxz']): max_v = max(max_v, abs(v)) return max_v def centering(self): self.add_x(-self.ex['xcenter']) self.add_y(-self.ex['ycenter']) self.add_z(-self.ex['zcenter']) self.update()
class Eval: def __init__(self, peak_calling_file, inference_file, output): """ :param peak_calling_file: peak_calling的文件 :param inference_file: CNN预测输出结果 """ self.peak_file = peak_calling_file self.infer_file = inference_file self.peak = None self.infer = None self.interval_tree = IntervalTree() self.output = output def load_inference_file(self): self.infer = pd.read_csv(self.infer_file, sep="\t", encoding="utf-8") # self.infer = self.infer.head() return None def load_peak_file(self): self.peak = pd.read_csv(self.peak_file, sep="\t", encoding="utf-8") self.peak = self.peak.set_index("chr") return None def build_interval_tree(self, chr_num): """ :param chr_num: 染色体号 :return: 返回区间树的根节点 """ peak_starts = list(self.peak.ix[chr_num]["start"]) peak_ends = list(self.peak.ix[chr_num]["end"]) root = self.interval_tree.interval_tree( list(zip(peak_starts, peak_ends))) return root def cover_by_peak(self, record, root): """ 检验inference的序列的A位点是否是在macs2 peak的范围内 :param record: inference data中每一条记录,通过apply方法传入 :param root: 区间树的根节点 :return: """ position = record["position"] res = self.interval_tree.intervals_containing(root, position) if res != 0: return 1 return 0 def in_peak(self): """ 是否在peak覆盖范围内 :return: """ for chr_num, group in self.infer.groupby("chr"): try: root = self.build_interval_tree(chr_num) group["meth"] = group.apply(self.cover_by_peak, axis=1, args=(root, )) group.to_csv(self.output, sep="\t", index=False, header=None, mode="a") except KeyError: continue def crosstab(self, cover, y_pred): """ 生成2×2列联表的四个值。两个变量分别是 1.预测结果(分为阴性和阳性) 2.是否被macs2的peak覆盖(分为覆盖和不覆盖) :param cover: 样本的实际是否被peak覆盖 :param y_pred: 样本的预测标签值 :return: """ pos_cover = 0 pos_uncover = 0 neg_cover = 0 neg_uncover = 0 for i in range(len(cover)): if cover[i] == 1 and y_pred[i] == 1: pos_cover += 1 elif cover[i] == 0 and y_pred[i] == 1: pos_uncover += 1 elif cover[i] == 1 and y_pred[i] == 0: neg_cover += 1 else: neg_uncover += 1 return pos_cover, pos_uncover, neg_cover, neg_uncover def fisher_test(self, pos_cover, pos_uncover, neg_cover, neg_uncover): """ :param pos_cover: 分类为阳性且被peak覆盖。 :param pos_uncover: 分类为阳性但未被peak覆盖。 :param neg_cover: 分类为阴性且被peak覆盖。 :param neg_uncover: 分类为阴性但未被peak覆盖。 :return: """ oddsratio, pvalue = fisher_exact( [[pos_cover, pos_uncover], [neg_cover, neg_uncover]], alternative="greater") return pvalue def eval(self): self.load_inference_file() self.load_peak_file() self.in_peak() threshold = 0.62 data = pd.read_csv(self.output, sep="\t", usecols=[0, 1, 3], header=None, encoding="utf-8") data.columns = ["chr", "logits", "label"] # print(data[data["label"] == 0].describe()) # print(data[data["label"] == 1].describe()) y_score = list(data["logits"]) judge = lambda x, t: 1 if x > t else 0 # arr = np.ones(shape=data.shape[0]) y_score = np.array([judge(i, threshold) for i in y_score]) # 求取预测为阴性阳性、peak是否覆盖的列联表的四个值 pos_cover, pos_uncover, neg_cover, neg_uncover = self.crosstab( data["label"].values, y_score) print(pos_cover, pos_uncover, neg_cover, neg_uncover) p_val = self.fisher_test(pos_cover, pos_uncover, neg_cover, neg_uncover) print(p_val)
class PolygonErrorFilter(ErrorFilter): def __init__(self, polygon_id, cache_delay=60): polygon_url = "http://polygons.openstreetmap.fr/" url = polygon_url + "index.py?id="+str(polygon_id) s = downloader.urlread(url, cache_delay) url = polygon_url + "get_wkt.py?params=0&id="+str(polygon_id) s = downloader.urlread(url, cache_delay) if s.startswith("SRID="): s = s.split(";", 1)[1] self.polygon = loads(s) self.build() def sameVDir(self, x1, y1, x2, y2, x3, y3): # Check if next segment have same direction again vertical. if y1 < y2: return y2 < y3 else: return y2 > y3 class Interval: def __init__(self, x1, y1, x2, y2, sameDir): # Need for IntervalTree self.start = min(y1,y2) self.stop = max(y1,y2) # Segment self.x1 = x1 self.x2 = x2 self.y1 = y1 self.y2 = y2 self.sameDir = sameDir def __repr__(self): return "(%s,%s)-(%s, %s)" % (self.x1, self.y1, self.x2, self.y2) def build_polygon(self, coords): (x,y) = coords.xy n = len(x) ivals = [] for i in range(n): ivals.append(self.Interval(x[i], y[i], x[(i+1)%n], y[(i+1)%n], self.sameVDir(x[i], y[i], x[(i+1)%n], y[(i+1)%n], x[(i+2)%n], y[(i+2)%n]))) return ivals def build(self): ivals = [] for p in self.polygon: ivals += self.build_polygon(p.exterior.coords) for i in p.interiors: ivals += self.build_polygon(i.coords) self.tree = IntervalTree(ivals) def point_inside_polygon(self, x, y): poly = self.tree.find(y, y) inside = False for p in poly: if p.y1 != p.y2: if p.y2 != y or p.sameDir: # This is a true cross and not a tangent xinters = (y-p.y1)*(p.x2-p.x1)/(p.y2-p.y1)+p.x1 if x < xinters: inside = not inside elif x <= max(p.x1, p.x2): inside = not inside return inside def apply(self, classs, subclass, geom): if "position" not in geom: return False else: inside = False for position in geom["position"]: lat = float(position["lat"]) lon = float(position["lon"]) inside |= self.point_inside_polygon(lon, lat) return inside
def sort_contours_by_level(contours): """Sort contours into parts. Returns a sorted list of lists, where inner lists represent contours at the same depth, and the outer list organizes inner lists by decreasing depth. """ # TODO: handle pre-closed contours. (Circles, ellipses, etc.) parts = [] height_interval_to_contours = { } # items are contour lists, since multiple contours can have the same height interval. contour_tree = IntervalTree() heights = set() contours_by_name = {} nested_contour_tree_items = {} # dict of contour nodes # Find min/max heights of all contours. layout_y_min = math.inf layout_y_max = -math.inf # Also find the left/right extremes to find global corners. layout_x_min = math.inf layout_x_max = -math.inf for contour in contours: # Store contours by name. contours_by_name[contour.name()] = contour # Store contour in a dict by height interval. Some contours can have the same height, so use lists. # This data structure is the input to build the interval tree. if (contour.y_min, contour.y_max) in height_interval_to_contours: height_interval_to_contours[(contour.y_min, contour.y_max)].append(contour) else: height_interval_to_contours[(contour.y_min, contour.y_max)] = [contour] # Update the extremes of the layout. if contour.y_min < layout_y_min: layout_y_min = contour.y_min if contour.y_max > layout_y_max: layout_y_max = contour.y_max if contour.x_min < layout_x_min: layout_x_min = contour.x_min if contour.x_max > layout_x_max: layout_x_max = contour.x_max # Add the contour's midpoint to the height intervals. heights.add((contour.y_max - contour.y_min) / 2 + contour.y_min) # Create interval tree. print("Packing Contours into Interval Tree for sorting speedup.") contour_tree.build(layout_y_min, layout_y_max, height_interval_to_contours) # Construct all contour in-out relationships. print("Constructing in-out contour relationships.") for height in heights: # Extract all the contours that exist at this height. contour_subset_lists = contour_tree.query(height) contour_subset_lists = [item[1] for item in contour_subset_lists ] # remove the keys. contour_subset_lists = [ item for sublist in contour_subset_lists for item in sublist ] # flatten remaining lists. # Build the In-Out relationship tree. for a_index, contour_a in enumerate(contour_subset_lists): contour_a_node = nested_contour_tree_items.get( contour_a.name(), Node(contour_a.name())) for b_index, contour_b in enumerate(contour_subset_lists[a_index + 1:]): point_a = (contour_a.start_x, contour_a.start_y) point_b = (contour_b.start_x, contour_b.start_y) # Check if a is in b. If so, insert pair relationship into tree. if point_in_contour(point_a, contour_b): # contour_b is contour_a's parent. Add back to the dict contour_b_node = nested_contour_tree_items.get( contour_b.name(), Node(contour_b.name())) contour_a_node.parent = contour_b_node nested_contour_tree_items[ contour_b.name()] = contour_b_node # Check if b is in a. If so, insert pair relationship into tree. elif point_in_contour(point_b, contour_a): # contour_a is contour_b's parent. Add back to the dict contour_b_node = nested_contour_tree_items.get( contour_b.name(), Node(contour_b.name())) contour_b_node.parent = contour_a_node nested_contour_tree_items[ contour_b.name()] = contour_b_node nested_contour_tree_items[contour_a.name()] = contour_a_node print("Organizing contours by depth") # A dict, keyed by level (int) of contours that live at that level. depth_lists = OrderedDict() # Contours may be sorted in multiple separate trees. # Pull contours out of the dict representation and put into lists sorted by depths while len(nested_contour_tree_items): # Find the root(s) and print out the tree from there. node = None # Pull an arbitrary item out from the nesting. node_key = list(nested_contour_tree_items.keys())[0] # Get the root of this tree. node = nested_contour_tree_items[node_key] while node.parent is not None: node = node.parent # https://anytree.readthedocs.io/en/latest/api/anytree.iterators.html#anytree.iterators.levelordergroupiter.LevelOrderGroupIter list_o_lists = [[node.name for node in children] for children in LevelOrderGroupIter(node)] for index, depth_list in enumerate(list_o_lists): old_depth_list = depth_lists.get(index, []) for contour_name in depth_list: old_depth_list.append(contours_by_name[contour_name]) del nested_contour_tree_items[contour_name] depth_lists[index] = old_depth_list # Return serialized tree and a starting point. return [v for k, v in depth_lists.items()], (layout_x_max, layout_y_max)
#lens.append(len(res)) else: for i in range(tries): res = tree.find(start, end) res.sort(key=operator.attrgetter('start')) lens.append("%i:%s" % (len(res), [x.start for x in res[-1:]])) #lens.append(len(res)) t1 = time.time() return res, t1 - t0, lens start_max = STOP * 3 while True: intervals = rands(N, start_max=start_max) t0 = time.time() tree = IntervalTree(intervals) t1 = time.time() print "time to build IntervalTree with %i intervals: %.3f" % (N, t1 - t0) t0 = time.time() ints = Intersecter(intervals) t1 = time.time() print "time to build Intersector with %i intervals: %.3f" % (N, t1 - t0) found, t, tree_lens = search(tree, START, STOP, TRIES) print "time to search tree %i times: %.3f. found %i intervals" % ( TRIES, t, len(found)) found, t, brute_lens = search(intervals, START, STOP, TRIES) print "time to search brute %i times: %.3f. found %i intervals" % ( TRIES, t, len(found))
import time from ranges_gen import gen_ranges from users_gen import gen_users from find_ip import parse_ip from interval_tree import IntervalTree, Segment if __name__ == '__main__': min_ip = parse_ip('5.0.0.0') max_ip = parse_ip('10.0.0.0') users = list(gen_users(1000, min_ip, max_ip)) num = 256 print(' %s | %s | %s' % ('ranges', 'build tree', 'find 1000 users')) for i in range(14): ranges = [Segment(*r) for r in gen_ranges(num, min_ip, max_ip)] start = time.time() tree = IntervalTree.build_tree(ranges) build_time = time.time() - start start = time.time() for user, ip in users: tree.find(ip) print('%10d | %10fs | %10fs' % (num, build_time, time.time() - start)) num *= 2