def interval_intersect_interval(**kwargs): """ Efficient algorithm to find which intervals intersect Handles both unix timestamp or datetime object Return: ------- prediction_gt: array with same size as prediction, will be 1 if there's an overlapping label 0 if not recall: recall percentage of labels overlap: how much overlap between label and prediction """ gt = kwargs['groundtruth'] pred = kwargs['prediction'] # calculate recall tree = IntervalTree() for segment in pred: tree.add(Interval(segment[0],segment[1])) recall_gt = [] for segment in gt: overlap = tree.search(segment[0], segment[1]) if len(overlap) != 0: recall_gt.append(1) else: recall_gt.append(0) recall = np.mean(recall_gt) # calculate precision tree = IntervalTree() for segment in gt: tree.add(Interval(segment[0],segment[1])) prediction_gt = [] for segment in pred: overlap = tree.search(segment[0], segment[1]) if len(overlap) != 0: prediction_gt.append(1) else: prediction_gt.append(0) result = {'prediction_gt': prediction_gt, 'recall_gt': recall_gt, 'recall': recall, 'precision': np.mean(prediction_gt)} return result
def mergeSegments(self,segs1,segs2,ignoreInsideEnvelope=True): """ Given two segmentations of the same file, return the merged set of them Two similar segments should be replaced by their union Those that are inside another should be removed (?) or the too-large one deleted? If ignoreInsideEnvelope is true this is the first of those, otherwise the second """ from intervaltree import Interval, IntervalTree t = IntervalTree() # Put the first set into the tree for s in segs1: t[s[0]:s[1]] = s # Decide whether or not to put each segment in the second set in for s in segs2: overlaps = t.search(s[0],s[1]) # If there are no overlaps, add it if len(overlaps)==0: t[s[0]:s[1]] = s else: # Search for any enveloped, if there are remove and add the new one envelops = t.search(s[0],s[1],strict=True) if len(envelops) > 0: if ignoreInsideEnvelope: # Remove any inside the envelope of the test point t.remove_envelop(s[0],s[1]) overlaps = t.search(s[0], s[1]) #print s[0], s[1], overlaps # Open out the region, delete the other for o in overlaps: if o.begin < s[0]: s[0] = o.begin t.remove(o) if o.end > s[1]: s[1] = o.end t.remove(o) t[s[0]:s[1]] = s else: # Check for those that intersect the ends, widen them out a bit for o in overlaps: if o.begin > s[0]: t[s[0]:o[1]] = (s[0],o[1]) t.remove(o) if o.end < s[1]: t[o[0]:s[1]] = (o[0],s[1]) t.remove(o) segs = [] for a in t: segs.append([a[0],a[1]]) return segs
def get_multilines(spans): intervals = Intervals() lines = [] for start, stop, type in spans: line = Line(start, stop, type, level=None) intervals.addi(start, stop, line) lines.append(line) # level for line in lines: selected = intervals.search(line.start, line.stop) line.level = get_free_level(selected) # chunk intervals.split_overlaps() # group groups = defaultdict(list) for start, stop, line in intervals: groups[start, stop].append(line) for start, stop in sorted(groups): lines = groups[start, stop] lines = sorted(lines, key=lambda _: _.level) yield Multiline(start, stop, lines)
def point_intersect_interval(points, df_interval): # store index of intervals as value of the interval tree = IntervalTree() for i in range(df_interval.shape[0]): tree[df_interval['start'][i]:df_interval['end'][i]] = i points_gt = np.zeros_like(points).astype(bool) interval_gt = [False] * df_interval.shape[0] for i in range(len(points)): intersection = tree.search(points[i]) if len(intersection) == 0: points_gt[i] = False else: points_gt[i] = True for segment in intersection: interval_gt[segment.data] = True results = {} results['points_gt'] = points_gt results['interval_gt'] = interval_gt return results
def interval_intersect_interval(**kwargs): """Determine label of each segmentation based on intersection with labels Parameters ---------- groundtruth: dataframe containing columns 'Start', 'End', 'Label' segmentation: dataframe containing columns 'Start', 'End' Return: ------- label_segmentation: list of labels, same number of rows as segmentation represent label of each segment """ label_segmentation = [] gt = kwargs['groundtruth'] segmentation = kwargs['segmentation'] tree = IntervalTree() for i in range(gt.shape[0]): tree.add(Interval(gt['Start'][i], gt['End'][i], i)) index_covered = [] for i in range(segmentation.shape[0]): interval = (segmentation.Start.iloc[i], segmentation.End.iloc[i]) overlapping_labels = sorted(tree.search(interval[0], interval[1])) if len(overlapping_labels) == 0: logger.debug("A segment does not have overlapping groundtruth") elif len(overlapping_labels) == 1: # only one overlapping label label_segmentation.append( gt['Label'].iloc[overlapping_labels[0].data]) index_covered.append(i) else: # majority voting: # if there are multiple overlapping labels, # select the one with largest overlap overlap_time = [] for label in overlapping_labels: overlap_time.append(_get_overlap(label, interval)) index_max = np.argmax(np.array(overlap_time)) label_segmentation.append( gt['Label'].iloc[overlapping_labels[index_max].data]) index_covered.append(i) # logger.info(label_segmentation) logger.info("Percentage of segments that do not have label: {}%".format(\ 100*(1 -len(index_covered)/len(label_segmentation)))) return label_segmentation, index_covered
def get_merged_variants(self, variants, key=None): # type: (List[vcfio.Variant], str) -> Iterable[vcfio.Variant] non_variant_tree = IntervalTree() grouped_variants = collections.defaultdict(list) for v in variants: self._align_with_window(v, key) if self._is_non_variant(v): non_variant_tree.addi(v.start, v.end, v) else: group_key = next(self._move_to_calls.get_merge_keys(v)) grouped_variants[group_key].append(v) non_variants = self._merge_non_variants(non_variant_tree) variants = self._merge_variants(grouped_variants) non_variant_tree.clear() for nv in non_variants: non_variant_tree.addi(nv.start, nv.end, nv) splits = IntervalTree() for v in variants: non_variant_interval = non_variant_tree.search(v.start, v.end) if non_variant_interval: non_variant = next(iter(non_variant_interval)).data v.calls.extend(non_variant.calls) v.calls = sorted(v.calls) self._update_splits(splits, v) yield v for non_variant in self._split_non_variants(non_variant_tree, splits): yield non_variant
def test_brackets_vs_search(): it = IntervalTree() it.addi(1, 3, "dude") it.addi(2, 4, "sweet") it.addi(6, 9, "rad") for iobj in it: assert it[iobj.begin:iobj.end] == it.search(iobj.begin, iobj.end)
def point_intersect_interval(points, df_interval): """ Expect both points and df_interval to be datetime object """ # store index of intervals as value of the interval tree = IntervalTree() for i in range(df_interval.shape[0]): tree[df_interval['start'].iloc[i]:df_interval['end'].iloc[i]] = i points_gt = np.zeros_like(points).astype(bool) interval_gt = [False] * df_interval.shape[0] for i in range(len(points)): intersection = tree.search(points[i]) if len(intersection) == 0: points_gt[i] = False else: points_gt[i] = True for segment in intersection: interval_gt[segment.data] = True results = {'points_gt': points_gt, 'interval_gt': interval_gt} return results
def countIdealOverlaps(self, nodes): iTree = IntervalTree() for node in nodes: iTree.addi(node.idealLeft(), node.idealRight(), data=node) for node in nodes: overlaps = iTree.search(node.idealLeft(), node.idealRight()) node.overlaps = [x.data for x in overlaps] node.overlapCount = len(overlaps)
def original_print(): it = IntervalTree() it.addi(1, 3, "dude") it.addi(2, 4, "sweet") it.addi(6, 9, "rad") for iobj in it: print(it[iobj.begin, iobj.end]) # set(), should be using : for iobj in it: print(it.search(iobj.begin, iobj.end))
def find_overlapping_intervals( intervals: t.List[Interval]) -> t.List[Interval]: """ Return any (but possibly not all) overlapping intervals. """ tree = IntervalTree(intervals) for interval in tree: overlaps = tree.search(interval) if len(overlaps) > 1: return overlaps return []
def get_overlapping_intervals(ranges_a, ranges_b): """ Return a list of overlapping intervals """ if len(ranges_a) < len(ranges_b): longer = ranges_b shorter = ranges_a else: longer = ranges_a shorter = ranges_b tree = IntervalTree() for s in longer: tree.add(Interval(s[0], s[1])) overlap = [] for seg in shorter: overlap += [_intersect(s, seg) for s in tree.search(seg[0], seg[1])] return sorted(overlap)
def get_multilines(spans): # level intervals = Intervals() for start, stop, type in sorted(spans): selected = intervals.search(start, stop) level = get_free_level(selected) intervals.addi(start, stop, Line(start, stop, type, level)) # chunk intervals.split_overlaps() # group groups = defaultdict(list) for start, stop, line in intervals: groups[start, stop].append(line) for start, stop in sorted(groups): lines = groups[start, stop] lines = sorted(lines) yield Multiline(start, stop, lines)
def read_rttm(input_path): """Read a RTTM file indicating gold diarization""" with open(input_path, 'r') as fin: # RTTM format is # SPEAKER fname 1 onset duration <NA> <NA> spkr <NA> rttm = fin.readlines() sad = IntervalTree() fname = "" for line in rttm: _, fname, _, onset, dur, _, _, _, _ = line.strip('\n').split() if float(dur) == 0: # Remove empty intervals continue elif float(dur) < 0: print( "{} shows an interval with negative duration." " Please inspect file, this shouldn't happen".format(line)) continue interval = Interval(float(onset), float(onset) + float(dur)) # Search for intervals already added that overlap with current # interval. If we find some, then we truncate the current # interval to remove all overalps ov = sad.search(interval) interval, other_intervals = remove_overlap(ov, interval) if interval[0] == interval[1]: # continue if interval was removed continue sad.add(interval) # if other_intervals is not empty, add these intervals to tree for new_interv in other_intervals: if new_interv[0] == new_interv[1]: # continue if interval was removed continue sad.add(new_interv) return sad, fname
class GenomeAnnotation(object): """ represents a genbank file and allows to efficiently annotate positions of interest """ COLUMNS = [ "type", "name", "locus", "product", 'protein_id', "strand", "start", "end" ] def __init__(self, genbank_file): """ initializes the GenomeAnnotation object :param genbank_file: a path to a genbank file """ self.genome_tree = IntervalTree() self.gene_dic = {} self.locus_dic = {} self.type_dic = {} self.genome_id = None self.length = None self.__read_genbank(genbank_file) # internal data structure for quick internal nearest gene search if position is not annotated tmp = [] for v in (self.type_dic["CDS"] + self.type_dic["gene"]): tmp.extend([(v.start, v), (v.end, v)]) tmp.sort(key=lambda x: x[0]) self.__index_list = [] self.__cds_list = [] for pos, cds in tmp: self.__index_list.append(pos) self.__cds_list.append(cds) def __read_genbank(self, genbank_file): """ reads the genbank file and stores its content in a interval tree and other searchable containers for efficient querying :param genbank_file: a path to a genbank file """ ##print("old implementation") pseudogenes = [] with open(genbank_file, "r") as f: my_type, name, locus, product, product_id, strand, start, end = None, None, None, None, None, None, None, None annotated_features = set() # states gathering = False comment_block = False annotation_block = False c = 0 for l in f: # skip empty lines if l.strip() == "": continue splits = l.split() if splits[0].startswith("LOCUS"): ##print(splits) self.genome_id = splits[1].strip() self.length = int(splits[2].strip()) # are we at the end of the annotation block? if splits[0].startswith("ORIGIN"): break # check for parsing stage if splits[0].startswith("COMMENT"): comment_block = True if splits[0].startswith("FEATURES"): ##print(annotated_features) annotation_block = True comment_block = False # COMMENT block feature annotation if comment_block and splits[0].startswith("Fe"): gathering = True for an in splits[3:]: if not an.startswith("Gene"): annotated_features.add(an.split(";")[0]) else: annotated_features.add("gene") # FEATURES Block here we found an entry that we want to gather if annotation_block and splits[ 0] in annotated_features and ".." in splits[1]: # first add already gathered entry into data structures if locus is not None: entry = GenomeEntry(my_type, name, locus, product, product_id, strand, start, end) #if my_type == "PROMOTER": # print(entry) # if its a gene annotation than first store it in temp for alter processing if my_type == "gene": pseudogenes.append(entry) else: if start > end: ##print(entry) c += 1 self.genome_tree.addi(start, self.length, entry) self.genome_tree.addi(0, end, entry) else: self.genome_tree.addi(start, end, entry) self.locus_dic[locus] = entry self.type_dic.setdefault(my_type, []).append(entry) if name is not None: self.gene_dic[name] = entry my_type, name, locus, product, product_id, strand, start, end = None, None, None, None, None, None, None, None gathering = True my_type = splits[0] # determine strand, start and end if splits[1].startswith('comp'): interval = splits[1].strip('complement()') strand = '-' else: interval = splits[1] strand = '+' start, end = map(lambda x: int(x) - 1, interval.split('..')) # TODO: this has to be fixed in the genbank file if start == end: end += 1 # gather annotated elements if gathering: # if we are in the comment block than we are gathering annotated features if comment_block: if "::" in splits: gathering = False else: for s in splits: annotated_features.add(s.split(";")[0]) # if we are in the annotation block than we gather infos distributed over multiple lines if annotation_block: if splits[0].startswith("/locus"): locus = l.split("=")[-1].replace('"', '').replace( "_", "").strip() elif splits[0].startswith("/product"): product = l.split("=")[-1].replace('"', '').strip() elif splits[0].startswith("/gene"): name = l.split("=")[-1].replace('"', '').strip() elif splits[0].startswith("/protein_id"): product_id = l.split("=")[-1].replace('"', '').strip() else: continue # end of file if locus is not None: entry = GenomeEntry(my_type, name, locus, product, product_id, strand, start, end) # if its a gene annotation than first store it in temp for alter processing #if my_type == "PROMOTER": # print(entry) if my_type == "gene": pseudogenes.append(entry) else: start = entry.start end = entry.end if start > end: ##print(entry) c += 1 self.genome_tree.addi(start, self.length, entry) self.genome_tree.addi(0, end, entry) else: self.genome_tree.addi(entry.start, entry.end, entry) self.locus_dic[locus] = entry self.type_dic.setdefault(type, []).append(entry) if name is not None: self.gene_dic[name] = entry ##print("Wrongly start end", c) for p in pseudogenes: # if this is true gene did not have another entry if p.locus not in self.locus_dic: self.locus_dic[p.locus] = p self.type_dic.setdefault(p.type, []).append(p) self.genome_tree.addi(p.start, p.end, p) if p.name is not None: self.gene_dic[p.name] = p def _read_genbank2(self, genbank_file): gene_tmp = [] nop = [None] with open(genbank_file, "r") as gbk: anno = SeqIO.read(gbk, "genbank") self.genome_id = anno.id self.length = len(anno) for rec in anno.features: if rec.type == "source": continue else: entry = GenomeEntry( rec.type, rec.qualifiers.get("gene", nop)[0], rec.qualifiers.get("locus_tag", nop)[0], rec.qualifiers.get("product", nop)[0], rec.qualifiers.get("protein_id", nop)[0], "+" if rec.strand else "-", int(rec.location.start) - 1, int(rec.location.end) - 1) if entry.type == "gene": gene_tmp.append(entry) else: start = entry.start end = entry.end if start > end: self.genome_tree.addi(start, self.length, entry) self.genome_tree.addi(0, end, entry) else: self.genome_tree.addi(entry.start, entry.end, entry) self.locus_dic[entry.locus] = entry self.type_dic.setdefault(entry.type, []).append(entry) if entry.name is not None: self.gene_dic[entry.name] = entry for p in gene_tmp: # if this is true gene did not have another entry if p.locus not in self.locus_dic: self.locus_dic[p.locus] = p self.type_dic.setdefault(p.type, []).append(p) self.genome_tree.addi(p.start, p.end, p) if p.name is not None: self.gene_dic[p.name] = p def __str__(self): return pd.DataFrame.from_records(list(self.locus_dic.values()), columns=self.COLUMNS).to_string() def annotate_positions(self, idx, aggregate=False): """ annotates a list of positions with their associated genomic entries and returns a pandas dataframe with rows: pos, type, locus, name, product, strand, closest, distance, protein_pos, codon_pos :param idx: list of indices :return: pandas dataframe """ # test if parameter is an iterable or int if isinstance(idx, int): idx = [idx] else: idx = list(set(idx)) unknown = GenomeEntry("?", None, None, None, None, None, None, None) entries = [] closest = [] distance = [] index = [] protein_position = [] codon_position = [] for i in idx: data = self.genome_tree.search(i, strict=True) if data: # possible overlap of gene entries? for p in data: #print(i, p.data) index.append(i) entries.append(p.data) closest.append(None) distance.append(None) # calculate position within protein and codon position (1-indexed). if p.data.strand == '+': my_prot_pos = int( (i - p.data.start) / 3) + 1 # int() rounds down. my_codon_pos = ((i - p.data.start) % 3) + 1 ##print(my_prot_pos) elif p.data.strand == '-': my_prot_pos = int( (p.data.end - i) / 3) + 1 # int() rounds down. ##print(my_prot_pos) my_codon_pos = ((p.data.end - i) % 3) + 1 else: raise ValueError( "strand annotation is invalid for gene, {}".format( p.data.locus)) protein_position.append(my_prot_pos) codon_position.append(my_codon_pos) else: # position is not annotated in GenomeAnnotation # find closest annotated CDS index.append(i) entries.append(unknown) i_clos = self.find_closest_gene(i) closest.append(i_clos.locus) distance.append(min(abs(i - i_clos.start), abs(i - i_clos.end))) protein_position.append(None) codon_position.append(None) anno_df = pd.DataFrame.from_records(entries, columns=self.COLUMNS) anno_df["pos"] = index anno_df["closest"] = closest anno_df["distance"] = distance anno_df["protein_pos"] = protein_position anno_df["codon_pos"] = codon_position if aggregate: anno_df = anno_df.groupby("pos").agg( lambda col: ';'.join(map(str, col))) anno_df.reset_index(inplace=True) print(anno_df.head()) return anno_df[[ "pos", "type", "locus", "name", "product", "protein_id", "strand", "closest", "distance", "start", "end", "protein_pos", "codon_pos" ]] def find_closest_gene(self, pos): """ Returns closest value to pos. If two numbers are equally close, return the smallest number. :param pos: the genome position :return: GenomeEntry """ idx = bisect_left(self.__index_list, pos) if idx == 0: return self.__cds_list[0] if idx == len(self.__index_list): return self.__cds_list[-1] before = self.__index_list[idx - 1] after = self.__index_list[idx] if after - pos < pos - before: return self.__cds_list[idx] else: return self.__cds_list[idx - 1] def annotate_genes(self, genes): """ annotates a list of gene and returns a pandas dataframe with the following columns: type name locus product strand start end :param genes: list of genes names :return: pandas dataframe """ if isinstance(genes, str): genes = [genes] entries = [self.gene_dic[g] for g in genes if g in self.gene_dic] return pd.DataFrame.from_records(entries, columns=self.COLUMNS) def annotate_loci(self, loci): """ annotates a list of loci tags and returns a pandas dataframe with the following columns: type name locus product strand start end :param loci: list of locus names :return: pandas dataframe """ if isinstance(loci, str): loci = [loci] entries = [self.locus_dic[g] for g in loci if g in self.locus_dic] return pd.DataFrame.from_records(entries, columns=self.COLUMNS) def annotate_type(self, types): """ annotates a list of types and returns a pandas dataframe with the following columns: type name locus product strand start end :param types: list of types :return: pandas dataframe """ if isinstance(types, str): types = [types] entries = [] for g in types: if g in self.type_dic: for e in self.type_dic[g]: entries.append(e) return pd.DataFrame.from_records(entries, columns=self.COLUMNS) def annotate_dataframe(self, df, column, suffix=("_x", "_y"), aggregate=False): """ annotate an existing dataframe :param df: data frame to which annotation is added :param column: specifies the genome position column :param suffix: tuple of suffix that is added overlapping column names (default: (_x, _y)) :param aggregate: determines whether duplicated entry are aggregated as a semicolon separated string :return: pandas dataframe """ idx = set(df[column]) pos_df = self.annotate_positions(idx, aggregate=aggregate) df = df.merge(pos_df, left_on=column, right_on="pos", how="inner", suffixes=suffix) df.drop("pos", axis=1, inplace=True) return df
if bbint_last == bbint_curr: return False print("\nPCs: "), for pc in pcs: print ("%x " % pc), print "\nBB:" for instr in md.disasm(bbs[bbint_last], bbint_last.begin): if instr.address in pcs: print("T "), else: print(" "), print "0x%x:\t%s\t%s" %(instr.address, instr.mnemonic, instr.op_str) return True while i<ls: pc = seq[l][i] bbint_curr = ct.search(pc) bbint_best = None for bbint in bbint_curr: if bbint_best is None: bbint_best = bbint if bbint.contains_interval(bbint_best): bbint_best = bbint if (spit(bbint_last, bbint_best, pcs)): pcs = [] pcs.append(pc) i=i+1 if i == ls: spit(bbint_best, None, pcs) bbint_last = bbint_best else:
class virtual: def __init__(self): #mapea id drawables con su respectivo drawable self.idToDrawable = {} self.idToInterval= {} self.tags = {} #contine pares (intervaloX,idDrawable) que representan helperBoxs de elementos en espacio virtual self.intervalTreeX = IntervalTree() self.vista = None self.currentLocalId = 0 self.stringTofunction = {} self.drawableInMemory=None self.logger = logging.getLogger(__name__) self.logger.setLevel(logging.DEBUG) fh = logging.FileHandler('virtualScreen.log') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) self.logger.addHandler(fh) def setCommandString(self,command,function): self.logger.info('Adding new command %s for file recovery ',command) self.stringTofunction[command] = function def setView(self,vista): self.logger.info('Setting new view ') self.vista = vista self.setCommandString('setTag',lambda args : self.setTagLast(*args) ) self.setCommandString('SETID',lambda args : self.placeDrawable(self.drawableInMemory,args[0]) ) self.setCommandString('setViewWidthHeight',lambda args : self.vista.vistaSetWidthHeight(*args) ) self.setCommandString('placeView',lambda args : self.vista.placeView(*args) ) self.setCommandString('setViewScaleXY',lambda args : self.vista.setFactorXY(*args) ) self.setCommandString('createRectangle',lambda args : self.setLastDrawableInMemory(self.createRectangle(*args,createId=False)) ) self.setCommandString('createLine',lambda args : self.setLastDrawableInMemory(self.createLine(*args,createId=False)) ) self.setCommandString('createGroup',lambda args : self.setLastDrawableInMemory(self.createGroup(*args,createId=False)) ) self.setCommandString('createText', lambda args :self.setLastDrawableInMemory(self.createText(*args,createId=False)) ) self.setCommandString('createPointDraw', lambda args : self.setLastDrawableInMemory(self.createPointDraw(*args,createId=False)) ) def isVisible(self,drawable,intervalosView): viewIntervalX = intervalosView[0] viewIntervalY = intervalosView[1] intervaloQueryX= tuple([point[0] for point in drawable.calcHelperBox()]) intervaloQueryY= tuple([point[1] for point in drawable.calcHelperBox()]) return self.envision(intervaloQueryX,viewIntervalX) and self.envision(intervaloQueryY,viewIntervalY) def envision(self,queryInter,visInterval): #tres casos dentro de vision 0---1---1----0 o el caso 1-----0-------0-----1 o el caso 1------0------1 #sean los 1 el cuadro de vision objetoContieneVista = lambda queryInter,visInterval : min(queryInter) <= min(visInterval) and max(visInterval) <= max(queryInter) vistaContieneObjeto = lambda queryInter,visInterval : (min(visInterval) <= queryInter[0] <= max(visInterval)) or (min(visInterval) <= queryInter[1] <= max(visInterval)) return objetoContieneVista(queryInter,visInterval) or vistaContieneObjeto(queryInter,visInterval) def winfo_height(self): return self.vista.heigth def winfo_width(self): return self.vista.width def setLastDrawableInMemory(self,drawable): self.drawableInMemory=drawable #consigue todos los elementos en cuadrado def getSquare(self,p0,pf,tags=None): temp = [] #consigue lista con intervalos en X dentro del cuadrado (o que pasen por este) #Debe ser siempre begin < end listaIntervalos = self.intervalTreeX.search(min(p0[0],pf[0]),max(p0[0],pf[0])) #esto te entrega lista tuplas ((x2,x2),idDrawable) for tupla in listaIntervalos: drawable= self.idToDrawable[tupla[2]] #Ahora descarta los que no sean consistentes respecto al intervalo Y intervaloY = tuple([point[1] for point in drawable.calcHelperBox()]) if self.envision(intervaloY,(p0[1],pf[1])): temp.append(drawable) # print 'Elem without Filter ',str(temp) if not tags is None: return [elem for elem in temp if not self.getTagdrawable(elem) in tags] return temp """ ---------------Funciones de creacion ------------------------------ """ def createLine(self,p0,pf,createId=True): self.logger.info('Creating line in %s %s',p0,pf) line = Line(self,self.vista,p0,pf) if createId: self.placeDrawable(line) return line def createRectangle(self,p0,pf,createId=True): self.logger.info('Creating rectangle in %s %s',p0,pf) rect = Rectangle(self,self.vista,p0,pf) if createId: self.placeDrawable(rect) return rect def createGroup(self,listaId=None,createId=True): self.logger.info('Creating Group from list %s',listaId) group = Group(self,self.vista) if not listaId is None: for id in listaId: group.add(self.idToDrawable[id]) if createId: self.placeDrawable(group) return group def createText(self,p0,texto,createId=True): self.logger.info('Creating Text %s in %s',texto,p0) texto = TextDrawable(self,self.vista,p0,texto) if createId: self.placeDrawable(texto) return texto def createPointDraw(self,idGroup=None,createId=True): self.logger.info('Creating poinDraw from group %s',idGroup) pd = pointDraw(self,self.vista) if not idGroup is None: grupo = self.idToDrawable[idGroup] pd.addFromGroup(grupo) if createId: self.placeDrawable(pd) return pd def placeDrawable(self,drawable,id=None): self.logger.info('Placing drawable %s',str(drawable)) if id is None: drawable.uniqueId = self.__getNewId() else: drawable.uniqueId = id drawable.draw() #ASEGURATE QUE LAS HELPERBOX ESTE BIEN HECHA helperBoxCords = drawable.calcHelperBox() # print 'helperbox ',helperBoxCords # print "helper yo interval ",helperBoxCords self.intervalTreeX.addi(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) self.idToInterval[drawable.uniqueId] = Interval(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) assert(self.idToInterval[drawable.uniqueId] == drawable.calcInterval()) self.idToDrawable[drawable.uniqueId] = drawable def updatePosition(self,drawable): if self.idToDrawable.has_key(drawable.uniqueId): self.logger.info('Updating %s drawable %s ',drawable.uniqueId,str(drawable)) try: self.intervalTreeX.remove(self.idToInterval[drawable.uniqueId]) except Exception,e: print 'Error en borrar intervalo' self.logger.error('Cant remove interval %s exception %s',self.idToInterval[drawable.uniqueId],str(e)) self.idToInterval.pop(drawable.uniqueId) helperBoxCords = drawable.calcHelperBox() self.intervalTreeX.addi(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) self.idToInterval[drawable.uniqueId] = Interval(helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) assert(self.idToInterval[drawable.uniqueId] == drawable.calcInterval()) self.logger.debug('New drawable interval %s %s %s ',helperBoxCords[0][0],helperBoxCords[1][0],drawable.uniqueId) else:
def filterOverlaps(self, overlapPercCutoff=70): """Filtering out amplicons that substantially overlap. The amplicon with the highest PPC with be kept. The MFEprimerRes attribute must be set. in-place edit of MFEprimerRes object (table object filtered of overlaps) Parmeters --------- overlapPercCutoff : float percent of overlap to consider 'substantially' overlapping """ if self.MFEprimerRes is None: msg = 'genome object does not have MFEprimerRes attribute.' + \ ' Run MFEprimer() first' raise AttributeError, msg # making interval tree tree = IntervalTree() # loading intervals for count, row in self.MFEprimerRes.iterrows(): # sanity check for + strand if row['BindingStart'] > row['BindingStop']: raise TypeError('MFEprimer binding start-stop is not + strand') tree.addi(row['BindingStart'], row['BindingStop'], [count, row['PPC'], row['Size']]) # finding all that substantially overlap; keeping one with > PPC tree2 = tree.copy() for iv1 in tree.iter(): # skipping if already removed from tree2 if not iv1 in tree2: continue overlaps = tree.search(iv1.begin, iv1.end) # skipping those that poorly overlap lowOverlap = set() for iv2 in overlaps: if iv1.overlaps(iv2): percOverlaps = self._calcPercOverlap(iv1, iv2) if percOverlaps[0] < overlapPercCutoff: lowOverlap.add(iv2) overlaps = overlaps - lowOverlap # just list of substantially overlapping # skipping those that have been already removed prevRm = set([x for x in overlaps if x not in tree2]) overlaps = overlaps - prevRm # removing all substantially overlapping intervals with lower PPC if len(overlaps) > 1: overlaps = sorted(overlaps, key=lambda x: x.data[1], reverse=True) for o in overlaps[1:]: if o in tree2: tree2.remove(o) else: pass # selecting columns iv_idx = [x.data[0] for x in tree2.iter()] self.MFEprimerRes = self.MFEprimerRes.iloc[iv_idx]
class BloodSugarSimulator: def __init__(self, input_file, fooddb_filename, exerdb_filename, duration=1800): self.input_file = input_file self.fooddb_filename = fooddb_filename self.exerdb_filename = exerdb_filename # Initialize blood sugar count to 80. self.blood_sugar_count = 80 self.glycation_threshold = 150 self.int_tree = None self.ts1 = 0.0 self.ts2 = 0.0 # glycation self.glycation = 0 # change interval for plotting graph. # default set to 1800s or 30 minutes. self.duration = duration # Internal data structures used. self.x = [] self.y = [] self.x1 = [] self.y1 = [] self.food_dict = dict() self.exer_dict = dict() self.food_db = None self.exer_db = None # convert to epoch time @staticmethod def get_date_time_ts(dt, tm): d_fields = dt.split('-') tm_fields = tm.split(':') t = datetime.datetime(int(d_fields[2]), int(d_fields[0]), int(d_fields[1]), int(tm_fields[0]), int(tm_fields[1])) return float(time.mktime(t.timetuple())) # convert to timestr @staticmethod def get_date_time_hhmm(ts): return datetime.datetime.fromtimestamp(ts) # load csv into tuple @staticmethod def load_db_dict (filename): dbdict = dict() try: with open(filename, 'r') as f: reader = csv.reader(f) db = tuple(reader) if not db: raise Exception("No data found in file %s" % filename) # build hash table for tup in db: if len(tup) != 3: raise Exception("CSV file %s have missing fields." % filename) dbdict[tup[0]] = [tup[1], tup[2]] except Exception as e: if isinstance(e, IOError): print "Failed to open the input file %s" % filename else: print "Exception occured %s" % str(e) sys.exit(1) return dbdict def load_food_exer_files(self): self.food_dict= BloodSugarSimulator.load_db_dict (self.fooddb_filename) # get exercise data self.exer_dict = BloodSugarSimulator.load_db_dict (self.exerdb_filename) # create interval tree def create_int_tree(self): self.int_tree = IntervalTree() currentdate = None try: # create interval tree from input file activity.out fd = open(self.input_file, 'r') for line in fd: fields = line.split() #set current date for input if (currentdate): # input can be only for same day if (currentdate != fields[0]): print "Usage: Enter data for same day only" sys.exit(1) else: # initialise current date currentdate = fields[0] # initialise ts1 to beginning of day 9 am self.ts1 = BloodSugarSimulator.get_date_time_ts(fields[0], '9:00') # initialise ts2 to end of day 7 pm self.ts2 = BloodSugarSimulator.get_date_time_ts(fields[0], '19:00') # check input is Food or Exercise if fields[2] == 'F': begin = BloodSugarSimulator.get_date_time_ts(fields[0], fields[1]) # end time for food is 2 hours. end = begin + 7200 glycemicIndex = self.food_dict[fields[3]][1] # check formula data = round((float(glycemicIndex) / 120.0), 2) self.int_tree[begin:end] = data elif fields[2] == 'E': begin = BloodSugarSimulator.get_date_time_ts(fields[0], fields[1]) # end time for exercise is 1 hour. end = begin + 3600 exerIndex = self.exer_dict[fields[3]][1] data = round((float(exerIndex) / 60.0), 2) self.int_tree[begin:end] = -data else: print "Usage: Input can only be of type F or E" sys.exit(1) #first point for blood sugar graph self.y.append(self.blood_sugar_count) customdate = BloodSugarSimulator.get_date_time_hhmm(self.ts1) self.x.append(customdate) print customdate #first point for glycation graph self.y1.append(self.glycation) self.x1.append(customdate) except Exception as e: if isinstance(e, IOError): print "Failed to open the input file %s" % input_file if isinstance(e, KeyError): print "Invalid ID %s" % str(e) else: print "Exception occured %s" % str(e) sys.exit(1) # create interval tree def compute_values(self): # no of seconds secs = 0 curr_ts = self.ts1 blood_sugar_count = self.blood_sugar_count glycation = self.glycation try: #while not end of day while curr_ts < self.ts2: #look up intervals for current timestamp ivs = self.int_tree.search(curr_ts) if not ivs: if (blood_sugar_count - 1) > self.blood_sugar_count: blood_sugar_count -= 1 elif (blood_sugar_count + 1) < self.blood_sugar_count: blood_sugar_count += 1 else: blood_sugar_count = self.blood_sugar_count else: for iv in ivs: blood_sugar_count += iv.data #compute glycation if blood_sugar_count > self.glycation_threshold : glycation = glycation + 1 #enter data every 30 mins for graphs if ( secs % self.duration == 0): #print "%s -> %s -> %s" %(time.strftime('%m-%d-%Y %H:%M', # time.localtime(ts1)), blood_sugar_count, glycation) #blood sugar self.y.append(blood_sugar_count) customdate = BloodSugarSimulator.get_date_time_hhmm(curr_ts) self.x.append(customdate) #glycation self.y1.append(glycation) self.x1.append(customdate) #compute values every minute curr_ts += 60 secs += 60 except Exception as e: print "Exception occured %s" % str(e) sys.exit(1) def plot_graph_blood_sugar(self): b = plt.figure(1) formatter = DateFormatter('%H:%M') plt.plot(self.x,self.y) plt.gcf().axes[0].xaxis.set_major_locator(MinuteLocator(interval = 30)) plt.gcf().axes[0].xaxis.set_major_formatter(formatter) plt.xlabel('Time') plt.ylabel('Blood Sugar') plt.title('Blood Sugar graph for 1 day') plt.xticks(rotation='vertical') b.show() def plot_graph_glycation(self): b = plt.figure(2) formatter = DateFormatter('%H:%M') plt.plot(self.x1,self.y1) plt.gcf().axes[0].xaxis.set_major_locator(MinuteLocator(interval = 30)) plt.gcf().axes[0].xaxis.set_major_formatter(formatter) plt.xlabel('Time') plt.ylabel('Glycation Index') plt.title('Glycation Index graph for 1 day') plt.xticks(rotation='vertical') b.show()
tree = IntervalTree() seq_dict = {} for multiple_alignment in AlignIO.parse(maf_file, "maf"): multiple_alignment = list(multiple_alignment) id = multiple_alignment[1].id start = multiple_alignment[0].annotations["start"] end = start + multiple_alignment[0].annotations["size"] tree[start:end] = id seq_dict[id] = (start, end) fasta_output = [] with open(new_overlap_txt, "w") as fout: overlap_dict = {} for seq_id in seq_list: fasta_output.append(record_dict[seq_id]) #print(seq_id) overlap_list = list( tree.search(seq_dict[seq_id][0], seq_dict[seq_id][1])) for overlap_rec in overlap_list: if overlap_rec.data != seq_id: target_id = overlap_rec.data x = range(seq_dict[seq_id][0], seq_dict[seq_id][1]) y = range(overlap_rec.begin, overlap_rec.end) overlap_len = len(set(x) & set(y)) if overlap_len >= upper_overlap: overlap_dict[(seq_id, target_id)] = True print("{}\t{}\t{}".format(seq_id, target_id, overlap_len), file=fout) SeqIO.write(fasta_output, save_fasta, "fasta")
begin = get_date_time_ts(fields[0], fields[1]) # end time for food is 2 hours. end = begin + 7200 # check formula data = round((float(fields[3]) / 120.0), 2) int_tree[begin:end] = data elif fields[2] == 'E': begin = get_date_time_ts(fields[0], fields[1]) # end time for exercise is 1 hour. end = begin + 3600 data = round((float(fields[3]) / 60.0), 2) int_tree[begin:end] = -data #print int_tree print "Blood Sugar Graph:" while ts1 < ts2: ivs = int_tree.search(ts1) if not ivs: if (blood_sugar_count - 1) > 80: blood_sugar_count -= 1 elif (blood_sugar_count + 1) < 80: blood_sugar_count += 1 else: blood_sugar_count = 80 else: for iv in ivs: blood_sugar_count += iv.data if blood_sugar_count > 150: glycation += 1 if (count % 1800 == 0):
deletiontree.removei(curr_deletion['start'], curr_deletion['end'], curr_deletion) curr_deletion['end'] = end deletiontree[curr_deletion['start']: curr_deletion['end']] = curr_deletion else: curr_deletion['start'] = start curr_deletion['end'] = end curr_deletion['part'] = block deletiontree[curr_deletion['start']: curr_deletion['end']] = curr_deletion with open(flank5k, 'w') as flanking_outfile: with open(exact, 'w') as exact_outfile: for Iobj in sorted(insertiontree): Dset = sorted(deletiontree.search(Iobj.begin, Iobj.end)) closeset = sorted( deletiontree.search(Iobj.begin - flanking, Iobj.end + flanking)) for closeobj in closeset: if closeobj in Dset: maf_iterate.print_block(Iobj.data['part1'], flanking_outfile) maf_iterate.print_block(Iobj.data['part2'], flanking_outfile) maf_iterate.print_block(closeobj.data['part'], flanking_outfile) else: maf_iterate.print_block(Iobj.data['part1'], exact_outfile) maf_iterate.print_block(Iobj.data['part2'], exact_outfile) maf_iterate.print_block(closeobj.data['part'],
def _remove_overlaps(self, position_idy: IntervalTree, percents: dict): while len(position_idy) > 0: item = position_idy.pop() start = item.begin end = item.end cat = item.data overlaps = position_idy.search(start, end) if len(overlaps) > 0: has_overlap = False for overlap in overlaps: if has_overlap: break o_start = overlap.begin o_end = overlap.end o_cat = overlap.data if not position_idy.containsi(o_start, o_end, o_cat): continue if start < o_start: if end <= o_end: # cccccccccccccc******* # *****ooooooooo[ooooooo] if o_cat < cat: if end < o_end: # No overlap with the current item, we stay has_overlap as False position_idy.discard(overlap) position_idy[end:o_end] = o_cat else: position_idy.discard( overlap) # No kept overlap elif o_cat == cat: if end < o_end: has_overlap = True position_idy.discard(overlap) position_idy[start:o_end] = cat else: position_idy.discard( overlap) # No kept overlap else: has_overlap = True position_idy.discard(overlap) position_idy[start:o_start] = cat position_idy[o_start:o_end] = o_cat else: # end > o_end # ccccccccccccccccccc # *****oooooooooo**** if o_cat <= cat: position_idy.discard( overlap) # No kept overlap else: # o_cat > cat has_overlap = True position_idy.discard(overlap) position_idy[start:o_start] = cat position_idy[o_start:o_end] = o_cat position_idy[o_end:end] = cat elif start == o_start: if end < o_end: # cccccccccccc******* # ooooooooooooooooooo if o_cat < cat: # No overlap with the current item, we stay has_overlap as False position_idy.discard(overlap) position_idy[end:o_end] = o_cat elif o_cat == cat: has_overlap = True position_idy.discard(overlap) position_idy[start:o_end] = cat else: # o_cat > cat # The overlap just contains current item has_overlap = True elif end == o_end: # ***cccccccccccccccc*** # ***oooooooooooooooo*** if o_cat <= cat: position_idy.discard( overlap) # No kept overlap else: # The overlap just contains current item has_overlap = True else: # end > o_end # ccccccccccccccccccccccccccccc # oooooooooooooooooooo********* if o_cat <= cat: # current item just contains the overlap position_idy.discard(overlap) else: has_overlap = True position_idy.discard(overlap) position_idy[o_start:o_end] = o_cat position_idy[o_end:end] = cat else: # start > o_start if end <= o_end: # ******ccccccccc******* # ooooooooooooooo[ooooooo] if o_cat < cat: has_overlap = True position_idy.discard(overlap) position_idy[o_start:start] = o_cat position_idy[start:end] = cat if end < o_end: position_idy[end:o_end] = o_cat else: # o_cat >= cat # Overlap just contains the item has_overlap = True else: # end > o_end # ******ccccccccccccccccccccc # ooooooooooooooooo********** if o_cat < cat: has_overlap = True position_idy.discard(overlap) position_idy[o_start:start] = o_cat position_idy[start:end] = cat elif o_cat == cat: has_overlap = True position_idy.discard(overlap) position_idy[o_start:end] = cat else: # o_cat > cat has_overlap = True position_idy[o_end:end] = cat if not has_overlap: percents = self._add_percents(percents, item) else: percents = self._add_percents(percents, item) return percents
class MemoryCache(object): def __init__(self, context): self._context = context self._run_token = -1 self._log = logging.getLogger('memcache') self._reset_cache() def _reset_cache(self): self._cache = IntervalTree() self._metrics = CacheMetrics() ## # @brief Invalidates the cache if appropriate. def _check_cache(self): if self._context.core.is_running(): self._log.debug("core is running; invalidating cache") self._reset_cache() elif self._run_token != self._context.core.run_token: self._dump_metrics() self._log.debug("out of date run token; invalidating cache") self._reset_cache() self._run_token = self._context.core.run_token ## # @brief Splits a memory address range into cached and uncached subranges. # @return Returns a 2-tuple with the first element being a set of Interval objects for each # of the cached subranges. The second element is a set of Interval objects for each of the # non-cached subranges. def _get_ranges(self, addr, count): cached = self._cache.search(addr, addr + count) uncached = {Interval(addr, addr + count)} for cachedIv in cached: newUncachedSet = set() for uncachedIv in uncached: # No overlap. if cachedIv.end < uncachedIv.begin or cachedIv.begin > uncachedIv.end: newUncachedSet.add(uncachedIv) continue # Begin segment. if cachedIv.begin - uncachedIv.begin > 0: newUncachedSet.add( Interval(uncachedIv.begin, cachedIv.begin)) # End segment. if uncachedIv.end - cachedIv.end > 0: newUncachedSet.add(Interval(cachedIv.end, uncachedIv.end)) uncached = newUncachedSet return cached, uncached ## # @brief Reads uncached memory ranges and updates the cache. # @return A list of Interval objects is returned. Each Interval has its @a data attribute set # to a bytearray of the data read from target memory. def _read_uncached(self, uncached): uncachedData = [] for uncachedIv in uncached: data = self._context.read_memory_block8( uncachedIv.begin, uncachedIv.end - uncachedIv.begin) iv = Interval(uncachedIv.begin, uncachedIv.end, bytearray(data)) self._cache.add(iv) # TODO merge contiguous cached intervals uncachedData.append(iv) return uncachedData def _update_metrics(self, cached, uncached, addr, size): cachedSize = 0 for iv in cached: begin = iv.begin end = iv.end if iv.begin < addr: begin = addr if iv.end > addr + size: end = addr + size cachedSize += end - begin uncachedSize = sum((iv.end - iv.begin) for iv in uncached) self._metrics.reads += 1 self._metrics.hits += cachedSize self._metrics.misses += uncachedSize def _dump_metrics(self): if self._metrics.total > 0: self._log.debug( "%d reads, %d bytes [%d%% hits, %d bytes]; %d bytes written", self._metrics.reads, self._metrics.total, self._metrics.percent_hit, self._metrics.hits, self._metrics.writes) else: self._log.debug("no reads") ## # @brief Performs a cached read operation of an address range. # @return A list of Interval objects sorted by address. def _read(self, addr, size): # Get the cached and uncached subranges of the requested read. cached, uncached = self._get_ranges(addr, size) self._update_metrics(cached, uncached, addr, size) # Read any uncached ranges. uncachedData = self._read_uncached(uncached) # Merged cached with data we just read combined = list(cached) + uncachedData combined.sort(key=lambda x: x.begin) return combined ## # @brief Extracts data from the intersection of an address range across a list of interval objects. # # The range represented by @a addr and @a size are assumed to overlap the intervals. The first # and last interval in the list may have ragged edges not fully contained in the address range, in # which case the correct slice of those intervals is extracted. # # @param self # @param combined List of Interval objects forming a contiguous range. The @a data attribute of # each interval must be a bytearray. # @param addr Start address. Must be within the range of the first interval. # @param size Number of bytes. (@a addr + @a size) must be within the range of the last interval. # @return A single bytearray object with all data from the intervals that intersects the address # range. def _merge_data(self, combined, addr, size): result = bytearray() resultAppend = bytearray() # Take slice of leading ragged edge. if len(combined) and combined[0].begin < addr: offset = addr - combined[0].begin result += combined[0].data[offset:] combined = combined[1:] # Take slice of trailing ragged edge. if len(combined) and combined[-1].end > addr + size: offset = addr + size - combined[-1].begin resultAppend = combined[-1].data[:offset] combined = combined[:-1] # Merge. for iv in combined: result += iv.data result += resultAppend return result ## # @brief def _update_contiguous(self, cached, addr, value): size = len(value) end = addr + size leadBegin = addr leadData = bytearray() trailData = bytearray() trailEnd = end if cached[0].begin < addr and cached[0].end > addr: offset = addr - cached[0].begin leadData = cached[0].data[:offset] leadBegin = cached[0].begin if cached[-1].begin < end and cached[-1].end > end: offset = end - cached[-1].begin trailData = cached[-1].data[offset:] trailEnd = cached[-1].end self._cache.remove_overlap(addr, end) data = leadData + value + trailData self._cache.addi(leadBegin, trailEnd, data) ## # @return A bool indicating whether the given address range is fully contained within # one known memory region, and that region is cacheable. # @exception MemoryAccessError Raised if the access is not entirely contained within a single region. def _check_regions(self, addr, count): regions = self._context.core.memory_map.get_intersecting_regions( addr, length=count) # If no regions matched, then allow an uncached operation. if len(regions) == 0: return False # Raise if not fully contained within one region. if len(regions) > 1 or not regions[0].contains_range(addr, length=count): raise MemoryAccessError( "individual memory accesses must not cross memory region boundaries" ) # Otherwise return whether the region is cacheable. return regions[0].is_cacheable def read_memory(self, addr, transfer_size=32, now=True): # TODO use more optimal underlying read_memory call if transfer_size == 8: data = self.read_memory_block8(addr, 1)[0] elif transfer_size == 16: data = conversion.byte_list_to_u16le_list( self.read_memory_block8(addr, 2))[0] elif transfer_size == 32: data = conversion.byte_list_to_u32le_list( self.read_memory_block8(addr, 4))[0] if now: return data else: def read_cb(): return data return read_cb def read_memory_block8(self, addr, size): if size <= 0: return [] self._check_cache() # Validate memory regions. if not self._check_regions(addr, size): self._log.debug("range [%x:%x] is not cacheable", addr, addr + size) return self._context.read_memory_block8(addr, size) # Get the cached and uncached subranges of the requested read. combined = self._read(addr, size) # Extract data out of combined intervals. result = list(self._merge_data(combined, addr, size)) return result def read_memory_block32(self, addr, size): return conversion.byte_list_to_u32le_list( self.read_memory_block8(addr, size * 4)) def write_memory(self, addr, value, transfer_size=32): if transfer_size == 8: return self.write_memory_block8(addr, [value]) elif transfer_size == 16: return self.write_memory_block8( addr, conversion.u16le_list_to_byte_list([value])) elif transfer_size == 32: return self.write_memory_block8( addr, conversion.u32le_list_to_byte_list([value])) def write_memory_block8(self, addr, value): if len(value) <= 0: return self._check_cache() # Validate memory regions. cacheable = self._check_regions(addr, len(value)) # Write to the target first, so if it fails we don't update the cache. result = self._context.write_memory_block8(addr, value) if cacheable: size = len(value) end = addr + size cached = sorted(self._cache.search(addr, end), key=lambda x: x.begin) self._metrics.writes += size if len(cached): # Write data is entirely within cached data. if addr >= cached[0].begin and end <= cached[0].end: beginOffset = addr - cached[0].begin endOffset = end - cached[0].end cached[0].data[beginOffset:endOffset] = value else: self._update_contiguous(cached, addr, bytearray(value)) else: # No cached data in this range, so just add the entire interval. self._cache.addi(addr, end, bytearray(value)) return result def write_memory_block32(self, addr, data): return self.write_memory_block8( addr, conversion.u32le_list_to_byte_list(data)) def invalidate(self): self._reset_cache()
#print(ichr) start = int(L[1]) + gstart[ichr] end = int(L[2]) + gstart[ichr] tr.addi(start, end, 100) f.close() t2 = time.time() print("Tree build time: ", t2 - t1) #print(tr.items()) Total = 0 with open(qfile) as f: for line in f: L = line.strip().split() if len(L[0]) < 6 and L[0][3] != 'M': if L[0][3] == 'X': ichr = 22 elif L[0][3] == 'Y': ichr = 23 else: ichr = int(L[0][3:]) - 1 start = int(L[1]) + gstart[ichr] end = int(L[2]) + gstart[ichr] ols = tr.search(start, end) if len(ols) > 0: Total += len(ols) #print(start, ", ", end-1, ":") #print(ols, "\n") f.close() print("Total: ", Total) t3 = time.time() print("Tree search time: ", t3 - t2)
class FlashReaderContext(DebugContext): def __init__(self, parentContext, elf): super(FlashReaderContext, self).__init__(parentContext.core) self._parent = parentContext self._elf = elf self._log = logging.getLogger('flashreadercontext') self._build_regions() def _build_regions(self): self._tree = IntervalTree() for sect in [ s for s in self._elf.sections if (s.region and s.region.is_flash) ]: start = sect.start length = sect.length sect.data # Go ahead and read the data from the file. self._tree.addi(start, start + length, sect) self._log.debug("created flash section [%x:%x] for section %s", start, start + length, sect.name) def read_memory(self, addr, transfer_size=32, now=True): length = transfer_size // 8 matches = self._tree.search(addr, addr + length) # Must match only one interval (ELF section). if len(matches) != 1: return self._parent.read_memory(addr, transfer_size, now) section = matches.pop().data addr -= section.start def read_memory_cb(): self._log.debug("read flash data [%x:%x] from section %s", section.start + addr, section.start + addr + length, section.name) data = section.data[addr:addr + length] if transfer_size == 8: return data[0] elif transfer_size == 16: return conversion.byte_list_to_u16le_list(data)[0] elif transfer_size == 32: return conversion.byte_list_to_u32le_list(data)[0] else: raise ValueError("invalid transfer_size (%d)" % transfer_size) if now: return read_memory_cb() else: return read_memory_cb def read_memory_block8(self, addr, size): matches = self._tree.search(addr, addr + size) # Must match only one interval (ELF section). if len(matches) != 1: return self._parent.read_memory_block8(addr, size) section = matches.pop().data addr -= section.start data = section.data[addr:addr + size] self._log.debug("read flash data [%x:%x]", section.start + addr, section.start + addr + size) return list(data) def read_memory_block32(self, addr, size): return conversion.byte_list_to_u32le_list( self.read_memory_block8(addr, size)) def write_memory(self, addr, value, transfer_size=32): return self._parent.write_memory(addr, value, transfer_size) def write_memory_block8(self, addr, value): return self._parent.write_memory_block8(addr, value) def write_memory_block32(self, addr, data): return self._parent.write_memory_block32(addr, data)
# create packet ranges for db_packet in db_packets: movie_body_sizes.add(db_packet[0]) # aggregate for body_size in movie_body_sizes: start = (1 - epsilon) * body_size end = (1 + epsilon) * body_size if start == end: if start not in body_sizes_dict: body_sizes_dict[start] = 0 body_sizes_dict[start] += 1 else: res = body_sizes_tree.search(3, 5, strict=True) elem = None if len(res) == 0: elem = Interval(start, end, IntContainer()) body_sizes_tree.add(elem) else: for entry in res: elem = entry.data elem.data.private += 1 # prepare collision dict collisions = {} # sum up collisions in tree for interval in body_sizes_tree:
class FeatureSet(object): """ An ordered collection of :class:`SeqFeature` objects. :param type feature_class: type of the features stored in the collection; defaults to :class:`SeqFeature` and must inherit from it. """ def __init__(self, feature_class=None): if feature_class is None: feature_class = SeqFeature elif not issubclass(feature_class, SeqFeature): raise RuntimeError( "FeatureSet expects a feature class that inherits from SeqFeature" ) self._features = IntervalTree() self._feature_class = feature_class def __or__(self, other): return self.difference(other) def __len__(self): return len(self._features) def __iter__(self): for f in sorted(self._features): yield f.data def __repr__(self): return '{}({})'.format(self.__class__.__name__, list(self)) def _wrap_feature(self, feature): if isinstance(feature, SeqFeature): return Interval(feature.location.start, feature.location.end, feature) elif isinstance(feature, (self._feature_class, Feature)): return Interval(feature.start, feature.end, feature) else: raise ValueError( "feature must be one of Bio.SeqFeature, co.Feature, %s" % self._feature_class) def copy(self): """ :returns: a copy of this collection :rtype: :class:`FeatureSet` """ fs = FeatureSet(feature_class=self._feature_class) fs._features = self._features.copy() return fs def add(self, *args, **kwargs): """ Creates a feature object from the given ``args`` and ``kwargs`` and adds it to the collection. :rtype: :class:`SeqFeature` """ feature = self._feature_class(*args, **kwargs) self._features.add(self._wrap_feature(feature)) return feature def remove(self, feature): """ Removes the given feature from the collection """ self._features.remove(self._wrap_feature(feature)) def find(self, between_start=None, between_end=None, type=None, id=None, strand=None, **qualifiers): """ Iterate over all features matching the search parameters. - ``between_start`` and ``between_end`` can be used to restrict the search range. - ``type``, ``id``, and ``strand`` each restrict the search to features that match on these attributes - ``qualifiers`` is an arbitrary group of keyword arguments that will be matched to the qualifier keys of each feature. Each key must be present and have the same value as in the search parameters. """ if between_start or between_end: it = self.overlap(between_start or 0, between_end or sys.maxsize) else: it = iter(self) attrs = [(k, v) for k, v in (('type', type), ('id', id), ('strand', strand)) if v is not None] for feature in it: if any(getattr(feature, key) != value for key, value in attrs): continue if any( feature.qualifiers.get(key) != value for key, value in qualifiers.items()): continue yield feature def overlap(self, start, end): """ Returns an iterator over all features in the collection that overlap the given range. :param int start: overlap region start :param int end: overlap region end """ if start > end: raise RuntimeError("start cannot be larger than end.") for f in sorted(self._features.search(start, end + 1)): yield f.data def difference(self, other): fs = self.copy() fs._features = self._features - other._features return fs def union(self, other): fs = self.copy() fs._features = self._features | other._features return fs
def test_all(): from intervaltree import Interval, IntervalTree from pprint import pprint from operator import attrgetter def makeinterval(lst): return Interval( lst[0], lst[1], "{}-{}".format(*lst) ) ivs = list(map(makeinterval, [ [1,2], [4,7], [5,9], [6,10], [8,10], [8,15], [10,12], [12,14], [14,15], ])) t = IntervalTree(ivs) t.verify() def data(s): return set(map(attrgetter('data'), s)) # Query tests print('Query tests...') assert data(t[4]) == set(['4-7']) assert data(t[4:5]) == set(['4-7']) assert data(t[4:6]) == set(['4-7', '5-9']) assert data(t[9]) == set(['6-10', '8-10', '8-15']) assert data(t[15]) == set() assert data(t.search(5)) == set(['4-7', '5-9']) assert data(t.search(6, 11, strict = True)) == set(['6-10', '8-10']) print(' passed') # Membership tests print('Membership tests...') assert ivs[1] in t assert Interval(1,3, '1-3') not in t assert t.overlaps(4) assert t.overlaps(9) assert not t.overlaps(15) assert t.overlaps(0,4) assert t.overlaps(1,2) assert t.overlaps(1,3) assert t.overlaps(8,15) assert not t.overlaps(15, 16) assert not t.overlaps(-1, 0) assert not t.overlaps(2,4) print(' passed') # Insertion tests print('Insertion tests...') t.add( makeinterval([1,2]) ) # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t[1:2] = '1-2' # adding duplicate should do nothing assert data(t[1]) == set(['1-2']) t.add(makeinterval([2,4])) assert data(t[2]) == set(['2-4']) t.verify() t[13:15] = '13-15' assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() print(' passed') # Duplication tests print('Interval duplication tests...') t.add(Interval(14,15,'14-15####')) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.verify() print(' passed') # Copying and casting print('Tree copying and casting...') tcopy = IntervalTree(t) tcopy.verify() assert t == tcopy tlist = list(t) for iv in tlist: assert iv in t for iv in t: assert iv in tlist tset = set(t) assert tset == t.items() print(' passed') # Deletion tests print('Deletion tests...') try: t.remove( Interval(1,3, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") try: t.remove( Interval(500, 1000, "Doesn't exist") ) except ValueError: pass else: raise AssertionError("Expected ValueError") orig = t.print_structure(True) t.discard( Interval(1,3, "Doesn't exist") ) t.discard( Interval(500, 1000, "Doesn't exist") ) assert data(t[14]) == set(['8-15', '13-15', '14-15', '14-15####']) t.remove( Interval(14,15,'14-15####') ) assert data(t[14]) == set(['8-15', '13-15', '14-15']) t.verify() assert data(t[2]) == set(['2-4']) t.discard( makeinterval([2,4]) ) assert data(t[2]) == set() t.verify() assert t[14] t.remove_overlap(14) t.verify() assert not t[14] # Emptying the tree #t.print_structure() for iv in sorted(iter(t)): #print('### Removing '+str(iv)+'... ###') t.remove(iv) #t.print_structure() t.verify() #print('') assert len(t) == 0 assert t.is_empty() assert not t t = IntervalTree(ivs) #t.print_structure() t.remove_overlap(1) #t.print_structure() t.verify() t.remove_overlap(8) #t.print_structure() print(' passed') t = IntervalTree(ivs) pprint(t) t.split_overlaps() pprint(t) #import cPickle as pickle #p = pickle.dumps(t) #print(p)
from datetime import datetime, date from intervaltree import IntervalTree class ScheduleItem: def __init__(self, course_number, start_time, end_time): self.course_number = course_number self.start_time = start_time self.end_time = end_time def get_begin(self): return minutes_from_midnight(self.start_time) def get_end(self): return minutes_from_midnight(self.end_time) def __repr__(self): return ''.join(["{ScheduleItem: ", str((self.course_number, self.start_time, self.end_time)), "}"]) def minutes_from_midnight(time): str_time = datetime.strptime(time, '%I:%M%p').time() midnight = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) return int((datetime.combine(date.today(), str_time) - midnight).total_seconds()/60) T = IntervalTree([ScheduleItem(28374, "9:00AM", "10:00AM"), \ ScheduleItem(43564, "8:00AM", "12:00PM"), \ ScheduleItem(53453, "1:00AM", "2:00AM")]) print T.search(minutes_from_midnight("9:00PM"), minutes_from_midnight("10:00PM"))
def interval_intersect_interval(**kwargs): ''' Efficient algorithm to find which intervals intersect Handles both unix timestamp or datetime object Return: ------- prediction_gt: array with same size as prediction, will be 1 if there's an overlapping label 0 if not recall: recall percentage of labels overlap: how much overlap between label and prediction ''' gt = kwargs['groundtruth'] pred = kwargs['prediction'] total_overlap = None missed = None false_alarm = None # calculate recall tree = IntervalTree() for segment in pred: tree.add(Interval(segment[0], segment[1])) TP = 0 for segment in gt: overlap = tree.search(segment[0], segment[1]) if len(overlap) != 0: TP += 1 recall = TP / len(gt) # calculate precision tree = IntervalTree() for segment in gt: tree.add(Interval(segment[0], segment[1])) prediction_gt = [] for segment in pred: overlap = tree.search(segment[0], segment[1]) for label in overlap: if total_overlap == None: total_overlap = get_overlap(label, segment) else: total_overlap += get_overlap(label, segment) if len(overlap) != 0: prediction_gt.append(1) else: prediction_gt.append(0) total_groundtruth = _get_sum(gt) result = {} result['prediction_gt'] = prediction_gt result['recall'] = recall result['precision'] = np.mean(prediction_gt) result['overlap'] = total_overlap result['missed'] = total_groundtruth - total_overlap return result
def _vj_handshakes(self): handshakes = [] just_v = Counter(self.just_v) just_j = Counter(self.just_j) itree = IntervalTree() just_v_keys = map(lambda x: x[0], sorted(just_v.items(), key=lambda z:z[1], reverse=True)) start = 0 for v in just_v_keys: end = start + len(v) + 1 itree.addi(start, end, v) start = end all_v_suf = "|".join(just_v_keys) stree = IgorSuffixTree(all_v_suf) for j, jj in just_j.items(): overlap, index, terminal = stree.search_stree(j) if terminal and len(j[:overlap]) >= self._settings.overlapLen: overlapping_v = itree.search(index) common_chains = set(self.pSeq_read_map[list(overlapping_v)[0].data]["chain_type"].keys()) & set(self.pSeq_read_map[j]["chain_type"].keys()) if common_chains: v_t = [] j_t = [] chtype = {} for key, ch in self.pSeq_read_map[list(overlapping_v)[0].data]["chain_type"].items(): if key in common_chains: v_t.extend(map(getGeneType, ch)) if key not in chtype: chtype[key] = [] chtype[key].extend(ch) for key, ch in self.pSeq_read_map[j]["chain_type"].items(): if key in common_chains: j_t.extend(map(getGeneType, ch)) if key not in chtype: chtype[key] = [] chtype[key].extend(ch) if len(j[overlap:]) > 0: newly_born_cdr3 = list(overlapping_v)[0].data + j[overlap:] else: position_of_j_in_v = list(overlapping_v)[0].data.rfind(j) newly_born_cdr3 = list(overlapping_v)[0].data[:position_of_j_in_v + len(j)] if newly_born_cdr3 not in self.cdr3_dict: self.cdr3_dict[newly_born_cdr3] = [] if list(overlapping_v)[0].data in self.just_v_dict: self.cdr3_dict[newly_born_cdr3].extend(self.just_v_dict[list(overlapping_v)[0].data]) if j in self.just_j_dict: self.cdr3_dict[newly_born_cdr3].extend(self.just_j_dict[j]) if list(overlapping_v)[0].data in self.just_v_dict: del self.just_v_dict[list(overlapping_v)[0].data] if j in self.just_j_dict: del self.just_j_dict[j] countV = just_v[list(overlapping_v)[0].data] countJ = just_j[j] countVJ = countV + countJ for x in range(countVJ): handshakes.append(newly_born_cdr3) self.pSeq_read_map[newly_born_cdr3] = {"v": v_t, "j": j_t, "chain_type": chtype, "overlap": overlap} return handshakes
class IntervalTreeSet(AbstractDataset): def __init__(self,filename): AbstractDataset.__init__(self,filename) self.backend=IntervalTree([]) self.nodecount=0 #reload self.intervals=None self.tmpcount=0 def reload_start(self,defaults): self.tmpintervals=[] self.tmpcount=0 def reload_line(self,line,defaults): value,data=self.create_default_datarecord(line, defaults) #TODO: how do we initialize default TTL from command line lower,upper=ip4range(value) lowerlong=ip2long(lower) upperlong=ip2long(upper) if defaults.maxrange4!=None and upperlong-lowerlong>defaults.maxrange4: logging.warn("MAXRANGE4 prohobits adding %s in %s"%(value,self.filename)) return interval=Interval(lowerlong,upperlong) interval.data=data self.tmpintervals.append(interval) self.tmpcount+=1 def reload_end(self,defaults): newtree=IntervalTree(self.tmpintervals) self.backend=newtree del newtree self.nodecount=self.tmpcount self.tmpintervals=None self.tmpcount=0 def get_record_count(self): return self.nodecount def get(self,query): query=ipreverse(query) q=ip2long(query) res=self.backend.search(q) for r in res: try: if r.data['excluded']: return None except KeyError: continue #no exclusions, return first match if len(res)>0: data=res[0].data if 'TXT' in data: data['TXT']=self.apply_txt_template(data['TXT'], query, data['A'], self.defaults) return data
m4_records[m4_record.id] = m4_record m4_lists = m4_records.keys() for m4_id in m4_lists: m4_record = m4_records[m4_id] tree[m4_record.target_start:m4_record.target_end] = m4_id overlap_dict = {} for m4_id in m4_lists: m4_record = m4_records[m4_id] length = m4_record.query_len large = [] medium = [] small = [] overlap_list = list( tree.search(m4_record.target_start, m4_record.target_end)) # print overlap_list for overlap_rec in overlap_list: if overlap_rec.data != m4_id: x = range(m4_record.target_start, m4_record.target_end) y = range(overlap_rec.begin, overlap_rec.end) # print x # print y # print set(x) & set(y) ovelap_len = len(set(x) & set(y)) overlap_frac = float(ovelap_len) / length if overlap_frac >= 0.5: large.append(overlap_rec.data) elif overlap_frac >= 0.25: medium.append(overlap_rec.data) elif overlap_frac > 0: