def setUp(self): iv = IntervalNode(Feature(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = Feature(i, i + 9) iv = iv.insert(f) self.intervals = iv
def processTarget(oId, submatches, primaries, tfotargets, annotations, parameters): ''' process one target region for all targets and their offtargets''' output = "" query = submatches[oId] # list of submatches of the same id # quit if this primary target cannot be related to a primary target region if (not primaries.has_key(oId)): print >> sys.stderr, "[ERROR] submatch does not have a corresponding primary target : %s" % (oId) exit(1) # create an empty root intersect_tree = IntervalNode( -1, -1, -1 ) # accumulate off-target data offtarget_data = [] if (tfotargets.has_key(oId)): # put all off-targets into an interval tree: offtarget_data = tfotargets[oId] # build an interval tree from the rest of the data for tfo_region in offtarget_data.keys(): start, end = tfo_region intersect_tree = intersect_tree.insert(start, end) # query all submatches output = "" for q in query: output += processSubTarget(q, oId, intersect_tree, offtarget_data) return output
def processTarget(oId, submatches, primaries, tfotargets, annotations, parameters): ''' process one target region for all targets and their offtargets''' output = "" query = submatches[oId] # list of submatches of the same id # quit if this primary target cannot be related to a primary target region if (not primaries.has_key(oId)): print >> sys.stderr, "[ERROR] submatch does not have a corresponding primary target : %s" % ( oId) exit(1) # create an empty root intersect_tree = IntervalNode(-1, -1, -1) # accumulate off-target data offtarget_data = [] if (tfotargets.has_key(oId)): # put all off-targets into an interval tree: offtarget_data = tfotargets[oId] # build an interval tree from the rest of the data for tfo_region in offtarget_data.keys(): start, end = tfo_region intersect_tree = intersect_tree.insert(start, end) # query all submatches output = "" for q in query: output += processSubTarget(q, oId, intersect_tree, offtarget_data) return output
def setUp(self): iv = IntervalNode(Interval(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = Interval(i, i + 9) iv = iv.insert(f) self.intervals = iv
def setUp(self): iv = IntervalNode(Feature(1, 2)) self.max = 1000000 for i in range(0, self.max, 10): f = Feature(i, i) iv = iv.insert(f) for i in range(600): iv = iv.insert(Feature(0, 1)) self.intervals = iv
def setUp(self): iv = IntervalNode(Interval(1, 2)) self.max = 1000000 for i in range(0, self.max, 10): f = Interval(i, i) iv = iv.insert(f) for i in range(6000): iv = iv.insert(Interval(0, 1)) self.intervals = iv
def testSimpleIntervals(): #yesterday, yesteray-year ago td_yesterday = timedelta(days=1) td_week = timedelta(days=7) td_month = timedelta(days=30) td_year = timedelta(days=365) time_now = datetime.utcnow() time_start = time_now - td_year time_end = time_now time_yesterday = time_now - td_yesterday time_week = time_now - td_week time_month = time_now - td_month time_year = time_now - td_year tree = IntervalNode(get_seconds(time_start)-1, get_seconds(time_end)+1) tree.insert(get_seconds(time_yesterday), get_seconds(time_now), other="Today") tree.insert(get_seconds(time_week), get_seconds(time_yesterday), other ="week-yesterday") tree.insert(get_seconds(time_month), get_seconds(time_week), other ="month-week") tree.insert(get_seconds(time_year), get_seconds(time_month), other ="year-month") res = [] time_check = time_week tree.intersect(get_seconds(time_check)-1, get_seconds(time_check), lambda x: res.append(x.other)) print res
def testSimpleIntervals(self): #yesterday, yesteray-year ago td_yesterday = timedelta(days=1) td_year = timedelta(days=365) time_now = datetime.utcnow() time_start = datetime.utcnow() - td_year time_end = datetime.utcnow() time_yesterday = datetime.utcnow() - td_yesterday tree = IntervalNode(time_start.toordinal(), time_end.toordinal()) tree.insert(time_yesterday.toordinal(), time_now.toordinal(), other="From Yesterday") tree.insert((time_yesterday - timedelta(days=7)).toordinal(), time_yesterday.toordinal(), other ="Yesterday, week") tree.insert(time_start.toordinal(), (time_yesterday - timedelta(days=7)).toordinal(), other ="Yesterday, week") tree.intersect(time_now.utcnow().toordinal(), time_now.utcnow().toordinal(), report_schedule) #yesterday, day before, day before that, till forever #yesterday, day before, nothing pass
def setUp(self): intervals = [] for i in range(11, 20000, 15): for zz in range(random.randint(2, 5)): m = random.randint(1, 10) p = random.randint(1, 10) intervals.append(Feature(i - m, i + p)) iv = IntervalNode(intervals[0]) for f in intervals[1:]: iv = iv.insert(f) self.intervals = intervals self.tree = iv
def setUp(self): intervals = [] for i in range(11, 20000, 15): for zz in range(random.randint(2, 5)): m = random.randint(1, 10) p = random.randint(1, 10) intervals.append(Interval(i - m, i + p)) iv = IntervalNode(intervals[0]) for f in intervals[1:]: iv = iv.insert(f) self.intervals = intervals self.tree = iv
def getWindowTree(selectRes): if len(selectRes[0]) == 3: start, end, score = selectRes[0] tree = IntervalNode(start, end, other=score) # build an interval tree from the rest of the data for start, end, score in selectRes[1:]: tree = tree.insert(start, end, other=score) else: start, end = selectRes[0] tree = IntervalNode(start, end, other=(end - start + 1)) # build an interval tree from the rest of the data for start, end in selectRes[1:]: # use size as the 3rd column tree = tree.insert(start, end, other=(end - start + 1)) return tree
def __init__(self, fileName): self.N = 0 self.headline = [] self.tree = {} if not os.path.exists(fileName): return fh = open(fileName, 'rt') a = -1 for lines in fh: line = lines.strip() if line[0] in ['#', '@']: self.headline.append(line) continue IL1 = line.split('\t') self.N = self.N + 1 chrom = IL1[0] pstart = IL1[1] pend = IL1[2] p1 = int(pstart) p2 = int(pend) #chrom=chrom.replace("MT","M") #self.IL.append([chrom,p1,p2]) start, end = p1 - 1, p2 + 1 if not (chrom == a): if not (a == -1): self.tree[a] = tree1 if (verbose): print('scan %s line %d chrom %s' % (fileName, self.N, a)) a = chrom tree1 = IntervalNode(start, end) else: # build an interval tree from the rest of the data tree1 = tree1.insert(start, end) self.tree[a] = tree1 fh.close() print "Loaded IntervalList:\t" + fileName print "events:\t" + str(self.N)
def get_schedule(chw_username, override_date = None): #print "doing schedule lookup for %s" % (chw_username) #if cached_schedules.has_key(chw_username): #return cached_schedules[chw_username] if override_date == None: nowdate = datetime.now() else: nowdate = override_date db = PactPatient.get_db() chw_schedules = db.view('pactcarehq/chw_dot_schedule_condensed', key=chw_username).all() day_intervaltree = {} for item in chw_schedules: single_sched = item['value'] day_of_week = int(single_sched['day_of_week']) if day_intervaltree.has_key(day_of_week): daytree = day_intervaltree[day_of_week] else: #if there's no day of week indication for this, then it's just a null interval node. to ensure that it's not checked, we make it REALLY old. daytree = IntervalNode(get_seconds(datetime.min), get_seconds(nowdate + timedelta(days=10))) if single_sched['ended_date'] == None: enddate = nowdate+timedelta(days=9) else: enddate = datetime.strptime(single_sched['ended_date'], "%Y-%m-%dT%H:%M:%SZ") #enddate = single_sched['ended_date'] startdate = datetime.strptime(single_sched['active_date'], "%Y-%m-%dT%H:%M:%SZ") #startdate = single_sched['active_date'] pact_id = single_sched['pact_id'] daytree.insert(get_seconds(startdate), get_seconds(enddate), other=pact_id) day_intervaltree[day_of_week] = daytree #cached_schedules[chw_username] = CHWPatientSchedule(chw_username, day_intervaltree, chw_schedules) #return cached_schedules[chw_username] return CHWPatientSchedule(chw_username, day_intervaltree, chw_schedules)
f_file = dirOutPerCategory + prefix + "." + chr + ".genomicFeature" outfile = open(f_file, 'w') outFile[chr] = open(f_file, 'w') outFile[chr].write( 'readName,chr,category, geneID, geneName, flag_multiMapped\n') #DATA STRUCTURE - per chr tree_utr3 = {} tree_utr5 = {} tree_cds = {} tree_geneCoordinates = {} tree_rRNA = {} tree_intergenic = {} # +10,000 for chr in chr_list: tree_utr3[chr] = IntervalNode(0, 0) tree_utr5[chr] = IntervalNode(0, 0) tree_cds[chr] = IntervalNode(0, 0) tree_geneCoordinates[chr] = IntervalNode(0, 0) tree_rRNA[chr] = IntervalNode(0, 0) tree_intergenic[chr] = IntervalNode(0, 0) print("Load gene annotations ...") geneUTR3 = {} #UTR3 print("Load", utr3_file) with open(utr3_file, 'r') as f: reader = csv.reader(f)
"Generates random interval over a size and span" lo = randint(10000, SIZE) hi = lo + randint(1, SPAN) return (lo, hi) def find(start, end, tree): "Returns a list with the overlapping intervals" out = [] tree.intersect( start, end, lambda x: out.append(x) ) return [ (x.start, x.end) for x in out ] # use this to force both examples to generate the same data seed(10) # generate 10 thousand random intervals data = map(generate, xrange(N)) # generate the intervals to query over query = map(generate, xrange(10)) # start the root at the first element start, end = data[0] tree = IntervalNode( start, end ) # build an interval tree from the rest of the data for start, end in data[1:]: tree = tree.insert( start, end ) for start, end in query: overlap = find(start, end , tree) print '(%s, %s) -> %s' % (start, end, overlap)
'''Returns a list with the overlapping intervals''' out = [] tree.contains( start, end, lambda x: out.append(x) ) return [ (x.start, x.end, x.other) for x in out ] #Salon 1 sch = {1:[(930,1030),(1130,1230),(1345,1600)],2:[800,900]} s1 = Classroom("S1", 50,sch) #Salon 2 sch = {1:[(730,1000),(1350,1500),(1600,1800)],2:[1000,1500]} s2 = Classroom("S2", 30,sch) weekTrees = {} mondayTree = IntervalNode( s1.schedule[1][0][0],s1.schedule[1][0][1],other=s1) for interval in s1.schedule[1][1:]: #print interval mondayTree = mondayTree.insert( interval[0],interval[1],other=s1 ) for interval in s2.schedule[1]: #print interval mondayTree = mondayTree.insert( interval[0],interval[1],other=s2 ) tuesdayTree = deepcopy(mondayTree) ilist = IntervalList(5) #interval list for interval in s1.schedule[1]: ilist.insert(interval[0],interval[1],s1,1)
def build_interval_tree(intervals): # root = IntervalNode(intervals[0].start, intervals[0].end, # other=intervals[0]) root = IntervalNode(intervals[0]) return reduce(lambda tree, x: tree.insert(x), intervals[1:], root)
hi = lo + randint(1, SPAN) return (lo, hi) def find(start, end, tree): "Returns a list with the overlapping intervals" out = [] tree.intersect(start, end, lambda x: out.append(x)) return [(x.start, x.end) for x in out] # use this to force both examples to generate the same data seed(10) # generate 10 thousand random intervals data = map(generate, xrange(N)) # generate the intervals to query over query = map(generate, xrange(10)) # start the root at the first element start, end = data[0] tree = IntervalNode(start, end) # build an interval tree from the rest of the data for start, end in data[1:]: tree = tree.insert(start, end) for start, end in query: overlap = find(start, end, tree) print '(%s, %s) -> %s' % (start, end, overlap)