Ejemplo n.º 1
0
 def __init__(self,
              capacity,
              start_key=lambda o: o[0],
              length_key=lambda o: o[1]):
     """
     @param key: A function that fetches the range start from an item.
     """
     super(RangeCache, self).__init__()
     self._ranges = SortedCollection(key=start_key)
     self._lru = BoundedLRUQueue(capacity, key=start_key)
     self._start_key = start_key
     self._length_key = length_key
Ejemplo n.º 2
0
def ordered_traversal(dependency_list):
    forward_dependencies = build_forward_dependencies(dependency_list)
    reverse_dependencies = build_reverse_dependencies(forward_dependencies)
    no_dependencies = find_no_dependencies(reverse_dependencies)
    cur_no_dependencies = SortedCollection(no_dependencies, reverse=True)
    answer = []
    while cur_no_dependencies:
        value = cur_no_dependencies.pop()
        del reverse_dependencies[value]
        answer.append(value)
        for dependencies in reverse_dependencies.values():
            dependencies.discard(value)
        for ready in find_no_dependencies(reverse_dependencies):
            cur_no_dependencies.maybe_insert(ready)
    return answer
Ejemplo n.º 3
0
def getGenesInLongRangeWindows(annotations,regions_collection,shift):
	gene_regions = {}
	for geneID in annotations.keys():
		gene_regions[geneID]=SortedCollection(key=itemgetter(1))
		left_Border=annotations[geneID][1][0]-shift
		right_Border=annotations[geneID][1][0]+shift
		chromosome=annotations[geneID][0]
		if (chromosome in regions_collection):
			selectedLoops = regions_collection[chromosome]
			try:                                                                                                     
				left_item = selectedLoops.find_lt(left_Border)
			except ValueError:
				try:
					left_item = selectedLoops.find_ge(left_Border)
				except ValueError:
					left_item = None
			else:
				if left_item[2] < left_Border:
					try:
						left_item = selectedLoops.find_ge(left_Border)
					except ValueError:
						left_item = None
			try:
				right_item = selectedLoops.find_le(right_Border)
			except ValueError:
				right_item = None
	           # Check if target interval is valid
			if left_item is not None and right_item is not None:
				left_index = selectedLoops.index(left_item)
				right_index = selectedLoops.index(right_item)
				if left_index <= right_index:
	          # Copy regions in target interval
					for i in xrange(left_index, right_index + 1):
						gene_regions[geneID].insert_right(selectedLoops[i])
	return gene_regions	
Ejemplo n.º 4
0
    def __init__(self, ranges=tuple()):
        # Sort by the start of every range:
        self._ranges = SortedCollection(ranges, itemgetter(0))

        if ranges:
            self._consolidate()

            self.begin = self.start = self._ranges[0][0]
            self.end = self.stop = self._ranges[-1][1]

            self.span = self.end - self.begin + 1
            self.coverage = sum(end - begin + 1
                                for (begin, end) in self._ranges)
        else:
            self.begin = self.start = self.end = self.stop = None
            self.span = self.coverage = 0
Ejemplo n.º 5
0
def read_Intra_Loop_Regions_Collection(filename):
	left_region_collection = {}
	right_region_collection = {}
	with open(filename) as hi_c_file:
		loopID=0
		for line in hi_c_file:
			line = line.split()
			if (line[0] == line[3]):
				loopID += 1
				chromosome = line[0].replace("chr","")
				if chromosome not in left_region_collection:
					left_region_collection[chromosome] = SortedCollection(key=itemgetter(1))
				if chromosome not in right_region_collection:
					right_region_collection[chromosome] = SortedCollection(key=itemgetter(1))	
				left_region_collection[chromosome].insert_right((loopID, int(line[1]), int(line[2]), int(line[4]), int(line[5])))
				right_region_collection[chromosome].insert_right((loopID, int(line[4]), int(line[5]), int(line[1]), int(line[2])))
	return left_region_collection, right_region_collection
Ejemplo n.º 6
0
def merge_gene_regions(gene_regions, add_gene_regions,annotation):
	for geneID in annotation.keys():
		if geneID not in gene_regions.keys():
			gene_regions[geneID] = SortedCollection(key=itemgetter(1))
		if geneID in add_gene_regions.keys():
			for region in add_gene_regions[geneID]:
				try:
					gene_regions[geneID].find(region[1])
				except ValueError:
					gene_regions[geneID].insert_right(region)
	return gene_regions
Ejemplo n.º 7
0
    def get_stats(self):
        items = self.get_items(False)
        read = filter(operator.attrgetter("is_read"), items)
        unread = filter(operator.attrgetter("is_unread"), items)

        read_sorted = SortedCollection(read, operator.attrgetter('time_read'))
        unread_sorted = SortedCollection(unread,
                                         operator.attrgetter('time_added'))

        # find items read less than a week ago
        now = datetime.datetime.now()
        _7_days_ago = now + relativedelta(days=-7)
        _30_days_ago = now + relativedelta(days=-30)

        print self.render(
            "report.txt",
            total=len(items),
            total_read=len(read),
            total_unread=len(unread),
            now=now,
            newly_added_7d=self._get_items_since(unread_sorted, _7_days_ago),
            newly_read_7d=self._get_items_since(read_sorted, _7_days_ago),
            newly_added_30d=self._get_items_since(unread_sorted, _30_days_ago),
            newly_read_30d=self._get_items_since(read_sorted, _30_days_ago))
Ejemplo n.º 8
0
def readOC_Region(filename):
	tfpa=open(filename,"r")
	tfpa.readline()
	oC={}
	counter=1
	for l in tfpa:
		s=l.split()[0]
		ds=s.split(":")
		if (len(ds)>=2):
			chrom=ds[0].replace("chr","")
			se=ds[1].split("-")
			if chrom not in oC:
				oC[chrom]=SortedCollection(key=itemgetter(1))
			oC[chrom].insert_right((counter,int(se[0]),int(se[1])))
			counter+=1
	tfpa.close()
	return oC
Ejemplo n.º 9
0
    def _consolidate(self):
        new_ranges = SortedCollection(key=itemgetter(0))
        prev_begin, prev_end = self._ranges[0]
        for begin, end in self._ranges[1:]:
            if prev_end >= begin - 1:
                # Consolidate the previous and current ranges:
                prev_end = max(prev_end, end)
            else:
                # Add the previous range, and continue with the current range
                # as the seed for the next iteration:
                new_ranges.insert((prev_begin, prev_end))
                prev_begin = begin
                prev_end = end

        new_ranges.insert((prev_begin, prev_end))

        self._ranges = new_ranges
Ejemplo n.º 10
0
def get_intersecting_regions(a_regions, b_collection):
	intersection_a = SortedCollection(key=itemgetter(1))
	for a_region in a_regions:
		try:
			left_boundary = b_collection.find_lt_index(a_region[1])
		except ValueError:
			try:
				left_boundary = b_collection.find_ge_index(a_region[1])
			except ValueError:
				left_boundary = len(b_collection)
		else:
			if b_collection[left_boundary][2] < a_region[1]:
				left_boundary += 1

		curr_index = left_boundary
		if curr_index < len(b_collection) and b_collection[curr_index][1] <= a_region[2]:
			intersection_a.insert_right(a_region)
			curr_index += 1

	b_collection.key=itemgetter(3)
	for a_region in a_regions:
		try:
			left_boundary = b_collection.find_lt_index(a_region[1])
		except ValueError:
			try:
				left_boundary = b_collection.find_ge_index(a_region[1])
			except ValueError:
				left_boundary = len(b_collection)
		else:
			if b_collection[left_boundary][4] < a_region[1]:
				left_boundary += 1

		curr_index = left_boundary
		if curr_index < len(b_collection) and b_collection[curr_index][3] <= a_region[2]:
			intersection_a.insert_right(a_region)
			curr_index += 1
	b_collection.key=itemgetter(1)
	return intersection_a
Ejemplo n.º 11
0
def main():
    ''' Sets arguments and subarguments for running the program, and reads in files for organism specified.
        If config_file being used, reads that in too.
    '''
    parser = argparse.ArgumentParser(usage=MAINUSEAGE)
    
    parser.add_argument("--hits-folder", default=".")
    parser.add_argument("--output-folder", default=".")
    parser.add_argument("--domains", default="highlighted", choices=[GENES_ALL, GENES_HIGHLIGHTED, GENES_NONE])
    parser.add_argument("--direction", default="highlighted", choices=[GENES_ALL, GENES_HIGHLIGHTED, GENES_NONE])
    parser.add_argument("--organism", default="Calb", choices=["Calb", "Scer", "Spom"])
    parser.add_argument("--absolute-pixel-size", type=int, default=0)
    parser.add_argument("--rna-bam")
    
    gene_list_parser = lambda gs: [g for g in gs.split(',')]
    
    subparsers = parser.add_subparsers(dest="source_type")
    
    region_parser = subparsers.add_parser("region", usage=REGIONUSAGE)
    region_parser.add_argument("--chromosome", required=True)
    region_parser.add_argument("--start", type=int, required=True)
    region_parser.add_argument("--stop", type=int, required=True)
    region_parser.add_argument("--genes", type=gene_list_parser, default="")
    
    gene_parser = subparsers.add_parser("gene", usage=GENEUSAGE)
    gene_name = gene_parser.add_mutually_exclusive_group(required=True)
    # TODO: can we make --genes not be named, and instead come at the end of the parser?
    gene_name.add_argument("--genes", type=gene_list_parser)
    gene_region = gene_parser.add_mutually_exclusive_group()
    gene_region.add_argument("--percent-of-length", type=float, default=0.2)
    gene_region.add_argument("--bps", type=int)
    gene_parser.add_argument("--exclude-genes", type=gene_list_parser, default="")
    
    config_file_parser = subparsers.add_parser("config_file", usage=CONFIGUSAGE)
    config_file_parser.add_argument("config_file")
    
    args = parser.parse_args()
    
    if args.organism == "Calb":
        hits = SummaryTable.read_hit_files(glob.glob(os.path.join(args.hits_folder, "*_Hits.txt")))
    elif args.organism == "Scer":
        import cPickle
        all_track_files = glob.glob(os.path.join(args.hits_folder, "*.wig"))
            
        # We cache the hits because the hit reading process involves finding the
        # feature which got hit, for every hit, and that makes reading an O(n log n)
        # operation, which is a little slow. O(n) is better here.
        hit_cache = os.path.join(args.hits_folder, "cached_sc_track_hits.dat")
        if not os.path.exists(hit_cache):
            all_tracks = [SummaryTable.get_hits_from_wig(fname) for fname in all_track_files]
            with open(hit_cache, 'wb') as pickle_file:
                cPickle.dump(all_tracks, pickle_file)
        else:
            with open(hit_cache, 'rb') as pickle_file:
                all_tracks = cPickle.load(pickle_file)
                
        hits = all_tracks
    elif args.organism == "Spom":
        hits = [SummaryTable.read_pombe_hit_file("/Users/bermanlab/ngs-bench/Hermes/SRR327340.trimmed.trail_q_20.sorted_Hits.csv")]
    
    # Process hits for quicker access by chromosome name and position:
    new_hits = []
    db = _get_organism(args.organism).feature_db
    
    # TODO: we manipulate the chromosome names to reflect the standard names,
    # but this should be done in the hit-reading functions.
    chrom_names = db._chrom_names # TODO: don't do this, _chrom_names are protected!
    for hit_track in hits:
        new_hit_track = {chrom.name: SortedCollection(key=lambda h: h["hit_pos"]) for chrom in db}
        for hit in hit_track:
            chrom = chrom_names[hit["chrom"]]
            hit["chrom"] = chrom
            new_hit_track[chrom].insert(hit)
        new_hits.append(new_hit_track)
    hits = new_hits
    
    if not hits:
        raise Exception("No hit files were found in the hits folder: %s" % args.hits_folder)
    
    if args.source_type == "config_file":
        with open(args.config_file, "r") as in_file:
            # Read the file once, in case it changes in the middle of the run:
            for line in in_file.readlines():
                line = line.strip()
                if not line or line.startswith("#"):
                    continue
                handle_args(parser.parse_args(shlex.split(line)), hits)
    else:
        handle_args(args, hits)
Ejemplo n.º 12
0
def addFile(filename, games):
  
  with open(filename, 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
      row['start'] = parser.parse(row['start'])
      row['end'] = parser.parse(row['end'])
      try:
        item = games.find_le(row['start'])
        if item['end'] < row['start']:
          games.insert(row)
        else:
          print "Skipping game: %s,%s,%s it conflicts with %s,%s,%s" % (row['name'], row['start'], row['end'], item['name'], item['start'], item['end']) 
      except ValueError:
        games.insert(row)


if __name__ == "__main__":
  arguments = docopt(__doc__, version='Merge 0.1')
  mergedGames = SortedCollection([], key=lambda k: k['start'])

  for filename in arguments["<CSVfiles>"]:
    addFile(filename, mergedGames)

  with open(arguments["<outCSV>"],"w") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["team","name", "start", "end", "band", "freq"])
    writer.writeheader()
    for game in mergedGames:
      writer.writerow(game)
Ejemplo n.º 13
0
def convertTrainingData():
    data = {
        'articleTrain.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown',
            'getSubarticlesCount', 'createSubarticle', 'notSubarticleRating',
            'PgetSub', 'PcreateSub'
        ]],
        'articleTest.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown',
            'getSubarticlesCount', 'createSubarticle', 'notSubarticleRating',
            'PgetSub', 'PcreateSub'
        ]],
        'articleCV.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown',
            'getSubarticlesCount', 'createSubarticle', 'notSubarticleRating',
            'PgetSub', 'PcreateSub'
        ]],
        'subarticleTrain.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown'
        ]],
        'subarticleTest.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown'
        ]],
        'subarticleCV.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown'
        ]],
        'commentTrain.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown'
        ]],
        'commentTest.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown'
        ]],
        'commentCV.csv': [[
            'yUp', 'yDown', 'yGetCom', 'yCreateCom', 'yGetSub', 'yCreateSub',
            'id', 'rating', 'viewCount', 'upVoteCount', 'downVoteCount',
            'getCommentsCount', 'createComment', 'notCommentRating',
            'PgetComment', 'PcreateComment', 'Pup', 'Pdown'
        ]]
    }

    arts = articles[:]
    random.shuffle(arts)

    cvLength = int(0.05 * len(arts))
    testLength = int(0.10 * len(arts))

    cvArts = SortedCollection(arts[0:cvLength], key=itemgetter('id'))
    testArts = SortedCollection(arts[cvLength:(cvLength + testLength)],
                                key=itemgetter('id'))
    trainArts = SortedCollection(arts[(cvLength + testLength):],
                                 key=itemgetter('id'))

    cvSubs = SortedCollection([], key=getId)
    testSubs = SortedCollection([], key=getId)
    trainSubs = SortedCollection([], key=getId)

    cvComs = SortedCollection([], key=getId)
    testComs = SortedCollection([], key=getId)
    trainComs = SortedCollection([], key=getId)

    def contains(lst, Id):
        try:
            if lst.find(str(Id)):
                return True
            else:
                return False
        except ValueError:
            return False

    for sub in Subarticles.find():
        if contains(testArts, sub['parentId']):
            testSubs.insert(sub)
        elif contains(cvArts, sub['parentId']):
            cvSubs.insert(sub)
        else:
            trainSubs.insert(sub)

    replies = []
    for comment in Comments.find():
        if comment['commentableType'] == 'article':
            if contains(cvArts, comment['commentableId']):
                cvComs.insert(comment)
            elif contains(testArts, comment['commentableId']):
                testComs.insert(comment)
            else:
                trainComs.insert(comment)
        elif comment['commentableType'] == 'subarticle':
            if contains(cvSubs, comment['commentableId']):
                cvComs.insert(comment)
            elif contains(testSubs, comment['commentableId']):
                testComs.insert(comment)
            else:
                trainComs.insert(comment)
        else:
            replies.append(comment)

    for comment in replies:
        if comment['commentableType'] == 'article':
            if contains(cvArts, comment['commentableId']):
                cvComs.insert(comment)
            elif contains(testArts, comment['commentableId']):
                testComs.insert(comment)
            else:
                trainComs.insert(comment)
        elif comment['commentableType'] == 'subarticle':
            if contains(cvSubs, comment['commentableId']):
                cvComs.insert(comment)
            elif contains(testSubs, comment['commentableId']):
                testComs.insert(comment)
            else:
                trainComs.insert(comment)
        elif comment['commentableType'] == 'comment':
            if contains(cvComs, comment['commentableId']):
                cvComs.insert(comment)
            elif contains(testComs, comment['commentableId']):
                testComs.insert(comment)
            else:
                trainComs.insert(comment)
        else:
            print "Comment on a comment!"

    print("cvArts: {}\tcvSubs: {}\tcvComs: {}").format(len(cvArts),
                                                       len(cvSubs),
                                                       len(cvComs))
    print("testArts: {}\ttestSubs: {}\ttestComs: {}").format(
        len(testArts), len(testSubs), len(testComs))
    print("trainArts: {}\ttrainSubs: {}\ttrainComs: {}").format(
        len(trainArts), len(trainSubs), len(trainComs))
    sys.stdout.flush()

    views = Views.find().sort("_id", 1)
    viewLength = views.count()
    pbar = progressbar.ProgressBar(widgets=[
        progressbar.Timer(),
        progressbar.ETA(),
        progressbar.Bar(),
        progressbar.Percentage()
    ],
                                   maxval=viewLength).start()

    processed = int(0)
    for view in views:
        pbar.update(processed)
        processed = processed + 1
        x, y = prepTrainingSet(view)
        if len(x):
            dat = np.append(y, x).tolist()
            if (view['viewableType'] == 'article'):
                if contains(cvArts, view['viewableId']):
                    data['articleCV.csv'].append(dat)
                elif contains(testArts, view['viewableId']):
                    data['articleTest.csv'].append(dat)
                else:
                    data['articleTrain.csv'].append(dat)
            elif (view['viewableType'] == 'subarticle'):
                if contains(cvSubs, view['viewableId']):
                    data['subarticleCV.csv'].append(dat)
                elif contains(testSubs, view['viewableId']):
                    data['subarticleTest.csv'].append(dat)
                else:
                    data['subarticleTrain.csv'].append(dat)
            elif (view['viewableType'] == 'comment'):
                if contains(cvComs, view['viewableId']):
                    data['commentCV.csv'].append(dat)
                elif contains(testComs, view['viewableId']):
                    data['commentTest.csv'].append(dat)
                else:
                    data['commentTrain.csv'].append(dat)
            else:
                print "Unknown viewableType: {}"

    pbar.finish()
    print "Writing results"
    for filename, lst in data.items():
        with open(filename, 'wb') as csvfile:
            writer = csv.writer(csvfile)
            for line in lst:
                writer.writerow(line)
Ejemplo n.º 14
0
DownVotes = InteractionsDB.downVote
Articles = ArticlesDB.article
Subarticles = ArticlesDB.subarticle
Comments = ArticlesDB.comment


def getId(item):
    return str(item['_id'])


def getViewId(item):
    return str(item['viewId'])


print "caching downvotes"
downVotes = SortedCollection(list(DownVotes.find().sort("viewId", 1)),
                             key=getViewId)
#downVotes = SortedCollection(list(DownVotes.find()), key=itemgetter('viewId'))
print "caching upvotes"
upVotes = SortedCollection(list(UpVotes.find().sort("viewId", 1)),
                           key=getViewId)
#upVotes = SortedCollection(list(UpVotes.find()), key=itemgetter('viewId'))
#upVotes = SortedCollection([], key=itemgetter('viewId'))
print "caching clicks"
clicks = SortedCollection(list(Clicks.find().sort("viewId", 1)), key=getViewId)
#clicks = SortedCollection(list(Clicks.find()), key=itemgetter('viewId'))
#clicks = SortedCollection([], key=itemgetter('viewId'))


def findGeneric(lst, Id):
    Id = str(Id)
    try:
Ejemplo n.º 15
0
class RangeCache(object):
    """
    RangeCache is a data structure that tracks a finite set of
      ranges (a range is a 2-tuple consisting of a numeric start
      and numeric length). New ranges can be added via the `push`
      method, and if such a call causes the capacity to be exceeded,
      then the "oldest" range is removed. The `get` method implements
      an efficient lookup for a single value that may be found within
      one of the ranges.
    """
    def __init__(self,
                 capacity,
                 start_key=lambda o: o[0],
                 length_key=lambda o: o[1]):
        """
        @param key: A function that fetches the range start from an item.
        """
        super(RangeCache, self).__init__()
        self._ranges = SortedCollection(key=start_key)
        self._lru = BoundedLRUQueue(capacity, key=start_key)
        self._start_key = start_key
        self._length_key = length_key

    def push(self, o):
        """
        Add a range to the cache.

        If `key` is not provided to the constructor, then
          `o` should be a 3-tuple:
            - range start (numeric)
            - range length (numeric)
            - range item (object)
        """
        self._ranges.insert(o)
        popped = self._lru.push(o)
        if popped is not None:
            self._ranges.remove(popped)

    def touch(self, o):
        self._lru.touch(o)

    def get(self, value):
        """
        Search for the numeric `value` within the ranges
          tracked by this cache.
        @raise ValueError: if the value is not found in the range cache.
        """
        hit = self._ranges.find_le(value)
        if value < self._start_key(hit) + self._length_key(hit):
            return hit
        raise ValueError("%s not found in range cache" % value)

    @staticmethod
    def test():
        q = RangeCache(2)

        x = None
        try:
            x = q.get(0)
        except ValueError:
            pass
        assert x is None

        x = None
        try:
            x = q.get(1)
        except ValueError:
            pass
        assert x is None

        q.push((1, 1, [0]))

        x = None
        try:
            x = q.get(0)
        except ValueError:
            pass
        assert x is None

        assert q.get(1) == (1, 1, [0])
        assert q.get(1.99) == (1, 1, [0])
        x = None
        try:
            x = q.get(2.01)
        except ValueError:
            pass
        assert x is None

        q.push((3, 1, [1]))
        assert q.get(1) == (1, 1, [0])
        assert q.get(3) == (3, 1, [1])

        q.push((5, 1, [2]))
        x = None
        try:
            x = q.get(1)
        except ValueError:
            pass
        assert x is None

        assert q.get(3) == (3, 1, [1])
        assert q.get(5) == (5, 1, [2])

        q.touch((3, 1, [1]))
        q.push((7, 1, [3]))

        assert q.get(3) == (3, 1, [1])
        assert q.get(7) == (7, 1, [3])
        x = None
        try:
            x = q.get(5)
        except ValueError:
            pass
        assert x is None

        return True
Ejemplo n.º 16
0
class RangeSet(object):
    # TODO: currently doesn't handle the null range set very well. Should
    # introduce a NULL static singelton somehow.

    def __init__(self, ranges=tuple()):
        # Sort by the start of every range:
        self._ranges = SortedCollection(ranges, itemgetter(0))

        if ranges:
            self._consolidate()

            self.begin = self.start = self._ranges[0][0]
            self.end = self.stop = self._ranges[-1][1]

            self.span = self.end - self.begin + 1
            self.coverage = sum(end - begin + 1
                                for (begin, end) in self._ranges)
        else:
            self.begin = self.start = self.end = self.stop = None
            self.span = self.coverage = 0

    def __len__(self):
        return len(self._ranges)

    def __iter__(self):
        return iter(self._ranges)

    def __getitem__(self, key):
        return self._ranges[key]

    def __contains__(self, pos):
        try:
            begin, end = self._ranges.find_le(pos)
            return pos >= begin and pos <= end
        except ValueError:
            return False

    def __add__(self, other):
        return RangeSet(list(self) + list(other))

    def __or__(self, other):
        return self + other

    def __and__(self, other):
        leftmost = min(self.start, other.start)
        rightmost = max(self.stop, other.stop)

        return (self.complement(leftmost, rightmost)
                | other.complement(leftmost, rightmost)).complement(
                    leftmost, rightmost)

    def __sub__(self, other):
        return self & other.complement(self.start, self.stop)

    def __eq__(self, other):
        return len(self) == len(other) and all(
            b1 == b2 and e1 == e2 for (b1, e1), (b2, e2) in zip(self, other))

    def __str__(self):
        return str(list(self._ranges))

    def _consolidate(self):
        new_ranges = SortedCollection(key=itemgetter(0))
        prev_begin, prev_end = self._ranges[0]
        for begin, end in self._ranges[1:]:
            if prev_end >= begin - 1:
                # Consolidate the previous and current ranges:
                prev_end = max(prev_end, end)
            else:
                # Add the previous range, and continue with the current range
                # as the seed for the next iteration:
                new_ranges.insert((prev_begin, prev_end))
                prev_begin = begin
                prev_end = end

        new_ranges.insert((prev_begin, prev_end))

        self._ranges = new_ranges

    def complement(self, begin, end):
        if not self:
            return RangeSet([(begin, end)])

        inter_complement = [
            (e1 + 1, b2 - 1)
            for (b1, e1), (b2, e2) in zip(self._ranges, self._ranges[1:])
        ]

        if begin < self.start:
            inter_complement.append((begin, self.start - 1))
        if end > self.end:
            inter_complement.append((self.end + 1, end))

        return RangeSet(inter_complement)

    def intersects(self, begin, end):
        # TODO: can be optimized
        return bool(self & RangeSet([(begin, end)]))

    def cut_to(self, begin, end):
        # TODO: can be optimized
        return self & RangeSet([(begin, end)])