コード例 #1
0
def intersect(a, b):
    rec_a = list(GFF.parse(a))
    rec_b = list(GFF.parse(b))
    if len(rec_a) > 1 or len(rec_b) > 1:
        raise Exception("Cannot handle multiple GFF3 records in a file, yet")

    rec_a = rec_a[0]
    rec_b = rec_b[0]

    tree_a = IntervalTree(list(treeFeatures(rec_a.features)), 1, len(rec_a))
    tree_b = IntervalTree(list(treeFeatures(rec_b.features)), 1, len(rec_b))

    rec_a_map = {f.id: f for f in rec_a.features}
    rec_b_map = {f.id: f for f in rec_b.features}

    rec_a_hits_in_b = []
    rec_b_hits_in_a = []

    for feature in rec_a.features:
        hits = tree_b.find_range(
            (int(feature.location.start), int(feature.location.end)))
        for hit in hits:
            rec_a_hits_in_b.append(rec_b_map[hit])

    for feature in rec_b.features:
        hits = tree_a.find_range(
            (int(feature.location.start), int(feature.location.end)))
        for hit in hits:
            rec_b_hits_in_a.append(rec_a_map[hit])

    rec_a.features = set(rec_a_hits_in_b)
    rec_b.features = set(rec_b_hits_in_a)
    return rec_a, rec_b
コード例 #2
0
def test_simple_float():
    """docstring for test_empty"""
    
    empty_interval = [0.0,1.0,1]
    empty_tree = IntervalTree([empty_interval],0,1)
    
    assert empty_tree.find_range([0.5,0.6]) == [1]
コード例 #3
0
def test_empty():
    """docstring for test_empty"""
    
    empty_interval = [0,0,None]
    empty_tree = IntervalTree([empty_interval],0,0)
    
    assert empty_tree.find_range([1,1]) == []
コード例 #4
0
def test_simple():
    """docstring for test_empty"""
    
    empty_interval = [1,1,3]
    empty_tree = IntervalTree([empty_interval],0,1)
    
    assert empty_tree.find_range([1,1]) == [3]
コード例 #5
0
ファイル: util.py プロジェクト: nadavbra/shared_utils
def calc_overlap_between_segments(ordered_segments1, ordered_segments2):
    '''
    Calculates the total overlap size between a pair of ordered and disjoint groups of segments.
    Each group of segment is given by: [(start1, end1), (start2, end2), ...]. 
    '''

    from interval_tree import IntervalTree

    if len(ordered_segments1) == 0 or len(ordered_segments2) == 0:
        return 0

    if len(ordered_segments1) > len(ordered_segments2):
        ordered_segments1, ordered_segments2 = ordered_segments2, ordered_segments1

    min_value = min(ordered_segments1[0][0], ordered_segments2[0][0])
    max_value = max(ordered_segments1[-1][1], ordered_segments2[-1][1])
    interval_tree1 = IntervalTree(
        [segment + (segment, ) for segment in ordered_segments1], min_value,
        max_value)
    total_overlap = 0

    for segment in ordered_segments2:
        for overlapping_segment in interval_tree1.find_range(segment):
            overlapping_start = max(segment[0], overlapping_segment[0])
            overlapping_end = min(segment[1], overlapping_segment[1])
            assert overlapping_start <= overlapping_end, 'Reported overlap between %d..%d to %d..%d.' % (segment + \
                    overlapping_segment)
            total_overlap += (overlapping_end - overlapping_start + 1)

    return total_overlap
コード例 #6
0
ファイル: test.py プロジェクト: dalianzhu/interval_tree
def test_case_1():
    tree = IntervalTree(1, 10)
    tree.insert_line(2, 6)

    print(tree.search_line(2, 7))
    print(tree.search_line(3, 5))

    print("over")
コード例 #7
0
def main():
    # build intervals list from file
    intervals = str_to_interval(readIntervalsFile())
    # build an interval tree
    tree = IntervalTree(intervals)

    for line in sys.stdin:
        # search input number in tree
        processLine(tree, line)
コード例 #8
0
ファイル: evalV2.py プロジェクト: Jakob666/NanoporeNetwork
 def __init__(self, peak_calling_file, inference_file, output):
     """
     :param peak_calling_file: peak_calling的文件
     :param inference_file: CNN预测输出结果
     """
     self.peak_file = peak_calling_file
     self.infer_file = inference_file
     self.peak = None
     self.infer = None
     self.interval_tree = IntervalTree()
     self.output = output
コード例 #9
0
def test_float_tree():
    """docstring for test_empty"""
    
    first_interval = [0.0,0.01,1]
    second_interval = [0.01,0.02,2]
    third_interval = [0.02,1,3]
    
    empty_tree = IntervalTree([
        first_interval,
        second_interval,
        third_interval
    ],0,2)
    
    assert empty_tree.find_range([0.001,0.001]) == [1,2,3]
コード例 #10
0
def _build_gene_interval_trees(genes):

    if len(genes) == 0:
        return None

    segments = []
    max_coordinate = 1

    for gene in genes:
        start, end = _get_gene_locus(gene)
        segments += [(start, end, gene)]
        max_coordinate = max(max_coordinate, end)

    return IntervalTree(segments, 1, max_coordinate)
コード例 #11
0
def db_hit(database, query_mass):
    ''' Returns database hits for the query_mass.
        Each DatabaseEntry in database now knows its own range of begin and end masses
        The tolerance ppm for that range is specified when the DatabaseEntry is created.
        See test_discretisation.py and identification.py for example usage.

        Args:
         - database: a list of DatabaseEntry objects
         - query_mass: the mass to query

        Returns:
         a list of DatabaseEntry objects {e} where e.get_begin() < query_mass < e.get_end()
    '''
    T = IntervalTree(database)
    hits = T.search(query_mass)
    return hits
コード例 #12
0
ファイル: build_gene_trees.py プロジェクト: dowing/phizz
def build_gene_trees(infile, gene_db):
    """Build gene trees from the gene file"""
    gene_trees = {}
    chromosome_stops = {}
    with gzip.open(infile) as f:
        for line in f:
            if not line.startswith('#'):
                line = line.rstrip().split('\t')
                # print(line)
                # print(len(line))
                if len(line) >= 6:
                    chrom = line[0]
                    start = int(line[1])
                    stop = int(line[2])
                    hgnc_symbol = line[5]
                    if hgnc_symbol:
                        if chrom in gene_trees:
                            if not hgnc_symbol in gene_trees[chrom]:
                                gene_trees[chrom][hgnc_symbol] = [start, stop]
                        else:
                            gene_trees[chrom] = {}
                            gene_trees[chrom][hgnc_symbol] = [start, stop]

                        if stop > chromosome_stops.get(chrom, 0):
                            chromosome_stops[chrom] = stop + 1

    #Prepare for interval tree
    interval_trees = {}
    for chromosome in gene_trees:
        for gene_symbol in gene_trees[chromosome]:
            start = gene_trees[chromosome][gene_symbol][0]
            stop = gene_trees[chromosome][gene_symbol][1]
            interval = [start, stop, gene_symbol]
            if chromosome in interval_trees:
                interval_trees[chromosome].append(interval)
            else:
                interval_trees[chromosome] = [interval]

    for chrom in gene_trees:
        interval_trees[chrom] = IntervalTree(interval_trees[chrom], 1,
                                             chromosome_stops[chrom])

    with open(gene_db, 'wb') as f:
        logger.info("Dumping gene database to {0}.".format(gene_db))
        pickle.dump(interval_trees, f)
        logger.debug("Dumping successful.")
コード例 #13
0
ファイル: FlotDataset.py プロジェクト: younghj/flot
 def __init__(self, conf, pathList, transform):
     self.conf = conf
     self.dataList = []
     self.offsets = []
     self.len = 0
     features = []
     for idx, dataFolder in enumerate(pathList):
         feature = []
         #
         # This can be done better by using pytorch to concat datasets.
         self.dataList.append(DataFolder(conf, dataFolder, transform))
         feature.append(self.len)
         self.offsets.append(self.len)
         self.len += len(self.dataList[-1])
         feature.append(self.len - 1)
         feature.append(idx)
         features.append(feature)
     self.binSelector = IntervalTree(features, 0, self.len + 1)
コード例 #14
0
def neighbours_in_record(rec_a,
                         rec_b,
                         within=1000,
                         mode='unordered',
                         **kwargs):
    feat_f = list(treeFeatures(rec_a.features, strand=1))
    feat_r = list(treeFeatures(rec_a.features, strand=-1))

    if len(feat_f) > 0:
        tree_f = IntervalTree(feat_f, 1, len(rec_a))
    else:
        tree_f = None

    if len(feat_r) > 0:
        tree_r = IntervalTree(feat_r, 1, len(rec_a))
    else:
        tree_r = None

    rec_a_map = {f.id: f for f in rec_a.features}
    # rec_b_map = {f.id: f for f in rec_b.features}

    rec_a_hits_in_b = []
    rec_b_hits_in_a = []

    for feature in rec_b.features:
        start = feature.location.start
        end = feature.location.end
        if feature.location.strand > 0:
            start -= within
            if mode != 'ordered':
                end += within

            if tree_f is None:
                continue

            hits = tree_f.find_range((start, end))
            if len(hits) == 0:
                continue
            print start, end, feature.location.strand, feature.id, hits

            rec_b_hits_in_a.append(feature)
            for hit in hits:
                feat_hit = rec_a_map[hit]
                if feat_hit not in rec_a_hits_in_b:
                    rec_a_hits_in_b.append(feat_hit)

        else:
            end += within
            if mode != 'ordered':
                start -= within

            if tree_r is None:
                continue

            hits = tree_r.find_range((start, end))
            if len(hits) == 0:
                continue

            print start, end, feature.location.strand, feature.id, hits
            rec_b_hits_in_a.append(feature)
            for hit in hits:
                feat_hit = rec_a_map[hit]
                if feat_hit not in rec_a_hits_in_b:
                    rec_a_hits_in_b.append(feat_hit)

    rec_a.features = rec_a_hits_in_b
    rec_b.features = rec_b_hits_in_a
    return rec_a, rec_b
コード例 #15
0
            #lens.append(len(res))
    else:
        for i in range(tries):
            res = tree.find(start, end)
            res.sort(key=operator.attrgetter('start'))
            lens.append("%i:%s" % (len(res), [x.start for x in res[-1:]]))
            #lens.append(len(res))
    t1 = time.time()
    return res, t1 - t0, lens


start_max = STOP * 3
while True:
    intervals = rands(N, start_max=start_max)
    t0 = time.time()
    tree = IntervalTree(intervals)
    t1 = time.time()
    print "time to build IntervalTree with %i intervals: %.3f" % (N, t1 - t0)
    t0 = time.time()
    ints = Intersecter(intervals)
    t1 = time.time()
    print "time to build Intersector with %i intervals: %.3f" % (N, t1 - t0)

    found, t, tree_lens = search(tree, START, STOP, TRIES)
    print "time to search tree %i times: %.3f. found %i intervals" % (
        TRIES, t, len(found))

    found, t, brute_lens = search(intervals, START, STOP, TRIES)
    print "time to search brute %i times: %.3f. found %i intervals" % (
        TRIES, t, len(found))
コード例 #16
0
def sort_contours_by_level(contours):
    """Sort contours into parts.
        Returns a sorted list of lists, where inner lists represent contours at the same depth,
        and the outer list organizes inner lists by decreasing depth.
    """
    # TODO: handle pre-closed contours. (Circles, ellipses, etc.)
    parts = []
    height_interval_to_contours = {
    }  # items are contour lists, since multiple contours can have the same height interval.
    contour_tree = IntervalTree()
    heights = set()
    contours_by_name = {}
    nested_contour_tree_items = {}  # dict of contour nodes

    # Find min/max heights of all contours.
    layout_y_min = math.inf
    layout_y_max = -math.inf
    # Also find the left/right extremes to find global corners.
    layout_x_min = math.inf
    layout_x_max = -math.inf
    for contour in contours:
        # Store contours by name.
        contours_by_name[contour.name()] = contour
        # Store contour in a dict by height interval. Some contours can have the same height, so use lists.
        # This data structure is the input to build the interval tree.
        if (contour.y_min, contour.y_max) in height_interval_to_contours:
            height_interval_to_contours[(contour.y_min,
                                         contour.y_max)].append(contour)
        else:
            height_interval_to_contours[(contour.y_min,
                                         contour.y_max)] = [contour]
        # Update the extremes of the layout.
        if contour.y_min < layout_y_min:
            layout_y_min = contour.y_min
        if contour.y_max > layout_y_max:
            layout_y_max = contour.y_max
        if contour.x_min < layout_x_min:
            layout_x_min = contour.x_min
        if contour.x_max > layout_x_max:
            layout_x_max = contour.x_max
        # Add the contour's midpoint to the height intervals.
        heights.add((contour.y_max - contour.y_min) / 2 + contour.y_min)

    # Create interval tree.
    print("Packing Contours into Interval Tree for sorting speedup.")
    contour_tree.build(layout_y_min, layout_y_max, height_interval_to_contours)

    # Construct all contour in-out relationships.
    print("Constructing in-out contour relationships.")
    for height in heights:
        # Extract all the contours that exist at this height.
        contour_subset_lists = contour_tree.query(height)
        contour_subset_lists = [item[1] for item in contour_subset_lists
                                ]  # remove the keys.
        contour_subset_lists = [
            item for sublist in contour_subset_lists for item in sublist
        ]  # flatten remaining lists.

        # Build the In-Out relationship tree.
        for a_index, contour_a in enumerate(contour_subset_lists):
            contour_a_node = nested_contour_tree_items.get(
                contour_a.name(), Node(contour_a.name()))
            for b_index, contour_b in enumerate(contour_subset_lists[a_index +
                                                                     1:]):
                point_a = (contour_a.start_x, contour_a.start_y)
                point_b = (contour_b.start_x, contour_b.start_y)
                # Check if a is in b. If so, insert pair relationship into tree.
                if point_in_contour(point_a, contour_b):
                    # contour_b is contour_a's parent. Add back to the dict
                    contour_b_node = nested_contour_tree_items.get(
                        contour_b.name(), Node(contour_b.name()))
                    contour_a_node.parent = contour_b_node
                    nested_contour_tree_items[
                        contour_b.name()] = contour_b_node
                # Check if b is in a. If so, insert pair relationship into tree.
                elif point_in_contour(point_b, contour_a):
                    # contour_a is contour_b's parent. Add back to the dict
                    contour_b_node = nested_contour_tree_items.get(
                        contour_b.name(), Node(contour_b.name()))
                    contour_b_node.parent = contour_a_node
                    nested_contour_tree_items[
                        contour_b.name()] = contour_b_node
            nested_contour_tree_items[contour_a.name()] = contour_a_node

    print("Organizing contours by depth")
    # A dict, keyed by level (int) of contours that live at that level.
    depth_lists = OrderedDict()

    # Contours may be sorted in multiple separate trees.
    # Pull contours out of the dict representation and put into lists sorted by depths
    while len(nested_contour_tree_items):
        # Find the root(s) and print out the tree from there.
        node = None
        # Pull an arbitrary item out from the nesting.
        node_key = list(nested_contour_tree_items.keys())[0]
        # Get the root of this tree.
        node = nested_contour_tree_items[node_key]
        while node.parent is not None:
            node = node.parent
        # https://anytree.readthedocs.io/en/latest/api/anytree.iterators.html#anytree.iterators.levelordergroupiter.LevelOrderGroupIter
        list_o_lists = [[node.name for node in children]
                        for children in LevelOrderGroupIter(node)]
        for index, depth_list in enumerate(list_o_lists):
            old_depth_list = depth_lists.get(index, [])
            for contour_name in depth_list:
                old_depth_list.append(contours_by_name[contour_name])
                del nested_contour_tree_items[contour_name]
            depth_lists[index] = old_depth_list

    # Return serialized tree and a starting point.
    return [v for k, v in depth_lists.items()], (layout_x_max, layout_y_max)