def getOverlappingRegionDict(bd1, bd2, debug=True): ''' Return a dictionary of overlapping regions within two dictionaries of Bed regions ''' retval = {} for chrom, bedList1 in bd1.items(): bedList2 = bd2.get(chrom, []) if bedList2 == []: continue if debug: if not isNonContiguous(bedList1): errAbort( "Calculating overlapping regions must have non-contiguous input elements." ) if not isNonContiguous(bedList2): errAbort( "Calculating overlapping regions must have non-contiguous input elements." ) b1Len = len(bedList1) b1Idx = 0 b2Len = len(bedList2) b2Idx = 0 while (b1Idx < b1Len) and (b2Idx < b2Len): b1Curr = bedList1[b1Idx] b2Curr = bedList2[b2Idx] assert b1Curr.chrom == b2Curr.chrom maxStart = max(b1Curr.chromStart, b2Curr.chromStart) minEnd = min(b1Curr.chromEnd, b2Curr.chromEnd) if maxStart < minEnd: retval[b1Curr.chrom] = retval.get(b1Curr.chrom, []) + [ Bed.Bed("%s\t%d\t%d" % (b1Curr.chrom, maxStart, minEnd)) ] if b1Curr.chromEnd < b2Curr.chromEnd: b1Idx += 1 elif b1Curr.chromEnd > b2Curr.chromEnd: b2Idx += 1 else: b1Idx += 1 b2Idx += 1 return retval
def bedReadFromFile(fn, chromSizes=None): ''' Read a file of Bed regions into a list of Beds and return it ''' f = open(fn) retval = [Bed.Bed(line, chromSizes=chromSizes) for line in f.readlines()] f.close() return retval