コード例 #1
0
ファイル: regions2gff.py プロジェクト: siping/cgat
    def processChunk( contig, regions ):
        if contig == None: return
        
        start = 0
        end = contigs[contig]

        regions = Intervals.combineIntervals( regions )
        for xstart, xend in Intervals.complementIntervals( regions, start, end ):
            locations.append( ("intergenic", "intergenic", contig, "+", xstart, xend, ".") )
コード例 #2
0
def FilterEliminateOverlappingTranscripts(
        exons, filter_exons,
        eliminated_predictions, contig_sizes, options):
    """eliminate predictions that overlap or span a positive set of transcripts.
    """

    eliminated = []

    # convert list of filter exons into a list of ranges.
    filter_ranges = getRangesFromExons(
        filter_exons,
        both_strands=options.filter_remove_spanning_both_strands,
        contig_sizes=contig_sizes)

    for k, r in filter_ranges.items():
        filter_ranges[k] = Intervals.combineIntervals(map(lambda x: x[:2], r))

    exon_ranges = getRangesFromExons(exons,
                                     both_strands=False)

    # and now go through exons and delete transcripts whose
    # exons overlap one of the forbidden ranges
    for k, ee in exon_ranges.items():

        if k not in filter_ranges:
            continue

        ff = filter_ranges[k]
        ee.sort()

        # set exon index e and filter index f
        # (both are indices in sorted lists)
        e, f = 0, 0

        while e < len(ee):

            efrom, eto, id = ee[e]

            # increment filter, such that its extent
            # is larger than current range ee[e] to test.
            while f < len(ff) and ff[f][1] < efrom:
                f += 1
            if f == len(ff):
                break

            if eto < ff[f][0]:
                # no overlap
                pass
            else:
                options.stdout.write(
                    "%s\t%s\n" % (id, "eliminated: filtered by %s:%i:%i" % (k, ff[f][0], ff[f][1])))
                eliminated_predictions[id] = 0
                eliminated.append((id, "f"))

            e += 1

    return eliminated
コード例 #3
0
def FilterEliminateOverlappingTranscripts(exons, filter_exons,
                                          eliminated_predictions, contig_sizes,
                                          options):
    """eliminate predictions that overlap or span a positive set of transcripts.
    """

    eliminated = []

    # convert list of filter exons into a list of ranges.
    filter_ranges = getRangesFromExons(
        filter_exons,
        both_strands=options.filter_remove_spanning_both_strands,
        contig_sizes=contig_sizes)

    for k, r in filter_ranges.items():
        filter_ranges[k] = Intervals.combineIntervals(map(lambda x: x[:2], r))

    exon_ranges = getRangesFromExons(exons, both_strands=False)

    # and now go through exons and delete transcripts whose
    # exons overlap one of the forbidden ranges
    for k, ee in exon_ranges.items():

        if k not in filter_ranges:
            continue

        ff = filter_ranges[k]
        ee.sort()

        # set exon index e and filter index f
        # (both are indices in sorted lists)
        e, f = 0, 0

        while e < len(ee):

            efrom, eto, id = ee[e]

            # increment filter, such that its extent
            # is larger than current range ee[e] to test.
            while f < len(ff) and ff[f][1] < efrom:
                f += 1
            if f == len(ff):
                break

            if eto < ff[f][0]:
                # no overlap
                pass
            else:
                options.stdout.write("%s\t%s\n" %
                                     (id, "eliminated: filtered by %s:%i:%i" %
                                      (k, ff[f][0], ff[f][1])))
                eliminated_predictions[id] = 0
                eliminated.append((id, "f"))

            e += 1

    return eliminated
コード例 #4
0
    def processChunk(contig, regions):
        if contig == None: return

        start = 0
        end = contigs[contig]

        regions = Intervals.combineIntervals(regions)
        for xstart, xend in Intervals.complementIntervals(regions, start, end):
            locations.append(
                ("intergenic", "intergenic", contig, "+", xstart, xend, "."))
コード例 #5
0
ファイル: gff2table.py プロジェクト: kathrinjansen/cgat
def transform_third_codon(start, end, intervals_with_gff):
    """transform: only return nucleotide positions in window (start, end) 
    that are in third codon position.
    """
    intervals = []
    for istart, iend, gff in intervals_with_gff:

        if gff.frame == ".":
            raise ValueError("need a frame for third codon positions.")

        # frame = nucleotides from start to next codon
        frame = int(gff.frame)

        # to make life easier, convert to 0-based coordinates,
        # with zero starting at first position in window
        # re-arrange positions on negative strand
        if Genomics.IsNegativeStrand(gff.strand):
            # convert to negative strand coordinates counting from 0
            coordinate_offset = end
            reverse = True
            istart, iend = end - iend, end - istart
        else:
            istart, iend = istart - start, iend - start
            reverse = False
            coordinate_offset = start

        # make sure that you start on a second codon position and within window
        if istart < 0:
            frame = (frame + istart) % 3
            istart = 0
        if frame != 0:
            istart -= (3 - frame)
        istart += 2

        iend = min(iend, end - start)

        for x in range(istart, iend, 3):

            if reverse:
                c = coordinate_offset - x - 1
            else:
                c = coordinate_offset + x
            intervals.append((c, c + 1))

    return Intervals.combineIntervals(intervals)
コード例 #6
0
ファイル: gff_decorate.py プロジェクト: Q-KIM/cgat
def transform_third_codon(start, end, intervals_with_gff):
    """transform: only return nucleotide positions in window (start, end) 
    that are in third codon position.
    """
    intervals = []
    for istart, iend, gff in intervals_with_gff:

        if gff.frame == ".":
            raise ValueError("need a frame for third codon positions.")

        # frame = nucleotides from start to next codon
        frame = int(gff.frame)

        # to make life easier, convert to 0-based coordinates,
        # with zero starting at first position in window
        # re-arrange positions on negative strand
        if Genomics.IsNegativeStrand(gff.strand):
            # convert to negative strand coordinates counting from 0
            coordinate_offset = end
            reverse = True
            istart, iend = end - iend, end - istart
        else:
            istart, iend = istart - start, iend - start
            reverse = False
            coordinate_offset = start

        # make sure that you start on a second codon position and within window
        if istart < 0:
            frame = (frame + istart) % 3
            istart = 0
        if frame != 0:
            istart -= (3 - frame)
        istart += 2

        iend = min(iend, end - start)

        for x in range(istart, iend, 3):

            if reverse:
                c = coordinate_offset - x - 1
            else:
                c = coordinate_offset + x
            intervals.append((c, c + 1))

    return Intervals.combineIntervals(intervals)
コード例 #7
0
ファイル: gff2table.py プロジェクト: kathrinjansen/cgat
def transform_complement(start, end, intervals_with_gff):
    y = Intervals.combineIntervals([(x[0], x[1]) for x in intervals_with_gff])
    return Intervals.complementIntervals(y, start, end)
コード例 #8
0
ファイル: gff2table.py プロジェクト: kathrinjansen/cgat
def transform_overlap(start, end, intervals_with_gff):
    """transform: overlap of intervals in x with y."""
    y = Intervals.combineIntervals([(x[0], x[1]) for x in intervals_with_gff])
    return Intervals.pruneIntervals(y, start, end)
コード例 #9
0
ファイル: gff2table.py プロジェクト: zpeng1989/cgat
def transform_complement(start, end, intervals_with_gff):
    y = Intervals.combineIntervals(
        map(lambda x: (x[0], x[1]), intervals_with_gff))
    return Intervals.complementIntervals(y, start, end)
コード例 #10
0
ファイル: gff2table.py プロジェクト: zpeng1989/cgat
def transform_overlap(start, end, intervals_with_gff):
    """transform: overlap of intervals in x with y."""
    y = Intervals.combineIntervals(
        map(lambda x: (x[0], x[1]), intervals_with_gff))
    return Intervals.pruneIntervals(y, start, end)
コード例 #11
0
ファイル: gff_decorate.py プロジェクト: Q-KIM/cgat
def transform_complement(start, end, intervals_with_gff):
    y = Intervals.combineIntervals(
        map(lambda x: (x[0], x[1]), intervals_with_gff))
    return Intervals.complementIntervals(y, start, end)
コード例 #12
0
ファイル: gff_decorate.py プロジェクト: Q-KIM/cgat
def transform_overlap(start, end, intervals_with_gff):
    """transform: overlap of intervals in x with y."""
    y = Intervals.combineIntervals(
        map(lambda x: (x[0], x[1]), intervals_with_gff))
    return Intervals.pruneIntervals(y, start, end)
コード例 #13
0
ファイル: gff_decorate.py プロジェクト: CGATOxford/cgat
def transform_complement(start, end, intervals_with_gff):
    y = Intervals.combineIntervals(
        [(x[0], x[1]) for x in intervals_with_gff])
    return Intervals.complementIntervals(y, start, end)
コード例 #14
0
ファイル: gff_decorate.py プロジェクト: CGATOxford/cgat
def transform_overlap(start, end, intervals_with_gff):
    """transform: overlap of intervals in x with y."""
    y = Intervals.combineIntervals(
        [(x[0], x[1]) for x in intervals_with_gff])
    return Intervals.pruneIntervals(y, start, end)
コード例 #15
0
ファイル: regions2gff.py プロジェクト: CGATOxford/Optic
    def processChunk(gene_id, contig, strand, frame, regions):
        if gene_id is None:
            return

        for start, end in Intervals.combineIntervals(regions):
            locations.append((gene_id, gene_id, contig, strand, start, end))
コード例 #16
0
    def processChunk(gene_id, contig, strand, frame, regions):
        if gene_id == None: return

        for start, end in Intervals.combineIntervals(regions):
            locations.append((gene_id, gene_id, contig, strand, start, end))