コード例 #1
0
def cropGFF(gffs, options):
    """crop intervals in gff file."""

    # read regions to crop with and convert intervals to intersectors
    E.info("reading gff for cropping: started.")

    other_gffs = GTF.iterator(IOTools.openFile(options.crop, "r"))
    cropper = GTF.readAsIntervals(other_gffs)
    ntotal = 0
    for contig in cropper.keys():
        intersector = bx.intervals.intersection.Intersecter()
        for start, end in cropper[contig]:
            intersector.add_interval(bx.intervals.Interval(start, end))
            ntotal += 1
        cropper[contig] = intersector

    E.info("reading gff for cropping: finished.")
    E.info("reading gff for cropping: %i contigs with %i intervals." %
           (len(cropper), ntotal))

    ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0

    # do the actual cropping
    for gff in gffs:

        ninput += 1

        if gff.contig in cropper:
            start, end = gff.start, gff.end
            overlaps = cropper[gff.contig].find(start, end)

            if overlaps:
                l = end - start
                a = numpy.ones(l)
                for i in overlaps:
                    s = max(0, i.start - start)
                    e = min(l, i.end - start)
                    a[s:e] = 0

                segments = Intervals.fromArray(a)

                if len(segments) == 0:
                    ndeleted += 1
                else:
                    ncropped += 1

                for s, e in segments:
                    gff.start, gff.end = s + start, e + start
                    noutput += 1
                    options.stdout.write("%s\n" % gff)

                continue

        noutput += 1
        options.stdout.write("%s\n" % gff)

    if options.loglevel >= 1:
        options.stdlog.write(
            "# ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i\n" %
            (ninput, noutput, ncropped, ndeleted))
コード例 #2
0
ファイル: gff2gff.py プロジェクト: Charlie-George/cgat
def cropGFF(gffs, options):
    """crop intervals in gff file."""

    # read regions to crop with and convert intervals to intersectors
    E.info("reading gff for cropping: started.")

    other_gffs = GTF.iterator(IOTools.openFile(options.crop, "r"))
    cropper = GTF.readAsIntervals(other_gffs)
    ntotal = 0
    for contig in cropper.keys():
        intersector = bx.intervals.intersection.Intersecter()
        for start, end in cropper[contig]:
            intersector.add_interval(bx.intervals.Interval(start, end))
            ntotal += 1
        cropper[contig] = intersector

    E.info("reading gff for cropping: finished.")
    E.info("reading gff for cropping: %i contigs with %i intervals." %
           (len(cropper), ntotal))

    ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0

    # do the actual cropping
    for gff in gffs:

        ninput += 1

        if gff.contig in cropper:
            start, end = gff.start, gff.end
            overlaps = cropper[gff.contig].find(start, end)

            if overlaps:
                l = end - start
                a = numpy.ones(l)
                for i in overlaps:
                    s = max(0, i.start - start)
                    e = min(l, i.end - start)
                    a[s:e] = 0

                segments = Intervals.fromArray(a)

                if len(segments) == 0:
                    ndeleted += 1
                else:
                    ncropped += 1

                for s, e in segments:
                    gff.start, gff.end = s + start, e + start
                    noutput += 1
                    options.stdout.write("%s\n" % gff)

                continue

        noutput += 1
        options.stdout.write("%s\n" % gff)

    if options.loglevel >= 1:
        options.stdlog.write("# ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i\n" % (
            ninput, noutput, ncropped, ndeleted))
コード例 #3
0
 def testArray2(self):
     """test longer array."""
     a = [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]
     self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9), (12, 15)])
     self.assertEqual(Intervals.fromArray([not x for x in a]), [(3, 6),
                                                                (9, 12)])
コード例 #4
0
 def testArray1(self):
     """test simple array."""
     a = [1, 1, 1, 0, 0, 0, 1, 1, 1]
     self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9)])
     self.assertEqual(Intervals.fromArray([not x for x in a]), [(3, 6)])
コード例 #5
0
 def testEmpty(self):
     """test empty input."""
     self.assertEqual(Intervals.fromArray([]), [])
コード例 #6
0
ファイル: Intervals_test.py プロジェクト: Charlie-George/cgat
 def testArray2(self):
     """test longer array."""
     a = [1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]
     self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9), (12, 15)])
     self.assertEqual(
         Intervals.fromArray([not x for x in a]), [(3, 6), (9, 12)])
コード例 #7
0
ファイル: Intervals_test.py プロジェクト: Charlie-George/cgat
 def testArray1(self):
     """test simple array."""
     a = [1, 1, 1, 0, 0, 0, 1, 1, 1]
     self.assertEqual(Intervals.fromArray(a), [(0, 3), (6, 9)])
     self.assertEqual(Intervals.fromArray([not x for x in a]), [(3, 6)])
コード例 #8
0
ファイル: Intervals_test.py プロジェクト: Charlie-George/cgat
 def testEmpty(self):
     """test empty input."""
     self.assertEqual(Intervals.fromArray([]), [])
コード例 #9
0
ファイル: gff2gff.py プロジェクト: logust79/cgat-apps
def cropGFF(gffs, filename_gff):
    """crop intervals in gff file."""

    # read regions to crop with and convert intervals to intersectors
    E.info("reading gff for cropping: started.")

    other_gffs = GTF.iterator(IOTools.open_file(filename_gff, "r"))

    cropper = GTF.readAsIntervals(other_gffs)

    ntotal = 0
    for contig in list(cropper.keys()):
        intersector = quicksect.IntervalTree()
        for start, end in cropper[contig]:
            intersector.add(start, end)
            ntotal += 1
        cropper[contig] = intersector

    E.info("reading gff for cropping: finished.")
    E.info("reading gff for cropping: %i contigs with %i intervals." %
           (len(cropper), ntotal))

    ninput, noutput, ncropped, ndeleted = 0, 0, 0, 0

    # do the actual cropping
    for gff in gffs:

        ninput += 1

        if gff.contig in cropper:

            start, end = gff.start, gff.end
            overlaps = cropper[gff.contig].find(quicksect.Interval(start, end))

            if overlaps:

                l = end - start
                a = numpy.ones(l)
                for i in overlaps:
                    s = max(0, i.start - start)
                    e = min(l, i.end - start)
                    a[s:e] = 0

                segments = Intervals.fromArray(a)
                if len(segments) == 0:
                    ndeleted += 1
                else:
                    ncropped += 1

                for s, e in segments:
                    gff.start, gff.end = s + start, e + start
                    noutput += 1
                    yield (gff)

                continue

        noutput += 1

        yield (gff)

    E.info("ninput=%i, noutput=%i, ncropped=%i, ndeleted=%i" %
           (ninput, noutput, ncropped, ndeleted))