Ejemplo n.º 1
0
 def testSetItem(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     iv.chrom = 'chrfake'
     print iv.fields
     self.assertEqual(iv['chrom'], 'chrfake')
     self.assertEqual(iv.chrom, 'chrfake')
Ejemplo n.º 2
0
 def testAppend(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     print iv.fields
     iv.append('asdf')
     print iv
     self.assertEqual(iv[-1], 'asdf')
Ejemplo n.º 3
0
 def testGetItemNegative(self):
     "test negative indexes to feature."
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assert_(iv[-self.fieldcount+self.chrpos].startswith("chr"), iv[-self.fieldcount+self.chrpos])
     self.assert_(iv[-self.fieldcount+self.startpos].isdigit(), iv[-self.fieldcount+self.startpos])
     self.assert_(iv[-self.fieldcount+self.stoppos].isdigit())
Ejemplo n.º 4
0
    def testStart(self):
        ivf = IntervalFile(self.file)
        iv = ivf.next()
        orig_string = str(iv)

        # 0-based.
        orig_start = iv.start

        # Setting .start always sets 0-based coord.
        iv.start = orig_start

        # But for GFF setting .start should also make the .fields[3] the GFF
        # 1-based coord
        assert iv.start == int(iv.fields[3])-1

        second_string = str(iv)
        second_start = iv.start
        iv.start = second_start

        # Check .start and .fields[3] internal consistency again
        assert iv.start == int(iv.fields[3])-1

        print '   orig:', '(start=%s)'%orig_start, orig_string
        print ' second:', '(start=%s)'%second_start, second_string
        print 'current:', '(start=%s)'%iv.start, str(iv)
        self.assert_(orig_start == second_start == iv.start)
        self.assert_(orig_string == second_string == str(iv))
Ejemplo n.º 5
0
 def testGetItem(self):
     "getitem now supports direct access to the line."
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assert_(iv[self.chrpos].startswith("chr"))
     self.assert_(iv[self.startpos].isdigit())
     self.assert_(iv[self.startpos].isdigit())
Ejemplo n.º 6
0
 def testAppend(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     print iv.fields
     iv.append('asdf')
     print iv
     self.assertEqual(iv[-1], 'asdf')
Ejemplo n.º 7
0
 def testName(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     iv.name = "bart simpson"
     self.assertEqual(iv.name, "bart simpson")
     if iv.file_type == "gff":
         self.assert_("bart" in iv.fields[8])
Ejemplo n.º 8
0
 def testSetItem(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     iv.chrom = 'chrfake'
     print iv.fields
     self.assertEqual(iv['chrom'], 'chrfake')
     self.assertEqual(iv.chrom, 'chrfake')
Ejemplo n.º 9
0
    def testStart(self):
        ivf = IntervalFile(self.file)
        iv = ivf.next()
        orig_string = str(iv)

        # 0-based.
        orig_start = iv.start

        # Setting .start always sets 0-based coord.
        iv.start = orig_start

        # But for GFF setting .start should also make the .fields[3] the GFF
        # 1-based coord
        assert iv.start == int(iv.fields[3]) - 1

        second_string = str(iv)
        second_start = iv.start
        iv.start = second_start

        # Check .start and .fields[3] internal consistency again
        assert iv.start == int(iv.fields[3]) - 1

        print '   orig:', '(start=%s)' % orig_start, orig_string
        print ' second:', '(start=%s)' % second_start, second_string
        print 'current:', '(start=%s)' % iv.start, str(iv)
        self.assert_(orig_start == second_start == iv.start)
        self.assert_(orig_string == second_string == str(iv))
Ejemplo n.º 10
0
 def testName(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     iv.name = "bart simpson"
     self.assertEqual(iv.name, "bart simpson")
     if iv.file_type == "gff":
         self.assert_("bart" in iv.fields[8])
Ejemplo n.º 11
0
 def testGetItem(self):
     "getitem now supports direct access to the line."
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assert_(iv[self.chrpos].startswith("chr"))
     self.assert_(iv[self.startpos].isdigit())
     self.assert_(iv[self.startpos].isdigit())
Ejemplo n.º 12
0
 def testGetItemNegative(self):
     "test negative indexes to feature."
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assert_(iv[-self.fieldcount + self.chrpos].startswith("chr"),
                  iv[-self.fieldcount + self.chrpos])
     self.assert_(iv[-self.fieldcount + self.startpos].isdigit(),
                  iv[-self.fieldcount + self.startpos])
     self.assert_(iv[-self.fieldcount + self.stoppos].isdigit())
Ejemplo n.º 13
0
 def testGetItemSliceNone(self):
     " test support for funky slices."
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assertEqual(len(iv[:3]), 3)
     self.assertEqual(len(iv[3:3]), 0)
     self.assertEqual(len(iv[2:]), self.fieldcount-2, iv[2:])
     
     print len(iv.fields), iv.fields
     self.assertRaises(IndexError, lambda x: iv[x], self.fieldcount+1)
Ejemplo n.º 14
0
    def testGetItemSlice(self):
        "getitem now supports direct access to the line."
        ivf = IntervalFile(self.file)
        iv = ivf.next()
        seqid, = iv[self.chrpos:self.chrpos+1]
        start, end = iv[self.startpos:self.stoppos+1]
        self.assert_(start.isdigit())

        self.assertEqual(int(end), iv.end)
        self.assertEqual(seqid, iv.chrom)
Ejemplo n.º 15
0
    def testGetItemSlice(self):
        "getitem now supports direct access to the line."
        ivf = IntervalFile(self.file)
        iv = ivf.next()
        seqid, = iv[self.chrpos:self.chrpos + 1]
        start, end = iv[self.startpos:self.stoppos + 1]
        self.assert_(start.isdigit())

        self.assertEqual(int(end), iv.end)
        self.assertEqual(seqid, iv.chrom)
Ejemplo n.º 16
0
    def testGetItemSliceNone(self):
        " test support for funky slices."
        ivf = IntervalFile(self.file)
        iv = ivf.next()
        self.assertEqual(len(iv[:3]), 3)
        self.assertEqual(len(iv[3:3]), 0)
        self.assertEqual(len(iv[2:]), self.fieldcount - 2, iv[2:])

        print len(iv.fields), iv.fields
        self.assertRaises(IndexError, lambda x: iv[x], self.fieldcount + 1)
Ejemplo n.º 17
0
 def testSetAttrs(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     if iv.file_type != 'gff':
         self.assertRaises(ValueError, iv.attrs.__setitem__, 'a','b')
         return
     iv.attrs['ID'] = 'fake'
     iv.attrs['field0'] = 'asdf'
     self.assertEqual(str(iv.attrs), iv[8])
     self.assert_('field0=asdf' in iv[8])
     self.assert_('ID=fake' in iv[8])
Ejemplo n.º 18
0
 def testSetAttrs(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     if iv.file_type != 'gff':
         self.assertRaises(ValueError, iv.attrs.__setitem__, 'a', 'b')
         return
     iv.attrs['ID'] = 'fake'
     iv.attrs['field0'] = 'asdf'
     self.assertEqual(str(iv.attrs), iv[8])
     self.assert_('field0=asdf' in iv[8])
     self.assert_('ID=fake' in iv[8])
Ejemplo n.º 19
0
 def testStart(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     orig_string = str(iv)
     orig_start = iv.start
     iv.start = orig_start
     second_string = str(iv)
     second_start = iv.start
     iv.start = second_start
     print '   orig:', '(start=%s)'%orig_start, orig_string
     print ' second:', '(start=%s)'%second_start, second_string
     print 'current:', '(start=%s)'%iv.start, str(iv)
     self.assert_(orig_start == second_start == iv.start)
     self.assert_(orig_string == second_string == str(iv))
Ejemplo n.º 20
0
 def testStart(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     orig_string = str(iv)
     orig_start = iv.start
     iv.start = orig_start
     second_string = str(iv)
     second_start = iv.start
     iv.start = second_start
     print '   orig:', '(start=%s)' % orig_start, orig_string
     print ' second:', '(start=%s)' % second_start, second_string
     print 'current:', '(start=%s)' % iv.start, str(iv)
     self.assert_(orig_start == second_start == iv.start)
     self.assert_(orig_string == second_string == str(iv))
Ejemplo n.º 21
0
    def testStart(self):
        ivf = IntervalFile(self.file)
        iv = next(ivf)
        orig_string = str(iv)

        # 0-based.
        orig_start = iv.start

        # Setting .start always sets 0-based coord.
        iv.start = orig_start

        # But for GFF setting .start should also make the .fields[3] the GFF
        # 1-based coord
        assert iv.start == int(iv.fields[3]) - 1

        second_string = str(iv)
        second_start = iv.start
        iv.start = second_start

        # Check .start and .fields[3] internal consistency again
        assert iv.start == int(iv.fields[3]) - 1

        print("   orig:", "(start=%s)" % orig_start, orig_string)
        print(" second:", "(start=%s)" % second_start, second_string)
        print("current:", "(start=%s)" % iv.start, str(iv))
        self.assertTrue(orig_start == second_start == iv.start)
        self.assertTrue(orig_string == second_string == str(iv))
Ejemplo n.º 22
0
 def testAppend(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     print(iv.fields)
     iv.append('asdf')
     print(iv)
     self.assertEqual(iv[-1], 'asdf')
Ejemplo n.º 23
0
 def testSetItem(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     iv.chrom = "chrfake"
     print(iv.fields)
     self.assertEqual(iv["chrom"], "chrfake")
     self.assertEqual(iv.chrom, "chrfake")
Ejemplo n.º 24
0
    def testFileType(self):
        self.assert_(self.bed.file_type == "bed",
                     (self.bed.file_type, self.file))

        gff = os.path.join(PATH, "data/c.gff")
        i = IntervalFile(gff)
        self.assert_(i.file_type == "gff", (i.file_type, gff))
Ejemplo n.º 25
0
 def testSetAttrs(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     if iv.file_type != 'gff':
         iv.attrs['a'] = 'b'
         self.assertRaises(ValueError, str, iv)
         return
     iv.attrs['ID'] = 'fake'
     iv.attrs['field0'] = 'asdf'
     self.assertEqual(str(iv.attrs), iv[8])
     self.assertTrue('field0=asdf' in iv[8])
     self.assertTrue('ID=fake' in iv[8])
Ejemplo n.º 26
0
 def testSetAttrs(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     if iv.file_type != "gff":
         iv.attrs["a"] = "b"
         self.assertRaises(ValueError, str, iv)
         return
     iv.attrs["ID"] = "fake"
     iv.attrs["field0"] = "asdf"
     self.assertEqual(str(iv.attrs), iv[8])
     self.assertTrue("field0=asdf" in iv[8])
     self.assertTrue("ID=fake" in iv[8])
Ejemplo n.º 27
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.small.bed"
    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testFileType(self):
        self.assert_(self.bed.file_type == "bed", (self.bed.file_type, self.file))

        gff = os.path.join(PATH, "data/c.gff")
        i = IntervalFile(gff)
        self.assert_(i.file_type == "gff", (i.file_type, gff))

    def testOverlaps(self):
        i    = Interval("chr21", 9719768, 9739768)
        hits = self.bed.all_hits(i)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        i = Interval("chr21", 9719768, 9739768, "+")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '+')

        i = Interval("chr21", 9719768, 9739768, "-")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '-')

    def testRichCmp(self):
        a = Interval("chr21", 9719768, 9739768)
        b = Interval("chr21", 9719767, 9739768)
        self.assert_(a < b)
        self.assert_(b < a)
        c = Interval("chr21", 9719767, 9739768)
        self.assert_(c == b)
        d = Interval("chr22", 9719767, 9739768)
        self.assert_(c != d)
Ejemplo n.º 28
0
 def testStart(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     orig_string = str(iv)
     orig_start = iv.start
     iv.start = orig_start
     second_string = str(iv)
     second_start = iv.start
     iv.start = second_start
     print("   orig:", "(start=%s)" % orig_start, orig_string)
     print(" second:", "(start=%s)" % second_start, second_string)
     print("current:", "(start=%s)" % iv.start, str(iv))
     self.assertTrue(orig_start == second_start == iv.start)
     self.assertTrue(orig_string == second_string == str(iv))
Ejemplo n.º 29
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.small.bed"
    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testFileType(self):
        self.assert_(self.bed.file_type == "bed", (self.bed.file_type, self.file))

        gff = os.path.join(PATH, "data/c.gff")
        i = IntervalFile(gff)
        self.assert_(i.file_type == "gff", (i.file_type, gff))

    def testOverlaps(self):
        i    = Interval("chr21", 9719768, 9739768)
        hits = self.bed.all_hits(i)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        i = Interval("chr21", 9719768, 9739768, "+")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '+')

        i = Interval("chr21", 9719768, 9739768, "-")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '-')

    def testRichCmp(self):

        # be obsessive . . .
        #
        # ==
        a = Interval("chr21", 100, 200)
        b = Interval("chr21", 100, 200)
        self.assert_(a == b)
        self.assertFalse(a != b)
        self.assert_(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        a = Interval("chr21", 100, 100)
        b = Interval("chr21", 100, 100)
        self.assert_(a == b)
        self.assertFalse(a != b)
        self.assert_(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)


        # != because of strand
        a = Interval("chr21", 100, 200, strand='+')
        b = Interval("chr21", 100, 200, strand='-')
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a >= b
        a = Interval("chr21", 100, 300)
        b = Interval("chr21", 100, 200)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a <= b
        a = Interval("chr21", 100, 300)
        b = Interval("chr21", 300, 300)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assert_(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)


        # a <= b
        a = Interval("chr21", 100, 300)
        b = Interval("chr21", 250, 300)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assert_(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a < b
        a = Interval("chr21", 100, 200)
        b = Interval("chr21", 201, 300)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assert_(a <= b)
        self.assertFalse(a >= b)
        self.assert_(a < b)
        self.assertFalse(a > b)

        # a > b
        a = Interval("chr21", 201, 300)
        b = Interval("chr21", 100, 200)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assert_(a > b)

        # a != b
        a = Interval("none", 1, 100)
        b = Interval("chr21", 1, 100)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # nested should raise NotImplementedError
        a = Interval("chr21", 100, 200)
        b = Interval("chr21", 50, 300)
        self.assertRaises(NotImplementedError, a.__eq__, b)
        self.assertRaises(NotImplementedError, a.__ne__, b)
        self.assertRaises(NotImplementedError, a.__le__, b)
        self.assertRaises(NotImplementedError, a.__ge__, b)
        self.assertRaises(NotImplementedError, a.__lt__, b)
        self.assertRaises(NotImplementedError, a.__gt__, b)
Ejemplo n.º 30
0
        # 3. Reached an interval that is AFTER  the query (start > query's end)
        # We add each feature to the cache, and track those that overlap
        while (curr_db is not None and curr_qy.chrom == curr_db.chrom
               and not after(curr_db, curr_qy)):
            if (overlaps(curr_qy, curr_db) > 0):
                hits.append(curr_db)
            db_cache.append(curr_db)
            curr_db = get_next(database)

        # Report the query's overlaps and move on to the next query
        report_hits(curr_qy, hits)
        hits = []
        curr_qy = get_next(query)


if __name__ == "__main__":

    if len(sys.argv) < 3:
        print("Usage:")
        print("chrom_sweep.py [query] [database]")
        sys.exit()

    query_file = sys.argv[1]
    database_file = sys.argv[2]

    # open up the BED files.
    query = IntervalFile(query_file)  # The Query File
    database = IntervalFile(database_file)  # The Database File

    sweep(query, database)
Ejemplo n.º 31
0
def main():
    '''
    main function
    takes arguments from the cammandline
    '''
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--upstream',
        type=int,
        help='distance upstream of seed'
        ' to look for flanking regions to compare with `seed`, default = 20000',
        default=20000)
    parser.add_argument('--downstream',
                        type=int,
                        help='distance downstream to '
                        'look for flanking regions, default = 20000',
                        default=20000)
    parser.add_argument('--seed',
                        help='regions of interest, e.g. promoters',
                        required=True,
                        metavar="BED")

    parser.add_argument('--exclude',
                        help='regions to be excluded when looking for flanks')
    parser.add_argument('--include',
                        help='regions to be included when looking for flanks')
    parser.add_argument('--test',
                        default='fisher',
                        choices=['fisher', 'permutation', 'both'])
    parser.add_argument(
        '--shuffles',
        type=int,
        help='number of shuffles to do for permutation analysis',
        default=1000)
    parser.add_argument('--genome',
                        help='the name of the genome file for BEDTools')
    parser.add_argument('--full',
                        help='output full, dataset with per-sample p-values',
                        default=False,
                        action='store_true')
    parser.add_argument('--score',
                        metavar="BIGWIG/INT",
                        help='score functionality in progress')
    parser.add_argument('variants',
                        help='regions to assign significance e.g.'
                        'a list of variants',
                        metavar="BED/VCF",
                        nargs='+')

    args = parser.parse_args()

    seed_region = BedTool(args.seed)

    if args.variants[0][-4:] == '.vcf':
        region_file = vcf_to_long_bed(*args.variants)
    else:
        assert len(args.variants) == 1
        region_file = IntervalFile(args.variants[0])

    out_file = sys.stdout
    genome = args.genome

    if args.exclude:
        include_file = BedTool(args.exclude).complement(g=genome)
    elif args.include:
        include_file = BedTool(args.include)
    else:
        include_file = BedTool(create_genome_bed(genome))

    bw = None
    if args.score:
        bw = BigWigFile(open(args.score))

    analyze_intervals(include_file,
                      seed_region,
                      region_file,
                      args.upstream,
                      args.downstream,
                      args.test,
                      args.shuffles,
                      out_file,
                      args.full,
                      score=bw)
Ejemplo n.º 32
0
1 = start_byte,
2 = end_byte,
3 = num_records,
4 = max_interval size (future, for binary search)
"""

    # what are the BED files
    A_file = sys.argv[1]
    B_file = sys.argv[2]

    # expected index file name
    A_idx_file = A_file + ".idx"
    B_idx_file = B_file + ".idx"

    # open up the BED files.
    A = IntervalFile(A_file)  # The Query File
    B = IntervalFile(B_file)  # The Database File

    # create index files if they don't yet exist
    if not os.path.exists(A_idx_file):
        index_bed.index(A_file)
    if not os.path.exists(B_idx_file):
        index_bed.index(B_file)

    # load the indices for A and B
    A_map = []  # list of chrom/offset tuples
    for line in open(A_idx_file):
        fields = line.strip().split("\t")
        A_map.append(
            (fields[0], int(fields[1]), int(fields[2]), int(fields[3]),
             int(fields[4])))
Ejemplo n.º 33
0
 def testSetItemString(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     iv['chrom'] = 'fake'
     self.assertEqual(iv['chrom'], 'fake')
     self.assertEqual(iv.chrom, 'fake')
Ejemplo n.º 34
0
 def testGetItemString(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assertEqual(iv['chrom'], iv.chrom)
     self.assertEqual(iv['start'], iv.start)
     self.assertEqual(iv['end'], iv.end)
Ejemplo n.º 35
0
1 = start_byte,
2 = end_byte,
3 = num_records,
4 = max_interval size (future, for binary search)
"""

    # what are the BED files
    A_file = sys.argv[1]
    B_file = sys.argv[2]

    # expected index file name
    A_idx_file = A_file + ".idx"
    B_idx_file = B_file + ".idx"

    # open up the BED files.
    A = IntervalFile(A_file) # The Query File
    B = IntervalFile(B_file) # The Database File
    
    # create index files if they don't yet exist
    if not os.path.exists(A_idx_file):
        index_bed.index(A_file)
    if not os.path.exists(B_idx_file):
        index_bed.index(B_file)
    
    # load the indices for A and B
    A_map = [] # list of chrom/offset tuples
    for line in open(A_idx_file):
        fields = line.strip().split("\t")
        A_map.append((fields[0], int(fields[1]), int(fields[2]), int(fields[3]), int(fields[4])))
    B_map = [] # list of chrom/offset tuples
    for line in open(B_idx_file):
Ejemplo n.º 36
0
 def testGetItemString(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     self.assertEqual(iv['chrom'], iv.chrom)
     self.assertEqual(iv['start'], iv.start)
     self.assertEqual(iv['end'], iv.end)
Ejemplo n.º 37
0
 def setUp(self):
     self.file = os.path.join(PATH, self.file)
     self.bed = IntervalFile(self.file)
Ejemplo n.º 38
0
 def testSetItemString(self):
     ivf = IntervalFile(self.file)
     iv = ivf.next()
     iv['chrom'] = 'fake'
     self.assertEqual(iv['chrom'], 'fake')
     self.assertEqual(iv.chrom, 'fake')
Ejemplo n.º 39
0
class IntervalFileTest(unittest.TestCase):
    file = "data/rmsk.hg18.chr21.small.bed"

    def setUp(self):
        self.file = os.path.join(PATH, self.file)
        self.bed = IntervalFile(self.file)

    def testFileType(self):
        self.assert_(self.bed.file_type == "bed",
                     (self.bed.file_type, self.file))

        gff = os.path.join(PATH, "data/c.gff")
        i = IntervalFile(gff)
        self.assert_(i.file_type == "gff", (i.file_type, gff))

    def testOverlaps(self):
        i = Interval("chr21", 9719768, 9739768)
        hits = self.bed.all_hits(i)
        self.assertEqual(len(hits), 8)
        for hit in hits:
            self.assert_(hit.start <= 9739768 and hit.end >= 9719768)

    def testStrands(self):
        i = Interval("chr21", 9719768, 9739768, "+")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '+')

        i = Interval("chr21", 9719768, 9739768, "-")
        hits = self.bed.all_hits(i, same_strand=True)
        for hit in hits:
            self.assert_(hit.strand == '-')

    def testRichCmp(self):

        # be obsessive . . .
        #
        # ==
        a = Interval("chr21", 100, 200)
        b = Interval("chr21", 100, 200)
        self.assert_(a == b)
        self.assertFalse(a != b)
        self.assert_(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        a = Interval("chr21", 100, 100)
        b = Interval("chr21", 100, 100)
        self.assert_(a == b)
        self.assertFalse(a != b)
        self.assert_(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # != because of strand
        a = Interval("chr21", 100, 200, strand='+')
        b = Interval("chr21", 100, 200, strand='-')
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a >= b
        a = Interval("chr21", 100, 300)
        b = Interval("chr21", 100, 200)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a <= b
        a = Interval("chr21", 100, 300)
        b = Interval("chr21", 300, 300)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assert_(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a <= b
        a = Interval("chr21", 100, 300)
        b = Interval("chr21", 250, 300)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assert_(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # a < b
        a = Interval("chr21", 100, 200)
        b = Interval("chr21", 201, 300)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assert_(a <= b)
        self.assertFalse(a >= b)
        self.assert_(a < b)
        self.assertFalse(a > b)

        # a > b
        a = Interval("chr21", 201, 300)
        b = Interval("chr21", 100, 200)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assert_(a >= b)
        self.assertFalse(a < b)
        self.assert_(a > b)

        # a != b
        a = Interval("none", 1, 100)
        b = Interval("chr21", 1, 100)
        self.assertFalse(a == b)
        self.assert_(a != b)
        self.assertFalse(a <= b)
        self.assertFalse(a >= b)
        self.assertFalse(a < b)
        self.assertFalse(a > b)

        # nested should raise NotImplementedError
        a = Interval("chr21", 100, 200)
        b = Interval("chr21", 50, 300)
        self.assertRaises(NotImplementedError, a.__eq__, b)
        self.assertRaises(NotImplementedError, a.__ne__, b)
        self.assertRaises(NotImplementedError, a.__le__, b)
        self.assertRaises(NotImplementedError, a.__ge__, b)
        self.assertRaises(NotImplementedError, a.__lt__, b)
        self.assertRaises(NotImplementedError, a.__gt__, b)
Ejemplo n.º 40
0
 def testSetItemString(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     iv["chrom"] = "fake"
     self.assertEqual(iv["chrom"], "fake")
     self.assertEqual(iv.chrom, "fake")
Ejemplo n.º 41
0
 def setUp(self):
     self.file = os.path.join(PATH, self.file)
     self.bed = IntervalFile(self.file)
Ejemplo n.º 42
0
 def testGetItemString(self):
     ivf = IntervalFile(self.file)
     iv = next(ivf)
     self.assertEqual(iv["chrom"], iv.chrom)
     self.assertEqual(iv["start"], iv.start)
     self.assertEqual(iv["end"], iv.end)