Example #1
0
def test_bedrecord__setters__3_fields():
    record = BEDRecord("my_contig\t12\t345")

    record.contig = "chrZ"
    assert_equal(record.contig, "chrZ")

    record.end += 20
    assert_equal(record.end, 365)

    assert_equal(str(record), "chrZ\t12\t365")
    assert_equal(repr(record),
                 "BEDRecord(contig='chrZ', start=12, end=365)")
Example #2
0
def test_bedrecord__setters__unset_fields__at_end():
    record = BEDRecord("my_contig\t12\t345")

    record.name = "my_region"
    assert_equal(record.name, "my_region")

    record.score = -13
    assert_equal(record.score, -13)

    record.strand = '-'
    assert_equal(record.strand, '-')

    assert_equal(str(record), "my_contig\t12\t345\tmy_region\t-13\t-")
    assert_equal(repr(record),
                 "BEDRecord(contig='my_contig', start=12, end=345, "
                 "name='my_region', score=-13, strand='-')")
Example #3
0
def _collect_and_validate_regions(regions):
    contigs = _collect_fasta_contigs(regions)
    sequences = set()
    with open(regions["BED"]) as bedhandle:
        for (line_num, line) in enumerate(bedhandle):
            line = line.strip()
            if not line or line.startswith("#"):
                continue

            try:
                bed = BEDRecord(line)
            except ValueError, error:
                raise MakefileError(
                    ("Error parsing line %i in regions file:\n"
                     "  Path = %r\n  Line = %r\n\n%s") %
                    (line_num + 1, regions["BED"], line, error))

            if len(bed) < 6:
                url = "http://genome.ucsc.edu/FAQ/FAQformat.html#format1"
                name = repr(bed.name) if len(bed) > 3 else "unnamed record"
                raise MakefileError(("Region at line #%i (%s) does not "
                                     "contain the expected number of fields; "
                                     "the first 6 fields are required. C.f. "
                                     "defination at\n   %s\n\nPath = %r") %
                                    (line_num, name, url, regions["BED"]))

            contig_len = contigs.get(bed.contig)
            if contig_len is None:
                raise MakefileError(("Regions file contains contig not found "
                                     "in reference:\n  Path = %r\n  Contig = "
                                     "%r\n\nPlease ensure that all contig "
                                     "names match the reference names!") %
                                    (regions["BED"], bed.contig))
            elif not (0 <= bed.start < bed.end <= contig_len):
                raise MakefileError(("Regions file contains invalid region:\n"
                                     "  Path   = %r\n  Contig = %r\n"
                                     "  Start  = %s\n  End    = %s\n\n"
                                     "Expected 0 <= Start < End <= %i!") %
                                    (regions["BED"], bed.contig, bed.start,
                                     bed.end, contig_len))

            sequences.add(bed.name)
Example #4
0
 def _stat_areas_of_interest(cls, prefixes):
     """Returns (size, number of named intervals, total number of intervals)
     for a set of areas of interest."""
     areas_of_interest = {}
     for (prefix_name, prefix) in prefixes.iteritems():
         prefix_label = prefix.get("Label", prefix_name)
         for (roi_name, roi_filename) in prefix.get("RegionsOfInterest", {}).iteritems():
             count, names, size = 0, set(), 0
             with open(roi_filename) as handle:
                 for line in handle:
                     bed = BEDRecord(line)
                     names.add(bed.name if len(bed) >= 4 else (bed.contig + "*"))
                     size += (bed.end - bed.start)
                     count += 1
             areas_of_interest[(prefix_name, roi_name)] = {"Size"       : size,
                                                           "NFeatures"  : len(names),
                                                           "NIntervals" : count,
                                                           "Genome"     : prefix["Name"],
                                                           "Name"       : roi_name,
                                                           "Label"      : "%s:%s" % (prefix_label, roi_name),
                                                           "Path"       : roi_filename}
     return areas_of_interest
Example #5
0
def test_bedrecord__setters__unset_fields__after_end():
    record = BEDRecord("")
    record.strand = "-"
    assert_equal(str(record), "\t0\t0\t\t0\t-")

    record = BEDRecord("my_name")
    record.strand = "-"
    assert_equal(str(record), "my_name\t0\t0\t\t0\t-")

    record = BEDRecord("my_name\t17")
    record.strand = "-"
    assert_equal(str(record), "my_name\t17\t0\t\t0\t-")

    record = BEDRecord("my_name\t17\t258")
    record.strand = "-"
    assert_equal(str(record), "my_name\t17\t258\t\t0\t-")

    record = BEDRecord("my_name\t17\t258\tregion")
    record.strand = "-"
    assert_equal(str(record), "my_name\t17\t258\tregion\t0\t-")

    record = BEDRecord("my_name\t17\t258\tregion\t33")
    record.strand = "-"
    assert_equal(str(record), "my_name\t17\t258\tregion\t33\t-")

    record = BEDRecord("my_name\t17\t258\tregion\t33\t+")
    record.strand = "-"
    assert_equal(str(record), "my_name\t17\t258\tregion\t33\t-")