def test_bedrecord__setters__3_fields(): record = BEDRecord("my_contig\t12\t345") record.contig = "chrZ" assert_equal(record.contig, "chrZ") record.end += 20 assert_equal(record.end, 365) assert_equal(str(record), "chrZ\t12\t365") assert_equal(repr(record), "BEDRecord(contig='chrZ', start=12, end=365)")
def test_bedrecord__setters__unset_fields__at_end(): record = BEDRecord("my_contig\t12\t345") record.name = "my_region" assert_equal(record.name, "my_region") record.score = -13 assert_equal(record.score, -13) record.strand = '-' assert_equal(record.strand, '-') assert_equal(str(record), "my_contig\t12\t345\tmy_region\t-13\t-") assert_equal(repr(record), "BEDRecord(contig='my_contig', start=12, end=345, " "name='my_region', score=-13, strand='-')")
def _collect_and_validate_regions(regions): contigs = _collect_fasta_contigs(regions) sequences = set() with open(regions["BED"]) as bedhandle: for (line_num, line) in enumerate(bedhandle): line = line.strip() if not line or line.startswith("#"): continue try: bed = BEDRecord(line) except ValueError, error: raise MakefileError( ("Error parsing line %i in regions file:\n" " Path = %r\n Line = %r\n\n%s") % (line_num + 1, regions["BED"], line, error)) if len(bed) < 6: url = "http://genome.ucsc.edu/FAQ/FAQformat.html#format1" name = repr(bed.name) if len(bed) > 3 else "unnamed record" raise MakefileError(("Region at line #%i (%s) does not " "contain the expected number of fields; " "the first 6 fields are required. C.f. " "defination at\n %s\n\nPath = %r") % (line_num, name, url, regions["BED"])) contig_len = contigs.get(bed.contig) if contig_len is None: raise MakefileError(("Regions file contains contig not found " "in reference:\n Path = %r\n Contig = " "%r\n\nPlease ensure that all contig " "names match the reference names!") % (regions["BED"], bed.contig)) elif not (0 <= bed.start < bed.end <= contig_len): raise MakefileError(("Regions file contains invalid region:\n" " Path = %r\n Contig = %r\n" " Start = %s\n End = %s\n\n" "Expected 0 <= Start < End <= %i!") % (regions["BED"], bed.contig, bed.start, bed.end, contig_len)) sequences.add(bed.name)
def _stat_areas_of_interest(cls, prefixes): """Returns (size, number of named intervals, total number of intervals) for a set of areas of interest.""" areas_of_interest = {} for (prefix_name, prefix) in prefixes.iteritems(): prefix_label = prefix.get("Label", prefix_name) for (roi_name, roi_filename) in prefix.get("RegionsOfInterest", {}).iteritems(): count, names, size = 0, set(), 0 with open(roi_filename) as handle: for line in handle: bed = BEDRecord(line) names.add(bed.name if len(bed) >= 4 else (bed.contig + "*")) size += (bed.end - bed.start) count += 1 areas_of_interest[(prefix_name, roi_name)] = {"Size" : size, "NFeatures" : len(names), "NIntervals" : count, "Genome" : prefix["Name"], "Name" : roi_name, "Label" : "%s:%s" % (prefix_label, roi_name), "Path" : roi_filename} return areas_of_interest
def test_bedrecord__setters__unset_fields__after_end(): record = BEDRecord("") record.strand = "-" assert_equal(str(record), "\t0\t0\t\t0\t-") record = BEDRecord("my_name") record.strand = "-" assert_equal(str(record), "my_name\t0\t0\t\t0\t-") record = BEDRecord("my_name\t17") record.strand = "-" assert_equal(str(record), "my_name\t17\t0\t\t0\t-") record = BEDRecord("my_name\t17\t258") record.strand = "-" assert_equal(str(record), "my_name\t17\t258\t\t0\t-") record = BEDRecord("my_name\t17\t258\tregion") record.strand = "-" assert_equal(str(record), "my_name\t17\t258\tregion\t0\t-") record = BEDRecord("my_name\t17\t258\tregion\t33") record.strand = "-" assert_equal(str(record), "my_name\t17\t258\tregion\t33\t-") record = BEDRecord("my_name\t17\t258\tregion\t33\t+") record.strand = "-" assert_equal(str(record), "my_name\t17\t258\tregion\t33\t-")