def check_get_transcript_from_args_add_three_tabix(fmt): # check endpoints of transcript and CDS # GTF2 and BED format bed_ref = REF_FILES["100transcripts_bed"] cds = {} tx = {} for line in open(bed_ref): items = line.strip("\n").split("\t") name = items[3] tx_start = int(items[1]) tx_end = int(items[2]) tx[name] = (tx_start,tx_end) cds_start = int(items[6]) cds_end = int(items[7]) if cds_start < cds_end: cds[name] = (cds_start,cds_end) else: cds[name] = (None,None) ref_files = { "BED" : REF_FILES["100transcripts_bed_tabix"], "GTF2" : REF_FILES["100transcripts_gtf_tabix"] } parser = get_annotation_file_parser() argstr = "--annotation_format %s --annotation_files %s --tabix" % (fmt,ref_files[fmt]) args = parser.parse_args(shlex.split(argstr)) transcripts = list(get_transcripts_from_args(args)) for my_tx in transcripts: name = my_tx.get_name() expected_cds_start, expected_cds_end = cds[name] found_cds_start = my_tx.cds_genome_start found_cds_end = my_tx.cds_genome_end found_tx_start = my_tx.spanning_segment.start found_tx_end = my_tx.spanning_segment.end expected_tx_start, expected_tx_end = tx[name] if found_cds_start is None: assert_true(found_cds_end is None) assert_true(expected_cds_start is None,"Unequal CDS start (expected %s, got %s) on transcript %s" % (expected_cds_start,found_cds_start,name)) assert_true(expected_cds_end is None,"Unequal CDS stop (expected %s, got %s) on transcript %s" % (expected_cds_end,found_cds_end,name)) else: assert_equal(found_cds_start,expected_cds_start,"Unequal CDS start (expected %s, got %s) on transcript %s" % (expected_cds_start,found_cds_start,name)) assert_equal(found_cds_end,expected_cds_end,"Unequal CDS stop (expected %s, got %s) on transcript %s" % (expected_cds_end,found_cds_end,name)) assert_equal(found_tx_start,expected_tx_start,"Unequal transcript start (expected %s, got %s) on transcript %s" % (expected_tx_start,found_tx_start,name)) assert_equal(found_tx_end,expected_tx_end,"Unequal transcript end (expected %s, got %s) on transcript %s" % (expected_tx_end,found_tx_end,name)) assert_equal(len(transcripts),len(cds),"Not all transcripts found in input. Expected %s. Got %s." % (len(cds),len(transcripts)))
def test_get_transcript_from_args_multiple_bigbed_raises_error(): files = " ".join([MINI["bigbed_file"]] * 2) argstr = "--annotation_format BigBed --annotation_files %s " % (files) parser = get_annotation_file_parser() args = parser.parse_args(shlex.split(argstr)) assert_raises(SystemExit,get_transcripts_from_args,args)
"add_three", "annotation_files", "tabix" } annotation_file_gff_opts = { "gff_exon_types", "gff_transcript_types"} annotation_file_parser_disableable = annotation_file_parser_opts - set(["annotation_files"]) ivcollection_file_parser_opts = annotation_file_parser_opts - set(["add_three"]) ivcollection_file_parser_disableable = annotation_file_parser_disableable - set(["add_three"]) mask_file_parser_opts = ivcollection_file_parser_opts mask_file_parser_disableable = ivcollection_file_parser_disableable alignment_file_parser = get_alignment_file_parser() annotation_file_parser = get_annotation_file_parser() ivcollection_file_parser = get_segmentchain_file_parser() sequence_file_parser = get_sequence_file_parser() mask_file_parser = get_mask_file_parser() def check_prefix(parser_fn,opts): """Helper function to test prefix appending to various parsers Parameters ---------- parser_fn : function Function that returns a :py:class:`~argparse.ArgumentParser` opts : list list of strings of options, without prefixes, included in the parser