예제 #1
0
def check_get_transcript_from_args_add_three_tabix(fmt):
    # check endpoints of transcript and CDS
    # GTF2 and BED format
    bed_ref = REF_FILES["100transcripts_bed"]
    cds = {}
    tx  = {}
    for line in open(bed_ref):
        items = line.strip("\n").split("\t")
        name = items[3]

        tx_start = int(items[1])
        tx_end   = int(items[2])
        tx[name] = (tx_start,tx_end)
        
        cds_start = int(items[6])
        cds_end   = int(items[7])
        if cds_start < cds_end:
            cds[name] = (cds_start,cds_end)
        else:
            cds[name] = (None,None)
    
    ref_files = {
             "BED"  : REF_FILES["100transcripts_bed_tabix"],
             "GTF2" : REF_FILES["100transcripts_gtf_tabix"]
             }    
    parser = get_annotation_file_parser()
    argstr = "--annotation_format %s --annotation_files %s --tabix" % (fmt,ref_files[fmt])
    args = parser.parse_args(shlex.split(argstr))
    transcripts = list(get_transcripts_from_args(args))
    
    for my_tx in transcripts:
        name = my_tx.get_name()
        expected_cds_start, expected_cds_end = cds[name]
        
        found_cds_start = my_tx.cds_genome_start
        found_cds_end   = my_tx.cds_genome_end
        
        found_tx_start = my_tx.spanning_segment.start
        found_tx_end   = my_tx.spanning_segment.end
        
        expected_tx_start, expected_tx_end = tx[name]
        if found_cds_start is None:
            assert_true(found_cds_end is None)
            assert_true(expected_cds_start is None,"Unequal CDS start (expected %s, got %s) on transcript %s" % (expected_cds_start,found_cds_start,name))
            assert_true(expected_cds_end is None,"Unequal CDS stop (expected %s, got %s) on transcript %s" % (expected_cds_end,found_cds_end,name))
        else:
            assert_equal(found_cds_start,expected_cds_start,"Unequal CDS start (expected %s, got %s) on transcript %s" % (expected_cds_start,found_cds_start,name))
            assert_equal(found_cds_end,expected_cds_end,"Unequal CDS stop (expected %s, got %s) on transcript %s" % (expected_cds_end,found_cds_end,name))

        assert_equal(found_tx_start,expected_tx_start,"Unequal transcript start (expected %s, got %s) on transcript %s" % (expected_tx_start,found_tx_start,name))
        assert_equal(found_tx_end,expected_tx_end,"Unequal transcript end (expected %s, got %s) on transcript %s" % (expected_tx_end,found_tx_end,name))
    
        assert_equal(len(transcripts),len(cds),"Not all transcripts found in input. Expected %s. Got %s." % (len(cds),len(transcripts)))
예제 #2
0
def test_get_transcript_from_args_multiple_bigbed_raises_error():
    files = " ".join([MINI["bigbed_file"]] * 2)
    argstr = "--annotation_format BigBed --annotation_files %s " % (files)
    parser = get_annotation_file_parser()
    args = parser.parse_args(shlex.split(argstr))
    assert_raises(SystemExit,get_transcripts_from_args,args)
예제 #3
0
    "add_three",
    "annotation_files",
    "tabix"
}
annotation_file_gff_opts = { "gff_exon_types", "gff_transcript_types"}
annotation_file_parser_disableable = annotation_file_parser_opts - set(["annotation_files"])


ivcollection_file_parser_opts        = annotation_file_parser_opts - set(["add_three"])
ivcollection_file_parser_disableable = annotation_file_parser_disableable - set(["add_three"])

mask_file_parser_opts        = ivcollection_file_parser_opts
mask_file_parser_disableable = ivcollection_file_parser_disableable

alignment_file_parser    = get_alignment_file_parser()
annotation_file_parser   = get_annotation_file_parser()
ivcollection_file_parser = get_segmentchain_file_parser()
sequence_file_parser     = get_sequence_file_parser()
mask_file_parser         = get_mask_file_parser()

def check_prefix(parser_fn,opts):
    """Helper function to test prefix appending to various parsers

    Parameters
    ----------
    parser_fn : function
        Function that returns a :py:class:`~argparse.ArgumentParser`

    opts : list
        list of strings of options, without prefixes, included
        in the parser