Exemple #1
0
def test_find_location_in_bed():
    """See if I'm using searching correctly w/ binary search"""

    # this is in the file
    chr = "chr5"
    pos = 180603265
    bed_file = "../data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed"
    chr2st2end, chr2posLs = bed_tools.load_bed(bed_file, "NimbleGen Tiled Regions")
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_equal(location, 180603263, "did not find location, found " + str(location))

    # this is not in the file
    # too large
    chr = "chrY"
    pos = 26180101
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_true(not location, "found location in too big test, but should not have, found " + str(location))

    # this is not in the file
    # too small
    chr = "chr12"
    pos = 100
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_true(
        not location, "found location in too small test, but should not have, found " + str(location)
    )

    # this is not in the file
    # between capture regions
    chr = "chr6"
    pos = 170731150
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_true(not location, "found location in between test, but should not have, found " + str(location))
Exemple #2
0
def test_load_bed():
    """See if I get the right tracks"""

    bed_file = "../data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed"
    chr2st2end, chr2posLs = bed_tools.load_bed(bed_file, "Target Regions")
    chr_count = len(chr2st2end.keys())
    nose.tools.assert_equal(chr_count, 24, "dont have 24 chrs. have " + str(chr_count))
    for chr in chr2posLs:
        for idx in xrange(len(chr2posLs[chr]) - 1):
            nose.tools.assert_true(chr2posLs[chr][idx] < chr2posLs[chr][idx + 1], "chr2posLs is not sorted")
        nose.tools.assert_true(len(chr2posLs[chr]) > 400, chr + " length is " + str(len(chr2posLs[chr])))
def get_my_mutations(quality_cutoff, coverage_cutoff):
    """Load mutations from working/"""

    # my_mutations = {}
    # with open('/home/perry/Projects/loh/working/murim.exome.aa_chg.vars') as f:
    #     for line in f:
    #         my_mutations[line.strip()] = True
    # return my_mutations

    bed_file = 'data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed'
    bed_chr2st2end, bed_chr2posLs = bed_tools.load_bed(bed_file, 
                                                       'NimbleGen Tiled Regions')
    # NimbleGen Tiled Regions
    # Target Regions

    use_data_dir = '/home/perry/Projects/loh/data/all_non_ref_hg18/'
    all_somatic = {}
    all_inherited = {}
    cancer_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanT.ann')
    normal_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanN.ann')
    for exome in global_settings.exome_types:
        data_file = use_data_dir + exome
        inherited, somatic, murim = mutations.get_mutations(data_file, normal_qualities,
                                                            cancer_qualities, quality_cutoff,
                                                            False, coverage_cutoff)
        # only use the bed_tools NimbleGen
        # restriction for hg18 data
        for s in somatic['yusan']: 
            chr, pos = s.split(':')
            if bed_tools.find_location_in_bed(chr, int(pos), 
                                              bed_chr2posLs,
                                              bed_chr2st2end):
                all_somatic[s] = True
        for i in inherited['yusan']: 
            chr, pos = s.split(':')
            if bed_tools.find_location_in_bed(chr, int(pos), 
                                              bed_chr2posLs,
                                              bed_chr2st2end):
                all_inherited[i] = True
    return (set(all_somatic.keys()) & set(get_murim_covered(quality_cutoff)), set(all_inherited.keys()) & set(get_murim_covered(quality_cutoff)))
Exemple #4
0
def get_mutations_yusan(quality_cutoff, coverage_cutoff):
    """Load mutations from data/all_non_ref_hg18"""


    exome2mutations = {}
    bed_file = 'data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed'
    bed_chr2st2end, bed_chr2posLs = bed_tools.load_bed(bed_file, 
                                                       'NimbleGen Tiled Regions')
    # NimbleGen Tiled Regions
    # Target Regions

    use_data_dir = '/home/perry/Projects/loh/data/all_non_ref_hg18/'
    all_somatic = {}
    all_inherited = {}
    cancer_qualities = call_class.get_consensus_qualities(use_data_dir + 'yusanT.ann')
    normal_qualities = call_class.get_consensus_qualities(use_data_dir + 'yusanN.ann')
    for exome in global_settings.exome_types:
        data_file = use_data_dir + exome
        inherited, somatic, murim = mutations.get_mutations(data_file, normal_qualities,
                                                            cancer_qualities, quality_cutoff,
                                                            False, coverage_cutoff)
        exome2mutations[exome] = (inherited, somatic)
    return exome2mutations