Exemple #1
0
def test_find_location_in_bed():
    """See if I'm using searching correctly w/ binary search"""

    # this is in the file
    chr = "chr5"
    pos = 180603265
    bed_file = "../data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed"
    chr2st2end, chr2posLs = bed_tools.load_bed(bed_file, "NimbleGen Tiled Regions")
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_equal(location, 180603263, "did not find location, found " + str(location))

    # this is not in the file
    # too large
    chr = "chrY"
    pos = 26180101
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_true(not location, "found location in too big test, but should not have, found " + str(location))

    # this is not in the file
    # too small
    chr = "chr12"
    pos = 100
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_true(
        not location, "found location in too small test, but should not have, found " + str(location)
    )

    # this is not in the file
    # between capture regions
    chr = "chr6"
    pos = 170731150
    location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end)
    nose.tools.assert_true(not location, "found location in between test, but should not have, found " + str(location))
def get_my_mutations(quality_cutoff, coverage_cutoff):
    """Load mutations from working/"""

    # my_mutations = {}
    # with open('/home/perry/Projects/loh/working/murim.exome.aa_chg.vars') as f:
    #     for line in f:
    #         my_mutations[line.strip()] = True
    # return my_mutations

    bed_file = 'data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed'
    bed_chr2st2end, bed_chr2posLs = bed_tools.load_bed(bed_file, 
                                                       'NimbleGen Tiled Regions')
    # NimbleGen Tiled Regions
    # Target Regions

    use_data_dir = '/home/perry/Projects/loh/data/all_non_ref_hg18/'
    all_somatic = {}
    all_inherited = {}
    cancer_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanT.ann')
    normal_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanN.ann')
    for exome in global_settings.exome_types:
        data_file = use_data_dir + exome
        inherited, somatic, murim = mutations.get_mutations(data_file, normal_qualities,
                                                            cancer_qualities, quality_cutoff,
                                                            False, coverage_cutoff)
        # only use the bed_tools NimbleGen
        # restriction for hg18 data
        for s in somatic['yusan']: 
            chr, pos = s.split(':')
            if bed_tools.find_location_in_bed(chr, int(pos), 
                                              bed_chr2posLs,
                                              bed_chr2st2end):
                all_somatic[s] = True
        for i in inherited['yusan']: 
            chr, pos = s.split(':')
            if bed_tools.find_location_in_bed(chr, int(pos), 
                                              bed_chr2posLs,
                                              bed_chr2st2end):
                all_inherited[i] = True
    return (set(all_somatic.keys()) & set(get_murim_covered(quality_cutoff)), set(all_inherited.keys()) & set(get_murim_covered(quality_cutoff)))
    
    captureDataStrict = bed_tools.load_nb222_exome_capture_driver('tmp.bed', 0)
    captureDataRelaxed = bed_tools.load_nb222_exome_capture_driver('tmp.bed', 100)
    with open(fileToAnnotate) as f, open(outFile, 'w') as fout:
        reader = csv.DictReader(f, delimiter='\t')
        print('\t'.join(reader.fieldnames) + '\t'
              + '\t'.join(['CAPTURE_STRICT_' + title,
                           'CAPTURE_RELAXED_' + title,
                           ]),
              file=fout)
        for row in reader:
            chrom = row['chrom']
            st,end = mkSnvBedFile.fixCoords(row)
            st = int(st)

            status = {}
            for captureData, label in ( (captureDataStrict, 'STRICT'),
                                        (captureDataRelaxed, 'RELAXED') ):
                captureStatusSt = bed_tools.find_location_in_bed("chr" + chrom, st, captureData[1], captureData[0])
                if captureStatusSt:
                    status[label] = 'CAP_%s_TRUE' % (label,)
                else:
                    status[label] = 'CAP_%s_FALSE' % (label,)

            print('\t'.join([row[x] for x in reader.fieldnames]
                            + [status['STRICT'],
                               status['RELAXED'],
                               ]),
                  file=fout)
    os.system('rm tmp.bed')