def test_find_location_in_bed(): """See if I'm using searching correctly w/ binary search""" # this is in the file chr = "chr5" pos = 180603265 bed_file = "../data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed" chr2st2end, chr2posLs = bed_tools.load_bed(bed_file, "NimbleGen Tiled Regions") location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_equal(location, 180603263, "did not find location, found " + str(location)) # this is not in the file # too large chr = "chrY" pos = 26180101 location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_true(not location, "found location in too big test, but should not have, found " + str(location)) # this is not in the file # too small chr = "chr12" pos = 100 location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_true( not location, "found location in too small test, but should not have, found " + str(location) ) # this is not in the file # between capture regions chr = "chr6" pos = 170731150 location = bed_tools.find_location_in_bed(chr, pos, chr2posLs, chr2st2end) nose.tools.assert_true(not location, "found location in between test, but should not have, found " + str(location))
def get_my_mutations(quality_cutoff, coverage_cutoff): """Load mutations from working/""" # my_mutations = {} # with open('/home/perry/Projects/loh/working/murim.exome.aa_chg.vars') as f: # for line in f: # my_mutations[line.strip()] = True # return my_mutations bed_file = 'data/nimblegen/2.1M_Human_Exome_Annotation/2.1M_Human_Exome.bed' bed_chr2st2end, bed_chr2posLs = bed_tools.load_bed(bed_file, 'NimbleGen Tiled Regions') # NimbleGen Tiled Regions # Target Regions use_data_dir = '/home/perry/Projects/loh/data/all_non_ref_hg18/' all_somatic = {} all_inherited = {} cancer_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanT.ann') normal_qualities = mutations.get_consensus_qualities(use_data_dir + 'yusanN.ann') for exome in global_settings.exome_types: data_file = use_data_dir + exome inherited, somatic, murim = mutations.get_mutations(data_file, normal_qualities, cancer_qualities, quality_cutoff, False, coverage_cutoff) # only use the bed_tools NimbleGen # restriction for hg18 data for s in somatic['yusan']: chr, pos = s.split(':') if bed_tools.find_location_in_bed(chr, int(pos), bed_chr2posLs, bed_chr2st2end): all_somatic[s] = True for i in inherited['yusan']: chr, pos = s.split(':') if bed_tools.find_location_in_bed(chr, int(pos), bed_chr2posLs, bed_chr2st2end): all_inherited[i] = True return (set(all_somatic.keys()) & set(get_murim_covered(quality_cutoff)), set(all_inherited.keys()) & set(get_murim_covered(quality_cutoff)))
captureDataStrict = bed_tools.load_nb222_exome_capture_driver('tmp.bed', 0) captureDataRelaxed = bed_tools.load_nb222_exome_capture_driver('tmp.bed', 100) with open(fileToAnnotate) as f, open(outFile, 'w') as fout: reader = csv.DictReader(f, delimiter='\t') print('\t'.join(reader.fieldnames) + '\t' + '\t'.join(['CAPTURE_STRICT_' + title, 'CAPTURE_RELAXED_' + title, ]), file=fout) for row in reader: chrom = row['chrom'] st,end = mkSnvBedFile.fixCoords(row) st = int(st) status = {} for captureData, label in ( (captureDataStrict, 'STRICT'), (captureDataRelaxed, 'RELAXED') ): captureStatusSt = bed_tools.find_location_in_bed("chr" + chrom, st, captureData[1], captureData[0]) if captureStatusSt: status[label] = 'CAP_%s_TRUE' % (label,) else: status[label] = 'CAP_%s_FALSE' % (label,) print('\t'.join([row[x] for x in reader.fieldnames] + [status['STRICT'], status['RELAXED'], ]), file=fout) os.system('rm tmp.bed')