def test_junction_extraction_from_splitmap(): input = files.open(testfiles["reads_1.fastq"]) index = testfiles["genome.gem"] gtf_junctions = set(junctions.from_gtf(testfiles["refseq.gtf"])) jj = gem.extract_junctions(input, index, merge_with=gtf_junctions) assert junctions is not None assert len(jj) == 260
# us a mapping that preserves short indels detected during the # extraction run. print "Loading GTF junctions from %s" % annotation junctions = gem.junctions.from_gtf(annotation) # now the denovo run. This returns a tuple : (mapping, junctions) # and here we use the merge_with parameter to merge the denovo junctions with # the previously loaded gtf junctions. # # Also note that we pass only unmapped reads from the initial mapping to the junction # extraction. This is done using the "unmapped filter" print "Getting de-novo junctions" (denovo_mapping, junctions) = gem.extract_junctions( gem.filter.unmapped(initial_mapping), # only unmapped reads from the initial mapping index, denovo_out, mismatches=0.04, threads=THREADS, merge_with=set(junctions) ) ## we filter the junnctions now by their distance and # write all junctions with a distance <= 500000 to a file print "Writing junctions file" gem.junctions.write_junctions(gem.junctions.filter_by_distance(junctions, 500000), junctions_out, index) ## Initial split map run with junction sites # Here we take all unmapped reads after the denovo junction detection and # pass them to the split mapper along with the junctions print "Running initial split-map" initial_split_mapping = gem.splitmapper(