def umi_visualization(bams, chrom, start, end, output, coor): doc = genomeview.Document(1000) global umi_colors umi_colors = {} # genome source = genomeview.FastaGenomeSource(REF_FA) gv = genomeview.GenomeView(chrom, max(0, start), end, "+", source) axis = genomeview.Axis() gv.add_track(axis) label_track = genomeview.track.TrackLabel('{}:{}-{}'.format( chrom, start, end)) gv.tracks.insert(0, label_track) for bam in bams: # VCF/BAM track name = bam.split('/')[-1] if bam[-7:] == '.vcf.gz': # VCF track variant_track = VCFTrack(bam, name, chrom, start, end) gv.add_track(variant_track) else: # bam track track = genomeview.PairedEndBAMTrack(bam, name=name) #track = genomeview.SingleEndBAMTrack(bam, name=name) gv.add_track(track) track.nuc_colors = { "A": "blue", "C": "orange", "G": "green", "T": "black", "N": "gray" } track.quick_consensus = False # format global count, colors count, frag_draw, umi_draw, umi_all, any_umi = stats_umi( bam, chrom, start, end) if len(coor.split(':')) == 3: colors = ColorIter(umi_draw) # color generater umi_ar = list(count.keys()) for umi in umi_ar: umi_n = count[umi] if umi_n > 1 and umi not in umi_colors: umi_colors[umi] = colors.next_color() track.color_fn = color_by_umi track.include_read_fn = filter_by_umi # exculde reads out of else: colors = ColorIter(len(any_umi)) # color generater for umi in any_umi: umi_colors[umi] = colors.next_color() track.include_read_fn = filter_by_coor # exculde reads out of track.color_fn = color_by_umi #track.color_fn = lambda x: "lightgray" doc.elements.append(gv) genomeview.save(doc, '{}.svg'.format(output))
def bam_doc(): import genomeview source = genomeview.FastaGenomeSource(reference_path()) doc = genomeview.Document(900) view = genomeview.GenomeView("chr4", 96549060, 96549060+250, "+", source) doc.add_view(view) bam_track_hg002 = genomeview.SingleEndBAMTrack("data/quick_consensus_test.bam", name="HG002") bam_track_hg002.min_indel_size = 3 view.add_track(bam_track_hg002) axis_track = genomeview.Axis() view.add_track(axis_track) return doc
def test_view_without_source(): import genomeview doc = genomeview.Document(900) view = genomeview.GenomeView("chr4", 96549060, 96549060+1000, "+") doc.add_view(view) bam_track_hg002 = genomeview.SingleEndBAMTrack("data/quick_consensus_test.bam", name="HG002") bam_track_hg002.draw_mismatches = False view.add_track(bam_track_hg002) axis_track = genomeview.Axis() view.add_track(axis_track) genomeview.save(doc, "results/temp_without_source.svg") with pytest.raises(AssertionError): bam_track_hg002.draw_mismatches = True genomeview.save(doc, "results/temp_without_source_error.svg")
def visualize_data(file_paths, chrom, start, end, reference_path=None, width=900, axis_on_top=False): """ Creates a GenomeView document to display the data in the specified files (eg bam, bed, etc). Args: file_paths: this specifies the file paths to be rendered. It must be either a list/tuple of the paths, or a dictionary mapping {track_name:path}. (If you are using a python version prior to 3.6, use collections.ordereddict to ensure the order remains the same.) Currently supports files ending in .bam, .cram, .bed, .bed.gz, .bigbed, or .bigwig (or .bw). Most of these file types require a separate index file to be present (eg a .bam.bai or a .bed.gz.tbi file must exist). chrom: chromosome (or contig) to be rendered start: start coordinate of region to be rendered end: end coordinate of region to be rendered reference_path: path to fasta file specifying reference genomic sequence. This is required in order to display mismatches in bam tracks. width: the pixel width of the document axis_on_top: specifies whether the axis should be added at the bottom (default) or at the top """ if reference_path is not None: source = genomeview.FastaGenomeSource(reference_path) else: source = None doc = genomeview.Document(width) view = genomeview.GenomeView(chrom, start, end, "+", source) doc.add_view(view) def add_axis(): axis_track = genomeview.Axis("axis") view.add_track(axis_track) if axis_on_top: add_axis() if isinstance(file_paths, collections.Mapping): names = file_paths.keys() file_paths = [file_paths[name] for name in names] else: names = [None] * len(file_paths) file_paths = file_paths for name, path in zip(names, file_paths): if path.lower().endswith(".bam") or path.lower().endswith(".cram"): if utilities.is_paired_end(path): cur_track = genomeview.PairedEndBAMTrack(path, name=name) else: cur_track = genomeview.SingleEndBAMTrack(path, name=name) if utilities.is_long_frag_dataset(path): cur_track.min_indel_size = 5 elif path.lower().endswith(".bed") or path.lower().endswith( ".bed.gz") or path.lower().endswith(".bigbed"): cur_track = genomeview.BEDTrack(path, name=name) elif path.lower().endswith(".bigwig") or path.lower().endswith(".bw"): cur_track = genomeview.BigWigTrack(path, name=name) else: suffix = os.path.basename(path) raise ValueError("Unknown file suffix: {}".format(suffix)) view.add_track(cur_track) if not axis_on_top: add_axis() return doc