Exemple #1
0
def umi_visualization(bams, chrom, start, end, output, coor):
    doc = genomeview.Document(1000)
    global umi_colors
    umi_colors = {}
    # genome
    source = genomeview.FastaGenomeSource(REF_FA)
    gv = genomeview.GenomeView(chrom, max(0, start), end, "+", source)
    axis = genomeview.Axis()
    gv.add_track(axis)
    label_track = genomeview.track.TrackLabel('{}:{}-{}'.format(
        chrom, start, end))
    gv.tracks.insert(0, label_track)
    for bam in bams:
        # VCF/BAM track
        name = bam.split('/')[-1]
        if bam[-7:] == '.vcf.gz':  # VCF track
            variant_track = VCFTrack(bam, name, chrom, start, end)
            gv.add_track(variant_track)
        else:  # bam track
            track = genomeview.PairedEndBAMTrack(bam, name=name)
            #track = genomeview.SingleEndBAMTrack(bam, name=name)
            gv.add_track(track)
            track.nuc_colors = {
                "A": "blue",
                "C": "orange",
                "G": "green",
                "T": "black",
                "N": "gray"
            }
            track.quick_consensus = False
            # format
            global count, colors
            count, frag_draw, umi_draw, umi_all, any_umi = stats_umi(
                bam, chrom, start, end)

            if len(coor.split(':')) == 3:
                colors = ColorIter(umi_draw)  # color generater
                umi_ar = list(count.keys())
                for umi in umi_ar:
                    umi_n = count[umi]
                    if umi_n > 1 and umi not in umi_colors:
                        umi_colors[umi] = colors.next_color()
                track.color_fn = color_by_umi
                track.include_read_fn = filter_by_umi  # exculde reads out of
            else:
                colors = ColorIter(len(any_umi))  # color generater
                for umi in any_umi:
                    umi_colors[umi] = colors.next_color()
                track.include_read_fn = filter_by_coor  # exculde reads out of
                track.color_fn = color_by_umi
                #track.color_fn = lambda x: "lightgray"
    doc.elements.append(gv)
    genomeview.save(doc, '{}.svg'.format(output))
Exemple #2
0
def bam_doc():
    import genomeview
    source = genomeview.FastaGenomeSource(reference_path())

    doc = genomeview.Document(900)
    
    view = genomeview.GenomeView("chr4", 96549060, 96549060+250, "+", source)
    doc.add_view(view)

    bam_track_hg002 = genomeview.SingleEndBAMTrack("data/quick_consensus_test.bam", name="HG002")
    bam_track_hg002.min_indel_size = 3
    view.add_track(bam_track_hg002)

    axis_track = genomeview.Axis()
    view.add_track(axis_track)

    return doc
Exemple #3
0
def test_view_without_source():
    import genomeview

    doc = genomeview.Document(900)
    
    view = genomeview.GenomeView("chr4", 96549060, 96549060+1000, "+")
    doc.add_view(view)

    bam_track_hg002 = genomeview.SingleEndBAMTrack("data/quick_consensus_test.bam", name="HG002")
    bam_track_hg002.draw_mismatches = False
    view.add_track(bam_track_hg002)

    axis_track = genomeview.Axis()
    view.add_track(axis_track)

    genomeview.save(doc, "results/temp_without_source.svg")


    with pytest.raises(AssertionError):
        bam_track_hg002.draw_mismatches = True
        genomeview.save(doc, "results/temp_without_source_error.svg")
Exemple #4
0
def visualize_data(file_paths,
                   chrom,
                   start,
                   end,
                   reference_path=None,
                   width=900,
                   axis_on_top=False):
    """
    Creates a GenomeView document to display the data in the specified
    files (eg bam, bed, etc).

    Args:
        file_paths: this specifies the file paths to be rendered. It must be 
            either a list/tuple of the paths, or a dictionary mapping 
            {track_name:path}. (If you are using a python version prior to 3.6, 
            use collections.ordereddict to ensure the order remains the same.)
            Currently supports files ending in .bam, .cram, .bed, .bed.gz, 
            .bigbed, or .bigwig (or .bw). Most of these file types require a
            separate index file to be present (eg a .bam.bai or a .bed.gz.tbi 
            file must exist).
        chrom: chromosome (or contig) to be rendered
        start: start coordinate of region to be rendered
        end: end coordinate of region to be rendered
        reference_path: path to fasta file specifying reference genomic 
            sequence. This is required in order to display mismatches
            in bam tracks.
        width: the pixel width of the document
        axis_on_top: specifies whether the axis should be added at the bottom
            (default) or at the top
    """
    if reference_path is not None:
        source = genomeview.FastaGenomeSource(reference_path)
    else:
        source = None

    doc = genomeview.Document(width)

    view = genomeview.GenomeView(chrom, start, end, "+", source)
    doc.add_view(view)

    def add_axis():
        axis_track = genomeview.Axis("axis")
        view.add_track(axis_track)

    if axis_on_top:
        add_axis()

    if isinstance(file_paths, collections.Mapping):
        names = file_paths.keys()
        file_paths = [file_paths[name] for name in names]
    else:
        names = [None] * len(file_paths)
        file_paths = file_paths

    for name, path in zip(names, file_paths):
        if path.lower().endswith(".bam") or path.lower().endswith(".cram"):
            if utilities.is_paired_end(path):
                cur_track = genomeview.PairedEndBAMTrack(path, name=name)
            else:
                cur_track = genomeview.SingleEndBAMTrack(path, name=name)
                if utilities.is_long_frag_dataset(path):
                    cur_track.min_indel_size = 5

        elif path.lower().endswith(".bed") or path.lower().endswith(
                ".bed.gz") or path.lower().endswith(".bigbed"):
            cur_track = genomeview.BEDTrack(path, name=name)

        elif path.lower().endswith(".bigwig") or path.lower().endswith(".bw"):
            cur_track = genomeview.BigWigTrack(path, name=name)

        else:
            suffix = os.path.basename(path)
            raise ValueError("Unknown file suffix: {}".format(suffix))

        view.add_track(cur_track)

    if not axis_on_top:
        add_axis()

    return doc