Ejemplo n.º 1
0
import genomeview

dataset_paths = [
    "data/pacbio.chr1.bam", "data/illumina.chr1.bam",
    "/Users/nspies/Downloads/hg19.refseq.sorted.bed.gz"
]
reference = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz"

chrom = "chr1"
start = 224368899
end = 224398899

doc = genomeview.visualize_data(dataset_paths, chrom, start, end, reference)

genomeview.save(doc, "example.svg")

# import genomeview.track

# chrom = "14"
# start = 66901400

# genome_path = "data/chr14.fa"

# doc = genomeview.Document(950)
# source = genomeview.FastaGenomeSource(genome_path)

# gv = genomeview.genomeview.GenomeView("bam", chrom, start, start+10000, "+", source)

# # Add the coordinate axis at the top
# axis = genomeview.axis.Axis("axis")
# gv.add_track(axis)
Ejemplo n.º 2
0
def build_context_image(hit_row,
                        alignment,
                        upstream_range=4000,
                        downstream_range=4000):

    #extract hit accession of target from hit_row
    hit_accession = hit_row.target_name

    #extract start nt of target, stop nt of target and strand from hit_row

    start = hit_row.seq_from
    stop = hit_row.seq_to
    seq_length = stop - start + 1
    strand = hit_row.strand

    #if strand is + then flip is true, if strand is - then flip is false. This is needed in the genome browser url.
    flip_val = 'false'
    if (strand == "-"):
        flip_val = 'true'

    #get the assembly accession of the hit and download the fasta and gff files
    if hit_row.assembly_accession == 'nan':
        base_filename = download_gff_fna(hit_accession)
    else:
        base_filename = hit_row.assembly_accession

    fna_file = "data/raw/download/" + base_filename + "_genomic.fna"
    gff_file = "data/raw/download/" + base_filename + "_genomic.gff"

    #extract sequence of the part of the target that matches the query
    target_sequence = find_seq_in_alignment(hit_row.target_coords, alignment)
    target_sequence = target_sequence.replace('T', 'U')
    #extract E-value
    e_value = hit_row.e_value

    #extract %gc
    percent_gc = hit_row.gc

    #extract score
    score = hit_row.score
    #extract target-name
    target_name = hit_row.target_coords

    #extract taxonomy
    if hit_row.lineage == 'nan':
        lineage = get_taxonomy(hit_accession)
    else:
        lineage = hit_row.lineage

    #set range
    down_limit = start - downstream_range
    up_limit = stop + upstream_range

    #print statements for variables to be shown in image

    print("Match #{}".format(int(hit_row.name) + 1))
    print("E-value: " + str(e_value))
    print("%GC: " + str(percent_gc))
    print("Score: " + str(score))
    print("Genome Assembly: " + str(base_filename))
    print("Target: " + target_name)
    print("Lineage: " + lineage)
    print("Matched Sequence: " + target_sequence)

    #clickable link
    genome_browser_url = 'https://www.ncbi.nlm.nih.gov/projects/sviewer/?id={}&v={}:{}&c=FF6600&theme=Details&flip={}&select=null&content=3&color=0&label=1&geneModel=0&decor=0&layout=0&spacing=0&alncolor=on&m={},{}&mn=5,3'.format(
        hit_accession, down_limit, up_limit, flip_val, start, stop)
    try:
        display(
            HTML(
                '<a href="{}")>genome browser</a>'.format(genome_browser_url)))
    except HTTPError as e:
        if (e.code == 429):
            time.sleep(0.5)
            return display(
                HTML('<a href="{}")>genome browser</a>'.format(
                    genome_browser_url)))
        else:
            raise

    gff_file_zip = "/home/jovyan/work/data/raw/download/" + base_filename + "_genomic.gff.gz"
    bed_file = "/home/jovyan/work/data/raw/features/" + base_filename + "_genomic.bed"

    convert(gff_file_zip, bed_file, desc_only=True)

    def prerender(renderer, element):
        # prerenderers get run before the track is rendered
        if start < stop:
            x1 = element.scale.topixels(
                start)  # converting genomic coordinates to screen coordinates
            x2 = element.scale.topixels(stop)
            yield from renderer.rect(x1,
                                     0,
                                     x2 - x1,
                                     element.height,
                                     fill="lightblue",
                                     stroke="none")
        if start > stop:
            x1 = element.scale.topixels(
                start)  # converting genomic coordinates to screen coordinates
            x2 = element.scale.topixels(stop)
            yield from renderer.rect(x1,
                                     0,
                                     x1 - x2,
                                     element.height,
                                     fill="lightblue",
                                     stroke="none")

    doc = genomeview.visualize_data({"": bed_file}, hit_accession, down_limit,
                                    up_limit)
    cur_track = genomeview.get_one_track(doc, "")
    cur_track.prerenderers = [prerender]

    display(doc)

    return base_filename, lineage
Ejemplo n.º 3
0
def test_bed():
    file_paths = ["data/genes.sorted.bed.gz"]

    doc = genomeview.visualize_data(file_paths, "chr3", 179500230, 179800230)
    genomeview.save(doc, "results/bed_view.svg")
Ejemplo n.º 4
0
def test_bed2(which_bed):
    file_paths = ["data/{}.bed".format(which_bed)]

    doc = genomeview.visualize_data(file_paths, "chr3", 179500230, 179800230)
    genomeview.save(doc, "results/{}_view.svg".format(which_bed))
Ejemplo n.º 5
0
def build_context_image(hit_row,
                        alignment,
                        upstream_range=4000,
                        downstream_range=4000):
    #extract hit accession of target from hit_row
    hit_accession = hit_row.target_name

    #extract start nt of target, stop nt of target and strand from hit_row

    start = hit_row.seq_from
    stop = hit_row.seq_to
    seq_length = stop - start + 1
    strand = hit_row.strand

    #if strand is + then flip is true, if strand is - then flip is false. This is needed in the genome browser url.
    flip_val = 'false'
    if (strand == "-"):
        flip_val = 'true'

    #get the assembly accession of the hit and download the fasta and gff files
    if hit_row.assembly_accession == 'nan':
        base_filename = download_gff(hit_row)
    else:
        base_filename = hit_row.assembly_accession

    fna_file = "data/raw/download/" + base_filename + "_genomic.fna"
    gff_file = "data/raw/download/" + base_filename + "_genomic.gff"

    #extract sequence of the part of the target that matches the query
    target_sequence = find_seq_in_alignment(hit_row.target_coords, alignment)
    target_sequence = target_sequence.replace('T', 'U')
    #extract E-value
    e_value = hit_row.e_value

    #extract %gc
    percent_gc = hit_row.gc

    #extract score
    score = hit_row.score
    #extract target-name
    target_name = hit_row.target_coords

    #extract taxonomy
    if hit_row.lineage == 'nan':

        lineage = get_taxonomy(hit_row.tax_id)
    else:
        lineage = hit_row.lineage

    #set range
    down_limit = start - downstream_range
    up_limit = stop + upstream_range

    #don't let sequence ruler go negative
    if down_limit < 0:
        down_limit = 0
        #force ruler to go slightly negative to make room for left "HIT" arrow
        if flip_val and stop < 80:
            down_limit = -70

    #print statements for variables to be shown in image

    print("Match #{}".format(int(hit_row.name) + 1))
    print("E-value:          " + str(e_value))
    print("%GC:              " + str(percent_gc))
    print("Score:            " + str(score))
    print("Target:           " + target_name)
    print("Genome Assembly:  " + str(base_filename))
    print("Lineage:          " + lineage)
    print("Matched Sequence: " + target_sequence)

    #skip browser link and context graph when there is no bed file found
    if 'NO FEATURES' in base_filename or 'NOT FOUND' in base_filename or 'ERROR' in base_filename:
        return ('nan', 'nan')

    #clickable link
    genome_browser_url = 'https://www.ncbi.nlm.nih.gov/projects/sviewer/?id={}&v={}:{}&c=FF6600&theme=Details&flip={}&select=null&content=3&color=0&label=1&geneModel=0&decor=0&layout=0&spacing=0&alncolor=on&m={},{}&mn=5,3'.format(
        hit_accession, down_limit, up_limit, flip_val, start, stop)
    try:
        display(
            HTML(
                '<a href="{}")>genome browser</a>'.format(genome_browser_url)))
    except HTTPError as e:
        if (e.code == 429):
            time.sleep(0.5)
            return display(
                HTML('<a href="{}")>genome browser</a>'.format(
                    genome_browser_url)))
        else:
            raise

    gff_file_zip = "/home/jovyan/work/data/raw/download/" + base_filename + "_genomic.gff.gz"
    bed_file = "/home/jovyan/work/data/raw/features/" + base_filename + "_genomic.bed"

    convert(gff_file_zip, bed_file, desc_only=True)

    def prerender(renderer, element):
        # Prerenderers get run before the track is rendered.
        # Draw non-feature graphic elements.

        # IGR is in forward orientation
        if start < stop:
            x1 = element.scale.topixels(
                start)  # converting genomic coordinates to screen coordinates
            x2 = element.scale.topixels(stop)
            # Draw vertical hit bar
            yield from renderer.rect(x1,
                                     0,
                                     x2 - x1,
                                     element.height - 14,
                                     fill="lightblue",
                                     stroke="none")
            # Add "HIT" text
            yield from renderer.text(x1 + (x2 - x1) / 2,
                                     element.height - 2,
                                     "HIT",
                                     size=12,
                                     anchor="middle")
            # Draw hit direction arrow (forward/positive stand)
            yield from renderer.block_arrow(x1 + (x2 - x1) / 2 + 16,
                                            element.height - 11,
                                            9,
                                            9,
                                            arrow_width=9,
                                            direction="right",
                                            fill="black",
                                            stroke="none")

        # IGR is in reverse orientation
        if start > stop:
            x1 = element.scale.topixels(
                start)  # converting genomic coordinates to screen coordinates
            x2 = element.scale.topixels(stop)
            # Draw vertical hit bar
            yield from renderer.rect(x2,
                                     0,
                                     x1 - x2,
                                     element.height - 14,
                                     fill="lightblue",
                                     stroke="none")
            # Add "HIT" text
            yield from renderer.text(x1 + (x2 - x1) / 2,
                                     element.height - 2,
                                     "HIT",
                                     size=12,
                                     anchor="middle")
            # Draw hit direction arrow (reverse/negative stand)
            yield from renderer.block_arrow(x2 + (x1 - x2) / 2 - 25,
                                            element.height - 11,
                                            9,
                                            9,
                                            arrow_width=9,
                                            direction="left",
                                            fill="black",
                                            stroke="none")

    doc = genomeview.visualize_data({"": bed_file}, hit_accession, down_limit,
                                    up_limit)
    cur_track = genomeview.get_one_track(doc, "")
    cur_track.prerenderers = [prerender]

    display(doc)

    return base_filename, lineage
Ejemplo n.º 6
0
def test_arrows(reference_path):
    for i in [100, 1000, 2000, 5000]:
        print(i)
        doc = genomeview.visualize_data(["data/illumina.bam"], "chr4", 96549060, 96549060+i, reference_path)
        genomeview.save(doc, "results/temp_{}.png".format(i))