import genomeview dataset_paths = [ "data/pacbio.chr1.bam", "data/illumina.chr1.bam", "/Users/nspies/Downloads/hg19.refseq.sorted.bed.gz" ] reference = "ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz" chrom = "chr1" start = 224368899 end = 224398899 doc = genomeview.visualize_data(dataset_paths, chrom, start, end, reference) genomeview.save(doc, "example.svg") # import genomeview.track # chrom = "14" # start = 66901400 # genome_path = "data/chr14.fa" # doc = genomeview.Document(950) # source = genomeview.FastaGenomeSource(genome_path) # gv = genomeview.genomeview.GenomeView("bam", chrom, start, start+10000, "+", source) # # Add the coordinate axis at the top # axis = genomeview.axis.Axis("axis") # gv.add_track(axis)
def build_context_image(hit_row, alignment, upstream_range=4000, downstream_range=4000): #extract hit accession of target from hit_row hit_accession = hit_row.target_name #extract start nt of target, stop nt of target and strand from hit_row start = hit_row.seq_from stop = hit_row.seq_to seq_length = stop - start + 1 strand = hit_row.strand #if strand is + then flip is true, if strand is - then flip is false. This is needed in the genome browser url. flip_val = 'false' if (strand == "-"): flip_val = 'true' #get the assembly accession of the hit and download the fasta and gff files if hit_row.assembly_accession == 'nan': base_filename = download_gff_fna(hit_accession) else: base_filename = hit_row.assembly_accession fna_file = "data/raw/download/" + base_filename + "_genomic.fna" gff_file = "data/raw/download/" + base_filename + "_genomic.gff" #extract sequence of the part of the target that matches the query target_sequence = find_seq_in_alignment(hit_row.target_coords, alignment) target_sequence = target_sequence.replace('T', 'U') #extract E-value e_value = hit_row.e_value #extract %gc percent_gc = hit_row.gc #extract score score = hit_row.score #extract target-name target_name = hit_row.target_coords #extract taxonomy if hit_row.lineage == 'nan': lineage = get_taxonomy(hit_accession) else: lineage = hit_row.lineage #set range down_limit = start - downstream_range up_limit = stop + upstream_range #print statements for variables to be shown in image print("Match #{}".format(int(hit_row.name) + 1)) print("E-value: " + str(e_value)) print("%GC: " + str(percent_gc)) print("Score: " + str(score)) print("Genome Assembly: " + str(base_filename)) print("Target: " + target_name) print("Lineage: " + lineage) print("Matched Sequence: " + target_sequence) #clickable link genome_browser_url = 'https://www.ncbi.nlm.nih.gov/projects/sviewer/?id={}&v={}:{}&c=FF6600&theme=Details&flip={}&select=null&content=3&color=0&label=1&geneModel=0&decor=0&layout=0&spacing=0&alncolor=on&m={},{}&mn=5,3'.format( hit_accession, down_limit, up_limit, flip_val, start, stop) try: display( HTML( '<a href="{}")>genome browser</a>'.format(genome_browser_url))) except HTTPError as e: if (e.code == 429): time.sleep(0.5) return display( HTML('<a href="{}")>genome browser</a>'.format( genome_browser_url))) else: raise gff_file_zip = "/home/jovyan/work/data/raw/download/" + base_filename + "_genomic.gff.gz" bed_file = "/home/jovyan/work/data/raw/features/" + base_filename + "_genomic.bed" convert(gff_file_zip, bed_file, desc_only=True) def prerender(renderer, element): # prerenderers get run before the track is rendered if start < stop: x1 = element.scale.topixels( start) # converting genomic coordinates to screen coordinates x2 = element.scale.topixels(stop) yield from renderer.rect(x1, 0, x2 - x1, element.height, fill="lightblue", stroke="none") if start > stop: x1 = element.scale.topixels( start) # converting genomic coordinates to screen coordinates x2 = element.scale.topixels(stop) yield from renderer.rect(x1, 0, x1 - x2, element.height, fill="lightblue", stroke="none") doc = genomeview.visualize_data({"": bed_file}, hit_accession, down_limit, up_limit) cur_track = genomeview.get_one_track(doc, "") cur_track.prerenderers = [prerender] display(doc) return base_filename, lineage
def test_bed(): file_paths = ["data/genes.sorted.bed.gz"] doc = genomeview.visualize_data(file_paths, "chr3", 179500230, 179800230) genomeview.save(doc, "results/bed_view.svg")
def test_bed2(which_bed): file_paths = ["data/{}.bed".format(which_bed)] doc = genomeview.visualize_data(file_paths, "chr3", 179500230, 179800230) genomeview.save(doc, "results/{}_view.svg".format(which_bed))
def build_context_image(hit_row, alignment, upstream_range=4000, downstream_range=4000): #extract hit accession of target from hit_row hit_accession = hit_row.target_name #extract start nt of target, stop nt of target and strand from hit_row start = hit_row.seq_from stop = hit_row.seq_to seq_length = stop - start + 1 strand = hit_row.strand #if strand is + then flip is true, if strand is - then flip is false. This is needed in the genome browser url. flip_val = 'false' if (strand == "-"): flip_val = 'true' #get the assembly accession of the hit and download the fasta and gff files if hit_row.assembly_accession == 'nan': base_filename = download_gff(hit_row) else: base_filename = hit_row.assembly_accession fna_file = "data/raw/download/" + base_filename + "_genomic.fna" gff_file = "data/raw/download/" + base_filename + "_genomic.gff" #extract sequence of the part of the target that matches the query target_sequence = find_seq_in_alignment(hit_row.target_coords, alignment) target_sequence = target_sequence.replace('T', 'U') #extract E-value e_value = hit_row.e_value #extract %gc percent_gc = hit_row.gc #extract score score = hit_row.score #extract target-name target_name = hit_row.target_coords #extract taxonomy if hit_row.lineage == 'nan': lineage = get_taxonomy(hit_row.tax_id) else: lineage = hit_row.lineage #set range down_limit = start - downstream_range up_limit = stop + upstream_range #don't let sequence ruler go negative if down_limit < 0: down_limit = 0 #force ruler to go slightly negative to make room for left "HIT" arrow if flip_val and stop < 80: down_limit = -70 #print statements for variables to be shown in image print("Match #{}".format(int(hit_row.name) + 1)) print("E-value: " + str(e_value)) print("%GC: " + str(percent_gc)) print("Score: " + str(score)) print("Target: " + target_name) print("Genome Assembly: " + str(base_filename)) print("Lineage: " + lineage) print("Matched Sequence: " + target_sequence) #skip browser link and context graph when there is no bed file found if 'NO FEATURES' in base_filename or 'NOT FOUND' in base_filename or 'ERROR' in base_filename: return ('nan', 'nan') #clickable link genome_browser_url = 'https://www.ncbi.nlm.nih.gov/projects/sviewer/?id={}&v={}:{}&c=FF6600&theme=Details&flip={}&select=null&content=3&color=0&label=1&geneModel=0&decor=0&layout=0&spacing=0&alncolor=on&m={},{}&mn=5,3'.format( hit_accession, down_limit, up_limit, flip_val, start, stop) try: display( HTML( '<a href="{}")>genome browser</a>'.format(genome_browser_url))) except HTTPError as e: if (e.code == 429): time.sleep(0.5) return display( HTML('<a href="{}")>genome browser</a>'.format( genome_browser_url))) else: raise gff_file_zip = "/home/jovyan/work/data/raw/download/" + base_filename + "_genomic.gff.gz" bed_file = "/home/jovyan/work/data/raw/features/" + base_filename + "_genomic.bed" convert(gff_file_zip, bed_file, desc_only=True) def prerender(renderer, element): # Prerenderers get run before the track is rendered. # Draw non-feature graphic elements. # IGR is in forward orientation if start < stop: x1 = element.scale.topixels( start) # converting genomic coordinates to screen coordinates x2 = element.scale.topixels(stop) # Draw vertical hit bar yield from renderer.rect(x1, 0, x2 - x1, element.height - 14, fill="lightblue", stroke="none") # Add "HIT" text yield from renderer.text(x1 + (x2 - x1) / 2, element.height - 2, "HIT", size=12, anchor="middle") # Draw hit direction arrow (forward/positive stand) yield from renderer.block_arrow(x1 + (x2 - x1) / 2 + 16, element.height - 11, 9, 9, arrow_width=9, direction="right", fill="black", stroke="none") # IGR is in reverse orientation if start > stop: x1 = element.scale.topixels( start) # converting genomic coordinates to screen coordinates x2 = element.scale.topixels(stop) # Draw vertical hit bar yield from renderer.rect(x2, 0, x1 - x2, element.height - 14, fill="lightblue", stroke="none") # Add "HIT" text yield from renderer.text(x1 + (x2 - x1) / 2, element.height - 2, "HIT", size=12, anchor="middle") # Draw hit direction arrow (reverse/negative stand) yield from renderer.block_arrow(x2 + (x1 - x2) / 2 - 25, element.height - 11, 9, 9, arrow_width=9, direction="left", fill="black", stroke="none") doc = genomeview.visualize_data({"": bed_file}, hit_accession, down_limit, up_limit) cur_track = genomeview.get_one_track(doc, "") cur_track.prerenderers = [prerender] display(doc) return base_filename, lineage
def test_arrows(reference_path): for i in [100, 1000, 2000, 5000]: print(i) doc = genomeview.visualize_data(["data/illumina.bam"], "chr4", 96549060, 96549060+i, reference_path) genomeview.save(doc, "results/temp_{}.png".format(i))