import copy from SkittleCore.models import chunkSize from models import HighlighterState, SequenceEntry from SkittleGraphTransforms import reverseComplement, calculatePerCharacterMatch import PixelLogic from SkittleCore.GraphRequestHandler import registerGraph registerGraph( 'h', "Sequence Highlighter", __name__, True, helpText= '''Use the Select Tool and click on a line you would like to search for. You can either click on Nucleotide Display or Sequence Highlighter to pick a sequence. Given a search sequence, the Highlighter checks every start position on the screen. The grayscale pixels are start positions that didn't make the cut. Light pixels are near misses. Example: When the Highlighter finds another sequence that is at least 70\% the same, it highlights each of the matching nucleotides in bright green.''') def measureSequenceMatches(state, highlighterState, sequenceEntry): assert isinstance(sequenceEntry, SequenceEntry) scores = [] findSize = len(sequenceEntry.seq) searchSeq = sequenceEntry.seq maxMismatches = int(findSize - float(findSize) * highlighterState.minimumPercentage +
''' import math from SkittleGraphTransforms import pearsonCorrelation from SkittleCore.models import chunkSize from models import SimilarityHeatMapState import OligomerUsage from SkittleCore.GraphRequestHandler import registerGraph from PixelLogic import twoSidedSpectrumColoring registerGraph('s', "Similarity Heatmap", __name__, False, False, 0.4, helpText='''This graph is a heatmap that shows how similar each row is to every other row on the screen. Red represents positive correlation, blue is negative, with black being neutral. The red blue spectrum is normalized to account for baseline correlation from genome wide patterns. The structure of a heatmap is diagonally symmetrical. The diagonal red line is self compared with self. Each pixel represents a comparison between two other lines. To see which lines are involved in a comparison trace one line straight down to the diagonal and another line to the left. The Similarity Heatmap is useful to visualize the blocks of similar code found in the genome, such as large tandem repeats, and isochores at all scales. The patterns in Similarity Heatmap correlate strongly with those generated from Hi-C experiments to map chromosome territories.''') def prettyPrint(heatMap): for line in heatMap: print for e in line: if isinstance(e, float): print round(e, 2), ', ', else: print e, ', ', print #newline
''' Created on Nov 29, 2012 @author: Josiah Seaman ''' from PixelLogic import drawBar from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.models import RequestPacket from SkittleGraphTransforms import sensitiveTestForSpecificFrequency, normalize, oldRepeatMap from models import ThreeMerDetectorState from MathLogic import lowPassFilter registerGraph('t', "Threemer Detector", __name__, False, helpText='''Threemer detector was designed to detect the weak 3 periodicity signature associated with codons inside protein coding regions. It is much more sensitive than Repeat Map, but only detects a single periodicity. Exon annotations are generally marked by a 3-mer spike. Strong 3-mer signals outside of exon annotation that are not simple repeats merit further research.''' ) def testInformation(state, threemer_scores): threemer_scores.sort() # avg = average(threemer_scores) # median = threemer_scores[len(threemer_scores)/2] # percentile95 = threemer_scores[len(threemer_scores)*95/100] # max_ = threemer_scores[-1] percentiles = [] for p in range(50, 100, 10): percentiles.append(threemer_scores[len(threemer_scores) * p / 100])
from collections import namedtuple import math from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.Graphs.RepeatMap import skixelsPerSample, decodeWidth, encodeWidth, getBaseRepeatMapData from SkittleCore.Graphs.SkittleGraphTransforms import sequenceToColors from SkittleCore.models import RequestPacket, chunkSize __author__ = 'Josiah' registerGraph('p', "Photo Gallery", __name__, False, True, stretchy=False, helpText='''A graph to show sequence snippets that match a detected tandem repeat.''') class Snippet(): def __init__(self, start, stop, width): self.start, self.stop, self.width = start, stop, width while self.width < 25: if self.width == 0: self.width = 13 self.width *= 2 def __repr__(self): return str((self.start, self.stop, self.width))
''' Created on Nov 29, 2012 @author: Josiah ''' from SkittleGraphTransforms import chunkUpList, normalizeDictionary, countListToColorSpace, sequenceToColors, countNucleotides from SkittleCore.models import RequestPacket from SkittleCore.GraphRequestHandler import registerGraph registerGraph( 'n', "Nucleotide Display", __name__, True, True, helpText='''The four nucleotides of DNA are represented by four colors. A=Black, T=Blue, C=Red, G=Green. The pixels are arranged across the screen like text, reading from left to right, then jumping to the beginning of the next line when it reaches the width. If width is set at a multiple of a tandem repeat, the repeat will appear as vertical bars.''') def calculateOutputPixels(state): state.readFastaChunks() assert isinstance(state, RequestPacket) # chunks = chunkUpList(state.seq, state.nucleotidesPerLine() ) if state.scale > 1: chunks = chunkUpList(state.seq, state.scale) counts = countNucleotides(chunks) counts = normalizeDictionary(counts) pixels = countListToColorSpace(counts, state.colorPalette, state.scale) else:
Created on Nov 29, 2012 @author: Josiah Seaman ''' import copy import RepeatMap from SkittleGraphTransforms import normalize from PixelLogic import interpolate, spectrum from models import RepeatMapState from SkittleCore.models import RequestPacket, chunkSize from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.Graphs.MathLogic import ceil registerGraph('r', "Repeat Overview", __name__, True, False, helpText='''Repeat Overview is a color based overview of the Repeat Map. Each pixel in the overview is built from one full line of Repeat Map at width 24. It looks for the maximum score anywhere on a Repeat Map line. The brightness of the pixel represents the highest score. Dark areas of the overview mean there are no repeats. The color in bright areas represents the length (frequency) of a tandem repeat that is detected.''') def findMaxScore(line): valueToBeat = 0.0 column = 0 score = 0.0 for index, value in enumerate(line): if value > valueToBeat: column, score = index, value valueToBeat = score * 1.05 #this is made to bias the "winner" towards lower number repeats, rather than a multiple of the base frequency return column, score def alignmentColor(score, column):
from SkittleGraphTransforms import chunkUpList, countNucleotides, normalizeDictionary, countListToColorSpace, pearsonCorrelation, average, composedOfNs from models import RepeatMapState from SkittleCore.models import RequestPacket, chunkSize from SkittleCore.GraphRequestHandler import registerGraph, handleRequest from DNAStorage.StorageRequestHandler import GetPngFilePath, GetFastaFilePath from SkittleCore.png import Reader registerGraph( 'm', "Repeat Map", __name__, False, False, 0.4, isGrayScale=True, helpText='''Repeat Map is used for identifying tandem repeats without the need for continually adjusting the width in Nucleotide Display. It identifies periodicity of repeated sequences by checking all possible offsets scored by Pearson Correlation displayed in grayscale. The x-axis of the graph represents periodicity, starting at offset 1 on the left and increasing geometrically to offset 6,144 on the right. This growth curve means that Repeat Map can accurately detect 2bp periodicities simultaneously with segmental duplications. Vertical white lines show regions that contain tandem repeats. Most of the graph will be 25-30% gray from random chance. Black spots are created when two regions with opposite biases are compared as in the case of a CG repeat being compared with an AT repeat region.''' ) '''These are the functions that are specific to the use of RepeatMap and not generally applicable. These functions use RepeatMapState to emulate an object with state.''' skixelsPerSample = 24 def encodeWidth(nucleotideWidth): cumulativeWidth = 0 megaColumn = 0
from SkittleGraphTransforms import pearsonCorrelation from SkittleCore.models import chunkSize from models import SimilarityHeatMapState import OligomerUsage from SkittleCore.GraphRequestHandler import registerGraph from PixelLogic import twoSidedSpectrumColoring registerGraph('s', "Similarity Heatmap", __name__, False, False, 0.4, helpText='''This graph is a heatmap that shows how similar each row is to every other row on the screen. Red represents positive correlation, blue is negative, with black being neutral. The red blue spectrum is normalized to account for baseline correlation from genome wide patterns. The structure of a heatmap is diagonally symmetrical. The diagonal red line is self compared with self. Each pixel represents a comparison between two other lines. To see which lines are involved in a comparison trace one line straight down to the diagonal and another line to the left. The Similarity Heatmap is useful to visualize the blocks of similar code found in the genome, such as large tandem repeats, and isochores at all scales. The patterns in Similarity Heatmap correlate strongly with those generated from Hi-C experiments to map chromosome territories.''' ) def prettyPrint(heatMap): for line in heatMap: print for e in line: if isinstance(e, float):
from collections import namedtuple import math from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.Graphs.RepeatMap import skixelsPerSample, decodeWidth, encodeWidth, getBaseRepeatMapData from SkittleCore.Graphs.SkittleGraphTransforms import sequenceToColors from SkittleCore.models import RequestPacket, chunkSize __author__ = 'Josiah' registerGraph('p', "Photo Gallery", __name__, False, True, stretchy=False, helpText='''A graph to show sequence snippets that match a detected tandem repeat.''') class Snippet(): def __init__(self, start, stop, width): self.start, self.stop, self.width = start, stop, width while self.width < 25: if self.width == 0: self.width = 13 self.width *= 2 def __repr__(self): return str((self.start, self.stop, self.width)) def arrangePixels(state, snippet, maxWidth): assert isinstance(snippet, Snippet) pixels = [] seqStart = snippet.start for lineStartIndex in range(seqStart, snippet.stop, snippet.width): line = []
import math from random import choice import copy from SkittleGraphTransforms import chunkUpList, countNucleotides, normalizeDictionary, countListToColorSpace, pearsonCorrelation, average, composedOfNs from models import RepeatMapState from SkittleCore.models import RequestPacket, chunkSize from SkittleCore.GraphRequestHandler import registerGraph, handleRequest from DNAStorage.StorageRequestHandler import GetPngFilePath, GetFastaFilePath from SkittleCore.png import Reader registerGraph('m', "Repeat Map", __name__, False, False, 0.4, isGrayScale=True, helpText='''Repeat Map is used for identifying tandem repeats without the need for continually adjusting the width in Nucleotide Display. It identifies periodicity of repeated sequences by checking all possible offsets scored by Pearson Correlation displayed in grayscale. The x-axis of the graph represents periodicity, starting at offset 1 on the left and increasing geometrically to offset 6,144 on the right. This growth curve means that Repeat Map can accurately detect 2bp periodicities simultaneously with segmental duplications. Vertical white lines show regions that contain tandem repeats. Most of the graph will be 25-30% gray from random chance. Black spots are created when two regions with opposite biases are compared as in the case of a CG repeat being compared with an AT repeat region.''') '''These are the functions that are specific to the use of RepeatMap and not generally applicable. These functions use RepeatMapState to emulate an object with state.''' skixelsPerSample = 24 def encodeWidth(nucleotideWidth): cumulativeWidth = 0 megaColumn=0 subColumn=0 while cumulativeWidth < (nucleotideWidth-12):
''' Created on Dec 5, 2012 @author: Josiah ''' from SkittleGraphTransforms import * from PixelLogic import * from SkittleCore.GraphRequestHandler import registerGraph registerGraph( 'b', "Nucleotide Bias", __name__, False, True, 0.05, helpText='''This bar graph shows how often each nucleotide occurs per line. It uses the same color palette as Nucleotide Display.''') max_bar_width = 20 def calculateBiasBarSizes(state): order = ['C', 'G', 'A', 'T', 'N'] lines = chunkUpList(state.seq, state.nucleotidesPerLine()) countsPerLine = countNucleotides(lines, 1) barLengthsPerLine = [] for h in range(len(countsPerLine)): #once per line bar_sizes = [] remainder = 0.0 floating_sum = 0.0 for key in order: barSize = float(
''' Created on Dec 19, 2012 @author: Josiah ''' from SkittleCore.models import RequestPacket from PixelLogic import randomColor, blankColor from SkittleCore.GraphRequestHandler import registerGraph registerGraph('a', "Annotation Display", __name__, False, helpText='''Annotation Display is linked to an annotation file with start and stop positions for tracks. It aligns these start and stop positions along with the rest of the graphs, expanding to accommodate overlapping annotations as necessary. The user can select individual annotations and see the full text associated with that annotation. Currently, the positions of 23&Me SNPs are also displayed.''' ) class Annotation(): def getStartingLine(self, state): assert isinstance(state, RequestPacket) return self.start / state.nucleotidesPerLine() def lengthIndices(self, state): indices = range(self.start / state.nucleotidesPerLine(), self.stop / state.nucleotidesPerLine() + 1) return indices def __init__(self, Start, End):
''' Created on March 2, 2013 @author: Josiah Seaman ''' from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.models import RequestPacket from SkittleGraphTransforms import oldRepeatMap from models import ThreeMerDetectorState registerGraph('f', "Raw Frequency Map", __name__, False, isGrayScale=True, helpText='''This graph is the raw data used to calculate the Threemer Detector graph. The x-axis of Frequency Map represents offsets +1 to +60. The grey value of each pixel is the number of matching characters in the line compared with the sequence shifted to the offset. Threemer patterns show up as faint dark-dark-light dark-dark-light pixel patterns. These patterns are often associated with exons. ''') def calculateOutputPixels(state, threeMerState=ThreeMerDetectorState()): assert isinstance(state, RequestPacket) state.readFastaChunks()#read in next chunk scores = oldRepeatMap(state, threeMerState) return scores
import copy from SkittleCore.models import chunkSize from models import HighlighterState, SequenceEntry from SkittleGraphTransforms import reverseComplement, calculatePerCharacterMatch import PixelLogic from SkittleCore.GraphRequestHandler import registerGraph registerGraph( "h", "Sequence Highlighter", __name__, True, helpText="""Use the Select Tool and click on a line you would like to search for. You can either click on Nucleotide Display or Sequence Highlighter to pick a sequence. Given a search sequence, the Highlighter checks every start position on the screen. The grayscale pixels are start positions that didn't make the cut. Light pixels are near misses. Example: When the Highlighter finds another sequence that is at least 70\% the same, it highlights each of the matching nucleotides in bright green.""", ) def measureSequenceMatches(state, highlighterState, sequenceEntry): assert isinstance(sequenceEntry, SequenceEntry) scores = [] findSize = len(sequenceEntry.seq) searchSeq = sequenceEntry.seq maxMismatches = int(findSize - float(findSize) * highlighterState.minimumPercentage + 0.999) # at 50% 1 = 0, 2 = 1, 3 = 1
''' Created on Nov 29, 2012 @author: Josiah Seaman ''' from PixelLogic import drawBar from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.models import RequestPacket from SkittleGraphTransforms import sensitiveTestForSpecificFrequency, normalize, oldRepeatMap from models import ThreeMerDetectorState from MathLogic import lowPassFilter registerGraph('t', "Threemer Detector", __name__, False, helpText='''Threemer detector was designed to detect the weak 3 periodicity signature associated with codons inside protein coding regions. It is much more sensitive than Repeat Map, but only detects a single periodicity. Exon annotations are generally marked by a 3-mer spike. Strong 3-mer signals outside of exon annotation that are not simple repeats merit further research.''') def testInformation(state, threemer_scores): threemer_scores.sort() # avg = average(threemer_scores) # median = threemer_scores[len(threemer_scores)/2] # percentile95 = threemer_scores[len(threemer_scores)*95/100] # max_ = threemer_scores[-1] percentiles = [] for p in range(50, 100, 10): percentiles.append(threemer_scores[len(threemer_scores) * p / 100]) return (state.width, ) + percentiles def calculateOutputPixels(state, threeMerState=ThreeMerDetectorState()): assert isinstance(state, RequestPacket)
''' Created on March 2, 2013 @author: Josiah Seaman ''' from SkittleCore.GraphRequestHandler import registerGraph from SkittleCore.models import RequestPacket from SkittleGraphTransforms import oldRepeatMap from models import ThreeMerDetectorState registerGraph('f', "Raw Frequency Map", __name__, False, isGrayScale=True, helpText='''This graph is the raw data used to calculate the Threemer Detector graph. The x-axis of Frequency Map represents offsets +1 to +60. The grey value of each pixel is the number of matching characters in the line compared with the sequence shifted to the offset. Threemer patterns show up as faint dark-dark-light dark-dark-light pixel patterns. These patterns are often associated with exons. ''' ) def calculateOutputPixels(state, threeMerState=ThreeMerDetectorState()): assert isinstance(state, RequestPacket) state.readFastaChunks() #read in next chunk scores = oldRepeatMap(state, threeMerState) return scores
''' Created on Nov 29, 2012 @author: Josiah ''' from SkittleGraphTransforms import chunkUpList, normalizeDictionary, countListToColorSpace, sequenceToColors, countNucleotides from SkittleCore.models import RequestPacket from SkittleCore.GraphRequestHandler import registerGraph registerGraph('n', "Nucleotide Display", __name__, True, True, helpText='''The four nucleotides of DNA are represented by four colors. A=Black, T=Blue, C=Red, G=Green. The pixels are arranged across the screen like text, reading from left to right, then jumping to the beginning of the next line when it reaches the width. If width is set at a multiple of a tandem repeat, the repeat will appear as vertical bars.''') def calculateOutputPixels(state): state.readFastaChunks() assert isinstance(state, RequestPacket) # chunks = chunkUpList(state.seq, state.nucleotidesPerLine() ) if state.scale > 1: chunks = chunkUpList(state.seq, state.scale) counts = countNucleotides(chunks) counts = normalizeDictionary(counts) pixels = countListToColorSpace(counts, state.colorPalette, state.scale) else: pixels = sequenceToColors(state.seq, state.colorPalette) return pixels
''' Created on Dec 12, 2012 @author: Josiah ''' from SkittleCore.models import RequestPacket from models import OligomerUsageState from SkittleGraphTransforms import chunkUpList, countNucleotides, \ normalizeDictionary, generateExhaustiveOligomerList, oligCountToColorSpace from SkittleCore.GraphRequestHandler import registerGraph registerGraph('o', "Oligomer Usage", __name__, False, isGrayScale=True, helpText='''Each row is one display line equal to width. Each column matches one oligomer of fixed size, arranged in alphabetical order (i.e. AA, AC, AG...). The brightness of the pixel indicates how often that oligomer occurred compared to all the others.''') def calculateOutputPixels(state, oligState=OligomerUsageState()): assert isinstance(state, RequestPacket) assert isinstance(oligState, OligomerUsageState) state.readFastaChunks() overlap = oligState.oligomerSize - 1 lines = chunkUpList(state.seq, state.nucleotidesPerLine(), overlap) #chunk sequence by display line #we can't do this simply by line because of the overhang of oligState.oligState counts = countNucleotides(lines, oligState.oligomerSize) #NORMALIZATION values = [] for line in counts: for key, value in line.iteritems():
''' Created on Dec 5, 2012 @author: Josiah ''' from SkittleGraphTransforms import * from PixelLogic import * from SkittleCore.GraphRequestHandler import registerGraph registerGraph('b', "Nucleotide Bias", __name__, False, True, 0.05, helpText='''This bar graph shows how often each nucleotide occurs per line. It uses the same color palette as Nucleotide Display.''') max_bar_width = 20 def calculateBiasBarSizes(state): order = ['C', 'G', 'A', 'T', 'N'] lines = chunkUpList(state.seq, state.nucleotidesPerLine()) countsPerLine = countNucleotides(lines, 1) barLengthsPerLine = [] for h in range(len(countsPerLine)):#once per line bar_sizes = [] remainder = 0.0 floating_sum = 0.0 for key in order: barSize = float(countsPerLine[h].get(key, 0)) / state.nucleotidesPerLine() * max_bar_width + .00001 #normalize the size of the bar to display_width floating_sum += barSize barSize += remainder remainder = floating_sum - int(floating_sum + .5) tupleT = (key, int(barSize + .5)) bar_sizes.append(tupleT) barLengthsPerLine.append(bar_sizes)
''' Created on Dec 19, 2012 @author: Josiah ''' from SkittleCore.models import RequestPacket from PixelLogic import randomColor, blankColor from SkittleCore.GraphRequestHandler import registerGraph registerGraph('a', "Annotation Display", __name__, False, helpText='''Annotation Display is linked to an annotation file with start and stop positions for tracks. It aligns these start and stop positions along with the rest of the graphs, expanding to accommodate overlapping annotations as necessary. The user can select individual annotations and see the full text associated with that annotation. Currently, the positions of 23&Me SNPs are also displayed.''') class Annotation(): def getStartingLine(self, state): assert isinstance(state, RequestPacket) return self.start / state.nucleotidesPerLine() def lengthIndices(self, state): indices = range(self.start / state.nucleotidesPerLine(), self.stop / state.nucleotidesPerLine() + 1) return indices def __init__(self, Start, End): self.start = Start self.stop = End self.color = randomColor() class AnnotationTrackState():