import copy

from SkittleCore.models import chunkSize
from models import HighlighterState, SequenceEntry
from SkittleGraphTransforms import reverseComplement, calculatePerCharacterMatch
import PixelLogic
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph(
    'h',
    "Sequence Highlighter",
    __name__,
    True,
    helpText=
    '''Use the Select Tool and click on a line you would like to search for. 
 You can either click on Nucleotide Display or Sequence Highlighter to pick a sequence.  
 Given a search sequence, the Highlighter checks every start position on the screen.  
 The grayscale pixels are start positions that didn't make the cut.  
 Light pixels are near misses.  
 Example: When the Highlighter finds another sequence that is at least 70\% the same,
 it highlights each of the matching nucleotides in bright green.''')


def measureSequenceMatches(state, highlighterState, sequenceEntry):
    assert isinstance(sequenceEntry, SequenceEntry)
    scores = []
    findSize = len(sequenceEntry.seq)
    searchSeq = sequenceEntry.seq
    maxMismatches = int(findSize -
                        float(findSize) * highlighterState.minimumPercentage +
Beispiel #2
0
'''
import math

from SkittleGraphTransforms import pearsonCorrelation
from SkittleCore.models import chunkSize
from models import SimilarityHeatMapState
import OligomerUsage
from SkittleCore.GraphRequestHandler import registerGraph
from PixelLogic import twoSidedSpectrumColoring


registerGraph('s', "Similarity Heatmap", __name__, False, False, 0.4, helpText='''This graph is a heatmap that shows how similar 
each row is to every other row on the screen.  Red represents positive correlation, blue is negative, with black being neutral.  
The red blue spectrum is normalized to account for baseline correlation from genome wide patterns. 
The structure of a heatmap is diagonally symmetrical.
The diagonal red line is self compared with self. 
Each pixel represents a comparison between two other lines.
To see which lines are involved in a comparison trace one line straight down to the diagonal and another line to the left.
The Similarity Heatmap is useful to visualize the
blocks of similar code found in the genome, such as large tandem repeats, and isochores at all scales. 
The patterns in Similarity Heatmap correlate strongly with those generated from Hi-C experiments to map chromosome territories.''')


def prettyPrint(heatMap):
    for line in heatMap:
        print
        for e in line:
            if isinstance(e, float):
                print round(e, 2), ', ',
            else:
                print e, ', ',
    print #newline
'''
Created on Nov 29, 2012
@author: Josiah Seaman
'''
from PixelLogic import drawBar
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.models import RequestPacket
from SkittleGraphTransforms import sensitiveTestForSpecificFrequency, normalize, oldRepeatMap
from models import ThreeMerDetectorState
from MathLogic import lowPassFilter

registerGraph('t',
              "Threemer Detector",
              __name__,
              False,
              helpText='''Threemer detector was designed to detect the 
weak 3 periodicity signature associated with codons inside protein coding regions.  It is much more sensitive than 
Repeat Map, but only detects a single periodicity. Exon annotations are generally marked by a 3-mer spike. 
Strong 3-mer signals outside of exon annotation that are not simple repeats merit further research.'''
              )


def testInformation(state, threemer_scores):
    threemer_scores.sort()
    #    avg = average(threemer_scores)
    #    median = threemer_scores[len(threemer_scores)/2]
    #    percentile95 = threemer_scores[len(threemer_scores)*95/100]
    #    max_ = threemer_scores[-1]
    percentiles = []
    for p in range(50, 100, 10):
        percentiles.append(threemer_scores[len(threemer_scores) * p / 100])
Beispiel #4
0
from collections import namedtuple
import math
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.Graphs.RepeatMap import skixelsPerSample, decodeWidth, encodeWidth, getBaseRepeatMapData
from SkittleCore.Graphs.SkittleGraphTransforms import sequenceToColors
from SkittleCore.models import RequestPacket, chunkSize

__author__ = 'Josiah'

registerGraph('p',
              "Photo Gallery",
              __name__,
              False,
              True,
              stretchy=False,
              helpText='''A graph to show sequence snippets that match
a detected tandem repeat.''')


class Snippet():
    def __init__(self, start, stop, width):
        self.start, self.stop, self.width = start, stop, width
        while self.width < 25:
            if self.width == 0:
                self.width = 13
            self.width *= 2

    def __repr__(self):
        return str((self.start, self.stop, self.width))

Beispiel #5
0
'''
Created on Nov 29, 2012
@author: Josiah
'''
from SkittleGraphTransforms import chunkUpList, normalizeDictionary, countListToColorSpace, sequenceToColors, countNucleotides
from SkittleCore.models import RequestPacket
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph(
    'n',
    "Nucleotide Display",
    __name__,
    True,
    True,
    helpText='''The four nucleotides of DNA are represented by four colors.
A=Black, T=Blue, C=Red, G=Green. The pixels are arranged across the screen like text, reading from left to right, 
then jumping to the beginning of the next line when it reaches the width. If width is set at a multiple of a tandem repeat, 
the repeat will appear as vertical bars.''')


def calculateOutputPixels(state):
    state.readFastaChunks()
    assert isinstance(state, RequestPacket)
    #    chunks = chunkUpList(state.seq, state.nucleotidesPerLine() )

    if state.scale > 1:
        chunks = chunkUpList(state.seq, state.scale)
        counts = countNucleotides(chunks)
        counts = normalizeDictionary(counts)
        pixels = countListToColorSpace(counts, state.colorPalette, state.scale)
    else:
Created on Nov 29, 2012
@author: Josiah Seaman
'''
import copy

import RepeatMap
from SkittleGraphTransforms import normalize
from PixelLogic import interpolate, spectrum
from models import RepeatMapState
from SkittleCore.models import RequestPacket, chunkSize
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.Graphs.MathLogic import ceil


registerGraph('r', "Repeat Overview", __name__, True, False, helpText='''Repeat Overview is a color based overview of the Repeat Map.
Each pixel in the overview is built from one full line of Repeat Map at width 24.  It looks for the maximum score anywhere on a Repeat Map line.
The brightness of the pixel represents the highest score.  Dark areas of the overview mean there are no repeats.  The color in bright areas
represents the length (frequency) of a tandem repeat that is detected.''')


def findMaxScore(line):
    valueToBeat = 0.0
    column = 0
    score = 0.0
    for index, value in enumerate(line):
        if value > valueToBeat:
            column, score = index, value
            valueToBeat = score * 1.05 #this is made to bias the "winner" towards lower number repeats, rather than a multiple of the base frequency
    return column, score


def alignmentColor(score, column):
Beispiel #7
0
from SkittleGraphTransforms import chunkUpList, countNucleotides, normalizeDictionary, countListToColorSpace, pearsonCorrelation, average, composedOfNs
from models import RepeatMapState
from SkittleCore.models import RequestPacket, chunkSize
from SkittleCore.GraphRequestHandler import registerGraph, handleRequest
from DNAStorage.StorageRequestHandler import GetPngFilePath, GetFastaFilePath
from SkittleCore.png import Reader

registerGraph(
    'm',
    "Repeat Map",
    __name__,
    False,
    False,
    0.4,
    isGrayScale=True,
    helpText='''Repeat Map is used for identifying tandem repeats without
 the need for continually adjusting the width in Nucleotide Display.  
 It identifies periodicity of repeated sequences by checking all possible offsets scored by Pearson Correlation displayed in grayscale.  
 The x-axis of the graph represents periodicity, starting at offset 1 on the left and increasing geometrically to offset 6,144 on the right.  
 This growth curve means that Repeat Map can accurately detect 2bp periodicities simultaneously with segmental duplications.  
 Vertical white lines show regions that contain tandem repeats.  Most of the graph will be 25-30% gray from random chance.  
 Black spots are created when two regions with opposite biases are compared as in the case of a CG repeat being compared with an AT repeat region.'''
)
'''These are the functions that are specific to the use of RepeatMap and not generally applicable.  
These functions use RepeatMapState to emulate an object with state.'''
skixelsPerSample = 24


def encodeWidth(nucleotideWidth):
    cumulativeWidth = 0
    megaColumn = 0
Beispiel #8
0
from SkittleGraphTransforms import pearsonCorrelation
from SkittleCore.models import chunkSize
from models import SimilarityHeatMapState
import OligomerUsage
from SkittleCore.GraphRequestHandler import registerGraph
from PixelLogic import twoSidedSpectrumColoring

registerGraph('s',
              "Similarity Heatmap",
              __name__,
              False,
              False,
              0.4,
              helpText='''This graph is a heatmap that shows how similar 
each row is to every other row on the screen.  Red represents positive correlation, blue is negative, with black being neutral.  
The red blue spectrum is normalized to account for baseline correlation from genome wide patterns. 
The structure of a heatmap is diagonally symmetrical.
The diagonal red line is self compared with self. 
Each pixel represents a comparison between two other lines.
To see which lines are involved in a comparison trace one line straight down to the diagonal and another line to the left.
The Similarity Heatmap is useful to visualize the
blocks of similar code found in the genome, such as large tandem repeats, and isochores at all scales. 
The patterns in Similarity Heatmap correlate strongly with those generated from Hi-C experiments to map chromosome territories.'''
              )


def prettyPrint(heatMap):
    for line in heatMap:
        print
        for e in line:
            if isinstance(e, float):
Beispiel #9
0
from collections import namedtuple
import math
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.Graphs.RepeatMap import skixelsPerSample, decodeWidth, encodeWidth, getBaseRepeatMapData
from SkittleCore.Graphs.SkittleGraphTransforms import sequenceToColors
from SkittleCore.models import RequestPacket, chunkSize

__author__ = 'Josiah'

registerGraph('p', "Photo Gallery", __name__, False, True, stretchy=False, helpText='''A graph to show sequence snippets that match
a detected tandem repeat.''')


class Snippet():
    def __init__(self, start, stop, width):
        self.start, self.stop, self.width = start, stop, width
        while self.width < 25:
            if self.width == 0:
                self.width = 13
            self.width *= 2

    def __repr__(self):
        return str((self.start, self.stop, self.width))


def arrangePixels(state, snippet, maxWidth):
    assert isinstance(snippet, Snippet)
    pixels = []
    seqStart = snippet.start
    for lineStartIndex in range(seqStart, snippet.stop, snippet.width):
        line = []
Beispiel #10
0
import math
from random import choice
import copy

from SkittleGraphTransforms import chunkUpList, countNucleotides, normalizeDictionary, countListToColorSpace, pearsonCorrelation, average, composedOfNs
from models import RepeatMapState
from SkittleCore.models import RequestPacket, chunkSize
from SkittleCore.GraphRequestHandler import registerGraph, handleRequest
from DNAStorage.StorageRequestHandler import GetPngFilePath, GetFastaFilePath
from SkittleCore.png import Reader


registerGraph('m', "Repeat Map", __name__, False, False, 0.4, isGrayScale=True, helpText='''Repeat Map is used for identifying tandem repeats without
 the need for continually adjusting the width in Nucleotide Display.  
 It identifies periodicity of repeated sequences by checking all possible offsets scored by Pearson Correlation displayed in grayscale.  
 The x-axis of the graph represents periodicity, starting at offset 1 on the left and increasing geometrically to offset 6,144 on the right.  
 This growth curve means that Repeat Map can accurately detect 2bp periodicities simultaneously with segmental duplications.  
 Vertical white lines show regions that contain tandem repeats.  Most of the graph will be 25-30% gray from random chance.  
 Black spots are created when two regions with opposite biases are compared as in the case of a CG repeat being compared with an AT repeat region.''')

'''These are the functions that are specific to the use of RepeatMap and not generally applicable.  
These functions use RepeatMapState to emulate an object with state.'''
skixelsPerSample = 24


def encodeWidth(nucleotideWidth):
    cumulativeWidth = 0
    megaColumn=0
    subColumn=0

    while cumulativeWidth < (nucleotideWidth-12):
Beispiel #11
0
'''
Created on Dec 5, 2012
@author: Josiah
'''
from SkittleGraphTransforms import *
from PixelLogic import *
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph(
    'b',
    "Nucleotide Bias",
    __name__,
    False,
    True,
    0.05,
    helpText='''This bar graph shows how often each nucleotide occurs per line.  
It uses the same color palette as Nucleotide Display.''')
max_bar_width = 20


def calculateBiasBarSizes(state):
    order = ['C', 'G', 'A', 'T', 'N']
    lines = chunkUpList(state.seq, state.nucleotidesPerLine())
    countsPerLine = countNucleotides(lines, 1)
    barLengthsPerLine = []
    for h in range(len(countsPerLine)):  #once per line
        bar_sizes = []
        remainder = 0.0
        floating_sum = 0.0
        for key in order:
            barSize = float(
'''
Created on Dec 19, 2012

@author: Josiah
'''
from SkittleCore.models import RequestPacket
from PixelLogic import randomColor, blankColor
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph('a',
              "Annotation Display",
              __name__,
              False,
              helpText='''Annotation Display is linked to an annotation file
with start and stop positions for tracks.  It aligns these start and stop positions along with the rest of the graphs, 
expanding to accommodate overlapping annotations as necessary.  The user can select individual annotations and see the 
full text associated with that annotation. Currently, the positions of 23&Me SNPs are also displayed.'''
              )


class Annotation():
    def getStartingLine(self, state):
        assert isinstance(state, RequestPacket)
        return self.start / state.nucleotidesPerLine()

    def lengthIndices(self, state):
        indices = range(self.start / state.nucleotidesPerLine(),
                        self.stop / state.nucleotidesPerLine() + 1)
        return indices

    def __init__(self, Start, End):
Beispiel #13
0
'''
Created on March 2, 2013
@author: Josiah Seaman
'''
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.models import RequestPacket
from SkittleGraphTransforms import oldRepeatMap
from models import ThreeMerDetectorState


registerGraph('f', "Raw Frequency Map", __name__, False, isGrayScale=True, helpText='''This graph is the raw data used to calculate
the Threemer Detector graph. The x-axis of Frequency Map represents offsets +1 to +60. The grey value of each pixel is the 
number of matching characters in the line compared with the sequence shifted to the offset.  Threemer patterns show up as faint
dark-dark-light dark-dark-light pixel patterns. These patterns are often associated with exons. ''')


def calculateOutputPixels(state, threeMerState=ThreeMerDetectorState()):
    assert isinstance(state, RequestPacket)

    state.readFastaChunks()#read in next chunk
    scores = oldRepeatMap(state, threeMerState)

    return scores
        
    
import copy

from SkittleCore.models import chunkSize
from models import HighlighterState, SequenceEntry
from SkittleGraphTransforms import reverseComplement, calculatePerCharacterMatch
import PixelLogic
from SkittleCore.GraphRequestHandler import registerGraph


registerGraph(
    "h",
    "Sequence Highlighter",
    __name__,
    True,
    helpText="""Use the Select Tool and click on a line you would like to search for. 
 You can either click on Nucleotide Display or Sequence Highlighter to pick a sequence.  
 Given a search sequence, the Highlighter checks every start position on the screen.  
 The grayscale pixels are start positions that didn't make the cut.  
 Light pixels are near misses.  
 Example: When the Highlighter finds another sequence that is at least 70\% the same,
 it highlights each of the matching nucleotides in bright green.""",
)


def measureSequenceMatches(state, highlighterState, sequenceEntry):
    assert isinstance(sequenceEntry, SequenceEntry)
    scores = []
    findSize = len(sequenceEntry.seq)
    searchSeq = sequenceEntry.seq
    maxMismatches = int(findSize - float(findSize) * highlighterState.minimumPercentage + 0.999)
    # at 50%   1 = 0,  2 = 1, 3 = 1
Beispiel #15
0
'''
Created on Nov 29, 2012
@author: Josiah Seaman
'''
from PixelLogic import drawBar
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.models import RequestPacket
from SkittleGraphTransforms import sensitiveTestForSpecificFrequency, normalize, oldRepeatMap
from models import ThreeMerDetectorState
from MathLogic import lowPassFilter

registerGraph('t', "Threemer Detector", __name__, False, helpText='''Threemer detector was designed to detect the 
weak 3 periodicity signature associated with codons inside protein coding regions.  It is much more sensitive than 
Repeat Map, but only detects a single periodicity. Exon annotations are generally marked by a 3-mer spike. 
Strong 3-mer signals outside of exon annotation that are not simple repeats merit further research.''')


def testInformation(state, threemer_scores):
    threemer_scores.sort()
    #    avg = average(threemer_scores)
    #    median = threemer_scores[len(threemer_scores)/2]
    #    percentile95 = threemer_scores[len(threemer_scores)*95/100]
    #    max_ = threemer_scores[-1]
    percentiles = []
    for p in range(50, 100, 10):
        percentiles.append(threemer_scores[len(threemer_scores) * p / 100])
    return (state.width, ) + percentiles


def calculateOutputPixels(state, threeMerState=ThreeMerDetectorState()):
    assert isinstance(state, RequestPacket)
Beispiel #16
0
'''
Created on March 2, 2013
@author: Josiah Seaman
'''
from SkittleCore.GraphRequestHandler import registerGraph
from SkittleCore.models import RequestPacket
from SkittleGraphTransforms import oldRepeatMap
from models import ThreeMerDetectorState

registerGraph('f',
              "Raw Frequency Map",
              __name__,
              False,
              isGrayScale=True,
              helpText='''This graph is the raw data used to calculate
the Threemer Detector graph. The x-axis of Frequency Map represents offsets +1 to +60. The grey value of each pixel is the 
number of matching characters in the line compared with the sequence shifted to the offset.  Threemer patterns show up as faint
dark-dark-light dark-dark-light pixel patterns. These patterns are often associated with exons. '''
              )


def calculateOutputPixels(state, threeMerState=ThreeMerDetectorState()):
    assert isinstance(state, RequestPacket)

    state.readFastaChunks()  #read in next chunk
    scores = oldRepeatMap(state, threeMerState)

    return scores
Beispiel #17
0
'''
Created on Nov 29, 2012
@author: Josiah
'''
from SkittleGraphTransforms import chunkUpList, normalizeDictionary, countListToColorSpace, sequenceToColors, countNucleotides
from SkittleCore.models import RequestPacket
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph('n', "Nucleotide Display", __name__, True, True, helpText='''The four nucleotides of DNA are represented by four colors.
A=Black, T=Blue, C=Red, G=Green. The pixels are arranged across the screen like text, reading from left to right, 
then jumping to the beginning of the next line when it reaches the width. If width is set at a multiple of a tandem repeat, 
the repeat will appear as vertical bars.''')


def calculateOutputPixels(state):
    state.readFastaChunks()
    assert isinstance(state, RequestPacket)
    #    chunks = chunkUpList(state.seq, state.nucleotidesPerLine() )

    if state.scale > 1:
        chunks = chunkUpList(state.seq, state.scale)
        counts = countNucleotides(chunks)
        counts = normalizeDictionary(counts)
        pixels = countListToColorSpace(counts, state.colorPalette, state.scale)
    else:
        pixels = sequenceToColors(state.seq, state.colorPalette)
    return pixels
    

    
    
Beispiel #18
0
'''
Created on Dec 12, 2012

@author: Josiah
'''
from SkittleCore.models import RequestPacket
from models import OligomerUsageState
from SkittleGraphTransforms import chunkUpList, countNucleotides, \
    normalizeDictionary, generateExhaustiveOligomerList, oligCountToColorSpace
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph('o', "Oligomer Usage", __name__, False, isGrayScale=True, helpText='''Each row is one display line equal to width. 
Each column matches one oligomer of fixed size, arranged in alphabetical order (i.e. AA, AC, AG...). 
The brightness of the pixel indicates how often that oligomer occurred compared to all the others.''')


def calculateOutputPixels(state, oligState=OligomerUsageState()):
    assert isinstance(state, RequestPacket)
    assert isinstance(oligState, OligomerUsageState)
    state.readFastaChunks()

    overlap = oligState.oligomerSize - 1
    lines = chunkUpList(state.seq, state.nucleotidesPerLine(),
                        overlap) #chunk sequence by display line #we can't do this simply by line because of the overhang of oligState.oligState

    counts = countNucleotides(lines, oligState.oligomerSize)

    #NORMALIZATION
    values = []
    for line in counts:
        for key, value in line.iteritems():
Beispiel #19
0
'''
Created on Dec 5, 2012
@author: Josiah
'''
from SkittleGraphTransforms import *
from PixelLogic import *
from SkittleCore.GraphRequestHandler import registerGraph

registerGraph('b', "Nucleotide Bias", __name__, False, True, 0.05, helpText='''This bar graph shows how often each nucleotide occurs per line.  
It uses the same color palette as Nucleotide Display.''')
max_bar_width = 20


def calculateBiasBarSizes(state):
    order = ['C', 'G', 'A', 'T', 'N']
    lines = chunkUpList(state.seq, state.nucleotidesPerLine())
    countsPerLine = countNucleotides(lines, 1)
    barLengthsPerLine = []
    for h in range(len(countsPerLine)):#once per line
        bar_sizes = []
        remainder = 0.0
        floating_sum = 0.0
        for key in order:
            barSize = float(countsPerLine[h].get(key,
                                                 0)) / state.nucleotidesPerLine() * max_bar_width + .00001  #normalize the size of the bar to display_width
            floating_sum += barSize
            barSize += remainder
            remainder = floating_sum - int(floating_sum + .5)
            tupleT = (key, int(barSize + .5))
            bar_sizes.append(tupleT)
        barLengthsPerLine.append(bar_sizes)
Beispiel #20
0
'''
Created on Dec 19, 2012

@author: Josiah
'''
from SkittleCore.models import RequestPacket
from PixelLogic import randomColor, blankColor
from SkittleCore.GraphRequestHandler import registerGraph


registerGraph('a', "Annotation Display", __name__, False, helpText='''Annotation Display is linked to an annotation file
with start and stop positions for tracks.  It aligns these start and stop positions along with the rest of the graphs, 
expanding to accommodate overlapping annotations as necessary.  The user can select individual annotations and see the 
full text associated with that annotation. Currently, the positions of 23&Me SNPs are also displayed.''')

class Annotation():
    def getStartingLine(self, state):
        assert isinstance(state, RequestPacket)
        return self.start / state.nucleotidesPerLine()

    def lengthIndices(self, state):
        indices = range(self.start / state.nucleotidesPerLine(), self.stop / state.nucleotidesPerLine() + 1)
        return indices

    def __init__(self, Start, End):
        self.start = Start
        self.stop = End
        self.color = randomColor()


class AnnotationTrackState():