def createAssemblyGapsFile(genome, assemblyChars='ACGTacgt'): """genome assemblyChars='ACGTacgt'""" basePath = gcf.createOrigPath(genome, GenomeInfo.getPropertyTrackName(genome, 'gaps'),'') outFn = basePath + 'assemblyGaps.bed' qcf.ensurePathExists(outFn) outFile = open(outFn,'w') seqTrack = PlainTrack( GenomeInfo.getSequenceTrackName(genome) ) anyGaps = False for chr in GenomeInfo.getExtendedChrList(genome): chrRegion = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr)) seqTV = seqTrack.getTrackView(chrRegion) seq = seqTV.valsAsNumpyArray() #gapIndexes = numpy.arange(len(seq))[(seq == 'n') | (seq == 'N')] gapIndexes = numpy.arange(len(seq))[numpy.logical_not( numpy.logical_or.reduce([seq == x for x in assemblyChars]) )] gapIndexDiff = gapIndexes[1:] - gapIndexes[:-1] gapBeginIndexes = numpy.delete(gapIndexes, (numpy.arange(len(gapIndexDiff)) + 1)[gapIndexDiff==1]) gapEndIndexes = numpy.delete(gapIndexes + 1, numpy.arange(len(gapIndexDiff))[gapIndexDiff==1]) assert len(gapBeginIndexes) == len(gapEndIndexes) for i in xrange(len(gapBeginIndexes)): anyGaps = True outFile.write('\t'.join([chr, str(gapBeginIndexes[i]), str(gapEndIndexes[i])]) + os.linesep) if not anyGaps: outFile.write('\t'.join([GenomeInfo.getExtendedChrList(genome)[0], '1', '1'])) outFile.close()
def getAnchor(genome, trackName): track = PlainTrack(trackName) anchor = [] for chrom in GenomeInfo.getChrList(genome): chromLen = GenomeInfo.getChrLen(genome, chrom) region = GenomeRegion(genome, chrom, 0, chromLen) tv = track.getTrackView(region) anchor = anchor + [str(tv.genomeAnchor)] return anchor
def getNumberElements(genome, trackName): track = PlainTrack(trackName) numElements = [] for chrom in GenomeInfo.getChrList(genome): chromLen = GenomeInfo.getChrLen(genome, chrom) region = GenomeRegion(genome, chrom, 0, chromLen) tv = track.getTrackView(region) numElements = numElements + [len(tv.startsAsNumpyArray())] return numElements
def createNmerChains(self, n): for chr in GenomeInfo.getChrList(self._genome): print 'Creating chains of nmers of length ', n, ' for chromosome ', chr chrLen = GenomeInfo.getChrLen(self._genome,chr) chrReg = GenomeRegion( self._genome, chr, 0, chrLen ) seqTV = PlainTrack( GenomeInfo.getSequenceTrackName(self._genome) ).getTrackView(chrReg) #nmersAsInts = NmerAsIntSlidingWindow(n, FuncValTvWrapper(seqTV)) nmersAsInts = NmerAsIntSlidingWindow(n, seqTV.valsAsNumpyArray()) SameValueIndexChainsFactory.generate( nmersAsInts, chrLen, 4**n, self._createPath(n), chr )
def createNmerChains(self, n): for chr in GenomeInfo.getChrList(self._genome): print 'Creating chains of nmers of length ', n, ' for chromosome ', chr chrLen = GenomeInfo.getChrLen(self._genome, chr) chrReg = GenomeRegion(self._genome, chr, 0, chrLen) seqTV = PlainTrack(GenomeInfo.getSequenceTrackName( self._genome)).getTrackView(chrReg) #nmersAsInts = NmerAsIntSlidingWindow(n, FuncValTvWrapper(seqTV)) nmersAsInts = NmerAsIntSlidingWindow(n, seqTV.valsAsNumpyArray()) SameValueIndexChainsFactory.generate(nmersAsInts, chrLen, 4**n, self._createPath(n), chr)
def execute(cls, choices, galaxyFn=None, username=''): from gold.util.RandomUtil import random outputFile = open(galaxyFn, 'w') genome = choices[0] histItem = choices[2] trackItem = choices[3] chromRegsPath = GenomeInfo.getChrRegsFn(genome) chrSizeDict = dict([(chrom, GenomeInfo.getChrLen(genome, chrom)) for chrom in GenomeInfo.getChrList(genome)]) geSource = headLinesStr = None if choices[1] == 'history': trackType = choices[2].split(':')[1] username = ''.join( [chr(random.randint(97, 122)) for i in range(6)]) tempFn = createCollectedPath( genome, [], username + '_'.join([str(v) for v in time.localtime()[:6]]) + '.' + trackType) fnSource = ExternalTrackManager.extractFnFromGalaxyTN( choices[2].split(':')) open(tempFn, 'w').write(open(fnSource, 'r').read()) if trackType in ['valued.bed', 'category.bed', 'bed']: geSource = GenomeElementSorter( BedGenomeElementSource(tempFn, genome=genome)).__iter__() #elif trackType == 'gtrack': # geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__() # headLinesStr = geSource.getHeaderLines().replace('##','\n##') cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True) os.remove(tempFn) else: writeHeaderFlag = True for chrom in GenomeInfo.getChrList(genome): gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom]) plTrack = PlainTrack(trackItem.split(':')) geSource = GenomeElementTvWrapper( plTrack.getTrackView(gRegion)).__iter__() cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag) writeHeaderFlag = False outputFile.close()
def getSegmentSizes(genome, trackName): track = PlainTrack(trackName) segmentSize = []; sumSegmentSize = [] for chrom in GenomeInfo.getChrList(genome): chromLen = GenomeInfo.getChrLen(genome, chrom) region = GenomeRegion(genome, chrom, 0, chromLen) tv = track.getTrackView(region) sizeSegments = tv.endsAsNumpyArray() - tv.startsAsNumpyArray() sumSizes = sizeSegments.sum() segmentSize = segmentSize + [sizeSegments.tolist()] sumSegmentSize = sumSegmentSize + [sumSizes.tolist()] return sumSegmentSize
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(region) if len(intensityTV.valsAsNumpyArray()) == 0: raise InvalidRunSpecException('Error: No intensity data available for sampling randomized locations in region' + \ str(region) + \ '. Please check that the intensity track was created with the same main track that is being randomized in this analysis.') #intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(self._origRegion) #Dependence on origRegion is not nice, but not a big problem.. if intensityTV.trackFormat.isDense(): assert intensityTV.trackFormat.isValued('number') return self._createRandomizedNumpyArraysFromIntensityFunction(binLen, starts, ends, vals, strands, ids, edges, \ weights, extras, intensityTV) else: raise NotImplementedError
def _addPeaks(self): #trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, self.trackName) track = PlainTrack(self.trackName) chromRegs = GlobalBinSource(genome) i = 0 for region in chromRegs: if i > 2: break tv = track.getTrackView(region) starts = tv.startsAsNumpyArray() ends = tv.endsAsNumpyArray() for (start, end) in zip(starts, ends): self.peaks.append(Peak(self, region.chr, start, end)) i += 1
def __iter__(self): chr = self.chr trackName1, trackName2, w1, w2, genome = self.trackName1, self.trackName2, self.w1, self.w2, self.genome region = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr) ) track1 = PlainTrack(trackName1) tv1 = track1.getTrackView(region) vals1 = tv1.valsAsNumpyArray() track2 = PlainTrack(trackName2) tv2 = track2.getTrackView(region) vals2 = tv2.valsAsNumpyArray() for i in xrange(len(vals1)): yield w1*vals1[i] + w2*vals2[i]
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, origTrackFormat, region): if self._minimal: return numpy.array([]), None, None, None, None, None, None, OrderedDict() intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(region) if len(intensityTV.valsAsNumpyArray())==0: raise InvalidRunSpecException('Error: No intensity data available for sampling randomized locations in region' + \ str(region) + \ '. Please check that the intensity track was created with the same main track that is being randomized in this analysis.') #intensityTV = PlainTrack(self._trackNameIntensity).getTrackView(self._origRegion) #Dependence on origRegion is not nice, but not a big problem.. if intensityTV.trackFormat.isDense(): assert intensityTV.trackFormat.isValued('number') return self._createRandomizedNumpyArraysFromIntensityFunction(binLen, starts, ends, vals, strands, ids, edges, weights, extras, intensityTV) else: raise NotImplementedError
def execute(choices, galaxyFn=None, username=''): ''' Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' print "<h2>Test tool<h2>" fastaTrack = PlainTrack(['Sequence', 'DNA']) for i in range(0, 500): seqTv = fastaTrack.getTrackView( GenomeRegion("hg19", "chr1", 1000000, 1001000)) sequence = seqTv.valsAsNumpyArray() print sequence
def getGenomicElements(genome, trackName): track = PlainTrack(trackName) genElements = [] for chrom in GenomeInfo.getChrList(genome): chromLen = GenomeInfo.getChrLen(genome, chrom) region = GenomeRegion(genome, chrom, 0, chromLen) tv = track.getTrackView(region) for el in tv: #print chrom, el.start(), el.end() #, el.name() genElements = genElements + [[chrom, el.start(), el.end()]] return genElements #print numpy.version.version # 1.7.1 !! #unique, counts = numpy.unique(segmentSize, return_counts=True) # This is for numpy 1.9 #print numpy.asarray((unique, counts)).T '''track.setFormatConverter('SegmentToMidPointFormatConverter')
def __iter__(self): from gold.application.RSetup import r chr = self.chr trackName1, genome = self.trackName1, self.genome factor = self.factor region = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr) ) track1 = PlainTrack(trackName1) tv1 = track1.getTrackView(region) vals1 = tv1.valsAsNumpyArray() #scale between 0 and 1..: minVal, maxVal = vals1.min(), vals1.max() vals1 = (vals1 - minVal) * (1/(maxVal-minVal)) for pos in xrange(len(vals1)): #print r.runif(1), vals1[pos] if r.runif(1) < factor*vals1[pos]: yield [pos,pos+1]
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): universeTV = PlainTrack(self._trackNameUniverse).getTrackView(region) if universeTV.trackFormat.isDense(): raise InvalidRunSpecException('Error: Universe needs to be a binary (non-dense) track') else: return self._createRandomizedNumpyArraysFromBinaryUniverse( binLen, starts, ends, vals, strands, ids, edges, weights, extras, universeTV)
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): referenceTV = PlainTrack(self._trackNameIntensity).getTrackView( region ) #self._trackNameIntensity based on naming convenience wrt. inheritance if len(referenceTV.valsAsNumpyArray()) == 0: raise InvalidRunSpecException('Error: No reference data available for sampling randomized locations in region' + \ str(region) + \ '. Please check that the reference track was created with the same main track that is being randomized in this analysis.') if referenceTV.trackFormat.isDense(): raise InvalidRunSpecException( 'Error: Cannot sample by distance to reference if reference is a dense track' ) else: return self._createRandomizedNumpyArraysFromDistanceToReference( binLen, starts, ends, vals, strands, ids, edges, weights, extras, referenceTV)
def __iter__(self): for pos in self._lowerOrderChain: from gold.track.Track import PlainTrack from quick.util.GenomeInfo import GenomeInfo from gold.track.GenomeRegion import GenomeRegion track = PlainTrack(GenomeInfo.getSequenceTrackName(self._genome)) region = GenomeRegion(self._genome, self._chr, pos, pos + len(self._fullNmer)) fullSubstring = (''.join( track.getTrackView(region).valsAsNumpyArray())).lower() pl = len(self._nmerPrefix) assert self._fullNmer[0:pl] == fullSubstring[ 0: pl], 'The prefix of lower order does not match at the positions given by the chain. %s vs %s. Region: %s' % ( self._fullNmer[0:pl], fullSubstring[0:pl], region) #print 'Comparing nmers: %s VS %s (at pos:%i).' % (self._fullNmer, fullSubstring, pos) if self._fullNmer == fullSubstring: yield pos
def __iter__(self): from proto.RSetup import r chr = self.chr trackName1, genome = self.trackName1, self.genome factor = self.factor region = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr)) track1 = PlainTrack(trackName1) tv1 = track1.getTrackView(region) vals1 = tv1.valsAsNumpyArray() #scale between 0 and 1..: minVal, maxVal = vals1.min(), vals1.max() vals1 = (vals1 - minVal) * (1 / (maxVal - minVal)) for pos in xrange(len(vals1)): #print r.runif(1), vals1[pos] if r.runif(1) < factor * vals1[pos]: yield [pos, pos + 1]
def getFlatTracksTS(genome, guiSelectedGSuite): ts = FlatTracksTS() gsuite = getGSuiteFromGalaxyTN(guiSelectedGSuite) for gsTrack in gsuite.allTracks(): assert gsTrack.trackName is not None, "Gstrack name is None %s" % gsTrack track = PlainTrack(gsTrack.trackName) metadata = OrderedDict(title=gsTrack.title, genome=str(genome)) metadata.update(gsTrack.attributes) assert track is not None assert metadata is not None ts[gsTrack.title] = SingleTrackTS(track, metadata) return ts
def execute(cls, choices, galaxyFn=None, username=''): outputFile = open(galaxyFn, 'w') genome = choices[0] histItem = choices[2] trackItem = choices[3] chromRegsPath = GenomeInfo.getChrRegsFn(genome) chrSizeDict = dict([ ( chr, GenomeInfo.getChrLen(genome, chr)) for chr in GenomeInfo.getChrList(genome)]) geSource = headLinesStr = None if choices[1] == 'history': trackType = choices[2].split(':')[1] from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile tempFn = GalaxyRunSpecificFile(['fromHistory.'+trackType],galaxyFn).getDiskPath(True) fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':')) open(tempFn,'w').write(open(fnSource,'r').read()) if trackType in ['valued.bed', 'category.bed', 'bed']: geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__() elif trackType == 'gtrack': geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__() headLinesStr = geSource.getHeaderLines().replace('##','\n##') cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True) os.remove(tempFn) else: writeHeaderFlag = True for chr in GenomeInfo.getChrList(genome): gRegion = GenomeRegion(genome, chr, 0, chrSizeDict[chr]) plTrack = PlainTrack(trackItem.split(':')) geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__() cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag) writeHeaderFlag = False outputFile.close()
def _createRandomizedNumpyArrays(self, binLen, starts, ends, vals, strands, ids, edges, weights, extras, region): referenceTV = PlainTrack(self._trackNameIntensity).getTrackView( region ) #self._trackNameIntensity based on naming convenience wrt. inheritance if referenceTV.trackFormat.isDense(): raise InvalidRunSpecException( 'Error: Intensity needs to be a binary (non-dense) track') else: return self._createRandomizedNumpyArraysFromBinaryIntensity( binLen, starts, ends, vals, strands, ids, edges, weights, extras, referenceTV)
def getMutatedSequence(cls, genome, regionDict, pointDict=None): resultDict = defaultdict(list) regionList = [] fastaTrack = PlainTrack(['Sequence', 'DNA']) for chrom in regionDict.keys(): for start, end in regionDict[chrom]: seqTv = fastaTrack.getTrackView( GenomeRegion(genome, chrom, start, end)) valList = list(seqTv.valsAsNumpyArray()) if pointDict: mutatedPoints = [ v[1:] for v in pointDict[chrom] if v[0] == start ] for index, val in mutatedPoints: val = val[-1] if val.find('>') >= 0 else val valList[index] = val resultDict[chrom].append( '>%s %i-%i\n%s' % (chrom, start + 1, end, ''.join(valList))) return resultDict
def smoothPoints(genome, inTrackName, windowSize, chr): from gold.extra.SlidingWindow import SlidingWindow from quick.util.GenomeInfo import GenomeInfo from gold.track.Track import PlainTrack from gold.track.GenomeRegion import GenomeRegion #func = lambda x: ( sum( [r.dnorm(i-len(x)/2.0,0,2000)*x[i].end for i in range(len(x)) if x[i]!=None] ) / sum( [r.dnorm(i-len(x)/2.0,0,2000)*1 for i in range(len(x)) if x[i]!=None] ) ) if len([y for y in x if y!=None])>0 else 0 chrReg = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome,chr) ) #chrReg = GenomeElement(genome, chr, 0, 3000) inTrackView = PlainTrack(inTrackName).getTrackView(chrReg) print [x.end() for x in inTrackView] slidingWindows = SlidingWindow(GenomeElementTvWrapper(inTrackView), windowSize) print [x for x in weightedValForWindowsYielder(slidingWindows, windowSize)]
def execute(cls, choices, galaxyFn=None, username=''): outputFile = open(galaxyFn, 'w') genome = choices[0] histItem = choices[2] trackItem = choices[3] chromRegsPath = GenomeInfo.getChrRegsFn(genome) chrSizeDict = dict([ ( chrom, GenomeInfo.getChrLen(genome, chrom)) for chrom in GenomeInfo.getChrList(genome)]) geSource = headLinesStr = None if choices[1] == 'History': trackType = choices[2].split(':')[1] username = ''.join([chr(random.randint(97,122)) for i in range(6)]) tempFn = createCollectedPath(genome, [], username+'_'.join([str(v) for v in time.localtime()[:6]])+'.'+trackType) fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':')) open(tempFn,'w').write(open(fnSource,'r').read()) if trackType in ['marked.bed', 'category.bed', 'bed']: geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__() elif trackType == 'gtrack': geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__() headLinesStr = geSource.getHeaderLines().replace('##','\n##') cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True) os.remove(tempFn) else: writeHeaderFlag = True for chrom in GenomeInfo.getChrList(genome): gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom]) plTrack = PlainTrack(trackItem.split(':')) geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__() cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag) writeHeaderFlag = False outputFile.close()
def extractToFile(self, fn, outTrackName): append = False for region in GlobalBinSource(self._genome): print 'Creating segmentation for chr: ', region.chr trackView = PlainTrack(self._inTrackName).getTrackView(region) teSource = FunctionCategorizerWrapper(trackView, self._categorizerMethod, minSegLen=self._minSegLen) teSource.trackFormat = TrackFormat.createInstanceFromPrefixList( ['start', 'end', 'val']) TrackExtractor._extract(teSource, outTrackName, region, fn, append=append, globalCoords=True, addSuffix=True) append = True
def get_reference_allele(genome, chr, pos, len=1): pos = pos.strip() if not pos.isdigit() or int(pos) < 0: return None bpos = int(pos) - 1 try: genReg = GenomeRegion(genome, chr, bpos, bpos + len) seqTV = PlainTrack( GenomeInfo.getSequenceTrackName(genome)).getTrackView(genReg) #ge = seqTV.next() #return ge.val().upper() seq = "" for ge in seqTV: seq += ge.val().upper() return seq except Exception as e: print e return '-'
def __iter__(self): chr = self.chr trackName1, trackName2, w1, w2, genome = self.trackName1, self.trackName2, self.w1, self.w2, self.genome region = GenomeRegion(genome, chr, 0, GenomeInfo.getChrLen(genome, chr)) track1 = PlainTrack(trackName1) tv1 = track1.getTrackView(region) vals1 = tv1.valsAsNumpyArray() track2 = PlainTrack(trackName2) tv2 = track2.getTrackView(region) vals2 = tv2.valsAsNumpyArray() for i in xrange(len(vals1)): yield w1 * vals1[i] + w2 * vals2[i]
from gold.track.Track import PlainTrack from gold.track.GenomeRegion import GenomeRegion from gold.statistic.CountStat import CountStat #create a track track = PlainTrack(['Genes and gene subsets', 'Genes', 'Refseq']) #create a region of interest region = GenomeRegion('hg18', 'chr1', 1000, 900000) #create a statistic stat = CountStat(region, track) print stat.getResult() #What happens now: #CountStat inherits MagicStatFactory #MagicStatFactory determines that region may be splitted to smaller bins and looks for a CountStatSplittable. #CountStatSplittable exists, and is instantiated. #getResults first calls createChildren. CountStatSplittable now creates a new CountStat for a smaller first region. #This times, when MagicStatFactory handles CountStat-creation it sees that the region in question should not be splitted. #MagicStatFactory thus instantiates a CountStatUnsplittable, which loads track data, and does the count for its small bin. #This is repeated for each small bin, and results are collected by CountStatSplittable. #Finally, the method combineResults (of CountStatSplittable) computes the total results for the queried region and returns this.
from gold.track.Track import Track, PlainTrack from proto.hyperbrowser.HtmlCore import HtmlCore from quick.application.ExternalTrackManager import ExternalTrackManager from quick.application.UserBinSource import GlobalBinSource from quick.multitrack.MultiTrackCommon import getGSuiteFromGalaxyTN snps = [[] for chromosome in range(0, 25)] snpsTrack = None peaks = [[] for chromosome in range(0, 25)] transcription_factors = [] motif = None motifs = {} # Dict holding all the motifs regions = [] genome = None fastaTrack = PlainTrack(['Sequence', 'DNA']) BINDING_PROB_TRESHOLD = 0.5 BINDING_A_PRIORI_PROB = 0.01 class SNP(): def __init__(self, chromosome, position, mutationFrom, mutationTo): self.chromosome = chromosome self.position = position self.mutationFrom = mutationFrom self.mutationTo = mutationTo def __repr__(self): return "Pos: %s %s - %s" % (self.position, self.mutationFrom, self.mutationTo)
def _getGeSourceForRegion(cls, genome, outTrackName, region, inTrackName, windowSize, func): inTrackView = PlainTrack(inTrackName).getTrackView(region) geSource = CustomTrackGenomeElementSource(TrackViewBasedSlidingWindow(inTrackView, windowSize),\ genome, outTrackName, region.chr, func) return geSource
def execute(cls, choices, galaxyFn=None, username=''): #val = strVal.split(':')[1].split('k')[0]; htmlTemplate = '''<html><head>\n\n<link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/themes/base/jquery-ui.css" rel="stylesheet" type="text/css"/>\n <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.5/jquery.min.js"></script>\n <script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/jquery-ui.min.js"></script>\n <script type='text/javascript' src='https://www.google.com/jsapi'></script> <script type='text/javascript'> google.load("visualization", "1", {packages:["corechart"]});\n google.setOnLoadCallback(drawLine); function drawLine(divId) {\n} </script> <style type="text/css">\n #slider { margin: 10px; }\n </style>\n <script type="text/javascript">\n jQuery(document).ready(function() {\n jQuery("#slider").slider({min: 0, value: 370, max: %i });\n });\n </script>\n\n\n <link rel="stylesheet" type="text/css" href="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/styles/stylesheet.css" /> \n<script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2.1-core.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2-more.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/ImageZoom.js"></script>\n \n\n\n\n<script type="text/javascript" >\nliste =%s;\ncounter = 0;\n\n\nfunction point_it2(event){\n document.myform.posAnchor.value = ""; chrom = %s;\n trackNames = %s; pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft;\n pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop;\n factor = %i;\n pos_x = Math.floor(pos_x/factor);\n pos_y = Math.floor(pos_y/factor);\n counter++;\n var strVal = liste[pos_y][pos_x]; var strTab = strVal.split(","); val = strTab[0]; streng = chrom+":"+strTab[0]+"k | "; for(i=0; i<trackNames.length; i++) { streng = streng + trackNames[i]+': '+strTab[i+1]+'%% | '; } document.myform.posAnchor.value = streng;\n jQuery( "#slider" ).slider( "option", "value", val );\n }\n</script>\n\n\n\n\n</head> <body> <h2 align="center" style="color:#FF7400;">Heatmap for chromosome %s</h2> <div id="slider" ></div><br> \n<form name="myform" action="http://www.mydomain.com/myformhandler.cgi" method="POST">\n<div align="center">\n\n<input type="text" name="posAnchor" size="250" value=".">\n<br>\n</div>\n</form>\n<br> <div id="container"><!-- Image zoom start --><div id="zoomer_big_container"></div><div id="zoomer_thumb">\n<a href="%s" target="_blank" >\n<img src="%s" /></a></div><!-- Image zoom end --></div>\n\n\n%s <br/>%s</body></html>''' # onchange="jQuery('zoomer_region').css({ 'left': '31px', 'top': '15px'});" tableRowEntryTemplate = """<div class="tabbertab"><h2>%s</h2><a href="%s"><img src="%s" /></a></div>""" htmlPageTemplate = """<html><head>\n<script type="text/javascript" src="/gsuite/static/scripts/tabber.js"></script>\n<link href="/gsuite/static/style/tabber.css" rel="stylesheet" type="text/css" />\n </head><body>%s</body></html>""" #fileDict = dict() binsize = parseShortenedSizeSpec(choices[10]) tnList = [] trackNameList = [] genome = choices[0] chrLength = GenomeInfo.getStdChrLengthDict(genome) for index in [1,4,7]: startTime = time.time() if choices[index] in ['-- No track --','',None]: tnList.append(None) trackNameList.append('.') continue elif choices[index] == 'history': #trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(choices[0], choices[index+1].split(':')) trackName = choices[index+1].split(':') tnList.append(ExternalTrackManager.extractFnFromGalaxyTN(trackName)) trackNameList.append(prettyPrintTrackName(trackName)) else: trackName = choices[index+2].split(':') track = PlainTrack(trackName) regionList = [GenomeRegion(genome, chrom, 0, chrLength[chrom]) for chrom in GenomeInfo.getChrList(genome)] tnList.append((track, regionList)) trackNameList.append(prettyPrintTrackName(trackName)) trackNames = repr([v for v in trackNameList if v!='.']) tr1, tr2, tr3 = tnList ResultDicts = [] maxVals = []#list of the maximum coverage in a bin for each track Used for normalization purposes chrsWithData = set()# set of chromosomes with any data. No point in generating images with no data... microDictList = [] counter = 0 for tr,color in [(tr1, (1,0,0)),(tr2, (0,1,0)),(tr3, (0,0,1))]: maxVal = None if tr: if len([1 for v in tnList if v]) == 1: color = (1,1,1) res, microDict, maxVal, trackChrs = cls.getValuesFromBedFile(genome, tr,color, binsize) microDictList.append((trackNames[counter],microDict)) counter+=1 chrsWithData = chrsWithData|trackChrs ResultDicts += [res] maxVals.append(maxVal) htmlTableContent = [] resultDict = cls.syncResultDict(ResultDicts) binfactor = binsize/1000 for chrom in sorted(list(chrsWithData), cmp=alphanum): valList = resultDict[chrom] areaList = [] #For doing recursive pattern picture bigFactor = int(10*(binsize/10000.0)**(0.5)) smallFactor = bigFactor/3 posMatrix = cls.getResult(len(valList), 2,2) javaScriptList = [[0 for v in xrange(len(posMatrix[0])*bigFactor) ] for t in xrange(len(posMatrix)*bigFactor)] rowLen = len(posMatrix[0]) im = Image.new("RGB", (rowLen, len(posMatrix)), "white") for yIndex, row in enumerate(posMatrix): for xIndex, elem in enumerate(row): im.putpixel((xIndex, yIndex), valList[elem]) region = yIndex*rowLen + xIndex #for yVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor): # for xVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor): # javaScriptList[yVals][xVals] = chrom+':'+str(elem)+'-'+str(elem+1)+': '+repr([ v/255.0 for v in valList[elem]]) #javaScriptList[yIndex][xIndex] = chrom+':'+str(elem*binfactor)+'k - '+str((elem+1)*binfactor)+'k : '+repr([ trackNameList[indx]+'='+str(round(v*100/255.0, 2))+'%' for indx, v in enumerate(valList[elem])]) javaScriptList[yIndex][xIndex] = ','.join([str(elem*binfactor)]+[ str(round(v*100/255.0, 2)) for indx, v in enumerate(valList[elem]) if trackNameList[indx] !='.'] ) for i in range(len(javaScriptList)): javaScriptList[i] = [v for v in javaScriptList[i] if v !=0] imSmall = im.resize((len(posMatrix[0])*smallFactor, len(posMatrix)*smallFactor)) im2 = im.resize((len(posMatrix[0])*bigFactor, len(posMatrix)*bigFactor)) fileElements = [GalaxyRunSpecificFile(['Recursive', chrom+'.png' ], galaxyFn ), GalaxyRunSpecificFile(['Recursive', chrom+'Big.png' ], galaxyFn), GalaxyRunSpecificFile(['Recursive', chrom+'Zooming.html' ], galaxyFn)] #fileDict['Recursive/'+chrom] = fileElements imSmall.save(fileElements[0].getDiskPath(ensurePath=True)) im2.save(fileElements[1].getDiskPath(ensurePath=True)) trackAndValRangeTab = zip(trackNameList, maxVals) colorTab = [] onlyOneTrack = True if len([v for v in maxVals if v]) ==1 else False for color, vals in [('Red_combination',[1,0,0]), ('Green_combination',[0,1,0]), ('Blue_combination',[0,0,1]),('Red-Green_combination',[1,1,0]), ('Red-Blue_combination',[1,0,1]), ('Green-Blue_combination',[0,1,1]), ('Red-Green-Blue_combination',[1,1,1])]: if not None in [maxVals[i] for i in range(len(vals)) if vals[i]>0]: im = Image.new("RGB", (256 , 1), "white") tracksInvolved = ' & '.join([str(index+1) for index, v in enumerate(vals) if v>0]) if onlyOneTrack: vals = [1,1,1] for val in range(256): colVal = [val*v for v in vals] im.putpixel((val,0), tuple(colVal)) imColFile = GalaxyRunSpecificFile(['Recursive', color+'.png' ], galaxyFn) imCol = im.resize((256, 10)) imCol.save(imColFile.getDiskPath(ensurePath=True)) colorTab.append('<tr><td>Track %s</td><td> <img src="%s" /></td></tr>'% (tracksInvolved, imColFile.getURL())) htmlTnRangeVals= '<br/><br/><table align="center" cellspacing="10"><tr><th>Track number</th><th>Track name</th><th>Value range</th></tr>\n' htmlTnRangeVals += '\n'.join(['<tr/><td>Track %i </td><td>%s</td><td> 0 - %i</td></tr>' % (index+1, v[0], v[1]) for index, v in enumerate(trackAndValRangeTab) if v[1]] ) htmlTnRangeVals+='</table> <br/><table align="center" cellspacing="10"><tr><th>Track combination</th><th>Colour range</th></tr>' + '\n'.join(colorTab) + '</table>\n' lineTabStr= '' #if chrom == 'chr1': # tempList = [range(100)]+[v[1]['chr1'][26] for v in microDictList] # chartTemplate = "['%i', %i, %i, %i]" # lineTab = [ chartTemplate % v for v in zip(*tempList)] # lineTemplate = """<div id="%s" onclick="{\nvar data = google.visualization.arrayToDataTable([\n %s\n ]);\nvar options = { title: 'Detailed Graph' };var chart = new google.visualization.LineChart(document.getElementById('%s'));chart.draw(data, options);}" style="width: 1000px; height: 700px;"></div>""" # lineTabStr = lineTemplate % ('line_div', ', '.join(lineTab),'line_div') open(fileElements[2].getDiskPath(ensurePath=True),'w').write(htmlTemplate % (int(GenomeInfo.getChrLen(genome, chrom)/1000.0)+1, repr(javaScriptList), repr(chrom), trackNames,bigFactor, chrom, fileElements[1].getURL(), fileElements[0].getURL(), htmlTnRangeVals, lineTabStr) )# htmlTableContent.append(tableRowEntryTemplate % (chrom, fileElements[2].getURL(), fileElements[0].getURL())) # FOr doing normal picture #columns = int(round((len(valList)/1000)+0.5)) #im = Image.new("RGB", (1000, columns), "white") #y=-1 #for index, valuTuple in enumerate(valList): # x = index%1000 # # if x == 0: # y+=1 # try: # im.putpixel((x, y), valuTuple) # except: # pass #im.save(chrom+'.png') #htmlTableContent.append(tableRowEntryTemplate % (chrom, chrom+'.png')) tabberMal = '<div class="tabber">%s</div>' #tempRes, res = [],[] res = [tabberMal % v for v in htmlTableContent] #for i in htmlTableContent: # if len(tempRes) == 10: # res.append(tabberMal % '\n'.join(tempRes)) # tempRes = [] # tempRes.append(i) #if len(tempRes)>0: # res.append(tabberMal % '\n'.join(tempRes)) open(galaxyFn,'w').write(htmlPageTemplate % ('<br/>'.join(res)))
def getSingleTrackTS(genome, guiSelectedTrack, title='Dummy'): trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, guiSelectedTrack) return SingleTrackTS(PlainTrack(trackName), {'title': title})
def __init__(self, userBinSource, genome, **kwArgs): track = PlainTrack(GenomeInfo.getPropertyTrackName(genome, 'gaps')) StatJob.__init__(self, userBinSource, track, None, AssemblyGapCoverageStat, **kwArgs)
def create_track(file_name, trackName): from gtrackcore.core.Api import importFile importFile(file_name, genome="hg18", trackName=trackName) t = PlainTrack([trackName]) single_track_ts = SingleTrackTS(t, {"title": trackName}) return single_track_ts
def execute(cls, choices, galaxyFn=None, username=''): shelveDict = { 'track1': choices[3] if choices[3] != cls.NO_TRACK_SHORTNAME else None } shelveDict['track2'] = choices[ 5] if choices[5] != cls.NO_TRACK_SHORTNAME else None print len(choices) print cls._extraParams for i in range(len(cls._extraParams)): index = i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1 shelveDict[index] = choices[index].strip() DebugInfoShelve = safeshelve.open(cls.SHELVE_FN) DebugInfoShelve[choices[0]] = shelveDict DebugInfoShelve.close() try: from gold.application.LogSetup import setupDebugModeAndLogging setupDebugModeAndLogging() print 'Getting Unsplittable statClass' statClassName = choices[0] #statClass = STAT_CLASS_DICT[statClassName] #try: print 'Preparing arguments to init' unsplittableStatClass = MagicStatFactory._getClass( statClassName, 'Unsplittable') genome = choices[1] from gold.track.Track import PlainTrack prefixTN1 = cls.STD_PREFIX_TN if choices[2] == 'yes' else [] tn1 = prefixTN1 + choices[3].split(':') track1 = PlainTrack( tn1) if choices[3] != cls.NO_TRACK_SHORTNAME else None prefixTN2 = cls.STD_PREFIX_TN if choices[4] == 'yes' else [] tn2 = prefixTN2 + choices[5].split(':') track2 = PlainTrack( tn2) if choices[5] != cls.NO_TRACK_SHORTNAME else None from gold.track.GenomeRegion import GenomeRegion #region = GenomeRegion(genome, 'chr1',1000,2000) #region2 = GenomeRegion(genome, 'chr1',5000,6000) kwArgs = {} regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1] binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 3] ubSource = UserBinSource(regVal, binSpecVal, genome=genome) region = list(ubSource)[0] if len(cls._extraParams) > 3: for i in range(len(cls._extraParams)): paramName = choices[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER] param = paramName[:paramName.find('(')].strip() val = choices[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1].strip() if val != '': kwArgs[param] = val shelveDict[i * 2 + cls.FIRST_EXTRA_PARAM_BOX_NUMBER + 1] = val print 'Calling __init__' # statObj = unsplittableStatClass(region, track1, track2, **kwArgs) print 'Calling createChildren' statObj.createChildren() print 'Calling getResult' statObj.getResult() #except: # raise #print 'Preparing arguments to init' #genome = 'hg18' #prefixTN = ['DNA structure'] if choices[2] == 'yes' else [] #from gold.track.Track import PlainTrack #tn1 = prefixTN + choices[3].split(':') #track1 = PlainTrack(tn1) #tn2 = prefixTN + choices[5].split(':') #track2 = PlainTrack(tn2) #from gold.track.GenomeRegion import GenomeRegion ##region = GenomeRegion(genome, 'chr1',1000,2000) ##region2 = GenomeRegion(genome, 'chr1',5000,6000) # #kwArgs = {} #regVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] #binSpecVal = choices[cls.FIRST_EXTRA_PARAM_BOX_NUMBER+3] #ubSource = UserBinSource(regVal, binSpecVal, genome=choices[1]) #region = list(UserBinSource)[0] # #if len(cls._extraParams)>2: # for i in range(2,len(cls._extraParams)): # paramName = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER] # param = paramName[:paramName.find('(')].strip() # val = choices[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1].strip() # if val !='': # kwArgs[param] = val # shelveDict[i*2+cls.FIRST_EXTRA_PARAM_BOX_NUMBER+1] = val # # ##extraParams += [v.strip() for v in choices.kwArgs.split(',')] if choices.kwArgs.strip() != '' else [] ##args = [region, track1, track2] # #print 'Calling __init__' ## #statObj = unsplittableStatClass(region, track1, track2, **kwArgs) # #print 'Calling createChildren' #statObj.createChildren() # #print 'Calling getResult' #statObj.getResult() print 'Running StatJob' magicStatClass = STAT_CLASS_DICT[statClassName] #res = StatJob([region,region2],track1,track2,magicStatClass,**kwArgs).run() res = StatJob(ubSource, track1, track2, magicStatClass, **kwArgs).run() from quick.application.GalaxyInterface import GalaxyInterface GalaxyInterface._viewResults([res], galaxyFn) except Exception, e: print 'Error: ', e raise
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # The Genomic HyperBrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with The Genomic HyperBrowser. If not, see <http://www.gnu.org/licenses/>. from gold.track.Track import PlainTrack from gold.track.GenomeRegion import GenomeRegion #create a track track = PlainTrack(['Genes and gene subsets','Genes','Refseq']) #track = PlainTrack(['DNA structure','Bendability']) #create a region of interest region = GenomeRegion('hg18','chr1',1000,900000) #Could instead have been iterator of regions, e.g. genome-wide: #from quick.application.UserBinSource import UserBinSource #regionIter = UserBinSource('*','*','hg18') #for region in regionIter: # pass #print 'Last region of iter: ', region #iterate through elements of the track in this region for element in track.getTrackView(region): #just print the intervals for now..