def MakeCircosConfFile(dataset, galaxyFn, outputFn):
        circosMal = """<<include etc/colors_fonts_patterns.conf>>
        <ideogram>
        <spacing>
        default = 0.005r
        break   = 0.5r
        axis_break_at_edge = yes
        axis_break         = yes
        axis_break_style   = 2
        <break_style 1>
        stroke_color = black
        fill_color   = blue
        thickness    = 0.25r
        stroke_thickness = 2
        </break>
        <break_style 2>
        stroke_color     = black
        stroke_thickness = 2
        thickness        = 1.5r
        </break>
        </spacing>
        
        #<<include ideogram.position.conf>>
        radius           = 0.85r
        thickness        = 30p
        fill             = yes
        fill_color       = black
        stroke_thickness = 2
        stroke_color     = black
        
        #<<include ideogram.label.conf>>
        show_label       = yes
        label_font       = default
        label_radius     = dims(ideogram,radius) + 0.075r
        label_size       = 36
        label_parallel   = yes
        label_case       = upper
        
        #<<include bands.conf>>
        show_bands            = yes
        fill_bands            = yes
        band_stroke_thickness = 2
        band_stroke_color     = white
        band_transparency     = 3
        
        
        </ideogram>
        
        
        #<<include ticks.conf>>
        show_ticks          = yes
        show_tick_labels    = yes
        
        <ticks>
        tick_separation      = 3p
        label_separation     = 5p
        radius               = dims(ideogram,radius_outer)
        multiplier           = 1e-6
        color          = black
        size           = 20p
        thickness      = 4p
        label_offset   = 5p
        format         = %%d
        
        <tick>
        spacing        = 1u
        show_label     = yes
        label_size     = 16p
        </tick>
        
        <tick>
        spacing        = 5u
        show_label     = yes
        label_size     = 18p
        </tick>
        
        <tick>
        spacing        = 10u
        show_label     = yes
        label_size     = 20p
        </tick>
        
        <tick>
        spacing        = 20u
        show_label     = yes
        label_size     = 24p
        </tick>
        </ticks>
        
        karyotype   = %s
        #data/karyotype/karyotype.human.hg19_mod.txt
        
        <image>
        
        dir   = %s
        file  = circos.png
        png   = yes
        svg   = no
        # radius of inscribed circle in image
        radius         = 1500p
        # by default angle=0 is at 3 o'clock position
        angle_offset      = -90
        #angle_orientation = counterclockwise
        auto_alpha_colors = yes
        auto_alpha_steps  = 5
        background = white
        
        </image>
        
        chromosomes_units = 1000000
        chromosomes_display_default = yes
        
        #chromosomes = hs1;hs2;hs3;hs4;hs5;hs6
        %s
        <<include etc/housekeeping.conf>>
        """

        plotsMal = "<plots>\n%s\n</plots>\n"
        plotMal = "<plot>\nfile=%s\ntype=%s\nline\nr0=%s\nr1=%s\nmin=%s\nmax=%s\ncolor=black\nthickness=2\nextend_bin=no\naxis=yes\naxis_color=lgrey\naxis_thickness=2\naxis_spacing=0.1\n</plot>\n"
        #% (file, plotType, r0, r1, minVal, maxVal)
        highlightsMal = "<highlights>\n%s\n</highlights>\n"
        highlightMal = "<highlight>\nfile=%s\nr0=%s\nr1= %s\n</highlight>\n"  #% (fn, r0, r1)

        circosConfFile = GalaxyRunSpecificFile(['circos.conf'], galaxyFn)
        dir = dirname(outputFn)

        #dataset = {'/usit/titan/u1/kaitre/circosData/100kb_extended_MS_regions.bed':{'type':'highlight', 'r0':'0.90r', 'r1':'0.95r'}}
        #{'/usit/titan/u1/kaitre/circosData/SE_bcell_Factor_of_observed_vs_expected_overlap_per_cytoband.bedgraph':{'type':'line','r0':'0.95r', 'r1':'1.0r','min':'0', 'max':'1500' }\
        #            , '/usit/titan/u1/kaitre/circosData/AP_bcell_Factor_of_observed_vs_expected_overlap_per_cytoband.bedgraph':{'type':'line', 'r0':'0.90r', 'r1':'0.95r', 'min':'0', 'max':'1500'}}
        plotStr = ''
        highlightStr = ''
        for data in dataset.keys():
            if dataset[data]['type'] in ['line', 'histogram']:
                plotStr += plotMal % (data, dataset[data]['type'],
                                      dataset[data]['r0'], dataset[data]['r1'],
                                      dataset[data]['min'],
                                      dataset[data]['max'])

            elif dataset[data]['type'] in ['highlight']:
                highlightStr += highlightMal % (data, dataset[data]['r0'],
                                                dataset[data]['r1'])

        if plotStr != '':
            plotStr = plotsMal % plotStr
        if highlightStr != '':
            highlightStr = highlightsMal % highlightStr
        #print circosMal % ('data/karyotype/karyotype.human.hg19_mod.txt',dir, plotStr+highlightStr)
        circosConfFile.writeTextToFile(
            circosMal % ('data/karyotype/karyotype.human.hg19_mod.txt', dir,
                         plotStr + highlightStr))
        #open(circosConfFile.getDiskPath(True), 'w').write(circosMal % ('data/karyotype/karyotype.human.hg19_mod.txt',dir, plotStr+highlightStr))

        return 'circos -conf %s -noparanoid' % circosConfFile.getDiskPath()
예제 #2
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        DebugMixin._setDebugModeIfSelected(choices)
        genome = choices.genome
        gSuite = getGSuiteFromGalaxyTN(choices.gsuite)
        #         fullCategory = AnalysisManager.combineMainAndSubCategories(choices.analysisCategory, 'Basic')
        fullCategory = AnalysisManager.combineMainAndSubCategories(
            'Descriptive statistics', 'Basic')
        tracks = list(gSuite.allTracks())
        analysisName = choices.analysis
        # selectedAnalysis = GSuiteSingleValueAnalysisPerTrackTool \
        #     ._resolveAnalysisFromName(gSuite.genome, fullCategory, tracks[0].trackName, analysisName)

        selectedAnalysis = cls.ANALYSIS_PRETTY_NAME_TO_ANALYSIS_SPEC_MAPPING[
            choices.analysis]

        regSpec, binSpec = UserBinMixin.getRegsAndBinsSpec(choices)
        analysisBins = GalaxyInterface._getUserBinSource(regSpec,
                                                         binSpec,
                                                         genome=genome)
        # paramName, paramValues = selectedAnalysis.getFirstOptionKeyAndValues()
        # if paramName and paramValues:
        #     if len(paramValues) == 1:
        #         selectedAnalysis.addParameter(paramName, paramValues[0])
        #     else:
        #         selectedAnalysis.addParameter(paramName, choices.paramOne)

        tableDict = OrderedDict()

        for track in tracks:
            tableDict[track.title] = OrderedDict()
            result = doAnalysis(selectedAnalysis, analysisBins, [track])
            resultDict = result.getGlobalResult()
            if 'Result' in resultDict:
                track.setAttribute(analysisName.lower(),
                                   str(resultDict['Result']))
                tableDict[
                    track.title][analysisName] = strWithNatLangFormatting(
                        resultDict['Result'])
            else:
                for attrName, attrVal in resultDict.iteritems():
                    attrNameExtended = analysisName + ':' + attrName
                    track.setAttribute(attrNameExtended.lower(), str(attrVal))
                    tableDict[track.title][
                        attrNameExtended] = strWithNatLangFormatting(attrVal)
                    # assert isinstance(resultDict['Result'], (int, basestring, float)), type(resultDict['Result'])

        core = HtmlCore()
        core.begin()
        core.header('Results: ' + analysisName)

        def _produceTable(core, tableDict=None, tableId=None):
            return core.tableFromDictOfDicts(tableDict,
                                             firstColName='Track title',
                                             tableId=tableId,
                                             expandable=True,
                                             visibleRows=20,
                                             presorted=0)

        tableId = 'results_table'
        tableFile = GalaxyRunSpecificFile([tableId, 'table.tsv'], galaxyFn)
        tabularHistElementName = 'Raw results: ' + analysisName

        gsuiteFile = GalaxyRunSpecificFile(
            [tableId, 'input_with_results.gsuite'], galaxyFn)
        GSuiteComposer.composeToFile(gSuite, gsuiteFile.getDiskPath())
        gsuiteHistElementName = \
            getGSuiteHistoryOutputName('result', ', ' + analysisName, choices.gsuite)

        core.tableWithImportButtons(
            tabularFile=True,
            tabularFn=tableFile.getDiskPath(),
            tabularHistElementName=tabularHistElementName,
            gsuiteFile=True,
            gsuiteFn=gsuiteFile.getDiskPath(),
            gsuiteHistElementName=gsuiteHistElementName,
            produceTableCallbackFunc=_produceTable,
            tableDict=tableDict,
            tableId=tableId)
        core.end()
        print core
예제 #3
0
    def executeSelfFeature(cls, genome, tracks, track_names, clusterMethod,
                           extra_option, feature, distanceType, kmeans_alg,
                           galaxyFn, regSpec, binSpec):

        from proto.RSetup import r
        #regSpec, binSpec = 'bed', '/usit/invitro/data/galaxy/galaxy-dist-hg-dev/./database/files/017/dataset_17084.dat'
        silenceRWarnings()

        jobFile = open(galaxyFn, 'w')
        #         print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, feature, distanceType, kmeans_alg,regSpec, binSpec]])), '<br><br>'
        batchRun = GalaxyRunSpecificFile(['batch_run_job.txt'], galaxyFn)
        print >> jobFile, '<h3>Results for the "similarity of positional distribution along the genome" way of clustering<h3/><br/><br/>'
        with open(batchRun.getDiskPath(ensurePath=True), 'w') as batchFile:
            print >> batchFile, '$clusterBySelfFeature', (genome, '$'.join([
                ':'.join(t) for t in tracks
            ]), ':'.join(track_names), clusterMethod, extra_option, feature,
                                                          distanceType,
                                                          kmeans_alg, regSpec,
                                                          binSpec)
        print >> jobFile, batchRun.getLink(
            'View batch script line for this analysis<br/>')
        #print>>jobFile, 'Batch script syntax for this analysis:<br>$clusterBySelfFeature', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec), '<br><br>'
        #print>>jobFile, 'signature of method clusterBySelfFeature:<br>', 'clusterBySelfFeature(genome, tracksStr, track_namesStr, clusterMethod, extra_option, feature, distanceType, kmeans_alg, regSpec, binSpec):<br><br><br>'
        prettyTrackNames = [
            v[-1].replace('RoadMap_', '').replace('.H3K4me1', '')
            for v in tracks
        ]
        #prettyTrackNames = [prettyPrintTrackName(v, shortVersion=True) for v in tracks]
        f_matrix = cls.construct_feature_matrix(genome, tracks, feature,
                                                regSpec, binSpec)
        #print>>jobFile, 'dir f_matrix: ', dir(f_matrix), regSpec, binSpec
        userBinSource = GalaxyInterface._getUserBinSource(
            regSpec, binSpec, genome)
        binNames = [
            str(bin)
            for binIndex, bin in enumerate(sorted(list(userBinSource)))
        ]
        if len(binNames) != f_matrix.shape[1]:
            binNames = ['Microbin' + str(i) for i in range(f_matrix.shape[1])]
        r.assign('bin_names', binNames)
        r.assign('track_names', prettyTrackNames
                 )  #use as track names, will be shown in clustering figure
        r.assign('f_matrix', f_matrix)
        r.assign('distanceType', distanceType)
        r('row.names(f_matrix) <- track_names')
        r('colnames(f_matrix) <- bin_names')

        if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--":
            #print 'galaxyFn: ', galaxyFn
            figure = GalaxyRunSpecificFile(
                ['cluster_tracks_result_figure.pdf'], galaxyFn)
            figurepath = figure.getDiskPath(ensurePath=True)
            r('d <- dist(f_matrix, method=distanceType)')
            distTable = r('d')
            distMatrix = GalaxyRunSpecificFile(['distance_matrix_result.txt'],
                                               galaxyFn)
            distMatrixPath = distMatrix.getDiskPath(True)
            open(distMatrixPath, 'w').write(str(distTable))
            print >> jobFile, distMatrix.getLink(
                'View the distance matrix for this analysis <br>')
            #with open(distMatrixPath,'w') as distObj:
            #    #distTable = d_matrix.tolist()
            #    core = HtmlCore()
            #    core.tableHeader(['']+track_names,firstRow=True)
            #    rowSize = len(track_names)
            #    index=0
            #    while index<len(distTable):
            #        core.tableLine([track_names[index % rowSize]]+[str(v) for v in distTable[index:index+rowSize]])
            #    #for index, row in enumerate(distTable):
            #    #    core.tableLine([track_names[index]]+[str(v) for v in row])
            #    core.tableFooter()
            #    print>>distObj, str(core)
            #print>>jobFile, distMatrix.getLink('View the distance matrix for this analysis <br>')

            if True:  #f_matrix.shape[1] <= 100:
                r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'],
                                                       galaxyFn)
                #', '.join([str(v) for v in row])
                r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
                r('dput(f_matrix, f_matrix_fn)')
                #r_f_matrixFile.writeTextToFile(', '.join(cls.getFlattenedMatrix(f_matrix)) + '\n\nTrack names: '+', '.join(prettyTrackNames)+'\n\nNumber of tracks: '+str(len(prettyTrackNames))+'\n\nbins: +)
                #r_f_matrixFile.writeTextToFile()
                #r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
                print >> jobFile, r_f_matrixFile.getLink(
                    'Access the R-representation of the Feature_matrix (text-file)'
                ), '<br/>'

            cls._clusterAndPlotDendrogram(figurepath, extra_option, 'd',
                                          'f_matrix', prettyTrackNames)
            print >> jobFile, figure.getLink(
                'View the clustering tree (dendrogram) for this analysis<br>')

            if True:  #f_matrix.shape[1] <= 100:
                #heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
                #baseDir = os.path.dirname(heatmap.getDiskPath(True))

                resDict = Results([], [], '')
                resDict.setGlobalResult({
                    'result': {
                        'Matrix': f_matrix,
                        'Rows': np.array(track_names),
                        'Cols': np.array(binNames),
                        'Significance': None,
                        'RowClust': r('hr'),
                        'ColClust': None
                    }
                })
                header = 'View the resulting heatmap plot <br>'

                baseDir = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()
                heatPresenter = HeatmapFromNumpyPresenter(
                    resDict, baseDir, header, printDimensions=False)
                print >> jobFile, heatPresenter.getReference('result')

                #heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
                #heatmap_path = heatmap.getDiskPath(True)
                #r.pdf(heatmap_path)
                ##cm.colors(256)
                #r.library("gplots")
                #r('heatmap(f_matrix, col=redgreen(75), distfun=function(c) dist(c, method=distanceType), hclustfun=function(c) hclust(c, method=extra_option, members=NULL),Colv=NA, scale="none", xlab="", ylab="", cexRow=0.5, cexCol=0.5, margin=c(8,10))')#Features cluster tracks
                #r('dev.off()')
                ##print>>jobFile, r('dimnames(f_matrix)')
                #print>>jobFile, heatmap.getLink('View the resulting heatmap plot <br>')
            else:
                print >> jobFile, 'Heatmap not generated due to large size ', f_matrix.shape
        elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
            textFile = GalaxyRunSpecificFile(
                ['result_of_kmeans_clustering.txt'], galaxyFn)
            textFilePath = textFile.getDiskPath(True)
            extra_option = int(extra_option)
            r.assign('kmeans_alg', kmeans_alg)
            r.assign('extra_option', extra_option)

            r(
                'hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)'
            )  #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
            r('hr$height <- hr$height/max(hr$height)*10')
            kmeans_output = open(textFilePath, 'w')
            clusterSizes = r('hr$size')  #size of every cluster
            withinSS = r('hr$withinss')
            clusters = r('hr$cluster')
            for index1 in range(
                    extra_option
            ):  #extra_option actually the number of clusters
                #trackInCluster = [k for k,val in clusters.items() if val == index1]
                trackInCluster = [
                    k + 1 for k, val in enumerate(clusters)
                    if val == index1 + 1
                ]  #IS THIS CORRECT, I.E. SAME AS ABOVE??

                print >> kmeans_output, 'Cluster %i(%s objects) : ' % (
                    index1 + 1, str(clusterSizes[index1]))
                for name in trackInCluster:
                    print >> kmeans_output, name, '(This result may be a bit shaky afters some changes in rpy access)'

                print >> kmeans_output, 'Sum of square error for this cluster is : ' + str(
                    withinSS[index1]) + '\n'

            kmeans_output.close()
            print >> jobFile, textFile.getLink(
                'Detailed result of kmeans clustering <br>')

        #cls.print_data(f_matrix, jobFile)
        '''
예제 #4
0
 def execute(cls, choices, galaxyFn=None, username=''):
     #val = strVal.split(':')[1].split('k')[0];
     htmlTemplate = '''<html><head>\n\n<link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/themes/base/jquery-ui.css" rel="stylesheet" type="text/css"/>\n  <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.5/jquery.min.js"></script>\n  <script src="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/jquery-ui.min.js"></script>\n
     <script type='text/javascript' src='https://www.google.com/jsapi'></script>
     <script type='text/javascript'> 
       google.load("visualization", "1", {packages:["corechart"]});\n google.setOnLoadCallback(drawLine);
       function drawLine(divId) {\n}
   </script>
     <style type="text/css">\n    #slider { margin: 10px; }\n  </style>\n  <script type="text/javascript">\n  jQuery(document).ready(function() {\n    jQuery("#slider").slider({min: 0, value: 370, max: %i });\n  });\n  </script>\n\n\n  <link rel="stylesheet" type="text/css" href="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/styles/stylesheet.css" />
                 \n<script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2.1-core.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/mootools-1.2-more.js">\n</script><script language="javascript" type="text/javascript" src="http://hyperbrowser.uio.no/gsuite/static/hyperbrowser/files/kaitre//image_zoom/scripts/ImageZoom.js"></script>\n
     \n\n\n\n<script type="text/javascript" >\nliste =%s;\ncounter = 0;\n\n\nfunction point_it2(event){\n
     document.myform.posAnchor.value = "";
     chrom = %s;\n
     trackNames = %s;
     pos_x = event.offsetX?(event.offsetX):event.pageX-document.getElementById("zoomer_image").offsetLeft;\n	pos_y = event.offsetY?(event.offsetY):event.pageY-document.getElementById("zoomer_image").offsetTop;\n        factor = %i;\n        pos_x = Math.floor(pos_x/factor);\n	pos_y = Math.floor(pos_y/factor);\n	counter++;\n
     var strVal = liste[pos_y][pos_x];
     var strTab = strVal.split(",");
     
     
     val = strTab[0];
     streng = chrom+":"+strTab[0]+"k | ";
     for(i=0; i<trackNames.length; i++) { 
         streng = streng + trackNames[i]+': '+strTab[i+1]+'%% | ';
        }
         
     document.myform.posAnchor.value = streng;\n
     jQuery( "#slider" ).slider( "option", "value", val );\n
     
             }\n</script>\n\n\n\n\n</head>
     <body>
     <h2 align="center" style="color:#FF7400;">Heatmap for chromosome %s</h2> 
     <div id="slider" ></div><br>
     \n<form name="myform" action="http://www.mydomain.com/myformhandler.cgi" method="POST">\n<div align="center">\n\n<input type="text" name="posAnchor" size="250" value=".">\n<br>\n</div>\n</form>\n<br>
     <div id="container"><!-- Image zoom start --><div id="zoomer_big_container"></div><div id="zoomer_thumb">\n<a href="%s" target="_blank" >\n<img src="%s" /></a></div><!-- Image zoom end --></div>\n\n\n%s
      
      <br/>%s</body></html>''' # onchange="jQuery('zoomer_region').css({ 'left': '31px', 'top': '15px'});"
     
     tableRowEntryTemplate = """<div class="tabbertab"><h2>%s</h2><a href="%s"><img src="%s" /></a></div>"""
     htmlPageTemplate = """<html><head>\n<script type="text/javascript" src="/gsuite/static/scripts/tabber.js"></script>\n<link href="/gsuite/static/style/tabber.css" rel="stylesheet" type="text/css" />\n
                 </head><body>%s</body></html>"""
     
     #fileDict = dict()
     binsize = parseShortenedSizeSpec(choices[10])
         
     tnList = []
     trackNameList = []
     genome = choices[0]
     chrLength = GenomeInfo.getStdChrLengthDict(genome)
     
     for index in [1,4,7]:
         startTime = time.time()
         if choices[index] in ['-- No track --','',None]:
             tnList.append(None)
             trackNameList.append('.')
             continue
         elif choices[index] == 'history':
             #trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(choices[0], choices[index+1].split(':'))
             trackName = choices[index+1].split(':')
             tnList.append(ExternalTrackManager.extractFnFromGalaxyTN(trackName))
             trackNameList.append(prettyPrintTrackName(trackName))
         else:
             trackName = choices[index+2].split(':')
             track = PlainTrack(trackName)
             regionList = [GenomeRegion(genome, chrom, 0, chrLength[chrom]) for chrom in GenomeInfo.getChrList(genome)]
             tnList.append((track, regionList))    
             trackNameList.append(prettyPrintTrackName(trackName))
             
             
     trackNames = repr([v for v in trackNameList if v!='.'])
     tr1, tr2, tr3 = tnList
     ResultDicts = []
     maxVals = []#list of the maximum coverage in a bin for each track Used for normalization purposes
     chrsWithData = set()# set of chromosomes with any data. No point in generating images with no data...
     microDictList = []
     counter = 0
     for tr,color in [(tr1, (1,0,0)),(tr2, (0,1,0)),(tr3, (0,0,1))]:
         
         maxVal = None
         if tr:
             if len([1 for v in tnList if v]) == 1:
                 color = (1,1,1)
             
             res, microDict, maxVal, trackChrs = cls.getValuesFromBedFile(genome, tr,color, binsize)
             microDictList.append((trackNames[counter],microDict))
             counter+=1
             chrsWithData = chrsWithData|trackChrs
             ResultDicts += [res]   
         maxVals.append(maxVal)
         
     
     htmlTableContent = []
     resultDict = cls.syncResultDict(ResultDicts)
     binfactor = binsize/1000
     for chrom in sorted(list(chrsWithData), cmp=alphanum):
         valList = resultDict[chrom]
         areaList = []
         #For doing recursive pattern picture
         bigFactor = int(10*(binsize/10000.0)**(0.5))
         smallFactor = bigFactor/3
         posMatrix = cls.getResult(len(valList), 2,2)
         javaScriptList = [[0 for v in xrange(len(posMatrix[0])*bigFactor) ] for t in xrange(len(posMatrix)*bigFactor)]
         rowLen = len(posMatrix[0])
         
         im = Image.new("RGB", (rowLen, len(posMatrix)), "white")
         for yIndex, row in enumerate(posMatrix):
             for xIndex, elem in enumerate(row):
                 im.putpixel((xIndex, yIndex), valList[elem])
                 region = yIndex*rowLen + xIndex
                 #for yVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor):
                 #    for xVals in range(yIndex*bigFactor, (yIndex+1)*bigFactor):
                 #        javaScriptList[yVals][xVals] = chrom+':'+str(elem)+'-'+str(elem+1)+': '+repr([ v/255.0 for v in valList[elem]])
                 
                 #javaScriptList[yIndex][xIndex] = chrom+':'+str(elem*binfactor)+'k - '+str((elem+1)*binfactor)+'k : '+repr([ trackNameList[indx]+'='+str(round(v*100/255.0, 2))+'%' for indx, v in enumerate(valList[elem])])
                 javaScriptList[yIndex][xIndex] = ','.join([str(elem*binfactor)]+[ str(round(v*100/255.0, 2)) for indx, v in enumerate(valList[elem]) if trackNameList[indx] !='.'] )
         for i in range(len(javaScriptList)):
             javaScriptList[i] = [v for v in javaScriptList[i] if v !=0]
             
     
         imSmall = im.resize((len(posMatrix[0])*smallFactor, len(posMatrix)*smallFactor))
         im2 = im.resize((len(posMatrix[0])*bigFactor, len(posMatrix)*bigFactor))
         
         fileElements = [GalaxyRunSpecificFile(['Recursive', chrom+'.png' ], galaxyFn ), GalaxyRunSpecificFile(['Recursive', chrom+'Big.png' ], galaxyFn), GalaxyRunSpecificFile(['Recursive', chrom+'Zooming.html' ], galaxyFn)]
         #fileDict['Recursive/'+chrom] = fileElements
         imSmall.save(fileElements[0].getDiskPath(ensurePath=True))
         im2.save(fileElements[1].getDiskPath(ensurePath=True))
         
         trackAndValRangeTab = zip(trackNameList, maxVals)
         colorTab = []
         onlyOneTrack = True if len([v for v in maxVals if v]) ==1 else False
         for color, vals in [('Red_combination',[1,0,0]), ('Green_combination',[0,1,0]), ('Blue_combination',[0,0,1]),('Red-Green_combination',[1,1,0]), ('Red-Blue_combination',[1,0,1]), ('Green-Blue_combination',[0,1,1]), ('Red-Green-Blue_combination',[1,1,1])]:    
             
             if not None in [maxVals[i] for i in range(len(vals)) if vals[i]>0]:
                 im = Image.new("RGB", (256 , 1), "white")
                 tracksInvolved = ' & '.join([str(index+1) for index, v in enumerate(vals) if v>0])
                 if onlyOneTrack:
                     vals = [1,1,1]
                 for val in range(256):
                     colVal = [val*v for v in vals]
                     
                     im.putpixel((val,0), tuple(colVal))
                 imColFile = GalaxyRunSpecificFile(['Recursive', color+'.png' ], galaxyFn)
                 imCol = im.resize((256, 10))
                 imCol.save(imColFile.getDiskPath(ensurePath=True))
                 colorTab.append('<tr><td>Track %s</td><td>  <img src="%s" /></td></tr>'% (tracksInvolved, imColFile.getURL()))
                 
         
         htmlTnRangeVals= '<br/><br/><table align="center"  cellspacing="10"><tr><th>Track number</th><th>Track name</th><th>Value range</th></tr>\n'
         htmlTnRangeVals += '\n'.join(['<tr/><td>Track %i </td><td>%s</td><td> 0 - %i</td></tr>' % (index+1, v[0], v[1]) for index, v in  enumerate(trackAndValRangeTab) if v[1]] )
         htmlTnRangeVals+='</table> <br/><table align="center"  cellspacing="10"><tr><th>Track combination</th><th>Colour range</th></tr>' + '\n'.join(colorTab) + '</table>\n'
         lineTabStr= ''
         #if chrom == 'chr1':
         #    tempList = [range(100)]+[v[1]['chr1'][26] for v in microDictList]
         #    chartTemplate =  "['%i',  %i, %i, %i]"
         #    lineTab = [ chartTemplate % v for v in zip(*tempList)]    
         #    lineTemplate = """<div id="%s" onclick="{\nvar data = google.visualization.arrayToDataTable([\n    %s\n  ]);\nvar options = {  title: 'Detailed Graph'    };var chart = new google.visualization.LineChart(document.getElementById('%s'));chart.draw(data, options);}" style="width: 1000px; height: 700px;"></div>"""
         #    lineTabStr = lineTemplate % ('line_div', ', '.join(lineTab),'line_div')    
         open(fileElements[2].getDiskPath(ensurePath=True),'w').write(htmlTemplate % (int(GenomeInfo.getChrLen(genome, chrom)/1000.0)+1, repr(javaScriptList), repr(chrom), trackNames,bigFactor, chrom, fileElements[1].getURL(), fileElements[0].getURL(), htmlTnRangeVals, lineTabStr) )# 
         htmlTableContent.append(tableRowEntryTemplate % (chrom, fileElements[2].getURL(), fileElements[0].getURL()))
         
         # FOr doing normal picture
         #columns = int(round((len(valList)/1000)+0.5))
         #im = Image.new("RGB", (1000, columns), "white")        
         #y=-1    
         #for index, valuTuple in enumerate(valList):
         #    x = index%1000
         #
         #    if x == 0:
         #        y+=1
         #    try:
         #        im.putpixel((x, y), valuTuple)
         #    except:
         #        pass
         #im.save(chrom+'.png')
         #htmlTableContent.append(tableRowEntryTemplate % (chrom, chrom+'.png'))
     
     tabberMal = '<div class="tabber">%s</div>'
     #tempRes, res = [],[]
     res = [tabberMal % v for v in htmlTableContent]
     #for i in htmlTableContent:
     #    if len(tempRes) == 10:
     #        res.append(tabberMal % '\n'.join(tempRes))
     #        tempRes = []
     #    tempRes.append(i)
     #if len(tempRes)>0:
     #    res.append(tabberMal % '\n'.join(tempRes))
     open(galaxyFn,'w').write(htmlPageTemplate % ('<br/>'.join(res)))
예제 #5
0
    def executePairDistance(cls, genome, tracks, track_names, clusterMethod,
                            extra_option, feature, extra_feature, galaxyFn,
                            regSpec, binSpec):
        from proto.RSetup import r
        silenceRWarnings()
        #jobFile = galaxyFn

        if feature is not None:  # must use "" here because the '' does not work

            l = len(tracks)
            d_matrix = np.zeros((l, l))
            for i in range(l):
                for j in range(l):
                    if i < j:
                        if extra_feature == "1 minus the ratio":
                            d_matrix[
                                i,
                                j] = 1 - ClusteringExecution.computeDistance(
                                    genome, tracks[i], tracks[j], feature,
                                    regSpec, binSpec, galaxyFn)
                            d_matrix[j, i] = d_matrix[i, j]
                        elif extra_feature == "1 over the ratio":
                            d_matrix[
                                i,
                                j] = 1 / ClusteringExecution.computeDistance(
                                    genome, tracks[i], tracks[j], feature,
                                    regSpec, binSpec, galaxyFn)
                            d_matrix[j, i] = d_matrix[i, j]
                        else:
                            d_matrix[i,
                                     j] = ClusteringExecution.computeDistance(
                                         genome, tracks[i], tracks[j], feature,
                                         regSpec, binSpec, galaxyFn)
                            d_matrix[j, i] = d_matrix[i, j]

            jobFile = open(galaxyFn, 'w')
            print >> jobFile, '<h3>Results for the "direct sequence-level similarity" way of clustering<h3/><br/><br/>'
            figure = GalaxyRunSpecificFile(
                ['cluster_tracks_result_figure.pdf'], galaxyFn
            )  #this figure is runspecific and is put in the directory
            distMatrix = GalaxyRunSpecificFile(['distance_matrix_result.html'],
                                               galaxyFn)
            distMatrixPath = distMatrix.getDiskPath(True)
            with open(distMatrixPath, 'w') as distObj:
                distTable = d_matrix.tolist()
                core = HtmlCore()
                core.tableHeader([''] + track_names, firstRow=True)
                for index, row in enumerate(distTable):
                    core.tableLine([track_names[index]] +
                                   [str(v) for v in row])
                core.tableFooter()
                print >> distObj, str(core)

            figurepath = figure.getDiskPath(True)
            #r.pdf(figurepath, 8, 8)
            r.assign('track_names', track_names)
            r.assign('d_matrix', d_matrix)
            r('row.names(d_matrix) <- track_names')

            r('d <- as.dist(d_matrix)')
            if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--":
                cls._clusterAndPlotDendrogram(figurepath, extra_option, 'd',
                                              'd_matrix', track_names)
                #r.assign('extra_option',extra_option)
                #r('hr <- hclust(d, method=extra_option, members=NULL)')
                #r('hr$height <- hr$height/max(hr$height)*10')
                #r('plot(hr, ylab="Distance", hang=-1)')

            #r('dev.off()')
            batchRun = GalaxyRunSpecificFile(['batch_run_job.txt'], galaxyFn)
            with open(batchRun.getDiskPath(ensurePath=True), 'w') as batchFile:
                print >> batchFile, '$clusterByPairDistance', (
                    genome, '$'.join([':'.join(t) for t in tracks
                                      ]), ':'.join(track_names), clusterMethod,
                    extra_option, feature, extra_feature, regSpec, binSpec)
            print >> jobFile, batchRun.getLink(
                'View batch script line for this analysis <br/>')
            #print>>jobFile, 'Batch script syntax for this analysis:<br>$clusterByPairDistance', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, feature, extra_feature, regSpec, binSpec), '<br><br>'
            print >> jobFile, figure.getLink(
                'View the clustering tree (dendrogram) for this analysis <br>')
            print >> jobFile, distMatrix.getLink(
                'View the distance matrix for this analysis <br>')
예제 #6
0
    def executeReferenceTrack(cls,
                              genome,
                              tracks,
                              track_names,
                              clusterMethod,
                              extra_option,
                              distanceType,
                              kmeans_alg,
                              galaxyFn,
                              regSpec,
                              binSpec,
                              numreferencetracks=None,
                              refTracks=None,
                              refFeatures=None,
                              yesNo=None,
                              howMany=None,
                              upFlank=None,
                              downFlank=None):
        from proto.RSetup import r
        silenceRWarnings()
        jobFile = open(galaxyFn, 'w')
        print >> jobFile, '<h3>Results for the "similarity of relations to other sets of genomic features" way of clustering<h3/><br/><br/>'
        #         print>>jobFile, 'PARAMS: ', dict(zip('genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec'.split(','), [repr(v)+'<br>'for v in [genome, tracks, track_names, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec]])), '<br><br>'
        batchRun = GalaxyRunSpecificFile(['batch_run_job.txt'], galaxyFn)
        with open(batchRun.getDiskPath(ensurePath=True), 'w') as batchFile:
            print >> batchFile, '$clusterByReference', (genome, '$'.join([
                ':'.join(t) for t in tracks
            ]), ':'.join(track_names), clusterMethod, extra_option,
                                                        distanceType,
                                                        kmeans_alg, regSpec,
                                                        binSpec,
                                                        numreferencetracks,
                                                        refTracks, refFeatures,
                                                        yesNo, howMany,
                                                        upFlank, downFlank)
        print >> jobFile, batchRun.getLink(
            'View batch script line for this analysis<br/>')

        #print>>jobFile, 'Batch script syntax for this analysis:<br>', '$clusterByReference', (genome, '$'.join([':'.join(t) for t in tracks]), ':'.join(track_names)  , clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec,numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank), '<br><br>'
        #print>>jobFile, 'signature of method clusterByReference:<br>', 'clusterByReference(genome, tracksStr, track_namesStr, clusterMethod, extra_option, distanceType, kmeans_alg, regSpec, binSpec, numreferencetracks=None, refTracks=None, refFeatures=None, yesNo=None, howMany=None, upFlank=None, downFlank=None)<br><br><br>'
        prettyTrackNames = [
            v[-1].replace("RoadMap_", "").replace('.H3K4me1', '')
            for v in tracks
        ]

        #prettyTrackNames = [prettyPrintTrackName(v) for v in tracks]
        #paramNames = ['numreferencetracks', 'refTracks', 'refFeatures', 'yesNo', 'howMany', 'upFlank', 'downFlank']
        #for index, value in enumerate([numreferencetracks, refTracks, refFeatures, yesNo, howMany, upFlank, downFlank]):
        #    if value != None:
        #        print paramNames[index]+'='+ str(value),
        #print ''

        reftrack_names = [
        ]  #for use in creating the heatmap (as the column names)

        options = [
        ]  #for the case using refTracks, options contains feature for every refTrack, chosen by user.

        if numreferencetracks:
            for i in range(int(numreferencetracks)):
                ref_i = refTracks[i].split(
                    ":"
                )  #name of refTrack is being used to construct the name of expanded refTrack
                #refTracks.append(ref_i) #put the refTrack into refTracks list
                reftrack_names.append(ref_i[-1])
                temp_opt1 = 'ref' + str(i) + 'feature'
                options += [] if refFeatures[i] == None else [refFeatures[i]]
                if yesNo[i] == "Yes" and howMany[i] != '--select--':
                    for expan in range(int(howMany[i])):
                        reftrack_names.append(ref_i[-1] + '_' +
                                              upFlank[i][expan])
                        upFlank = int(upFlank[i][expan])
                        downFlank = int(downFlank[i][expan])
                        withinRunId = str(i + 1) + ' expansion ' + str(expan +
                                                                       1)
                        outTrackName = GalaxyInterface.expandBedSegmentsFromTrackNameUsingGalaxyFn(
                            ref_i, genome, upFlank, downFlank, galaxyFn,
                            withinRunId)  #outTrackName is unique for run
                        refTracks.append(
                            outTrackName
                        )  #put the expanded track into refTracks list
                        options.append(
                            options[-1]
                        )  # use chosen feature for refTack as valid feature for the expanded

            for index, track in enumerate(refTracks):
                #print track, '<br>'
                if isinstance(track, basestring):
                    track = track.split(":")
                refTracks[index] = track[:-1] if track[
                    -1] == "-- All subtypes --" else track

        if len(refTracks) > 0:

            trackFormats = [
                TrackInfo(genome, track).trackFormatName for track in tracks
            ]

            trackLen = len(tracks)
            refLen = len(refTracks)
            f_matrix = np.zeros((trackLen, refLen))
            for i in range(trackLen):
                for j in range(refLen):
                    #print 'len(options), refLen, len(tracks), trackLen, len(trackFormats):', len(options), refLen, len(tracks), trackLen, len(trackFormats)
                    f_matrix[i,
                             j] = cls.extract_feature(genome, tracks[i],
                                                      refTracks[j], options[j],
                                                      regSpec, binSpec,
                                                      trackFormats[i])
            r.assign('track_names', prettyTrackNames
                     )  #use as track names, will be shown in clustering figure
            r.assign('reftrack_names', reftrack_names)
            r.assign('f_matrix', f_matrix)
            r.assign('distanceType', distanceType)
            r('row.names(f_matrix) <- track_names')
            r('colnames(f_matrix) <- reftrack_names')

            if clusterMethod == 'Hierarchical clustering' and extra_option != "--select--":
                figure = GalaxyRunSpecificFile(
                    ['cluster_tracks_result_figure.pdf'], galaxyFn)
                figurepath = figure.getDiskPath(True)
                #r.pdf(figurepath, 8,8)
                r('d <- dist(f_matrix, method=distanceType)')
                distTable = r('d')
                distMatrix = GalaxyRunSpecificFile(
                    ['distance_matrix_result.txt'], galaxyFn)
                distMatrixPath = distMatrix.getDiskPath(True)
                open(distMatrixPath, 'w').write(str(distTable))
                print >> jobFile, distMatrix.getLink(
                    'View the distance matrix for this analysis <br>')

                #with open(distMatrixPath,'w') as distObj:
                #    #distTable = d_matrix.tolist()
                #    core = HtmlCore()
                #    core.tableHeader(['']+track_names,firstRow=True)
                #    rowSize = len(track_names)
                #    index=0
                #    while index<len(distTable):
                #        core.tableLine([track_names[index % rowSize]]+[str(v) for v in distTable[index:index+rowSize]])
                #    core.tableFooter()
                #    print>>distObj, str(core)
                #print>>jobFile, distMatrix.getLink('View the distance matrix for this analysis <br>')
                #print r.f_matrix
                #print r.d

                r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.robj'],
                                                       galaxyFn)
                r.assign('f_matrix_fn', r_f_matrixFile.getDiskPath(True))
                r('dput(f_matrix, f_matrix_fn)')
                print >> jobFile, r_f_matrixFile.getLink(
                    'Access the R-representation of the Feature_matrix (text-file) <br>'
                ),

                #r_f_matrixFile = GalaxyRunSpecificFile(['f-matrix.txt'], galaxyFn)
                #r_f_matrixFile.writeTextToFile(str(f_matrix)+'\n\n'+str(r.d))
                #print>>jobFile, r_f_matrixFile.getLink('r.f_matrix & r.d <br>')

                cls._clusterAndPlotDendrogram(figurepath, extra_option, 'd',
                                              'f_matrix', prettyTrackNames)
                #r.assign('extra_option',extra_option)
                #r('hr <- hclust(d, method=extra_option, members=NULL)')
                #r('hr$height <- hr$height/max(hr$height)*10')
                #r('plot(hr, ylab="Distance", hang=-1)')
                #
                #r('dev.off()')
                print >> jobFile, figure.getLink(
                    'View the clustering tree (dendrogram) for this analysis<br>'
                )
            elif clusterMethod == 'K-means clustering' and extra_option != "--select--" and kmeans_alg != "--select--":
                textFile = GalaxyRunSpecificFile(
                    ['result_of_kmeans_clustering.txt'], galaxyFn)
                textFilePath = textFile.getDiskPath(True)
                extra_option = int(extra_option)
                r.assign('extra_option', extra_option)
                r.assign('kmeans_alg', kmeans_alg)
                r(
                    'hr <- kmeans(f_matrix,extra_option,algorithm=kmeans_alg)'
                )  #the number of cluster is gotten from clusterMethod+ tag, instead of 3 used here
                r('hr$height <- hr$height/max(hr$height)*10')
                kmeans_output = open(textFilePath, 'w')
                clusterSizes = r('hr$size')  #size of every cluster

                withinSS = r('hr$withinss')
                clusters = np.array(
                    r('hr$cluster')
                )  #convert to array in order to handle the index more easily
                track_names = np.array(track_names)
                for index1 in range(
                        extra_option
                ):  #extra_option actually the number of clusters
                    trackInCluster = [
                        k for k, val in clusters.items() if val == index1
                    ]

                    print >> kmeans_output, 'Cluster %i(%s objects) : ' % (
                        index1 + 1, str(clusterSizes[index1]))
                    for name in trackInCluster:
                        print >> kmeans_output, name

                    print >> kmeans_output, 'Sum of square error for this cluster is : ' + str(
                        withinSS[index1]) + '\n'
                kmeans_output.close()
                print >> jobFile, textFile.getLink(
                    'Detailed result of kmeans clustering <br>')

            #heatmap = GalaxyRunSpecificFile(['heatmap_figure.pdf'], galaxyFn)
            #baseDir = os.path.dirname(heatmap.getDiskPath(True))
            ##r.png(heatmap_path, width=800, height=700)

            resDict = Results([], [], 'ClusTrack')
            resDict.setGlobalResult({
                'result': {
                    'Matrix': f_matrix,
                    'Rows': np.array(track_names),
                    'Cols': np.array(reftrack_names),
                    'Significance': None,
                    'RowClust': r('hr'),
                    'ColClust': None
                }
            })
            header = 'Heatmap of Feature matrix for "similarity of positional distribution along the genome" '

            baseDir = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()
            heatPresenter = HeatmapFromNumpyPresenter(resDict,
                                                      baseDir,
                                                      header,
                                                      printDimensions=False)

            print >> jobFile, heatPresenter.getReference('result')
            #r.pdf(heatmap_path)
            #r.library("gplots")
            #r('heatmap(f_matrix, col=redgreen(75), Colv=NA, scale="none", xlab="", ylab="", margins=c(10,10))')#Features cluster tracks
            #r('dev.off()')

            #print>>jobFile, heatmap.getLink('View the resulting heatmap plot <br>')
            #cls.print_data(f_matrix, jobFile)

        else:
            print 'Have to specify a set of refTracks'
예제 #7
0
    def findTFsTargetingGenes(cls, genome, tfSource, ensembleGeneIdList,
                              upFlankSize, downFlankSize, geneSource,
                              galaxyFn):
        #galaxyFn = '/usit/insilico/web/lookalike/galaxy_dist-20090924-dev/database/files/003/dataset_3347.dat'
        #print 'overriding galaxyFN!: ', galaxyFn
        uniqueWebPath = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()

        assert genome in [
            'mm9', 'hg18', 'hg19'
        ]  #other genomes not supported. TF id links do not specify genome for pre-selection of analysis

        #if tfSource == 'UCSC tfbs conserved':
        #    tfTrackName = ['Gene regulation','TFBS','UCSC prediction track']
        #else:
        #    raise
        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        tfTrackName = tfTrackNameMappings[tfSource]

        #Get gene track
        #targetGeneRegsTempFn = uniqueWebPath + os.sep + 'geneRegs.bed'
        #geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
        #geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
        #GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName, ensembleGeneIdList, targetGeneRegsTempFn )

        if not (upFlankSize == downFlankSize == 0):
            unflankedGeneRegsTempFn = uniqueWebPath + os.sep + '_geneRegs.bed'
            #flankedGeneRegsTempFn  = uniqueWebPath + os.sep + 'flankedGeneRegs.bed'
            flankedGeneRegsTempStaticFile = GalaxyRunSpecificFile(
                ['flankedGeneRegs.bed'], galaxyFn)
            flankedGeneRegsTempFn = flankedGeneRegsTempStaticFile.getDiskPath()
            geneRegsTrackName = GenomeInfo.getStdGeneRegsTn(genome)
            #geneRegsFn = getOrigFn(genome, geneRegsTrackName, '.category.bed')
            GalaxyInterface.getGeneTrackFromGeneList(genome, geneRegsTrackName,
                                                     ensembleGeneIdList,
                                                     unflankedGeneRegsTempFn)
            GalaxyInterface.expandBedSegments(unflankedGeneRegsTempFn,
                                              flankedGeneRegsTempFn,
                                              genome,
                                              upFlankSize,
                                              downFlankSize,
                                              suffix='category.bed')
            #flankedGeneRegsExternalTN = ['external'] +galaxyId +  [flankedGeneRegsTempFn]
            regSpec, binSpec = 'category.bed', flankedGeneRegsTempFn
        else:
            regSpec, binSpec = '__genes__', ','.join(ensembleGeneIdList)

        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        #trackName1 = tfTrackName
        #
        #analysisDef = 'Category point count: Number of elements each category of track1 (with overlaps)'+\
        #          '[tf1:=SegmentToStartPointFormatConverter:]'+\
        #          '-> FreqByCatStat'
        ##assert len(ensembleGeneIdList)==1
        ##geneId = ensembleGeneIdList[0]
        #
        #print '<div class="debug">'
        #userBinSource, fullRunArgs = GalaxyInterface._prepareRun(trackName1, None, analysisDef, regSpec, binSpec, genome)
        #res = AnalysisDefJob(analysisDef, trackName1, None, userBinSource, **fullRunArgs).run()
        #
        #print res
        ##GalaxyInterface._viewResults([res], galaxyFn)
        #print '</div>'
        tfs = res.getResDictKeys()

        genesPlural = 's' if len(ensembleGeneIdList) > 1 else ''
        tfsPlural = 's' if len(tfs) != 1 else ''
        print '<p>There are %i TF%s targeting your gene%s of interest (%s), using "%s" as source of TF occurrences.</p>' % (
            len(tfs), tfsPlural, genesPlural, ','.join(ensembleGeneIdList),
            tfSource)
        if not (upFlankSize == downFlankSize == 0):
            print '(using ', flankedGeneRegsTempStaticFile.getLink(
                'these genomic regions'), ' for genes)'
        expansionStr = ' flanked' if not (
            upFlankSize == downFlankSize == 0) else ''

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'], galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join([
            '<a href=%s/hyper?dbkey=%s&track1=%s&track2=>%s</a>' %
            (URL_PREFIX, genome, quote(':'.join(tfTrackName + [tf])), tf)
            for tf in tfs
        ]))
        #idHtmlFileNamer.writeTextToFile('<br>'.join(['<a href=/hbdev/hyper?track1=%s&track2=>%s</a>'%( ':'.join(tfTrackName+[tf]), tf) for tf in tfs]))
        print '<p>', idHtmlFileNamer.getLink(
            'Inspect html file'
        ), ' of all TF IDs occurring 1 or more times within your%s gene region%s of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (
            expansionStr, genesPlural)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'], galaxyFn)
        idFileNamer.writeTextToFile(os.linesep.join(tfs) + os.linesep)
        print '<p>', idFileNamer.getLink(
            'Inspect text file'
        ), ' listing all TF IDs occurring 1 or more times within your%s gene region%s of interest.</p>' % (
            expansionStr, genesPlural)

        extractedTfbsFileNamer = GalaxyRunSpecificFile(
            ['tfbsInGeneRegions.bed'], galaxyFn)
        GalaxyInterface.extractTrackManyBins(
            genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False,
            extractedTfbsFileNamer.getDiskPath())
        print '<p>', extractedTfbsFileNamer.getLink(
            'Inspect bed-file'
        ), 'of all TF binding sites occurring within your%s gene region%s of interest.</p>' % (
            expansionStr, genesPlural)
예제 #8
0
    def findTFsOccurringInRegions(cls, genome, tfSource, regionsBedFn,
                                  upFlankSize, downFlankSize, galaxyFn):
        uniqueWebPath = GalaxyRunSpecificFile([], galaxyFn).getDiskPath()
        #assert genome == 'hg18' #other genomes not supported. TF id links do not specify genome for pre-selection of analysis

        tfTrackNameMappings = TfInfo.getTfTrackNameMappings(genome)
        assert tfTrackNameMappings != {}, 'No TF info for genome: %s' % genome

        tfTrackName = tfTrackNameMappings[tfSource]

        if (upFlankSize == downFlankSize == 0):
            flankedRegionsFn = regionsBedFn
        else:
            flankedRegionsFn = uniqueWebPath + os.sep + 'flankedRegs.bed'
            GalaxyInterface.expandBedSegments(regionsBedFn, flankedRegionsFn,
                                              genome, upFlankSize,
                                              downFlankSize)

        regSpec, binSpec = 'bed', flankedRegionsFn
        res = cls._runCategoryPointCount(genome, regSpec, binSpec, tfTrackName)

        tfNames = res.getResDictKeys()
        #print 'RES: ', res.getGlobalResult()[tfNames[0]], type(res.getGlobalResult()[tfNames[0]])
        pwm2tfids = safeshelve.open(
            os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'pwm2TFids.shelf']),
            'r')
        tf2class = safeshelve.open(
            os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'TfId2Class.shelf']),
            'r')
        pwmName2id = safeshelve.open(
            os.sep.join([HB_SOURCE_CODE_BASE_DIR, 'data', 'pwmName2id.shelf']),
            'r')
        #print tfNames[0],tfNames[1], ' VS ', pwm2tfids.keys()[0], len(pwm2tfids)
        #tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits (class %s))'%(tf, res.getGlobalResult()[tf]), '/'.join([tf2class[x] for x in pwm2tfids[tf]]) ) for tf in tfNames]))) #num hits, tfName, tfTextInclHits
        tfs = list(reversed(sorted([(res.getGlobalResult()[tf], tf, '%s (%i hits )'%(tf, res.getGlobalResult()[tf]) + \
                                     (' (class: %s)'%'/'.join(set([str(tf2class.get(x)) for x in pwm2tfids[pwmName2id[tf]] if x in tf2class]))\
                                      if (tf in pwmName2id and pwmName2id[tf] in pwm2tfids and any([x in tf2class for x in pwm2tfids[pwmName2id[tf]]]))\
                                    else '') ) \
                                    for tf in tfNames])) ) #num hits, tfName, tfTextInclHits

        tfsPlural = 's' if len(tfs) != 1 else ''
        print '<p>There are %i TF%s targeting your regions of interest, using "%s" as source of TF occurrences.</p>' % (
            len(tfs), tfsPlural, tfSource)

        expansionStr = ' flanked' if not (
            upFlankSize == downFlankSize == 0) else ''

        idHtmlFileNamer = GalaxyRunSpecificFile(['allTfIds.html'], galaxyFn)
        idHtmlFileNamer.writeTextToFile('<br>'.join([
            '<a href=/hbdev/hyper?track1=%s&track2=>%s</a>' %
            (quote(':'.join(tfTrackName + [tf[1]])), tf[2]) for tf in tfs
        ]))
        print '<p>', idHtmlFileNamer.getLink(
            'Inspect html file'
        ), ' of all TF IDs occurring 1 or more times within your%s regions of interest, with each TF ID linking to analysis with this TF pre-selected.</p>' % (
            expansionStr)

        idFileNamer = GalaxyRunSpecificFile(['allTfIds.txt'], galaxyFn)
        idFileNamer.writeTextToFile(
            os.linesep.join([tf[2] for tf in tfs]) + os.linesep)
        print '<p>', idFileNamer.getLink(
            'Inspect text file'
        ), ' listing all TF IDs occurring 1 or more times within your%s regions of interest.</p>' % (
            expansionStr)

        extractedTfbsFileNamer = GalaxyRunSpecificFile(
            ['tfbsInGeneRegions.bed'], galaxyFn)
        GalaxyInterface.extractTrackManyBins(
            genome, tfTrackName, regSpec, binSpec, True, 'bed', False, False,
            extractedTfbsFileNamer.getDiskPath(), True)
        print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink(
            'Inspect bed-file'
        ), 'of all TF binding sites occurring within your%s regions of interest.</p>' % (
            expansionStr)

        for dummy, tf, dummy2 in tfs:
            extractedTfbsFileNamer = GalaxyRunSpecificFile(
                [tf + '_tfbsInGeneRegions.bed'], galaxyFn)
            GalaxyInterface.extractTrackManyBins(
                genome, tfTrackName + [tf], regSpec, binSpec, True, 'bed',
                False, False, extractedTfbsFileNamer.getDiskPath())
            print '<p>', extractedTfbsFileNamer.getLoadToHistoryLink(
                'Binding sites of the TF %s' % tf, 'bed'
            ), 'occurring within your%s regions of interest (bed-file).</p>' % (
                expansionStr)
    def execute(cls, choices, galaxyFn=None, username=''):
        import os
        gsuite = getGSuiteFromGalaxyTN(choices.gsuite)
        # match = int(choices.match)
        # mismatch = int(choices.mismatch)
        # delta = int(choices.delta)
        # pm = int(choices.pm)
        # pi = int(choices.pi)
        # minscore = int(choices.minscore)
        # maxperiod = int(choices.maxperiod)
        minConsensusLength = int(choices.minconsensus) if choices.minconsensus.isdigit() else None
        maxConsensusLength = int(choices.maxconsensus) if choices.maxconsensus.isdigit() else None
        minCopyNumber = int(choices.mincopynumber) if choices.mincopynumber.isdigit() else None
        parameters = [choices.match, choices.mismatch, choices.delta, choices.pm,
                      choices.pi, choices.minscore, choices.maxperiod]
        resultsDict = OrderedDict()
        for gsTrack in gsuite.allTracks():
            resFile = GalaxyRunSpecificFile(['trf', gsTrack.title, gsTrack.title + '.tmp'], galaxyFn)
            ensurePathExists(resFile.getDiskPath())
            trackDirName = os.path.dirname(os.path.realpath(resFile.getDiskPath()))
            # parameters = ["2", "5", "7", "80", "10", "50", "300"] #Madeleine suggestion
            instruction = [cls.TRF_PATH, gsTrack.path] + parameters + ["-d", "-h"]
            pipe = subprocess.Popen(instruction, cwd=trackDirName, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            results, errors = pipe.communicate()

            outFileName = ".".join([os.path.basename(gsTrack.path)] + parameters + ["dat"])
            outFilePath = os.path.join(trackDirName, outFileName)

            # print outFilePath

            resultList = cls.parseTRFResultFile(outFilePath, minConsensusLength, maxConsensusLength, minCopyNumber)
            if resultList:
                resultsDict[gsTrack.title] = resultList

        if choices.regionsGSuite:
            repeatRegionsBedTracksGSuite = GSuite()
            for trackName, trfResultList in resultsDict.iteritems():
                trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn,
                                                         extraFileName=("Repeat_regions_"+trackName),
                                                         suffix='bed')
                gsTrack = GSuiteTrack(trackUri, title=("Repeat regions " + trackName), genome=gsuite.genome)
                ensurePathExists(gsTrack.path)
                with open(gsTrack.path, 'w') as bedFile:
                    header = 'track name="' + trackName + '" description="' + trackName + '" priority=1'
                    bedFile.write(header + os.linesep)
                    for trfResult in trfResultList:
                        for repeatRegion in trfResult._repeatRegionList:
                            # if not repeatRegion.strand:
                            #     DebugUtil.insertBreakPoint()
                            endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition
                            bedFile.write('\t'.join([repeatRegion.chromosome,
                                                     str(repeatRegion.startPosition),
                                                     str(endPosition),
                                                     repeatRegion.bedName,
                                                     '0',
                                                     str(repeatRegion.strand)]) + os.linesep)
                repeatRegionsBedTracksGSuite.addTrack(gsTrack)

            GSuiteComposer.composeToFile(repeatRegionsBedTracksGSuite, cls.extraGalaxyFn['Repeat regions (bed) GSuite'])

        if choices.monomersGSuite:
            monomersBedTracksGSuite = GSuite()
            for trackName, trfResultList in resultsDict.iteritems():
                trackUri = GalaxyGSuiteTrack.generateURI(galaxyFn=galaxyFn,
                                                         extraFileName=("Repeat_monomers_"+trackName),
                                                         suffix='bed')
                gsTrack = GSuiteTrack(trackUri, title=("Repeat monomers " + trackName), genome=gsuite.genome)
                ensurePathExists(gsTrack.path)
                with open(gsTrack.path, 'w') as bedFile:
                    header = 'track name="' + trackName + '" description="' + trackName + '" priority=1'
                    bedFile.write(header + os.linesep)
                    for trfResult in trfResultList:
                        for repeatRegion in trfResult._repeatRegionList:
                            # if not repeatRegion.strand:
                            #     DebugUtil.insertBreakPoint()
                            for repeatMonomer in repeatRegion._monomers:
                            # endPosition = repeatRegion.endPositionFullCopies if choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else repeatRegion.endPosition
                                bedFile.write('\t'.join([repeatRegion.chromosome,
                                                         str(repeatMonomer.startPosition),
                                                         str(repeatMonomer.endPosition),
                                                         repeatMonomer.bedName,
                                                         '0',
                                                         str(repeatRegion.strand)]) + os.linesep)

                monomersBedTracksGSuite.addTrack(gsTrack)

            GSuiteComposer.composeToFile(monomersBedTracksGSuite, cls.extraGalaxyFn['Repeat monomers (bed) GSuite'])


        ###################
        analysisParamsTableColumnTitles = ['Parameter', 'Selected value']
        analysisParamsDict = OrderedDict()
        analysisParamsDict['Tandem Repeat Finder tool version'] = cls.TRF_VERSION
        analysisParamsDict.update(
            OrderedDict([
            ('Match', choices.match),
            ('Mismatch', choices.mismatch),
            ('Delta', choices.delta),
            ('Matching probability (Pm)', choices.pm),
            ('Indel probability (Pi)', choices.pi),
            ('Min score', choices.minscore),
            ('Max period', choices.maxperiod),
            ('Min consensus length', choices.minconsensus),
            ('Max consensus length', choices.maxconsensus),
            ('Min copy number', choices.mincopynumber)]
            )
        )
        ###################

        ###################
        countTableColumnTitles = ['Name', 'Nr of repeat regions', 'Avg copy number', 'Min copy number',
                                  'Max copy number', 'Avg consensus length', 'Min consensus length',
                                  'Max consensus length']
        countTableDict = OrderedDict()
        from numpy import mean
        for trackName, trfResultList in resultsDict.iteritems():
            countTableDict[trackName] = []
            repeatRegionsNr = sum([x.repeatRegionsCount for x in trfResultList])
            countTableDict[trackName].append(repeatRegionsNr)
            copyNumberList =[]
            for trfRes in trfResultList:
                copyNumberList += trfRes.copyNumberList if \
                    choices.rrCutoff == cls.CUTOFF_REPEAT_REGION else trfRes.realCopyNumberList
            countTableDict[trackName].append(mean(copyNumberList))
            countTableDict[trackName].append(min(copyNumberList))
            countTableDict[trackName].append(max(copyNumberList))

            consensusLengthList = []
            for trfRes in trfResultList:
                consensusLengthList += trfRes.consensusLengths
            countTableDict[trackName].append(mean(consensusLengthList))
            countTableDict[trackName].append(min(consensusLengthList))
            countTableDict[trackName].append(max(consensusLengthList))

        ###################

        core = HtmlCore()
        core.begin()
        core.divBegin()
        # core.paragraph('''This tool reports repeat regions discovered by the TRF tool
        #                 ''')
        core.tableFromDictionary(analysisParamsDict, columnNames=analysisParamsTableColumnTitles, sortable=False)
        core.divEnd()
        core.divBegin()
        core.tableFromDictionary(countTableDict, columnNames=countTableColumnTitles,
                                 tableId='repeatCounts', sortable=True, presorted=0)
        core.divEnd()
        # core.divBegin()
        # for k, v in resultsDict.iteritems():
        #     core.line('track: ' + k)
        #
        #     for val in v:
        #         core.line(str(val))
        # core.divEnd()
        core.end()

        print core
예제 #10
0
class ExactlySpecifiedTF(object):
    def __init__(self, tf, chipSeqPeaks, pwm, tracks, galaxyFn):
        self.tf = tf
        self.chipSeqPeaks = chipSeqPeaks
        self.pwm = pwm

        assert len(tracks) == 2
        self.track = tracks[0]
        self.mutationTrack = tracks[1]
        self.galaxyFn = galaxyFn

        self.bedPwmDiffScore = GalaxyRunSpecificFile(
            ['pwmDiffScore', self.pwm + '_'.join(self.track), 'pwmDiff.bed'],
            self.galaxyFn)
        self.pwmDiffScore = GalaxyRunSpecificFile(
            ['pwmDiffScore', self.pwm + '_'.join(self.track), 'pwmDiff.html'],
            self.galaxyFn)
        self.gtrackDiffScore = GalaxyRunSpecificFile([
            'pwmDiffScore', self.pwm + '_'.join(self.track), 'pwmDiff.gtrack'
        ], self.galaxyFn)
        self.mutatedFasta = GalaxyRunSpecificFile(
            ['fastaFiles', '_'.join(self.track), 'mutatedFastseq.fasta'],
            self.galaxyFn)
        self.regularFasta = GalaxyRunSpecificFile(
            ['fastaFiles', '_'.join(self.track), 'regularFastseq.fasta'],
            self.galaxyFn)

        self.maxPwmDiff = None
        self.avgPwmDiff = None
        self.numPwmDiff = 0

    def getFastaFiles(self, genome):
        assert self.track
        assert self.mutationTrack

        regionDict, pointDict = self.IntersectData(
            genome, [self.track, self.mutationTrack])
        self.intersectingPoints = str(
            sum([len(v) for v in regionDict.values()]))

        mutatedfastaDict = self.getMutatedSequence(genome, regionDict,
                                                   pointDict)
        regularFastaDict = self.getMutatedSequence(genome, regionDict)

        self.mutatedFasta.writeTextToFile('\n'.join([
            '\n'.join(mutatedfastaDict[chrom])
            for chrom in sorted(mutatedfastaDict.keys())
        ]))
        self.regularFasta.writeTextToFile('\n'.join([
            '\n'.join(regularFastaDict[chrom])
            for chrom in sorted(regularFastaDict.keys())
        ]))

    @classmethod
    def getMutatedSequence(cls, genome, regionDict, pointDict=None):
        resultDict = defaultdict(list)
        regionList = []
        fastaTrack = PlainTrack(['Sequence', 'DNA'])
        for chrom in regionDict.keys():
            for start, end in regionDict[chrom]:

                seqTv = fastaTrack.getTrackView(
                    GenomeRegion(genome, chrom, start, end))
                valList = list(seqTv.valsAsNumpyArray())
                if pointDict:
                    mutatedPoints = [
                        v[1:] for v in pointDict[chrom] if v[0] == start
                    ]
                    for index, val in mutatedPoints:
                        val = val[-1] if val.find('>') >= 0 else val
                        valList[index] = val
                resultDict[chrom].append(
                    '>%s %i-%i\n%s' %
                    (chrom, start + 1, end, ''.join(valList)))

        return resultDict

    @classmethod
    def IntersectData(cls, genome, tracks):
        from quick.util.CommonFunctions import getGeSource
        start = time()
        geSources = []
        for track in tracks:
            geSources.append(getGeSource(track, genome))
            #try:
            #    fileType = ExternalTrackManager.extractFileSuffixFromGalaxyTN(track)
            #    fn = ExternalTrackManager.extractFnFromGalaxyTN(track)
            #    if fileType == 'category.bed':
            #        geSources.append(BedCategoryGenomeElementSource(fn))
            #    elif fileType == 'gtrack':
            #        geSources.append(GtrackGenomeElementSource(fn))
            #    else:
            #        geSources.append(BedGenomeElementSource(fn))
            #
            #except:
            #    geSources.append(FullTrackGenomeElementSource(genome, track, allowOverlaps=False))

        resultDict, pointDict = defaultdict(list), defaultdict(list)
        gs1, gs2 = geSources
        track1Dict, track2Dict = defaultdict(list), defaultdict(list)

        for ge in gs1:
            track1Dict[ge.chr].append((ge.start, ge.end))

        for ge in gs2:
            track2Dict[ge.chr].append((ge.start, ge.end, ge.val))

        for chrom in track1Dict.keys():
            counter = 0
            track2List = sorted(track2Dict[chrom])
            for start1, end1 in sorted(track1Dict[chrom]):
                while len(track2List) > counter:
                    start2, end2, val = track2List[counter]
                    if start1 < end2 <= end1 or start1 <= start2 < end1:
                        resultDict[chrom].append([start1, end1])
                        pointDict[chrom].append(
                            [start1, start2 - start1,
                             str(val)])
                    elif start2 < start1 and end2 > end1:
                        resultDict[chrom].append([start1, end1])
                        pointDict[chrom].append(
                            [start1, start2 - start1,
                             str(val)])
                    elif start2 >= end1:
                        break
                    counter += 1
        return resultDict, pointDict

    def getPwmScores(self, motifId, moticScanObj):
        pwmRegDict = moticScanObj.scanMotifInTwoSequences(
            motifId, self.regularFasta.getDiskPath(),
            self.mutatedFasta.getDiskPath())
        #pwmMutDict = moticScanObj.scanMotifInSequence(motifId, self.mutatedFasta.getDiskPath())
        #pwmRegDict = moticScanObj.scanMotifInSequence(motifId, self.regularFasta.getDiskPath())
        diffResDict = defaultdict(list)
        lineTab = []
        for region in sorted(pwmRegDict):
            chrom, start = region.split()
            end = region.replace('-', ' ').split()[-1]
            start = int(start.split('-')[0])
            regular, mutated = pwmRegDict[region]
            difference = abs(regular[0] - mutated[0])
            reg, regMut, mut, mutReg = regular[:2] + mutated[:2]
            regSeq, regMutSeq, regPos = regular[2:]
            mutSeq, mutRegSeq, mutPos = mutated[2:]
            #print 'regSeq, regMutSeq, regPos: ', regSeq, regMutSeq, regPos, type(regSeq), type(regMutSeq), type(regPos)
            string = '%s\t%f\t[%f -> %f]\t[%f -> %f]\t' % (region.replace(
                '-', ' ').replace(' ',
                                  '\t'), difference, reg, regMut, mut, mutReg)
            string += '%s:%i-%i\t%s\t%s\t' % (chrom, start + regPos[0], start +
                                              regPos[1], regSeq, regMutSeq)
            string += '%s:%i-%i\t%s\t%s' % (chrom, start + mutPos[0], start +
                                            mutPos[1], mutSeq, mutRegSeq)
            diffResDict[difference].append(string)
            lineTab.append([
                chrom,
                str(start),
                str(end),
                str(difference),
                '[%f -> %f]' % (reg, regMut),
                '[%f -> %f]' % (mut, mutReg),
                '%s:%i-%i' % (chrom, start + regPos[0], start + regPos[1]),
                regSeq, regMutSeq,
                '%s:%i-%i' % (chrom, start + mutPos[0], start + mutPos[1]),
                mutSeq, mutRegSeq
            ])
        #(scores[bestIndx], mScores[bestIndx], matches[bestIndx], mMatches[bestIndx], endpoints[bestIndx]), (mScores[mBestIndx], scores[mBestIndx], mMatches[mBestIndx], matches[mBestIndx], mEndpoints[mBestIndx])]

        diffList = diffResDict.keys()
        if len(diffList) > 0:
            self.maxPwmDiff = str(max(diffList))
            self.avgPwmDiff = str(sum(diffList) / len(diffList))
            self.numPwmDiff = len(diffList)
            line = '# GTrack file\n#The columns in this dataset are:\n#\t(ChIP-seq_peak)chr\n#\tstart\n#\tend\n#\tmax(difference in column 5, difference in column 6)\n#\t[best_reference_sequence_PWM_hit_score -> corresponding_mutated_sequence_score]\n#\t[best_mutated_sequence_PWM_hit_score -> corresponding_reference_sequence_score]\n#\tchr:start-end(best_reference_sequence_PWM_hit_motif)\n#\tbest_reference_sequence_PWM_hit_motif\n#\tcorresponding_mutated_sequence_motif\n#\tchr:start-stop(best_mutated_sequence_PWM_hit_motif)\n#\tbest_mutated_sequence_PWM_hit_motif\n#\tcorresponding_reference_sequence_motif)\n##track type: valued segments\n##value column: val\n###seqid\tstart\tend\tval\treference_sequence_PWM\tmutated_sequence_PWM_hit_score\tbest_reference_sequence_PWM_hit_motif\tcorresponding_mutated_sequence_motif\tchr:start-stop(best_mutated_sequence_PWM_hit_motif)\tbest_mutated_sequence_PWM_hit_motif\tcorresponding_reference_sequence_motif\n'
            self.gtrackDiffScore.writeTextToFile(line)
            self.pwmDiffScore.writeTextToFile(self.getHtmlPwmTable(lineTab))
            self.bedPwmDiffScore.writeTextToFile('\n'.join(
                ['\t'.join(v[:4]) for v in lineTab]))
            for k in sorted(diffResDict.keys(), reverse=True):
                line = '\n'.join(diffResDict[k])
                #self.pwmDiffScore.writeTextToFile(line)
                self.gtrackDiffScore.writeTextToFile(line, mode='a')

    def getHtmlPwmTable(self, lineTab):
        headerTab = [
            'chrom', 'start', 'end', 'max PWM difference',
            'best reference seq_PWM score -> corresponding mut seq score',
            'best mut seq PWM score -> corresponding_ref seq score',
            'ref region', 'ref seq', 'corresponding mut seq', 'mut region',
            'mut seq', 'corresponding ref seq'
        ]
        core = HtmlCore()
        core.begin()
        core.tableHeader(headerTab, sortable=True)
        for row in lineTab:
            if True:  #hasattr(tfObj,'maxPwmDiff'):
                core.tableLine(row)
        core.tableFooter()
        core.end()
        return str(core)

    def makeHtmlStr(self):
        htmlPage = GalaxyRunSpecificFile(
            ['html', '_'.join(self.track), 'page.html'], self.galaxyFn)
        htmlStr = 'TF: ' + self.tf + '<br/>\nChip-seq peaks: ' + self.chipSeqPeaks + '<br/>\nPWM: ' + self.pwm + '<br/>\nNumber of SNV-intersected binding regions: ' + self.intersectingPoints + '<br/>\nHighest binding difference: ' + self.maxPwmDiff + '<br/>\nAvg binding difference: ' + self.avgPwmDiff + '<br/>\n' + self.regularFasta.getLink(
            'Original Fasta') + '<br/>\n' + self.mutatedFasta.getLink(
                'Mutated Fasta') + '<br/>\n' + self.pwmDiffScore.getLink(
                    'PWM score for each region'
                ) + '<br/>\n' + self.gtrackDiffScore.getLink(
                    'Gtrack of PWM score for each region')
        htmlPage.writeTextToFile(htmlStr)
        return htmlPage.getLink(self.tf + ':   ' + self.track[-1])
예제 #11
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''

        genome = choices[0]
        regSpec = '__chrs__'
        binSpec = '*'
        if choices[6] == 'Chromosome arms':
            regSpec = '__chrArms__'
        elif choices[6] == 'Track from history...':
            #put in history bins support here
            #print choices[4:]
            regSpec = ExternalTrackManager.extractFileSuffixFromGalaxyTN(choices[7].split(':'))
            binSpec = ExternalTrackManager.extractFnFromGalaxyTN(choices[7].split(':'))
            #print 'regSpec, binSpec,', regSpec, binSpec
            lineList, counter, tooManyBins = [], 0, False
            for line in open(binSpec):
                if line.strip() !='':
                    if counter == cls.MAX_NUM_ROWS:
                        tooManyBins = True
                        break
                    lineList.append(line)
                    counter+= 1 if line.strip()[0] !='#' else 0

            if tooManyBins:
                newHist = GalaxyRunSpecificFile(['newHistFile.%s' % regSpec], galaxyFn)
                binSpec = newHist.getDiskPath(ensurePath=True)
                open(binSpec, 'w').write(''.join(lineList))

        print GalaxyInterface.getHtmlBeginForRuns(galaxyFn)
        print GalaxyInterface.getHtmlForToggles(withRunDescription=False)

        core = HtmlCore()
        core.styleInfoBegin(styleClass='debug')

        figImage = GalaxyRunSpecificFile(['VizTrackOnGenome.png'], galaxyFn)
        #StaticImage(['VizTrackOnGenome.png'])
        analysisDef = ' [normalizeRows=%s] [centerRows=%s]  -> RawVisualizationDataStat' % \
            (choices[4] == 'Scale to same size', choices[5] == 'Center')

        if choices[1] == 'HyperBrowser repository':
            trackName = choices[2].split(':')
        else:
            trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(genome, choices[3].split(':'))

        res = GalaxyInterface.runManual([trackName], analysisDef, regSpec, binSpec, genome, username=username, printResults=False, printHtmlWarningMsgs=False)

        core.styleInfoEnd()
        core.line('')

        core.tableHeader(None)
        #visPresenter = RawVisualizationPresenter(res, galaxyFn,'')#os.path.split()[0]
        #htmlStreng = visPresenter.getReference('Result', fullImage=True)
        rScript = cls.customRExecution(res, figImage.getDiskPath(ensurePath=True), '')

        figUrl = figImage.getURL()
        figLinkText ='<img src="%s" alt="Figure" height="%i" width="800"/>' % (figUrl, 20 *min(cls.MAX_NUM_ROWS, len(res)))
        core.tableLine([figImage.getLink(figLinkText)])

        rScriptGalaxyFile = GalaxyRunSpecificFile(['RScript.R'], galaxyFn)
        with open(rScriptGalaxyFile.getDiskPath(ensurePath=True), 'w') as rScriptFile:
            rScriptFile.write(rScript)

        core.tableLine([rScriptGalaxyFile.getLink('R script')])

        core.tableFooter()

        print core
        print GalaxyInterface.getHtmlEndForRuns()