def _compute(self):
        kwArgs = copy(self._kwArgs)
        if 'rawStatistic' in kwArgs:
            del kwArgs['rawStatistic']

        matrixElRes = []
        tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(),
                                                   self._track.trackName, True)
        tr2Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(),
                                                   self._track2.trackName,
                                                   True)
        assert len(tr1Subtypes) > 0, str(self._track.trackName)
        assert len(tr2Subtypes) > 0, str(self._track2.trackName)
        if 'minimal' in self._kwArgs:
            tr1Subtypes = tr1Subtypes[:1]
            tr2Subtypes = tr2Subtypes[:1]
        for subtype1 in tr1Subtypes:  #['0','1']:
            #for subtype2 in ['0','1']:
            for subtype2 in tr2Subtypes:
                #                print ','
                tn1 = self._track.trackName + [subtype1]
                tn2 = self._track2.trackName + [subtype2]
                if not os.path.exists(createDirPath(
                        tn1, self.getGenome())) or not os.path.exists(
                            createDirPath(tn2, self.getGenome())):
                    raise IncompatibleTracksError

                #print ','
                track1 = Track(tn1)
                track1.formatConverters = self._track.formatConverters
                track2 = Track(tn2)
                track2.formatConverters = self._track2.formatConverters
                #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
                matrixElRes.append(
                    self._rawStatistic(self._region, track1, track2,
                                       **kwArgs).getResult())
            ResultsMemoizer.flushStoredResults()

        #assert len(self._children) == 7
        #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]]))

        allChildRes = array(matrixElRes)
        #allChildRes = array([x.getResult() for x in self._children[3:]])
        allChildRes = allChildRes.reshape((len(tr1Subtypes), len(tr2Subtypes)))
        return {
            'Result':
            OrderedDict([('Matrix', allChildRes.tolist()),
                         ('Rows', tr1Subtypes), ('Cols', tr2Subtypes)])
        }
 def _compute(self):
     kwArgs = copy(self._kwArgs)
     if 'rawStatistic' in kwArgs:
         del kwArgs['rawStatistic']
         
     matrixElRes = []
     tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True)
     assert len(tr1Subtypes) > 0
     for subtype1 in tr1Subtypes:#['0','1']:
         for subtype2 in ['0','1']:
             tn1 = self._track.trackName + [subtype1]
             tn2 = self._track2.trackName + [subtype2]
             if not os.path.exists(createDirPath(tn1,self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())):
                 raise IncompatibleTracksError
             
             #print ','
             track1 = Track( tn1)
             track1.formatConverters = self._track.formatConverters
             track2 = Track( tn2)
             track2.formatConverters = self._track2.formatConverters
             #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
             matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult() )
             ResultsMemoizer.flushStoredResults()
     
     #assert len(self._children) == 7
     #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]]))
     
     allChildRes = array(matrixElRes)
     #allChildRes = array([x.getResult() for x in self._children[3:]])
     allChildRes = allChildRes.reshape((-1,2))
     return OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', ['Case','Control'])])
Exemple #3
0
 def getOptionsBox15(prevChoices):
     if prevChoices[-2] == 'Select single reference track':
         assert prevChoices[4] == 'DHS'
         genome = prevChoices[0]
         tn = 'Private:GK:Psych:DHSs'.split(':')
         return ProcTrackOptions.getSubtypes(genome, tn, True)
     elif prevChoices[-2] == 'Select a range among all reference tracks':
         return ''
Exemple #4
0
 def getOptionsBoxTfTracks(cls, prevChoices):
     if prevChoices.sourceTfsDetails != cls.SELECT:
         genome = prevChoices.genome
         sourceTfs = prevChoices.sourceTfs
         sourceTfsDetails = prevChoices.sourceTfsDetails
         if sourceTfs == cls.SELECT:
             return
         elif sourceTfs == 'Hyperbrowser repository':
             tfSourceTN = TfTrackNameMappings.getTfTrackNameMappings(
                 prevChoices.genome)[sourceTfsDetails]
             subtypes = ProcTrackOptions.getSubtypes(
                 prevChoices.genome, tfSourceTN, True)
             falses = ['False'] * len(subtypes)
             return OrderedDict(zip(subtypes, falses))
         elif sourceTfs == cls.REGIONS_FROM_HISTORY:
             if isinstance(sourceTfsDetails, basestring):
                 galaxyTN = sourceTfsDetails.split(':')
                 if galaxyTN[
                         1] == "gsuite":  #ExternalTrackManager.extractFileSuffixFromGalaxyTN(prevChoices.sourceTfsDetails, allowUnsupportedSuffixes=True) == "gsuite"
                     errorString = GeneralGuiTool._checkGSuiteFile(
                         sourceTfsDetails)
                     if not errorString:
                         gSuite = getGSuiteFromGalaxyTN(sourceTfsDetails)
                         sizeErrorString = GeneralGuiTool._checkGSuiteTrackListSize(
                             gSuite, 1, 1000)
                         if not sizeErrorString:
                             reqErrorString = GeneralGuiTool._checkGSuiteRequirements \
                                 (gSuite,
                                  AllTfsOfRegions.GSUITE_ALLOWED_FILE_FORMATS,
                                  AllTfsOfRegions.GSUITE_ALLOWED_LOCATIONS,
                                  AllTfsOfRegions.GSUITE_ALLOWED_TRACK_TYPES,
                                  AllTfsOfRegions.GSUITE_DISALLOWED_GENOMES)
                             if not reqErrorString:
                                 validity = 'Valid'
                             else:
                                 return
                         else:
                             return
                     else:
                         return
                     if validity == 'Valid':
                         selectedTrackNames = []
                         gSuite = getGSuiteFromGalaxyTN(sourceTfsDetails)
                         for track in gSuite.allTracks():
                             selectedTrackNames.append(':'.join(
                                 track.trackName))
                         falses = ['False'] * len(selectedTrackNames)
                         return OrderedDict(zip(selectedTrackNames, falses))
                 else:
                     tfTrackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
                         genome, galaxyTN)
                     return [':'.join(tfTrackName)]
             else:
                 return
         else:
             return
     else:
         return
 def getOptionsBoxDataset(prevChoices):
     #if prevChoices.genome == 'hg19':
     if prevChoices.genome in ['hg19', 'mm9']:
         parentTrack = ColocalizationIn3DTool.PARENT_TRACKNAME + [
             prevChoices.interactions
         ] + [prevChoices.cellLine]
         return ProcTrackOptions.getSubtypes(prevChoices.genome,
                                             parentTrack,
                                             fullAccess=False)
Exemple #6
0
 def _getAllDiseases(prevChoices):
     if isinstance(prevChoices[2], dict):
         return prevChoices[2].keys()
     else:
         tn = SelectDiseaseTool._getDiseaseTn(prevChoices)
         return [
             x for x in ProcTrackOptions.getSubtypes(
                 SelectDiseaseTool.GENOME, tn)
         ]
 def _getAllPwms(prevChoices):
     if isinstance(prevChoices[2], dict):
         return prevChoices[2].keys()
     else:
         tfTrackName = SelectTfTool.TRACK_DICT[prevChoices[0]]
         return [
             pwm for pwm in ProcTrackOptions.getSubtypes(
                 SelectTfTool.GENOME, tfTrackName)
         ]
 def getOptionsBox3(prevChoices):
     seedSource = prevChoices[1]
     if seedSource == 'TFBS from history':
         return ('__history__', 'bed')
     else:
         genome = prevChoices[0]
         tfSourceTN = TfInfo.getTfTrackNameMappings(genome)[prevChoices[1]]
         subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True)
         return subtypes
 def getOptionsBox3(prevChoices):
     seedSource = prevChoices[1]
     if seedSource == 'TFBS from history':
         return ('__history__','bed')
     else:
         genome = prevChoices[0]
         tfSourceTN = TfInfo.getTfTrackNameMappings(genome)[ prevChoices[1] ]
         subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True)
         return subtypes
Exemple #10
0
    def superLine2batch(cls, line, genome):
        if line.strip() == '' or line[0] == '#':
            return []

        cols = line.split(BATCH_COL_SEPARATOR)

        if len(cols) == 4:
            cols = cols[:3] + ['dummy', cols[3]]
            line = BATCH_COL_SEPARATOR.join(cols)

        from quick.batch.BatchRunner import BatchRunner
        #errorResult, userBinSource  = BatchRunner._constructBins(cols[0], cols[1], genome)
        #if errorResult is not None:
        #    return [BATCH_COL_SEPARATOR.join([ 'dummy', cols[0], cols[1], 'dummyTN1', 'dummyTN2', 'dummyStatName' ])]

        for colIndex, col in zip(range(
                2, 5), cols[2:5]):  #not binSpec, but tn1, tn2, statistic..
            if '/' in col:
                splitPoint = col.find('/')
                splittedCols = col[0:splitPoint], col[splitPoint + 1:]
                splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\
                    for splitCol in splittedCols]
                #1+''
                return reduce(lambda x, y: x + y, [
                    cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome)
                    for line in splittedListLines
                ])
        for colIndex, col in zip(range(2, 4), cols[2:4]):
            if '*' in col:
                typeParts = col.split(':')
                assert typeParts[-1] == '*' and not any(
                    '*' in part for part in typeParts[:-1])
                #whitespacedTrackName = [x.replace('_',' ') for x in typeParts[:-1]]

                unquotedTrackName = [unquote(x) for x in typeParts[:-1]]
                #print 'TEMP: ',genome, unquotedTrackName
                starOptions = ProcTrackOptions.getSubtypes(
                    genome, unquotedTrackName, True)
                if len(starOptions) == 0:
                    raise Exception('No subtracks for parent track: %s' %
                                    str(unquotedTrackName))

                #typeOptions = [':'.join(typeParts[:-1] + [starOpt.replace(' ','_')]) for starOpt in starOptions]
                typeOptions = [
                    ':'.join(typeParts[:-1] + [quote(starOpt)])
                    for starOpt in starOptions
                ]

                splittedCols = typeOptions
                splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\
                    for splitCol in splittedCols]
                return reduce(lambda x, y: x + y, [
                    cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome)
                    for line in splittedListLines
                ])

        return [cls.generateBatchName(line) + BATCH_COL_SEPARATOR + line]
 def getOptionsBox6(cls, prevChoices):
     if prevChoices[4] == cls.REGIONS_FROM_HISTORY:
         return ('__history__','bed','bedgraph')
     else:
         tfSourceTN = TfInfo.getTfTrackNameMappings(prevChoices[1])[ prevChoices[4] ]
         genome = prevChoices[1]
         subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True)
         #logMessage(str(subtypes))
         #return ['V$AHR_01']
         return subtypes
 def getOptionsBox6(cls, prevChoices):
     if prevChoices[4] == cls.REGIONS_FROM_HISTORY:
         return ('__history__', 'bed', 'bedgraph')
     else:
         tfSourceTN = TfInfo.getTfTrackNameMappings(
             prevChoices[1])[prevChoices[4]]
         genome = prevChoices[1]
         subtypes = ProcTrackOptions.getSubtypes(genome, tfSourceTN, True)
         #logMessage(str(subtypes))
         #return ['V$AHR_01']
         return subtypes
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
Exemple #14
0
    def getOptionsBoxTrack2(cls, prevChoices):
        if prevChoices[4] == 'yes':

            genome = prevChoices[1]
            prefixTN = cls.STD_PREFIX_TN
            trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True)
            if cls._cacheDict.get('track2') in trackList:
                trackList.remove(cls._cacheDict.get('track2'))
                trackList.insert(0, cls._cacheDict.get('track2'))
            return [cls.NO_TRACK_SHORTNAME] + trackList
        else:
            return '__track__', 'history'
 def getOptionsBoxTrack2(cls, prevChoices): 
     if prevChoices[4]=='yes' :
         
         genome = prevChoices[1]
         prefixTN = cls.STD_PREFIX_TN
         trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True)
         if cls._cacheDict.get('track2') in trackList:
             trackList.remove(cls._cacheDict.get('track2'))
             trackList.insert(0, cls._cacheDict.get('track2'))
         return [cls.NO_TRACK_SHORTNAME] + trackList
     else:
         return '__track__','history'
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(
             self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome,
                                   trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
 def getOptionsBoxTfTracks(cls, prevChoices):
     if prevChoices.sourceTfs:
         if prevChoices.sourceTfs == cls.REGIONS_FROM_HISTORY:
             return ('__history__', 'bed', 'category.bed', 'gtrack')
         elif prevChoices.sourceTfs == cls.SELECT:
             return
         else:
             tfSourceTN = TfTrackNameMappings.getTfTrackNameMappings(
                 prevChoices.genome)[prevChoices.sourceTfs]
             subtypes = ProcTrackOptions.getSubtypes(
                 prevChoices.genome, tfSourceTN, True)
             return subtypes
     else:
         return
 def getOptionsBox6(cls, prevChoices): 
     #if prevChoices[0]:
     #    cls.updateCacheDict(prevChoices[0])
     
     if prevChoices[4] =='yes' :    
         genome = prevChoices[3]
         prefixTN = cls.STD_PREFIX_TN
         trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True)
         #if cls._cacheDict.get('track1') in trackList:
         #    trackList.remove(cls._cacheDict.get('track1'))
         #    trackList.insert(0, cls._cacheDict.get('track1'))
         return trackList
     else:
         return '__track__','history'
    def getOptionsBox6(cls, prevChoices):
        #if prevChoices[0]:
        #    cls.updateCacheDict(prevChoices[0])

        if prevChoices[4] == 'yes':
            genome = prevChoices[3]
            prefixTN = cls.STD_PREFIX_TN
            trackList = ProcTrackOptions.getSubtypes(genome, prefixTN, True)
            #if cls._cacheDict.get('track1') in trackList:
            #    trackList.remove(cls._cacheDict.get('track1'))
            #    trackList.insert(0, cls._cacheDict.get('track1'))
            return trackList
        else:
            return '__track__', 'history'
    def getOptionsBoxInteractions(
            prevChoices):  # Alternatively: getOptionsBoxKey2()
        '''
        See getOptionsBox1().

        prevChoices is a namedtuple of selections made by the user in the
        previous input boxes (that is, a namedtuple containing only one element
        in this case). The elements can accessed either by index, e.g.
        prevChoices[0] for the result of input box 1, or by key, e.g.
        prevChoices.key (case 2).
        '''
        #if prevChoices.genome == 'hg19':
        if prevChoices.genome in ['hg19', 'mm9']:
            parentTrack = ColocalizationIn3DTool.PARENT_TRACKNAME
            return ProcTrackOptions.getSubtypes(prevChoices.genome,
                                                parentTrack,
                                                fullAccess=False)
    def superLine2batch(cls, line, genome):
        if line.strip() == '' or line[0]=='#':
            return []
        
        cols = line.split(BATCH_COL_SEPARATOR)

        if len(cols) == 4:
            cols = cols[:3] + ['dummy', cols[3]]
            line = BATCH_COL_SEPARATOR.join(cols)
        
        from quick.batch.BatchRunner import BatchRunner
        #errorResult, userBinSource  = BatchRunner._constructBins(cols[0], cols[1], genome)
        #if errorResult is not None:
        #    return [BATCH_COL_SEPARATOR.join([ 'dummy', cols[0], cols[1], 'dummyTN1', 'dummyTN2', 'dummyStatName' ])]
        
        for colIndex, col in zip(range(2,5), cols[2:5]):
            if '/' in col:
                splitPoint = col.find('/')
                splittedCols = col[0:splitPoint], col[splitPoint+1:]
                splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\
                    for splitCol in splittedCols]
                #1+''
                return reduce(lambda x,y:x+y, [cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines])
        for colIndex, col in zip(range(2,4), cols[2:4]):
            if '*' in col:
                typeParts = col.split(':')
                assert typeParts[-1]=='*' and not any('*' in part for part in typeParts[:-1])
                #whitespacedTrackName = [x.replace('_',' ') for x in typeParts[:-1]]
                
                unquotedTrackName = [unquote(x) for x in typeParts[:-1]]
                print 'TEMP: ',genome, unquotedTrackName
                starOptions = ProcTrackOptions.getSubtypes(genome, unquotedTrackName, True)
                if len(starOptions) == 0:
                    raise Exception('No subtracks for parent track: %s' % str(unquotedTrackName))
                                                   
                #typeOptions = [':'.join(typeParts[:-1] + [starOpt.replace(' ','_')]) for starOpt in starOptions]
                typeOptions = [':'.join(typeParts[:-1] + [starOpt]) for starOpt in starOptions]
                                
                splittedCols = typeOptions
                splittedListLines = [ cols[0:colIndex] +[splitCol]+ cols[colIndex+1:]\
                    for splitCol in splittedCols]
                return reduce(lambda x,y:x+y, [cls.superLine2batch(BATCH_COL_SEPARATOR.join(line), genome) for line in splittedListLines])
            
        
        return [cls.generateBatchName(line)+BATCH_COL_SEPARATOR+line]
    def yielder(self, curTn):
        if self._avoidLiterature and curTn == GenomeInfo.getPropertyTrackName(self._genome, 'literature'):
            return
        
        for subtype in ProcTrackOptions.getSubtypes(self._genome, curTn, self._fullAccess):
            #if self._avoidLiterature and subtype == 'Literature':
            
            if subtype[0] in ['.','_']:
                continue

            newTn = curTn + [subtype]

            doBreak = False
            for subTn in self.yielder(newTn):
                yield subTn

        if ProcTrackOptions.isValidTrack(self._genome, curTn, self._fullAccess):
            yield curTn
    def yielder(self, curTn, level=0):
        if self._avoidLiterature and curTn == GenomeInfo.getPropertyTrackName(
                self._genome, 'literature'):
            return

        for subtype in ProcTrackOptions.getSubtypes(self._genome, curTn,
                                                    self._fullAccess):
            #if self._avoidLiterature and subtype == 'Literature':

            if subtype[0] in ['.', '_']:
                continue

            newTn = curTn + [subtype]

            doBreak = False
            for subTn in self.yielder(newTn, level=level + 1):
                yield subTn

        if self._includeParentTrack or level > 0:
            if ProcTrackOptions.isValidTrack(self._genome, curTn,
                                             self._fullAccess):
                yield curTn
Exemple #24
0
     print 'FAILED: Error when preprocessing TestGenome. Error:'
     print '        ' + str(e).strip()
     sys.exit(1)
 
 for allowOverlaps in [False, True]:
     fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps)
     toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps)
     try:
         if not os.path.exists(toDir):
             shutil.copytree(fromDir, toDir)
             print 'OK: Copied from %s to %s.' % (fromDir, toDir)
     except Exception, e:
         print 'FAILED: Error occurred copying from %s to %s: ' % (fromDir, toDir) + str(e).strip()
         sys.exit(1)
         
 for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']):
     ti = TrackInfo('TestGenome', ['GESourceTracks', track])
     ti.trackName = [track]
     ti.genome = 'ModelsForExternalTracks'
     ti.store()
         
 from quick.util.GenomeInfo import GenomeInfo
 from datetime import datetime
 gi = GenomeInfo('TestGenome')
 gi.fullName = 'TestGenome'
 gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \
                  'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz']
 gi.sourceChrNames = ['chr21', 'chrM']
 gi.installedBy = 'Setup.py'
 gi.genomeBuildSource = 'NCBI'
 gi.genomeBuildName = 'hg18'
        fromDir = createDirPath(['GESourceTracks'],
                                'TestGenome',
                                allowOverlaps=allowOverlaps)
        toDir = createDirPath([],
                              'ModelsForExternalTracks',
                              allowOverlaps=allowOverlaps)
        try:
            if not os.path.exists(toDir):
                shutil.copytree(fromDir, toDir)
                print 'OK: Copied from %s to %s.' % (fromDir, toDir)
        except Exception, e:
            print 'FAILED: Error occurred copying from %s to %s: ' % (
                fromDir, toDir) + str(e).strip()
            sys.exit(1)

    for track in ProcTrackOptions.getSubtypes('TestGenome',
                                              ['GESourceTracks']):
        ti = TrackInfo('TestGenome', ['GESourceTracks', track])
        ti.trackName = [track]
        ti.genome = 'ModelsForExternalTracks'
        ti.store()

    from quick.util.GenomeInfo import GenomeInfo
    from datetime import datetime
    gi = GenomeInfo('TestGenome')
    gi.fullName = 'TestGenome'
    gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \
                     'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz']
    #gi.sourceChrNames = ['chr21', 'chrM']
    gi.installedBy = 'Setup.py'
    gi.genomeBuildSource = 'NCBI'
    gi.genomeBuildName = 'hg18'
 def _getAllDiseases(prevChoices):
     if isinstance(prevChoices[2], dict):
         return prevChoices[2].keys()
     else:
         tn = SelectDiseaseTool._getDiseaseTn(prevChoices)
         return [x for x in ProcTrackOptions.getSubtypes(SelectDiseaseTool.GENOME, tn)]
 def _getAllPwms(prevChoices):
     if isinstance(prevChoices[2], dict):
         return prevChoices[2].keys()
     else:
         tfTrackName = SelectTfTool.TRACK_DICT[prevChoices[0]]
         return [pwm for pwm in ProcTrackOptions.getSubtypes(SelectTfTool.GENOME, tfTrackName)]