Ejemplo n.º 1
0
 def _storeUcscInfoBox(self, genome, parameterForm, trackName):
     htmlText = mechanize.urlopen(parameterForm.click('hgta_doSchema')).read()
     htmlText = htmlText.replace('h2>','H2>').replace('p>', 'P>').replace('<a href','<A HREF')
     
     #open('/tmp/TrackInfo_UcscHtmlDump.txt','w').write(htmlText)
     htmlSubText = htmlText[htmlText.find('<H2>'):htmlText.rfind('</P>')].replace('<A HREF="http:..', ' <A HREF="http://genome.ucsc.edu')
     #logMessage('htmlTrackInfo:  '+htmlSubText[:50])
     if htmlSubText:
         htmlSubText += '</P>'
         print ' : '.join(trackName) +',   '+genome
         trackInfoObject = TrackInfo(genome, trackName)
         tInfoVarConverter = {'Description':'description', 'Methods':'methods', 'Credits':'credits','References':'reference', 'Data Release Policy':'restrictions', 'Display Conventions and Configuration':'displayConvConf'}
     
         htmlSubList = htmlSubText.split('<H2>')[1:]            
         for i in htmlSubList:
             header = i[:i.find('</H2>')].replace(':','').strip()
             if header in tInfoVarConverter.keys():
                 setattr(trackInfoObject, tInfoVarConverter[header], i.split('</H2>')[1])
                 
                 
             #if header == 'Description':
             #    print 'Description'
             #    trackInfoObject.description = i.split('</H2>')[1]
             #elif header == 'Methods':
             #    print 'Methods'
             #    trackInfoObject.methods = i.split('</H2>')[1]
             #elif header == 'Credits':
             #    print 'Credits'
             #    trackInfoObject.credits = i.split('</H2>')[1]
             #elif header == 'References':
             #    print 'References'
             #    trackInfoObject.reference = i.split('</H2>')[1]
             #else:
             #    pass
         trackInfoObject.store()
Ejemplo n.º 2
0
 def _trackFormatsAreEqual(genome, trackName1, trackName2):
     trackInfoHash1 = TrackInfo(genome, trackName1).trackFormatHash
     if trackInfoHash1:
         trackInfoHash2 = TrackInfo(genome, trackName2).trackFormatHash
         if trackInfoHash1 == trackInfoHash2:
             return True
     return False
    def removeOutdatedPreProcessedFiles(cls, genome, trackName, allowOverlaps,
                                        mode):
        collector = PreProcMetaDataCollector(genome, trackName)
        if cls.preProcFilesExist(genome, trackName, allowOverlaps) and not \
            collector.hasRemovedPreProcFiles(allowOverlaps):
            dirPath = createDirPath(trackName,
                                    genome,
                                    allowOverlaps=allowOverlaps)

            assert (dirPath.startswith(PROCESSED_DATA_PATH))
            if mode == 'Real':
                print 'Removing outdated preprocessed data: ', dirPath
                for fn in os.listdir(dirPath):
                    fullFn = os.path.join(dirPath, fn)
                    if os.path.isfile(fullFn):
                        os.unlink(fullFn)
                    if os.path.isdir(fullFn):
                        if cls._isOldTypeChromDirectory(fullFn, genome):
                            shutil.rmtree(fullFn)
            else:
                print 'Would now have removed outdated preprocessed data if real run: ', dirPath

            collector.updateRemovedPreProcFilesFlag(allowOverlaps, True)

        if mode == 'Real':
            ti = TrackInfo(genome, trackName)
            ti.resetTimeOfPreProcessing()
Ejemplo n.º 4
0
    def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName):
        oldTrackName = None
        for allowOverlaps in [False, True]:
            parentDir = createDirPath(stdTrackName[:-1],
                                      genome,
                                      allowOverlaps=allowOverlaps)
            if os.path.exists(parentDir):
                dirContents = os.listdir(parentDir)
                realDirs = [
                    x for x in dirContents
                    if os.path.isdir(os.path.join(parentDir, x))
                    and not os.path.islink(os.path.join(parentDir, x))
                ]

                reqDirName = stdTrackName[-1]
                reqDirPath = os.path.join(parentDir, reqDirName)

                from gold.application.LogSetup import logMessage
                logMessage('Checking ' + reqDirPath)

                if os.path.islink(reqDirPath) and not os.path.isdir(
                        os.readlink(reqDirPath)):
                    # This is to fix a bug that ended in the symlink pointing to a file
                    os.remove(reqDirPath)
                    logMessage('Removed ' + reqDirPath)

                if realDirs and reqDirName not in dirContents:
                    oldTrackName = stdTrackName[:-1] + [realDirs[0]]
                    os.symlink(realDirs[0], reqDirPath)

        if oldTrackName is not None:
            ti = TrackInfo(genome, oldTrackName)
            ti.trackName = stdTrackName
            ti.store()
Ejemplo n.º 5
0
 def handleTask(self, task):
     task.cls.createTrackChr(task.genome, task.inTrackName,
                             task.outTrackName, task.windowSize, task.func,
                             task.chr)
     #todo: rewrite to new preprocessor solution...
     TrackInfo.finalizeTrack(task.genome, task.outTrackName, ['val'],
                             'float64', 1)
Ejemplo n.º 6
0
def fixTrackInfo(trackName, genome='hg18'):
    """trackName [genome]"""
    "fix timeOfPreProcessing of TrackInfo for a given trackName, in order for it to become a valid selection.."
    trackName = trackName.split(':')
    ti = TrackInfo(genome, trackName)
    ti.timeOfPreProcessing = 'manualOverride'
    ti.store()
Ejemplo n.º 7
0
 def removeOutdatedPreProcessedFiles(trackName, geSource, allowOverlaps, mode):
     genome = geSource.getGenome()
 
     if PreProcessUtils.preProcFilesExist(trackName, geSource, allowOverlaps) and not \
         TrackInfoDataCollector(genome, trackName).hasRemovedPreProcFiles(allowOverlaps):
             dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
             
             assert( dirPath.startswith(PROCESSED_DATA_PATH) )
             if mode == 'Real':
                 print 'Removing outdated preprocessed data: ', dirPath
                 for fn in os.listdir(dirPath):
                     fullFn = os.path.join(dirPath, fn)
                     if os.path.isfile(fullFn):
                         os.unlink(fullFn)
                     if os.path.isdir(fullFn):
                         if not PreProcessUtils._isSubTrackDirectory(fullFn):
                             shutil.rmtree(fullFn)
             else:
                 print 'Would now have removed outdated preprocessed data if real run: ', dirPath
             
             TrackInfoDataCollector(genome, trackName).updateRemovedPreProcFilesFlag(allowOverlaps, True)
     
     if mode == 'Real':
         ti = TrackInfo(genome, trackName)
         ti.resetTimeOfPreProcessing()
Ejemplo n.º 8
0
def writemetainfo(bigdict, genome, startname, ucscname):
    print "inne i createdirs"
    for key in bigdict:
        thisdict = bigdict[key]
        thistrack = list(startname)
        thistrack.append(thisdict['antibody'])
        cellname = thisdict['cell']
        if 'replicate' in thisdict:
            cellname = cellname + '_rep' + thisdict['replicate']
        thistrack.append(cellname)
        print thistrack

        ti = TrackInfo(genome, thistrack)
        ti.description = 'TF bindingsites as reported in Peak-files Fetched from UCSC Genome browser, se  http://genome.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=' + ucscname
        ti.description = ti.description + '<BR/>' + 'Use of data might be restricted, see dateUnrestricted below.<BR/> '
        for key in thisdict:
            ti.description = ti.description + '<BR/>' + key + '=' + thisdict[
                key]
        ti.description = ti.description + '<BR/>Annotation from the "files.txt"'
        ti.reference = 'Downloaded from UCSC hgdownload.cse.ucsc.edu, goldenPath/hg19/encodeDCC/wgEncodeHaibTfbs, August 2012. <BR/>The full data policy is available at http://genome.ucsc.edu/ENCODE/terms.html.'
        ti.hbContact = '*****@*****.**'
        #ti.quality = x[3]

        print "ti=", ti
        ti.store()
Ejemplo n.º 9
0
 def _storeUcscInfoBox(self, genome, parameterForm, trackName):
     htmlText = mechanize.urlopen(parameterForm.click('hgta_doSchema')).read()
     htmlText = htmlText.replace('h2>','H2>').replace('p>', 'P>').replace('<a href','<A HREF')
     
     #open('/tmp/TrackInfo_UcscHtmlDump.txt','w').write(htmlText)
     htmlSubText = htmlText[htmlText.find('<H2>'):htmlText.rfind('</P>')].replace('<A HREF="http:..', ' <A HREF="http://genome.ucsc.edu')
     #logMessage('htmlTrackInfo:  '+htmlSubText[:50])
     if htmlSubText:
         htmlSubText += '</P>'
         print ' : '.join(trackName) +',   '+genome
         trackInfoObject = TrackInfo(genome, trackName)
         tInfoVarConverter = {'Description':'description', 'Methods':'methods', 'Credits':'credits','References':'reference', 'Data Release Policy':'restrictions', 'Display Conventions and Configuration':'displayConvConf'}
     
         htmlSubList = htmlSubText.split('<H2>')[1:]            
         for i in htmlSubList:
             header = i[:i.find('</H2>')].replace(':','').strip()
             if header in tInfoVarConverter.keys():
                 setattr(trackInfoObject, tInfoVarConverter[header], i.split('</H2>')[1])
                 
                 
             #if header == 'Description':
             #    print 'Description'
             #    trackInfoObject.description = i.split('</H2>')[1]
             #elif header == 'Methods':
             #    print 'Methods'
             #    trackInfoObject.methods = i.split('</H2>')[1]
             #elif header == 'Credits':
             #    print 'Credits'
             #    trackInfoObject.credits = i.split('</H2>')[1]
             #elif header == 'References':
             #    print 'References'
             #    trackInfoObject.reference = i.split('</H2>')[1]
             #else:
             #    pass
         trackInfoObject.store()
    def _removePreprocessedTrackData(self, trackName):
        self._removeDir(
            createDirPath(trackName, self.GENOME, allowOverlaps=False),
            trackName)
        self._removeDir(
            createDirPath(trackName, self.GENOME, allowOverlaps=True),
            trackName)

        TrackInfo.removeFilteredEntriesFromShelve(self.GENOME, trackName)
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
Ejemplo n.º 12
0
def modifyTnRecord(genome, oldTn, newTn, verbose):
    trackInfo = TrackInfo(genome, oldTn)
    assert trackInfo.trackName == oldTn
    assert trackInfo.timeOfPreProcessing is not None, 'ERROR: trackInfo-object not complete for TN (is this track preprocessed?): ' + str(oldTn)
    #if trackInfo.timeOfPreProcessing is None:
        #print 'WARNING: timeOfPreProcessing is None for: ',oldTn
        
    trackInfo.trackName = newTn
    if not ONLY_SIMULATION:
        trackInfo.store()
        if verbose:
            print '(Storing track-info with new tn: %s)' % str(newTn)
    else:
        if verbose:
            print 'Would now store track-info with new tn: %s' % str(newTn)
def modifyTnRecord(genome, oldTn, newTn, verbose):
    trackInfo = TrackInfo(genome, oldTn)
    assert trackInfo.trackName == oldTn
    assert trackInfo.timeOfPreProcessing is not None, 'ERROR: trackInfo-object not complete for TN (is this track preprocessed?): ' + str(
        oldTn)
    #if trackInfo.timeOfPreProcessing is None:
    #print 'WARNING: timeOfPreProcessing is None for: ',oldTn

    trackInfo.trackName = newTn
    if not ONLY_SIMULATION:
        trackInfo.store()
        if verbose:
            print '(Storing track-info with new tn: %s)' % str(newTn)
    else:
        if verbose:
            print 'Would now store track-info with new tn: %s' % str(newTn)
    def _createPreprocessedGsuiteTrack(self,
                                       gSuiteTrack,
                                       baseFileName,
                                       doEncodeId,
                                       urlPrefix=None):
        from quick.application.ExternalTrackManager import ExternalTrackManager as ETM
        from gold.description.TrackInfo import TrackInfo

        self.genericVisit(gSuiteTrack)

        galaxyTN = ETM.constructGalaxyTnFromSuitedFn(
            gSuiteTrack.path, fileEnding=gSuiteTrack.suffix, name=baseFileName)
        trackName = ETM.getPreProcessedTrackFromGalaxyTN(
            gSuiteTrack.genome,
            galaxyTN,
            printErrors=False,
            printProgress=False,
            renameExistingTracksIfNeeded=False,
            doEncodeId=doEncodeId,
            urlPrefix=urlPrefix)

        trackType = TrackInfo(gSuiteTrack.genome,
                              trackName).trackFormatName.lower()
        hbUri = HbGSuiteTrack.generateURI(trackName=trackName)

        return GSuiteTrack(hbUri,
                           title=gSuiteTrack.title,
                           trackType=trackType,
                           genome=gSuiteTrack.genome,
                           attributes=gSuiteTrack.attributes,
                           comment=gSuiteTrack.comment)
Ejemplo n.º 15
0
    def _createMemoItems(cls, stat, useTrackFormatAsKey=True):
        from gold.description.TrackInfo import TrackInfo

        genome = stat.getGenome()

        statId = getClassName(stat) + '_' + stat.VERSION
        configHash = stat.getConfigKey(ignoreUsername=False)

        trackHashList = []
        tracks = [stat._track]
        if hasattr(stat, '_track2') and stat._track2 is not None:
            tracks.append(stat._track2)
        for track in tracks:
            if useTrackFormatAsKey and not cls._isRandomizedTrack(track):
                trackFormatHash = TrackInfo(genome,
                                            track.trackName).trackFormatHash
                if not trackFormatHash:
                    raise MissingInfoError(
                        'Precalculated TrackFormat hash is missing from TrackInfo, '
                        'for track "{}" of genome "{}". '.format(
                            track.trackName, genome) +
                        'Please consider to repeat the preprocessing step.')
                trackHashList.append(trackFormatHash)
            else:
                trackHashList.append(track.getUniqueKey(genome))
        return [stat._region, statId, configHash] + trackHashList
Ejemplo n.º 16
0
    def visitGalaxyGSuiteTrack(self, gSuiteTrack):
        self.genericVisit(gSuiteTrack)

        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.description.TrackInfo import TrackInfo

        if gSuiteTrack.hasExtraFileName():
            baseFileName = os.path.basename(gSuiteTrack.uriWithoutSuffix)
        else:
            baseFileName = gSuiteTrack.title

        galaxyTN = ExternalTrackManager.constructGalaxyTnFromSuitedFn(
            gSuiteTrack.path, fileEnding=gSuiteTrack.suffix, name=baseFileName)
        trackName = ExternalTrackManager.getPreProcessedTrackFromGalaxyTN(
            gSuiteTrack.genome,
            galaxyTN,
            printErrors=False,
            printProgress=False,
            renameExistingTracksIfNeeded=False)

        trackType = TrackInfo(gSuiteTrack.genome,
                              trackName).trackFormatName.lower()
        hbUri = HbGSuiteTrack.generateURI(trackName=trackName)

        return GSuiteTrack(hbUri,
                           title=gSuiteTrack.title,
                           trackType=trackType,
                           genome=gSuiteTrack.genome,
                           attributes=gSuiteTrack.attributes,
                           comment=gSuiteTrack.comment)
 def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName):
     oldTrackName = None
     for allowOverlaps in [False, True]:
         parentDir = createDirPath(stdTrackName[:-1], genome, allowOverlaps=allowOverlaps)
         if os.path.exists(parentDir):
             dirContents = os.listdir(parentDir)
             if len(dirContents) == 1 and dirContents[0] != stdTrackName[-1]:
                 oldDir = parentDir + os.sep + dirContents[0]
                 oldTrackName = stdTrackName[:-1] + [dirContents[0]]
                 newDir = parentDir + os.sep + stdTrackName[-1]
                 os.rename(oldDir, newDir)
     
     if oldTrackName is not None:
         ti = TrackInfo(genome, oldTrackName)
         ti.trackName = stdTrackName
         ti.store()
Ejemplo n.º 18
0
    def _getBasicTrackFormat(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            try:
                tf = GeneralGuiTool._convertToBasicTrackFormat(
                    TrackFormat.createInstanceFromGeSource(
                        geSource).getFormatName())
            except Warning:
                return genome, tn, ''
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                tfName = 'Points'
            else:
                tfName = TrackInfo(genome, tn).trackFormatName
            tf = GeneralGuiTool._convertToBasicTrackFormat(tfName)
        return genome, tn, tf
Ejemplo n.º 19
0
    def validateRegAndBinSpec(self, regSpec, binSpec):
        from quick.util.CommonFunctions import convertTNstrToTNListFormat
        from gold.description.TrackInfo import TrackInfo
        from gold.track.TrackFormat import TrackFormatReq

        trackName = convertTNstrToTNListFormat(binSpec)
        ti = TrackInfo(self._genome, trackName)

        if not ti.isValid():
            return 'The specified track is not valid: "%s"' % ':'.join(
                trackName)

        if not (ti.trackFormatName
                and TrackFormatReq(name=ti.trackFormatName).isInterval()):
            return 'The specified track does not contain regions: "%s"' % ':'.join(
                trackName)
Ejemplo n.º 20
0
    def isValidTrack(genome, trackName, fullAccess=False):
        if not TrackInfo(genome, trackName).isValid(fullAccess):
            return False

        if ProcTrackOptions._hasPreprocessedFiles(genome, trackName):
            return True

        return False
Ejemplo n.º 21
0
 def constructId(geSource):
     from gold.origdata.PreProcessTracksJob import PreProcessTracksJob
     if geSource.hasOrigFile():
         origPath = os.path.dirname(geSource.getFileName()) if not geSource.isExternal() else geSource.getFileName()
         return TrackInfo.constructIdFromPath(geSource.getGenome(), origPath, \
                                              geSource.getVersion(), PreProcessTracksJob.VERSION)
     else:
         return geSource.getId()
Ejemplo n.º 22
0
 def constructId(geSource):
     from gold.origdata.PreProcessTracksJob import PreProcessTracksJob
     if geSource.hasOrigFile():
         origPath = os.path.dirname(geSource.getFileName()) if not geSource.isExternal() else geSource.getFileName()
         return TrackInfo.constructIdFromPath(geSource.getGenome(), origPath, \
                                              geSource.getVersion(), PreProcessTracksJob.VERSION)
     else:
         return None
Ejemplo n.º 23
0
    def getUniqueKey(self, genome):
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )

        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName), self._trackId, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
Ejemplo n.º 24
0
 def getTrackFormat(cls,genome,trackName) : #trackName here is a list of directories which is path of track
     #temp = trackName.split(":")
     temp = trackName
     if temp[-1] == "-- All subtypes --" :
         trackName = temp[:-1]
     else :
         trackName = temp
     #trackName = self.trackPrepare(trackName)
     return TrackInfo(genome,trackName).trackFormatName
Ejemplo n.º 25
0
    def _getValueTypeName(choices, tnChoiceIndex=1, genomeChoiceIndex=0):
        genome = GeneralGuiTool._getGenomeChoice(choices, genomeChoiceIndex)[0]
        tn = GeneralGuiTool._getTrackChoice(choices, tnChoiceIndex)[0]

        from quick.application.GalaxyInterface import GalaxyInterface
        from gold.description.TrackInfo import TrackInfo
        from quick.application.ExternalTrackManager import ExternalTrackManager
        from gold.track.TrackFormat import TrackFormat

        if ExternalTrackManager.isGalaxyTrack(tn):
            geSource = ExternalTrackManager.getGESourceFromGalaxyOrVirtualTN(
                tn, genome)
            valTypeName = TrackFormat.createInstanceFromGeSource(
                geSource).getValTypeName()
        else:
            if GalaxyInterface.isNmerTrackName(genome, tn):
                valTypeName = ''
            else:
                valTypeName = TrackInfo(genome, tn).markType
        return valTypeName.lower()
 def _compute(self):
     tv = self._children[0].getResult()
     
     if not self._forceIsDirected:
         from gold.track.RandomizedTrack import RandomizedTrack
         origTrackName = self._track.trackName[:-2] if isinstance(self._track, RandomizedTrack) else self._track.trackName
         isDirected = not (TrackInfo(self._region.genome, origTrackName).undirectedEdges == True)
     else:
         isDirected = self._isDirected
     
     return LazyProtoGraphView.createInstanceFromTrackView(tv, isDirected=isDirected)
Ejemplo n.º 27
0
 def _filterValueListAndMakeTrackInfoObjects(self, category, valueList, genome, trackName):
     prunedValueList = []
     trackNames = [trackName+[v[1]]for v in valueList]
     for index, trackTuple in enumerate(trackNames):
         trackInfoObj = TrackInfo(genome, trackTuple)
         if trackInfoObj.fileType =='':
             
             webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_table', trackTuple[-1]) )
             self._storeUcscInfoBox(genome, paramForm, trackTuple)
             if set(['bed', 'wigBed']) & set(['.']+[item.name for item in paramForm.find_control("hgta_outputType").items]):
                     lastForm = self._getForm(mechanize.urlopen(paramForm.click('hgta_doFilterPage')))
                     if len([control.name for control in lastForm.controls if control.name.find('_.maxOutput')>0]) == 0:
                         prunedValueList.append(valueList[index])
                         trackInfoObj.fileType = 'bed'
             trackInfoObj.fileType = 'ucscdb' if trackInfoObj.fileType == '' else 'bed'            
         elif trackInfoObj.fileType =='bed':
             prunedValueList.append(valueList[index]) 
         trackInfoObj.store()
         
     return prunedValueList
Ejemplo n.º 28
0
 def _getBasicTrackFormat(choices, tnChoiceIndex=1):
     from quick.application.GalaxyInterface import GalaxyInterface
     from gold.description.TrackInfo import TrackInfo
     
     genome = choices[0]
     tn = choices[tnChoiceIndex].split(':')
     
     if GalaxyInterface.isNmerTrackName(genome, tn):
         tfName = 'Points'
     else:
         tfName = TrackInfo(genome, tn).trackFormatName
     
     tfName = tfName.lower()
     
     if tfName.startswith('linked '):
         tfName = tfName[7:]
         
     tfName = tfName.replace('unmarked ','')
     tfName = tfName.replace('marked','valued')
     
     return genome, tn, tfName
    def shouldPreProcessGESource(cls, trackName, geSource, allowOverlaps):
        genome = geSource.getGenome()
        storedInfo = TrackInfo(genome, trackName)

        validFilesExist = cls.preProcFilesExist(genome, trackName, allowOverlaps) and \
            storedInfo.isValid()

        if not geSource.hasOrigFile():
            return False if validFilesExist or geSource.isExternal() else True

        from gold.origdata.PreProcessTracksJob import PreProcessTracksJob
        if DebugConfig.VERBOSE:
            print os.linesep.join([
                "GenomeElementSource id: {}, ".format(
                    cls.constructId(geSource)) +
                "TrackInfo.id: {}".format(storedInfo.id),
                "GenomeElementSource version: {}, ".format(
                    geSource.getVersion()) +
                "TrackInfo.geSourceVersion: {}".format(
                    storedInfo.geSourceVersion),
                "PreProcessTracksJob version: {}, ".format(
                    PreProcessTracksJob.VERSION) +
                "TrackInfo.preProcVersion: {}".format(
                    storedInfo.preProcVersion),
            ])
        storedAsAccordingToGeSource = \
            (cls.constructId(geSource) == storedInfo.id and
             geSource.getVersion() == storedInfo.geSourceVersion and
             PreProcessTracksJob.VERSION == storedInfo.preProcVersion)

        #from gold.application.LogSetup import logMessage
        #logMessage(geSource.getGenome())
        #logMessage(':'.join(trackName))
        #logMessage('%s %s %s %s %s' % (cls.preProcFilesExist(genome, trackName, allowOverlaps), \
        #                               storedInfo.isValid(), \
        #                               geSource.hasOrigFile(), \
        #                               cls.constructId(geSource) == storedInfo.id, \
        #                               geSource.getVersion() == storedInfo.preProcVersion))

        return not (validFilesExist and storedAsAccordingToGeSource)
Ejemplo n.º 30
0
    def _getFileFormatInfo(cls, choices, gSuite, genome, track):
        if choices.changeFormat == cls.OUTPUT_FORMAT_ORIGINAL:
            suffix = TrackInfo(genome, track.trackName).fileType
            fileFormatName = \
                getComposerClsFromFileSuffix(suffix).FILE_FORMAT_NAME
            asOriginal = True
            allowOverlaps = True

            return FileFormatInfo(fileFormatName, asOriginal, allowOverlaps,
                                  suffix)
        else:
            outputFormatDict = cls._getOutputFormatDict(gSuite, genome)
            return outputFormatDict[choices.outputFormat]
Ejemplo n.º 31
0
    def getUniqueKey(self, genome):
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash(
            (tuple(self.trackName), self._trackId if self._trackId else '',
             getClassName(self.formatConverters[0])
             if self.formatConverters else '',
             self.formatConverters[0].VERSION if self.formatConverters else '',
             self._trackFormatReq.allowOverlaps()
             if self._trackFormatReq.allowOverlaps() else '',
             self._trackFormatReq.borderHandling()
             if self._trackFormatReq.borderHandling() else ''))
    def getSubtypes(genome, trackName, fullAccess=False):
        dirPath = createDirPath(trackName, genome)
        subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \
                    if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \
                    or GenomeInfo.isValidChr(genome, fn))]

        if not fullAccess and not ProcTrackOptions._isLiteratureTrack(
                genome, trackName):
            subtypes = [
                x for x in subtypes if x not in ['external']
                and not TrackInfo(genome, trackName + [x]).private
            ]

        return sorted(subtypes, key=smartStrLower)
Ejemplo n.º 33
0
 def shouldPreProcessGESource(trackName, geSource, allowOverlaps):
     storedInfo = TrackInfo(geSource.getGenome(), trackName)
     
     validFilesExist = PreProcessUtils.preProcFilesExist(trackName, geSource, allowOverlaps) and \
         storedInfo.isValid()
     
     if not geSource.hasOrigFile():
         return False if validFilesExist or geSource.isExternal() else True
     
     storedAsAccordingToGeSource = \
         (PreProcessUtils.constructId(geSource) == storedInfo.id and \
          geSource.getVersion() == storedInfo.preProcVersion)
     
     #from gold.application.LogSetup import logMessage
     #logMessage(geSource.getGenome())
     #logMessage(':'.join(trackName))
     #logMessage('%s %s %s %s %s' % (PreProcessUtils.preProcFilesExist(trackName, geSource, allowOverlaps), \
     #                               storedInfo.isValid(), \
     #                               geSource.hasOrigFile(), \
     #                               PreProcessUtils.constructId(geSource) == storedInfo.id, \
     #                               geSource.getVersion() == storedInfo.preProcVersion))
     
     return not (validFilesExist and storedAsAccordingToGeSource)
Ejemplo n.º 34
0
 def _findTrackInfoBasedMetaData(self):
     if not self._foundTrackInfoBasedMetaData:
         if PreProcMetaDataCollector.hasKey(self._genome, self._trackName):
             collector = PreProcMetaDataCollector(self._genome, self._trackName)
             self._fileSuffix = collector.getFileSuffix()
             self._geSourceVersion = collector.getGeSourceVersion()
             self._id = collector.getId()
             self._undirectedEdges = True if collector.hasUndirectedEdges() else False
         else:
             ti = TrackInfo(self._genome, self._trackName)
             self._fileSuffix = ti.fileType
             self._geSourceVersion = ti.geSourceVersion
             self._id = ti.id
             self._undirectedEdges = True if ti.undirectedEdges else False
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(
             self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome,
                                   trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
Ejemplo n.º 36
0
 def shouldPreProcessGESource(trackName, geSource, allowOverlaps):
     genome = geSource.getGenome()
     storedInfo = TrackInfo(genome, trackName)
     
     validFilesExist = PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps) and \
         storedInfo.isValid()
     
     if not geSource.hasOrigFile():
         return False if validFilesExist or geSource.isExternal() else True
     
     storedAsAccordingToGeSource = \
         (PreProcessUtils.constructId(geSource) == storedInfo.id and \
          geSource.getVersion() == storedInfo.preProcVersion)
     
     #from gold.application.LogSetup import logMessage
     #logMessage(geSource.getGenome())
     #logMessage(':'.join(trackName))
     #logMessage('%s %s %s %s %s' % (PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps), \
     #                               storedInfo.isValid(), \
     #                               geSource.hasOrigFile(), \
     #                               PreProcessUtils.constructId(geSource) == storedInfo.id, \
     #                               geSource.getVersion() == storedInfo.preProcVersion))
     
     return not (validFilesExist and storedAsAccordingToGeSource)
Ejemplo n.º 37
0
def removeUnusedRecords():
    trackInfoShelve = safeshelve.open(SHELVE_FN, 'w')
    iremoved = 0
    ifound = 0
    for key in trackInfoShelve.keys():
        try:
            ti = TrackInfo.createInstanceFromKey(key)
            fn = ti.getOrigFn()
            if not os.path.exists(fn):
                raise Exception('Should exclude nmer tracks and other tracks without standardized track (e.g. intensity tracks). How? Not sure..')
                ti.removeEntryFromShelve()
                iremoved = iremoved + 1
            else:
                ifound= ifound + 1
        except Exception, e:
            print "Something wrong with ", fn , ", ", e
Ejemplo n.º 38
0
def updateOrCreateStandardFolders(genome):
    print "update or create ", genome
    for x in standardFolderInfo:
        ti = TrackInfo(genome, x[0])
        ti.description = x[1]
        ti.reference = x[2]
        ti.quality = x[3]

        ### sjekke om folderen eksisterer, hvis ikke lage den. Dette er ikke helt implementer da jeg er usikker om jeg fikk det til med eksisterende metoder.
        if not getOrigFns(
                genome, x[0], ''
        ):  # returns empty list if no folder? But what does it return when no tracks is in the folder?
            print x[0], "is missing"  # lage folder og preprocess?

        #print "ti=", ti
        ti.store()
Ejemplo n.º 39
0
def removeUnusedRecords():
    trackInfoShelve = safeshelve.open(SHELVE_FN, 'w')
    iremoved = 0
    ifound = 0
    for key in trackInfoShelve.keys():
        try:
            ti = TrackInfo.createInstanceFromKey(key)
            fn = createOrigPath(ti.genome, ti.trackName)
            if not os.path.exists(fn):
                raise Exception(
                    'Should exclude nmer tracks and other tracks without standardized track (e.g. intensity tracks). How? Not sure..'
                )
                ti.removeEntryFromShelve()
                iremoved = iremoved + 1
            else:
                ifound = ifound + 1
        except Exception, e:
            print "Something wrong with ", fn, ", ", e
Ejemplo n.º 40
0
    def _appendConverterOptions(self, track, labelKey):
        if track is None:
            return

        if track.formatConverters is None:
            # May happen in the second track object if one analyses a track versus itself
            return
        
        if self.getChoice(labelKey) is not None:
            assert(self.getChoice(labelKey) == getClassName(track.formatConverters[0]))
            return
        
        labelPair = (labelKey, '_Treat ' + prettyPrintTrackName(track.trackName) + ' as')
        choicePairs = [ ( getClassName(fc), fc.getOutputDescription(TrackInfo(self._genome, track.trackName).trackFormatName) ) \
                        for fc in track.formatConverters ]
        
        text = '[' + ':'.join(labelPair) + '=' + '/'.join( [':'.join(x) for x in choicePairs] ) + ']'
        self._analysisParts.append(AnalysisOption( text ))
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     if ti.isValid():
         ti.subTrackCount = 1
         ti.store()
Ejemplo n.º 42
0
 def storeTrackInfo(genome, trackName, metaData):
     trackInfoObject = TrackInfo(genome, trackName)           
     for i in metaData.keys():
         setattr(trackInfoObject, i, metaData[i])
     trackInfoObject.store()
Ejemplo n.º 43
0
illegalchars = ''.join(c for c in ''.join(testname) if c not in valid_chars)
if len(illegalchars)>0:
    print 'Illegal characters found in chromosome names "%s". Please rename using legal characters "%s".' % (illegalchars,valid_chars)
'''    
    
'''
import string
valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits)
name = "vegard_hei"
illegalchars = ''.join(c for c in name if c not in valid_chars)
if len(illegalchars)>0:
    print "Illegal characters found in chromosome names '%s'. Please rename using legal characters " % (illegalchars,valid_chars)
'''
'''
import re

a = re.search("[ /]", "abc/def")
if a:
    print "treff"
'''
from gold.description.TrackInfo import TrackInfo
genome="hg18"
tn = ['Private', 'Vegard', 'test', 'test1']

ti = TrackInfo(genome, tn)
print(ti.getUIRepr())




print "ferdig"
Ejemplo n.º 44
0
import sys
import third_party.safeshelve as safeshelve
from gold.description.TrackInfo import TrackInfo
from gold.description.TrackInfo import SHELVE_FN
from gold.origdata.PreProcessTracksJob import PreProcessAllTracksJob
import re
#print "SHELVE_FN=", SHELVE_FN
trackInfoShelve = safeshelve.open(SHELVE_FN, 'w')
allkeys=trackInfoShelve.keys()
trackInfoShelve.close()

count = 0
for key in allkeys:
    #print key, count
    try: 
        ti = TrackInfo.createInstanceFromKey(key)
        if ti.timeOfPreProcessing > datetime.datetime(2011, 11, 8, 23,0,0) and ti.timeOfPreProcessing < datetime.datetime(2011, 11, 25, 23,0,0):
            if re.search('Nmers|Trashcan|external|Restriction', ti.getOrigFn())==None:
            # Nmers, external
                print 'trying to repair track ', ti.genome, ti.trackName, ti.timeOfPreProcessing
                count = count +1
                
            ### Sette ID til None og preprocesse.
                #ti.id = None
                #ti.store()
                #PreProcessAllTracksJob(ti.genome, ti.trackName).process()
                
                
                #### printe ut kommandoer for batch job, virket ikke.
                #print 'key =\''+key+'\''
                #print 'ti = TrackInfo.createInstanceFromKey(key)'
Ejemplo n.º 45
0
    def getRunDescription(trackName1, trackName2, trackNameIntensity, analysisDef, ubSource, revEngBatchLine, \
                          urlForTrackAutoSelection, manualSeed, **kwArgs):
        genome = ubSource.genome
        core = HtmlCore()

        analysis = Analysis(analysisDef, genome, trackName1, trackName2, **kwArgs)
        
        core.header('GENOME')
        core.append(GenomeInfo(genome).mainInfo(printEmpty=False))
        core.divider()
                
        formatChoices = analysis.getFormatConverterChoicesAsText().items()
        tr1FormatChoice, tr2FormatChoice = formatChoices if len(formatChoices) == 2 else (None, None) 
        
        first = True
        for tn,label,formatChoice in zip([trackName1,trackName2,trackNameIntensity], \
                                         ['TRACK 1','TRACK 2','INTENSITY TRACK'], \
                                         [tr1FormatChoice,tr2FormatChoice,None]):
            if tn in [None, []]:
                continue
            
            if not first:
                core.divider()

            core.header(label)
            trackInfo = TrackInfo(genome, tn)
            trackText = ''
            if ExternalTrackManager.isHistoryTrack(tn):
                assert len(tn)>=4, 'Length of external track name < 4: %s' % str(tn)
                core.descriptionLine('Name', ExternalTrackManager.extractNameFromHistoryTN(tn) + ' (from history)' + os.linesep)
            else:
                core.descriptionLine('Name', ':'.join(tn) + os.linesep)
            core.append(trackInfo.mainInfo(printEmpty=False))

            if formatChoice is not None:
                core.descriptionLine('Treated as', formatChoice[1])
            
            first = False
        
        core.divider()
        core.header('ANALYSIS')
        core.paragraph( ''.join(str(analysis).split(':')[1:]) )

        first = True
        for label,choice in analysis.getInterfaceChoicesAsText().items():
            if first:
                core.divider()
                core.header('OPTIONS')
            
            if manualSeed is not None and label == 'Random seed' and choice == 'Random':
                choice = str(manualSeed)
                
            core.descriptionLine(label, choice)
            first = False
            
        h0 = analysis.getH0()
        if h0 is not None:
            core.divider()
            core.header('NULL HYPOTHESIS')
            core.paragraph(h0)
            
        h1 = analysis.getH1()
        if h1 is not None:
            core.divider()
            core.header('ALTERNATIVE HYPOTHESIS')
            core.paragraph(h1)
            
        core.divider()
        core.header('ANALYSIS REGIONS')
        if hasattr(ubSource, 'description'):
            core.paragraph(ubSource.description)
            
        core.divider()
        core.header('SOLUTION')

        statClass = analysis.getStat()
        #One alternative is to put getDescription in MagicStatFactory-hierarchy as class-method, and get real class behind partial-object.
        #if isinstance(statClass, functools.partial):
            #statClass = statClass.func
        #core.paragraph( statClass.getDescription() )

        #Chosen alternative is to Instantiate an object, which will automatically give object of real class..
        #and then use the following two lines, which will get class in Statistic-hierarchy instead of MagicStatFactory-hierarchy ..
        try:
            reg = ubSource.__iter__().next()
        except:
            core.paragraph('Solution not relevant, as there are no specified analysis regions..')
        else:
            track1, track2 = analysis.getTracks()
            if statClass is None:
                core.paragraph('Solution not available, due to currently invalid analysis')
                logMessage('Solution not available, with params: ' + str([trackName1, trackName2, analysisDef]), level=logging.WARN )
            else:
                statObj = statClass(reg,track1, track2)
                statDescr = statObj.getDescription()
                replPat = '<a href=' + os.sep.join([STATIC_REL_PATH,'notes','stats','']) + r'\1>note</a>'
                statDescr = re.sub('<note>(.*)</note>', replPat, statDescr)
        
                core.paragraph( statDescr )

        core.divider()
        core.header('TIME OF ANALYSIS')
        core.paragraph('Analysis initiated at time: ' + str( datetime.datetime.now() ) )
        
        if urlForTrackAutoSelection not in [None, '']:
            core.divider()
            core.header('URL FOR TRACK AUTOSELECTION')
            #urlOptions = '&'.join(['track1=' + quote(':'.join(trackName1)), 'track2=' + quote(':'.join(trackName2))])
            #core.paragraph(URL_PREFIX + '/hyper?' + urlOptions)
            core.styleInfoBegin(styleClass='break-word')
            core.paragraph(urlForTrackAutoSelection)
            core.styleInfoEnd()
            
        if revEngBatchLine not in [None, '']:
            core.divider()
            core.header('CORRESPONDING BATCH-RUN LINE')
            #if any(ExternalTrackManager.isRedirectOrExternalTrack(tn) for tn in [trackName1, trackName2]):
                #core.paragraph('Batch-run line not available with tracks from history')
            #else:
            core.styleInfoBegin(styleClass='break-word')
            core.paragraph(revEngBatchLine)
            core.styleInfoEnd()

        core.divider()
        core.header('REFERENCES')
        core.paragraph('The HyperBrowser system is described in:<br>"Sandve et al., <a href="http://genomebiology.com/2010/11/12/R121/">The Genomic HyperBrowser: inferential genomics at the sequence level</a>, Genome Biol. 2010;11(12):R121')
        from gold.statistic.RandomizationManagerStat import RandomizationManagerStat
        if statClass is not None and RandomizationManagerStat.getMcSamplingScheme(statClass.keywords) == 'MCFDR':
            core.paragraph('The p-values of this analysis were computed using the MCFDR scheme for Monte Carlo based p-value computation'+\
                           ', described in:<br>Sandve et al., <a href="http://bioinformatics.oxfordjournals.org/content/early/2011/10/13/bioinformatics.btr568.long">Sequential Monte Carlo multiple testing</a>, Bioinformatics 2011')
        
#        description = \
#'''
#Run descriptions will be introduced in the next version of HB. <br>
#Below is an example run description, which is a static text unconnected to your choices. The purpose is to get feedback from you on what this should look like:<br>
#Track1 (refseg:genes): Unmarked points (converted from unmarked segments, taking midpoints)<br>
#Track2 (DNA melting:meltmap): Function<br>
#Bins: Chr1, divided into bins of 10 megabases<br>
#Question: Are track1-points occurring with different frequency inside track2-segment than outside?<br>
#Analysis:<br>
#The main result is a p-value resulting from a statistical test connected to the question.<br>
#The null-hypothesis assumes that the track1-points are randomly distributed according to a poisson-distribution, with the same number of points as in the original data. Track2-segment are assumed fixed as they are in the original data. This can be answered by a binomial test. The alternative hypothesis is then that the count of points inside segments has resulted from a different distribution of points, where the points are then either distributed more or less inside segments versus outside. See the note on this question in the user guide for further info.<br>
#'''
        return str(core)
Ejemplo n.º 46
0
 def handleTask(self, task):
     task.cls.createTrackChr(task.genome, task.inTrackName, task.outTrackName, task.windowSize, task.func, task.chr)
     #todo: rewrite to new preprocessor solution...
     TrackInfo.finalizeTrack(task.genome, task.outTrackName, ['val'], 'float64', 1)
Ejemplo n.º 47
0
     print '        ' + str(e).strip()
     sys.exit(1)
 
 for allowOverlaps in [False, True]:
     fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps)
     toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps)
     try:
         if not os.path.exists(toDir):
             shutil.copytree(fromDir, toDir)
             print 'OK: Copied from %s to %s.' % (fromDir, toDir)
     except Exception, e:
         print 'FAILED: Error occurred copying from %s to %s: ' % (fromDir, toDir) + str(e).strip()
         sys.exit(1)
         
 for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']):
     ti = TrackInfo('TestGenome', ['GESourceTracks', track])
     ti.trackName = [track]
     ti.genome = 'ModelsForExternalTracks'
     ti.store()
         
 from quick.util.GenomeInfo import GenomeInfo
 from datetime import datetime
 gi = GenomeInfo('TestGenome')
 gi.fullName = 'TestGenome'
 gi.sourceUrls = ['http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chr21.fa.gz', \
                  'http://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes/chrM.fa.gz']
 gi.sourceChrNames = ['chr21', 'chrM']
 gi.installedBy = 'Setup.py'
 gi.genomeBuildSource = 'NCBI'
 gi.genomeBuildName = 'hg18'
 gi.species = 'H**o Sapiens'