예제 #1
0
    def writeIndexes(self):
        numIndexElements = int(math.ceil(1.0 * self._chrSize / CompBinManager.getIndexBinSize()))
        self._leftIndexFile = OutputFile(self._path, 'leftIndex', numIndexElements, allowAppend=False)
        self._rightIndexFile = OutputFile(self._path, 'rightIndex', numIndexElements, allowAppend=False)
        
        if self._startFile:
            lefts = self._startFile.getContents()
        else:
            lefts = np.r_[0, self._endFile.getContents()[:-1]]
        
        if self._endFile:
            rights = self._endFile.getContents()
            if not self._startFile:
                rights = rights[1:]
        else:
            rights = self._startFile.getContents() + 1
            
        bin_i = 0
        i = 0
        for i, right in enumerate(rights):
            while right > (bin_i) * CompBinManager.getIndexBinSize():
                self._leftIndexFile.write(i)
                bin_i += 1

        bin_j = 0
        j = 0
        for j, left in enumerate(lefts):
            while left >= (bin_j+1) * CompBinManager.getIndexBinSize():
                self._rightIndexFile.write(j)
                bin_j += 1
                
        self._fillRestOfIndexFile(bin_i, i+1, self._leftIndexFile)
        self._fillRestOfIndexFile(bin_j, j+1, self._rightIndexFile)
예제 #2
0
    def _assertWrite(self, filePrefix, dataType, contents):
        s = Setup(filePrefix, len(contents), dataType, 1, None)

        of = OutputFile(s.path, s.filePrefix, len(contents))
        for i in contents:
            of.write(i)
        of.close()

        self.assertTrue(os.path.exists(s.fn))
        fileContents = [i for i in memmap(s.fn, dataType, mode="r")]
        self.assertListsOrDicts(contents, fileContents)
예제 #3
0
    def _assertWriteElement(
        self,
        filePrefix,
        contents,
        dataType,
        dataTypeDim,
        maxNumEdges,
        maxStrLens,
        assertContents,
        assertDataType,
        assertDataTypeDim,
        assertElementDim,
    ):

        s = Setup(filePrefix, len(assertContents), assertDataType, assertDataTypeDim, assertElementDim)

        valDataType = dataType if filePrefix == "val" else "float64"
        valDim = dataTypeDim if filePrefix == "val" else 1
        weightDataType = dataType if filePrefix == "weights" else "float64"
        weightDim = dataTypeDim if filePrefix == "weights" else 1

        geList = GEList()
        memberName = "_" + filePrefix + "List"
        if hasattr(geList, memberName):
            geList.__dict__[memberName] = contents
        else:
            geList._extraList = [{filePrefix: x} for x in contents]

        # print s.fn, s.path, s.filePrefix, len(geList), valDataType, valDim, weightDataType, weightDim, maxNumEdges, maxStrLens
        of = OutputFile(
            s.path,
            s.filePrefix,
            len(assertContents),
            valDataType,
            valDim,
            weightDataType,
            weightDim,
            maxNumEdges,
            maxStrLens,
        )
        for ge in geList:
            of.writeElement(ge)
        of.close()

        self.assertTrue(os.path.exists(s.fn))

        fileContents = [el for el in memmap(s.fn, dtype=s.dataType, shape=s.shape, mode="r")]
        self.assertListsOrDicts(assertContents, fileContents)
        return s
예제 #4
0
class OutputIndexFilePair(object):
    def __init__(self, path, chrSize, startFile, endFile):
        self._path = path
        self._chrSize = chrSize
        self._startFile = startFile
        self._endFile = endFile

    def writeIndexes(self):
        numIndexElements = int(
            math.ceil(1.0 * self._chrSize / CompBinManager.getIndexBinSize()))
        self._leftIndexFile = OutputFile(self._path,
                                         'leftIndex',
                                         numIndexElements,
                                         allowAppend=False)
        self._rightIndexFile = OutputFile(self._path,
                                          'rightIndex',
                                          numIndexElements,
                                          allowAppend=False)

        if self._startFile:
            lefts = self._startFile.getContents()
        else:
            lefts = np.r_[0, self._endFile.getContents()[:-1]]

        if self._endFile:
            rights = self._endFile.getContents()
            if not self._startFile:
                rights = rights[1:]
        else:
            rights = self._startFile.getContents() + 1

        bin_i = 0
        i = 0
        for i, right in enumerate(rights):
            while right > (bin_i) * CompBinManager.getIndexBinSize():
                self._leftIndexFile.write(i)
                bin_i += 1

        bin_j = 0
        j = 0
        for j, left in enumerate(lefts):
            while left >= (bin_j + 1) * CompBinManager.getIndexBinSize():
                self._rightIndexFile.write(j)
                bin_j += 1

        self._fillRestOfIndexFile(bin_i, i + 1, self._leftIndexFile)
        self._fillRestOfIndexFile(bin_j, j + 1, self._rightIndexFile)

    def _fillRestOfIndexFile(self, numFilled, fillValue, indexFile):
        for i in xrange(len(indexFile) - numFilled):
            indexFile.write(fillValue)

    def close(self):
        self._leftIndexFile.close()
        self._rightIndexFile.close()
예제 #5
0
    def _assertWriteElement(self, filePrefix, contents, dataType, dataTypeDim, maxNumEdges, maxStrLens, \
                            assertContents, assertDataType, assertDataTypeDim, assertElementDim):

        s = Setup(filePrefix, len(assertContents), assertDataType,
                  assertDataTypeDim, assertElementDim)

        valDataType = dataType if filePrefix == 'val' else 'float64'
        valDim = dataTypeDim if filePrefix == 'val' else 1
        weightDataType = dataType if filePrefix == 'weights' else 'float64'
        weightDim = dataTypeDim if filePrefix == 'weights' else 1

        geList = GEList()
        memberName = '_' + filePrefix + 'List'
        if hasattr(geList, memberName):
            geList.__dict__[memberName] = contents
        else:
            geList._extraList = [{filePrefix: x} for x in contents]

        #print s.fn, s.path, s.filePrefix, len(geList), valDataType, valDim, weightDataType, weightDim, maxNumEdges, maxStrLens
        of = OutputFile(s.path, s.filePrefix, len(assertContents), valDataType,
                        valDim, weightDataType, weightDim, maxNumEdges,
                        maxStrLens)
        for ge in geList:
            of.writeElement(ge)
        of.close()

        self.assertTrue(os.path.exists(s.fn))

        fileContents = [
            el
            for el in memmap(s.fn, dtype=s.dataType, shape=s.shape, mode='r')
        ]
        self.assertListsOrDicts(assertContents, fileContents)
        return s
예제 #6
0
    def writeIndexes(self):
        numIndexElements = int(
            math.ceil(1.0 * self._chrSize / CompBinManager.getIndexBinSize()))
        self._leftIndexFile = OutputFile(self._path,
                                         'leftIndex',
                                         numIndexElements,
                                         allowAppend=False)
        self._rightIndexFile = OutputFile(self._path,
                                          'rightIndex',
                                          numIndexElements,
                                          allowAppend=False)

        if self._startFile:
            lefts = self._startFile.getContents()
        else:
            lefts = np.r_[0, self._endFile.getContents()[:-1]]

        if self._endFile:
            rights = self._endFile.getContents()
            if not self._startFile:
                rights = rights[1:]
        else:
            rights = self._startFile.getContents() + 1

        bin_i = 0
        i = 0
        for i, right in enumerate(rights):
            while right > (bin_i) * CompBinManager.getIndexBinSize():
                self._leftIndexFile.write(i)
                bin_i += 1

        bin_j = 0
        j = 0
        for j, left in enumerate(lefts):
            while left >= (bin_j + 1) * CompBinManager.getIndexBinSize():
                self._rightIndexFile.write(j)
                bin_j += 1

        self._fillRestOfIndexFile(bin_i, i + 1, self._leftIndexFile)
        self._fillRestOfIndexFile(bin_j, j + 1, self._rightIndexFile)
예제 #7
0
class OutputIndexFilePair(object):
    def __init__(self, path, chrSize, startFile, endFile):
        self._path = path
        self._chrSize = chrSize
        self._startFile = startFile
        self._endFile = endFile
    
    def writeIndexes(self):
        numIndexElements = int(math.ceil(1.0 * self._chrSize / CompBinManager.getIndexBinSize()))
        self._leftIndexFile = OutputFile(self._path, 'leftIndex', numIndexElements, allowAppend=False)
        self._rightIndexFile = OutputFile(self._path, 'rightIndex', numIndexElements, allowAppend=False)
        
        if self._startFile:
            lefts = self._startFile.getContents()
        else:
            lefts = np.r_[0, self._endFile.getContents()[:-1]]
        
        if self._endFile:
            rights = self._endFile.getContents()
            if not self._startFile:
                rights = rights[1:]
        else:
            rights = self._startFile.getContents() + 1
            
        bin_i = 0
        i = 0
        for i, right in enumerate(rights):
            while right > (bin_i) * CompBinManager.getIndexBinSize():
                self._leftIndexFile.write(i)
                bin_i += 1

        bin_j = 0
        j = 0
        for j, left in enumerate(lefts):
            while left >= (bin_j+1) * CompBinManager.getIndexBinSize():
                self._rightIndexFile.write(j)
                bin_j += 1
                
        self._fillRestOfIndexFile(bin_i, i+1, self._leftIndexFile)
        self._fillRestOfIndexFile(bin_j, j+1, self._rightIndexFile)
        
    def _fillRestOfIndexFile(self, numFilled, fillValue, indexFile):
        for i in xrange( len(indexFile) - numFilled ):
            indexFile.write(fillValue)
    
    def close(self):
        self._leftIndexFile.close()
        self._rightIndexFile.close()
        
예제 #8
0
    def _assertWrite(self, filePrefix, dataType, contents):
        s = Setup(filePrefix, len(contents), dataType, 1, None)

        of = OutputFile(s.path, s.filePrefix, len(contents))
        for i in contents:
            of.write(i)
        of.close()

        self.assertTrue(os.path.exists(s.fn))
        fileContents = [i for i in memmap(s.fn, dataType, mode='r')]
        self.assertListsOrDicts(contents, fileContents)
예제 #9
0
    def __init__(self, path, prefixList, fileArraySize, chrSize, valDataType='float64', valDim=1, \
                 weightDataType='float64', weightDim=1, maxNumEdges=0, maxStrLens={}, elementsAreSorted=False):
        self._files = OrderedDict()
        if not os.path.exists(path):
            os.makedirs(path)

        for prefix in prefixList:
            self._files[prefix] = OutputFile(path, prefix, fileArraySize,
                                             valDataType, valDim,
                                             weightDataType, weightDim,
                                             maxNumEdges, maxStrLens)

        if 'start' in self._files or 'end' in self._files:
            self._indexFiles = OutputIndexFilePair(path, chrSize,
                                                   self._files.get('start'),
                                                   self._files.get('end'))
        else:
            self._indexFiles = None

        self._elementsAreSorted = elementsAreSorted
예제 #10
0
 def testLen(self):
     size = 123
     s = Setup('start', size, 'int32', 1, None)
     of = OutputFile(s.path, s.filePrefix, size)
     self.assertEqual(size, len(of))