Beispiel #1
0
 def __init__(self, dataDir, bigendian=False):
     self.categorys = CharCategory.readCategorys(dataDir, bigendian)
     fmis = FileMappedInputStream(dataDir + "/code2category", bigendian)
     try:
         self.char2id = fmis.getIntArray(fmis.size() // 4 // 2)
         self.eqlMasks = fmis.getIntArray(fmis.size() // 4 // 2)
     finally:
         fmis.close()
Beispiel #2
0
 def __init__(self, dataDir, bigendian=False):
     fmis = FileMappedInputStream(dataDir + "/matrix.bin", bigendian)
     try:
         self.leftSize = fmis.getInt()
         self.rightSize = fmis.getInt()
         self.matrix = fmis.getShortArray(self.leftSize * self.rightSize)
     finally:
         fmis.close()
Beispiel #3
0
    def __init__(self, filepath, bigendian=False):
        """
        保存されているDoubleArrayを読み込んで、このクラスのインスタンスを作成する

        @param filepath DoubleArrayが保存されているファイルのパス
        @throws IOException filepathで示されるファイルの読み込みに失敗した場合に送出される
        """
        fmis = FileMappedInputStream(filepath, bigendian)
        try:
            nodeSz = fmis.getInt()
            tindSz = fmis.getInt()
            tailSz = fmis.getInt()
            self.keySetSize = tindSz
            self.begs = fmis.getIntArray(tindSz)
            self.base = fmis.getIntArray(nodeSz)
            self.lens = fmis.getShortArray(tindSz)
            self.chck = fmis.getCharArray(nodeSz)
            self.tail = fmis.getString(tailSz)
        finally:
            fmis.close()
Beispiel #4
0
    def __init__(self, dataDir, bigendian=False, splitted=False):
        self.trie = Searcher(dataDir + "/word2id", bigendian)
        if splitted:
            paths = sorted(glob.glob(dataDir + "/word.dat.*"))
            self.data = util.getCharArrayMulti(paths, bigendian)
        else:
            self.data = util.getCharArray(dataDir + "/word.dat", bigendian)
        self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian)

        fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian)
        try:
            wordCount = fmis.size() // (4 + 2 + 2 + 2)
            self.dataOffsets = fmis.getIntArray(wordCount)
            """ dataOffsets[単語ID] = 単語の素性データの開始位置 """
            self.leftIds = fmis.getShortArray(wordCount)
            """ leftIds[単語ID] = 単語の左文脈ID """
            self.rightIds = fmis.getShortArray(wordCount)
            """ rightIds[単語ID] = 単語の右文脈ID """
            self.costs = fmis.getShortArray(wordCount)
            """ consts[単語ID] = 単語のコスト """
        finally:
            fmis.close()