def __init__(self, dataDir, bigendian=False): self.categorys = CharCategory.readCategorys(dataDir, bigendian) fmis = FileMappedInputStream(dataDir + "/code2category", bigendian) try: self.char2id = fmis.getIntArray(fmis.size() // 4 // 2) self.eqlMasks = fmis.getIntArray(fmis.size() // 4 // 2) finally: fmis.close()
def __init__(self, dataDir, bigendian=False): self.categorys = CharCategory.readCategorys(dataDir, bigendian) fmis = FileMappedInputStream(dataDir + "/code2category", bigendian) try: self.char2id = fmis.getIntArray(fmis.size() / 4 / 2) self.eqlMasks = fmis.getIntArray(fmis.size() / 4 / 2) finally: fmis.close()
def __init__(self, dataDir, bigendian=False): fmis = FileMappedInputStream(dataDir + "/matrix.bin", bigendian) try: self.leftSize = fmis.getInt() self.rightSize = fmis.getInt() self.matrix = fmis.getShortArray(self.leftSize * self.rightSize) finally: fmis.close()
def __init__(self, filepath, bigendian=False): """ 保存されているDoubleArrayを読み込んで、このクラスのインスタンスを作成する @param filepath DoubleArrayが保存されているファイルのパス @throws IOException filepathで示されるファイルの読み込みに失敗した場合に送出される """ fmis = FileMappedInputStream(filepath, bigendian) try: nodeSz = fmis.getInt() tindSz = fmis.getInt() tailSz = fmis.getInt() self.keySetSize = tindSz self.begs = fmis.getIntArray(tindSz) self.base = fmis.getIntArray(nodeSz) self.lens = fmis.getShortArray(tindSz) self.chck = fmis.getCharArray(nodeSz) self.tail = fmis.getString(tailSz) finally: fmis.close()
def __init__(self, dataDir, bigendian=False, splitted=False): self.trie = Searcher(dataDir + "/word2id", bigendian) if splitted: paths = sorted(glob.glob(dataDir + "/word.dat.*")) self.data = util.getCharArrayMulti(paths, bigendian) else: self.data = util.getCharArray(dataDir + "/word.dat", bigendian) self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian) fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian) try: wordCount = fmis.size() // (4 + 2 + 2 + 2) self.dataOffsets = fmis.getIntArray(wordCount) """ dataOffsets[単語ID] = 単語の素性データの開始位置 """ self.leftIds = fmis.getShortArray(wordCount) """ leftIds[単語ID] = 単語の左文脈ID """ self.rightIds = fmis.getShortArray(wordCount) """ rightIds[単語ID] = 単語の右文脈ID """ self.costs = fmis.getShortArray(wordCount) """ consts[単語ID] = 単語のコスト """ finally: fmis.close()
def __init__(self, dataDir, bigendian=False, splitted=False): self.trie = Searcher(dataDir + "/word2id", bigendian) if splitted: paths = sorted(glob.glob(dataDir + "/word.dat.*")) self.data = util.getCharArrayMulti(paths, bigendian) else: self.data = util.getCharArray(dataDir + "/word.dat", bigendian) self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian) fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian) try: wordCount = fmis.size() / (4 + 2 + 2 + 2) self.dataOffsets = fmis.getIntArray(wordCount) """ dataOffsets[単語ID] = 単語の素性データの開始位置 """ self.leftIds = fmis.getShortArray(wordCount) """ leftIds[単語ID] = 単語の左文脈ID """ self.rightIds = fmis.getShortArray(wordCount) """ rightIds[単語ID] = 単語の右文脈ID """ self.costs = fmis.getShortArray(wordCount) """ consts[単語ID] = 単語のコスト """ finally: fmis.close()