def __init__(self, dataDir, bigendian=False): self.categorys = CharCategory.readCategorys(dataDir, bigendian) fmis = FileMappedInputStream(dataDir + "/code2category", bigendian) try: self.char2id = fmis.getIntArray(fmis.size() / 4 / 2) self.eqlMasks = fmis.getIntArray(fmis.size() / 4 / 2) finally: fmis.close()
def __init__(self, dataDir, bigendian=False): self.categorys = CharCategory.readCategorys(dataDir, bigendian) fmis = FileMappedInputStream(dataDir + "/code2category", bigendian) try: self.char2id = fmis.getIntArray(fmis.size() // 4 // 2) self.eqlMasks = fmis.getIntArray(fmis.size() // 4 // 2) finally: fmis.close()
def __init__(self, dataDir, bigendian=False, splitted=False): self.trie = Searcher(dataDir + "/word2id", bigendian) if splitted: paths = sorted(glob.glob(dataDir + "/word.dat.*")) self.data = util.getCharArrayMulti(paths, bigendian) else: self.data = util.getCharArray(dataDir + "/word.dat", bigendian) self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian) fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian) try: wordCount = fmis.size() / (4 + 2 + 2 + 2) self.dataOffsets = fmis.getIntArray(wordCount) """ dataOffsets[単語ID] = 単語の素性データの開始位置 """ self.leftIds = fmis.getShortArray(wordCount) """ leftIds[単語ID] = 単語の左文脈ID """ self.rightIds = fmis.getShortArray(wordCount) """ rightIds[単語ID] = 単語の右文脈ID """ self.costs = fmis.getShortArray(wordCount) """ consts[単語ID] = 単語のコスト """ finally: fmis.close()
def __init__(self, dataDir, bigendian=False, splitted=False): self.trie = Searcher(dataDir + "/word2id", bigendian) if splitted: paths = sorted(glob.glob(dataDir + "/word.dat.*")) self.data = util.getCharArrayMulti(paths, bigendian) else: self.data = util.getCharArray(dataDir + "/word.dat", bigendian) self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian) fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian) try: wordCount = fmis.size() // (4 + 2 + 2 + 2) self.dataOffsets = fmis.getIntArray(wordCount) """ dataOffsets[単語ID] = 単語の素性データの開始位置 """ self.leftIds = fmis.getShortArray(wordCount) """ leftIds[単語ID] = 単語の左文脈ID """ self.rightIds = fmis.getShortArray(wordCount) """ rightIds[単語ID] = 単語の右文脈ID """ self.costs = fmis.getShortArray(wordCount) """ consts[単語ID] = 単語のコスト """ finally: fmis.close()