def readCategorys(dataDir, bigendian): data = util.getIntArray(dataDir + "/char.category", bigendian) size = len(data) / 4 ary = [] for i in range(0, size): ary.append(Category(data[i * 4], data[i * 4 + 1], data[i * 4 + 2] == 1, data[i * 4 + 3] == 1)) return ary
def readCategorys(dataDir, bigendian): data = util.getIntArray(dataDir + "/char.category", bigendian) size = len(data) // 4 ary = [] for i in range(0, size): ary.append(Category(data[i * 4], data[i * 4 + 1], data[i * 4 + 2] == 1, data[i * 4 + 3] == 1)) return ary
def __init__(self, dataDir, bigendian=False, splitted=False): self.trie = Searcher(dataDir + "/word2id", bigendian) if splitted: paths = sorted(glob.glob(dataDir + "/word.dat.*")) self.data = util.getCharArrayMulti(paths, bigendian) else: self.data = util.getCharArray(dataDir + "/word.dat", bigendian) self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian) fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian) try: wordCount = fmis.size() / (4 + 2 + 2 + 2) self.dataOffsets = fmis.getIntArray(wordCount) """ dataOffsets[単語ID] = 単語の素性データの開始位置 """ self.leftIds = fmis.getShortArray(wordCount) """ leftIds[単語ID] = 単語の左文脈ID """ self.rightIds = fmis.getShortArray(wordCount) """ rightIds[単語ID] = 単語の右文脈ID """ self.costs = fmis.getShortArray(wordCount) """ consts[単語ID] = 単語のコスト """ finally: fmis.close()
def __init__(self, dataDir, bigendian=False, splitted=False): self.trie = Searcher(dataDir + "/word2id", bigendian) if splitted: paths = sorted(glob.glob(dataDir + "/word.dat.*")) self.data = util.getCharArrayMulti(paths, bigendian) else: self.data = util.getCharArray(dataDir + "/word.dat", bigendian) self.indices = util.getIntArray(dataDir + "/word.ary.idx", bigendian) fmis = FileMappedInputStream(dataDir + "/word.inf", bigendian) try: wordCount = fmis.size() // (4 + 2 + 2 + 2) self.dataOffsets = fmis.getIntArray(wordCount) """ dataOffsets[単語ID] = 単語の素性データの開始位置 """ self.leftIds = fmis.getShortArray(wordCount) """ leftIds[単語ID] = 単語の左文脈ID """ self.rightIds = fmis.getShortArray(wordCount) """ rightIds[単語ID] = 単語の右文脈ID """ self.costs = fmis.getShortArray(wordCount) """ consts[単語ID] = 単語のコスト """ finally: fmis.close()