Ejemplo n.º 1
0
    def load(self, path):
        self.logger.info("核心词典开始加载:%s" % path)
        print("核心词典开始加载:%s" % path)
        if self.loadDat(path):
            return True

        initdict = OrderedDict()
        try:
            f = open(path, 'r')
            line = ''
            MAX_FREQUENCY = 0
            start = time()
            while 1:
                line = f.readline().strip(' \n\t\r')
                if not line:
                    break
                param = line.split('\t')
                natureCount = int((len(param) - 1) / 2)
                attribute = CoreDictionary.Attribute().init1(natureCount)
                for i in range(natureCount):
                    attribute.nature[i] = Nature.valueOf(param[1 + 2 * i])
                    attribute.frequency[i] = int(param[2 + 2 * i])
                    attribute.totalFrequency += attribute.frequency[i]
                initdict[param[0]] = attribute
                MAX_FREQUENCY += attribute.totalFrequency
            map = TreeMap(initdict)
            self.logger.info("核心词典读入词条%i,全部频次%i,耗时%fms" % (map.size(), MAX_FREQUENCY, (time() - start) * 1000))
            print ("核心词典读入词条%i,全部频次%i,耗时%fms" % (map.size(), MAX_FREQUENCY, (time() - start) * 1000))
            self.trie.build(map)
            self.logger.info("核心词典加载成功:%i个词条,下面将写入缓存" % self.trie.size1())
            print ("核心词典加载成功:%i个词条,下面将写入缓存" % self.trie.size1())

            try:
                out = open(self.path + Predefine.BIN_EXT, 'w+')
                attributeList = map.values()
                out.writelines(Convert.convert(len(attributeList)))
                for attribute in attributeList:
                    out.writelines(Convert.convert(attribute.totalFrequency))
                    out.writelines(Convert.convert(len(attribute.nature)))
                    for i in range(len(attribute.nature)):
                        out.writelines(Convert.convert(Nature.ordinal(attribute.nature[i])))
                        out.writelines(Convert.convert(attribute.frequency[i]))

                self.trie.save(out)
                out.close()
            except Exception as e:
                self.logger.warning("保存失败%s" % str(e))
                return False
        except IOError as e:
            self.logger.warning("核心词典%s不存在或读取错误!" % str(e))
            return False
        return True
Ejemplo n.º 2
0
    def loadMainDictionary(mainPath):
        Predefine.logger.info("自定义词典开始加载:%s" % mainPath)
        print("自定义词典开始加载:%s" % mainPath)
        if CustomDictionary.loadDat(mainPath):
            return True
        CustomDictionary.dat = DoubleArrayTrie()

        map = TreeMap({})
        customNatureCollector = set()
        try:
            for p in CustomDictionary.path:
                defaultNature = Nature.n
                Predefine.logger.info("以默认词性[%s]加载自定义词典%s中……" %
                                      (str(defaultNature), p))
                print("以默认词性[%s]加载自定义词典%s中……" % (str(defaultNature), p))
                success, map = CustomDictionary.loadtxt(
                    p, defaultNature, map, customNatureCollector)
                if not success:
                    Predefine.logger.warning("失败:%s" % p)
        except IOError as e:
            Predefine.logger.error("自定义词典%s不存在或读取错误!%s" % (mainPath, e))
        except Exception as e:
            Predefine.logger.error("自定义词典%s缓存失败!%s\n" % (mainPath, e))
        if map.size() == 0:
            Predefine.logger.warning("没有加载到任何词条")
            # 当做空白占位符
            map.put(Predefine.TAG_OTHER, None)
        Predefine.logger.info("正在构建DoubleArrayTrie……")
        CustomDictionary.dat.build(map)
        # 缓存成dat文件,下次加载会快很多
        Predefine.logger.info("正在缓存词典为dat文件……")
        # 缓存值文件
        attributeList = []
        for key, value in map.items():
            attributeList.append(value)
        out = open(mainPath + Predefine.BIN_EXT, 'w+')
        # 缓存用户词性
        #IOUtil.writeCustomNature(out, customNatureCollector)
        # 缓存正文
        out.writelines(Convert.convert(len(attributeList)))
        for attribute in attributeList:
            attribute.save(out)
        CustomDictionary.dat.save1(out)
        out.close()

        return True