Ejemplo n.º 1
0

# def LoopTest2(n):
#     for _ in range(n):
#         old_Tokenize_cn('響著錄中文规则很长 very long , 为啥是不?')

if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG,
                        format='%(asctime)s [%(levelname)s] %(message)s')

    logging.info("Start")
    # import ProcessSentence
    # ProcessSentence.LoadCommon()  # too heavy to load for debugging

    FeatureOntology.LoadFeatureOntology('../../fsa/Y/feature.txt')
    Lexicon.LoadSegmentLexicon()
    XLocation = '../../fsa/X/'
    Lexicon.LoadExtraReference(XLocation + 'CuobieziX.txt',
                               Lexicon._LexiconCuobieziDict)
    Lexicon.LoadExtraReference(XLocation + 'Fanti.txt',
                               Lexicon._LexiconFantiDict)

    main_x = Tokenize('科普:。,?带你看懂蜀绣冰壶比赛')
    #old_Tokenize_cn('很少有科普:3 minutes 三分钟带你看懂蜀绣冰壶比赛')

    import cProfile, pstats

    cProfile.run("LoopTest1(100)", 'restatslex')
    pstat = pstats.Stats('restatslex')
    pstat.sort_stats('time').print_stats(10)
Ejemplo n.º 2
0
    def Reload(self, ReloadTask):
        utils.InitDB()
        PipeLineLocation = ParserConfig.get("main", "Pipelinefile")
        XLocation = os.path.dirname(PipeLineLocation) + "/"
        Reply = "Lexicon/Rule/Pipeline:"
        systemfileolderthanDB = ProcessSentence.SystemFileOlderThanDB(
            XLocation)

        if ReloadTask.lower() == "/lexicon":
            logging.info("Start loading lexicon...")
            Lexicon.ResetAllLexicons()
            # ProcessSentence.LoadCommonLexicon(XLocation)
            for action in ProcessSentence.PipeLine:
                if action.startswith("Lookup Spelling:"):
                    Spellfile = action[action.index(":") +
                                       1:].strip().split(",")
                    for spell in Spellfile:
                        spell = spell.strip()
                        if spell:
                            Lexicon.LoadExtraReference(
                                XLocation + spell,
                                Lexicon._LexiconCuobieziDict)

                if action.startswith("Lookup Encoding:"):
                    Encodefile = action[action.index(":") +
                                        1:].strip().split(",")
                    for encode in Encodefile:
                        encode = encode.strip()
                        if encode:
                            Lexicon.LoadExtraReference(
                                XLocation + encode, Lexicon._LexiconFantiDict)

                if action.startswith("Lookup Main:"):
                    Mainfile = action[action.index(":") +
                                      1:].strip().split(",")
                    for main in Mainfile:
                        main = main.strip()
                        if main:
                            Lexicon.LoadMainLexicon(XLocation + main)

                if action.startswith("Lookup SegmentSlash:"):
                    Slashfile = action[action.index(":") +
                                       1:].strip().split(",")
                    for slash in Slashfile:
                        slash = slash.strip()
                        if slash:
                            Lexicon.LoadSegmentSlash(XLocation + slash)

                if action.startswith("Lookup Lex:"):
                    Lexfile = action[action.index(":") + 1:].strip().split(",")
                    for lex in Lexfile:
                        lex = lex.strip()
                        if lex:
                            Lexicon.LoadLexicon(XLocation + lex)

                if action.startswith("Lookup defLex:"):
                    Compoundfile = action[action.index(":") +
                                          1:].strip().split(",")
                    for compound in Compoundfile:
                        compound = compound.strip()
                        if compound:
                            Lexicon.LoadLexicon(
                                XLocation + compound,
                                lookupSource=LexiconLookupSource.defLex)

                if action.startswith("Lookup External:"):
                    Externalfile = action[action.index(":") +
                                          1:].strip().split(",")
                    for external in Externalfile:
                        external = external.strip()
                        if external:
                            Lexicon.LoadLexicon(
                                XLocation + external,
                                lookupSource=LexiconLookupSource.External)

                if action.startswith("Lookup oQcQ:"):
                    oQoCfile = action[action.index(":") +
                                      1:].strip().split(",")
                    for oQoC in oQoCfile:
                        oQoC = oQoC.strip()
                        if oQoC:
                            Lexicon.LoadLexicon(
                                XLocation + oQoC,
                                lookupSource=LexiconLookupSource.oQcQ)
            Lexicon.LoadSegmentLexicon()
            Reply += "Reloaded lexicon at " + str(datetime.now())

        if ReloadTask.lower() == "/rule":
            logging.info("Start loading rules...")
            #Rules.ResetAllRules()
            #ProcessSentence.WinningRuleDict.clear()
            GlobalmacroLocation = os.path.join(XLocation,
                                               "../Y/GlobalMacro.txt")
            Rules.LoadGlobalMacro(GlobalmacroLocation)

            for action in ProcessSentence.PipeLine:
                if action.startswith("FSA "):
                    Rulefile = action[3:].strip()
                    RuleLocation = os.path.join(XLocation, Rulefile)
                    if RuleLocation.startswith("."):
                        RuleLocation = os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            RuleLocation)
                    if not systemfileolderthanDB or not Rules.RuleFileOlderThanDB(
                            RuleLocation):
                        Rules.LoadRules(XLocation, Rulefile,
                                        systemfileolderthanDB)

                elif action.startswith("DAGFSA_APP "):  # FUZZY
                    Rulefile = action[10:].strip()
                    RuleLocation = os.path.join(XLocation, Rulefile)
                    if RuleLocation.startswith("."):
                        RuleLocation = os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            RuleLocation)
                    if not systemfileolderthanDB or not Rules.RuleFileOlderThanDB(
                            RuleLocation):
                        Rules.LoadRules(XLocation,
                                        Rulefile,
                                        systemfileolderthanDB,
                                        fuzzy=True)
                    # Rules.LoadRules(XLocation, Rulefile, systemfileolderthanDB, fuzzy=True)

                elif action.startswith("DAGFSA "):
                    Rulefile = action[6:].strip()
                    RuleLocation = os.path.join(XLocation, Rulefile)
                    if RuleLocation.startswith("."):
                        RuleLocation = os.path.join(
                            os.path.dirname(os.path.realpath(__file__)),
                            RuleLocation)
                    if not systemfileolderthanDB or not Rules.RuleFileOlderThanDB(
                            RuleLocation):
                        Rules.LoadRules(XLocation, Rulefile,
                                        systemfileolderthanDB)

            Reply += "Reloaded rules at " + str(datetime.now())

        if ReloadTask.lower() == "/pipeline":
            logging.info("Start loading pipeline...")
            Rules.ResetAllRules()
            ProcessSentence.PipeLine = []
            ProcessSentence.LoadCommon()
            Reply += "Reloaded pipeline at " + str(datetime.now())

        ProcessSentence.UpdateSystemFileFromDB(XLocation)

        self.send_response(200)
        self.send_header('Content-type', "text/html; charset=utf-8")
        self.end_headers()
        self.wfile.write(Reply.encode("utf-8"))
        utils.CloseDB(utils.DBCon)
Ejemplo n.º 3
0
def LoadCommon():
    if not utils.DisableDB:
        InitDB()

        import Cache
        Cache.LoadSentenceDB()

    PipeLineLocation = ParserConfig.get("main", "Pipelinefile")
    FILE_ABS_PATH = os.path.dirname(os.path.abspath(__file__))
    XLocation = FILE_ABS_PATH  + '/' + os.path.dirname(PipeLineLocation) + "/"
    #XLocation = os.path.dirname(PipeLineLocation) + "/"

    FeaturefileLocation = os.path.join(XLocation, "../Y/feature.txt")
    GlobalmacroLocation = os.path.join(XLocation, "../Y/GlobalMacro.txt")
    # PunctuatefileLocation = os.path.join(XLocation, "../Y/LexY-EnglishPunctuate.txt")


    FeatureOntology.LoadFeatureOntology(FeaturefileLocation)
    systemfileolderthanDB = SystemFileOlderThanDB(XLocation)

    LoadPipeline(PipeLineLocation)

    if logging.root.isEnabledFor(logging.DEBUG):
        logging.debug("Runtype:" + ParserConfig.get("main", "runtype"))
    if logging.root.isEnabledFor(logging.DEBUG):
        logging.debug("utils.Runtype:" + utils.ParserConfig.get("main", "runtype"))

    Rules.LoadGlobalMacro(GlobalmacroLocation)


    # Lexicon.LoadLexicon(PunctuatefileLocation)

    for action in PipeLine:
        if action.startswith("FSA "):
            Rulefile = action[3:].strip()
            Rules.LoadRules(XLocation, Rulefile,systemfileolderthanDB)

        if action.startswith("DAGFSA "):
            Rulefile = action[6:].strip()
            Rules.LoadRules(XLocation, Rulefile,systemfileolderthanDB)

        if action.startswith("DAGFSA_APP "): #FUZZY
            Rulefile = action[10:].strip()
            Rules.LoadRules(XLocation, Rulefile,systemfileolderthanDB, fuzzy=True)

        if action.startswith("Lookup Spelling:"):
            Spellfile = action[action.index(":")+1:].strip().split(",")
            for spell in Spellfile:
                spell = spell.strip()
                if spell:
                    Lexicon.LoadExtraReference(XLocation + spell, Lexicon._LexiconCuobieziDict)

        if action.startswith("Lookup Encoding:"):
            Encodefile = action[action.index(":")+1:].strip().split(",")
            for encode in Encodefile:
                encode = encode.strip()
                if encode:
                    Lexicon.LoadExtraReference(XLocation + encode, Lexicon._LexiconFantiDict)

        if action.startswith("Lookup Main:"):
            Mainfile = action[action.index(":")+1:].strip().split(",")
            for main in Mainfile:
                main = main.strip()
                if main:
                    Lexicon.LoadMainLexicon(XLocation + main)

        if action.startswith("Lookup SegmentSlash:"):
            Slashfile = action[action.index(":")+1:].strip().split(",")
            for slash in Slashfile:
                slash = slash.strip()
                if slash:
                    Lexicon.LoadSegmentSlash(XLocation + slash)

        if action.startswith("Lookup Lex:"):
            Lexfile = action[action.index(":")+1:].strip().split(",")
            for lex in Lexfile:
                lex = lex.strip()
                if lex:
                    Lexicon.LoadLexicon(XLocation + lex)

        # (O.O)
        if action.startswith("Stemming:"):
            Stemfile = action[action.index(":") + 1:].strip().split(",")
            inf = Stemfile[0].strip()
            Rules.LoadRules(XLocation, inf, systemfileolderthanDB)
            Lexicon.LoadSuffix(XLocation + inf, inf)
            for stem in Stemfile[1:]:
                stem = stem.strip()
                if stem:
                    Lexicon.LoadLexicon(XLocation + stem, lookupSource=LexiconLookupSource.stemming)

        if action.startswith("Lookup Compound:"):
            Compoundfile = action[action.index(":")+1:].strip().split(",")
            for compound in Compoundfile:
                compound = compound.strip()
                if compound:
                    Lexicon.LoadLexicon(XLocation + compound, lookupSource=LexiconLookupSource.Compound)

        if action.startswith("Lookup defLex:"):
            Compoundfile = action[action.index(":")+1:].strip().split(",")
            for compound in Compoundfile:
                compound = compound.strip()
                if compound:
                    Lexicon.LoadLexicon(XLocation + compound, lookupSource=LexiconLookupSource.defLex)

        if action.startswith("Lookup External:"):
            Externalfile = action[action.index(":")+1:].strip().split(",")
            for external in Externalfile:
                external = external.strip()
                if external:
                    Lexicon.LoadLexicon(XLocation + external,lookupSource=LexiconLookupSource.External)

        if action.startswith("Lookup oQcQ:"):
            oQoCfile = action[action.index(":")+1:].strip().split(",")
            for oQoC in oQoCfile:
                oQoC = oQoC.strip()
                if oQoC:
                    Lexicon.LoadLexicon(XLocation + oQoC,lookupSource=LexiconLookupSource.oQcQ)

        if action.startswith("Lookup IE:"):
            compositefile = action[action.index(":")+1:].strip().split(",")
            for composite in compositefile:
                comp = composite.strip()
                if comp:
                    Lexicon.LoadCompositeKG(XLocation + comp)

    Lexicon.LoadSegmentLexicon()
    UpdateSystemFileFromDB(XLocation)

    if not utils.DisableDB:
        CloseDB(utils.DBCon)
    if ParserConfig.get("main", "runtype") == "Debug":
        logging.debug("Start writing temporary rule files")
        Rules.OutputRuleFiles(ParserConfig.get("main", "compiledfolder"))
        FeatureOntology.OutputFeatureOntologyFile(ParserConfig.get("main", "compiledfolder"))
        logging.debug("Start writing temporary lex file.")
        #Lexicon.OutputLexiconFile(ParserConfig.get("main", "compiledfolder"))


    #Rules._PreProcess_RuleIDNormalize()
    logging.debug("Done of LoadCommon!")