Beispiel #1
0
class ServerEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def server2h_codename2aliases(server):
            aliases = Server.server_langs2aliases(server, langs_recognizable)
            return {Server.server2codename(server): aliases}

        h_codename2aliases = merge_dicts(map(server2h_codename2aliases,
                                             Server.list_all()),
                                         vwrite=vwrite_no_duplicate_key)
        assert_is_not_none(h_codename2aliases)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher

    @classmethod
    @CacheTool.cache2hashable(
        cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{
            FoxylibEntity.Field.SPAN:
            span,
            FoxylibEntity.Field.TEXT:
            StringTool.str_span2substr(text_in, span),
            FoxylibEntity.Field.VALUE:
            value,
            FoxylibEntity.Field.TYPE:
            cls.entity_type(),
        } for span, value in span_value_list]

        return entity_list
Beispiel #2
0
class PortEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text): return str2lower(text)

    @classmethod
    # @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    @cached(cache=TTLCache(maxsize=HenriqueLocale.lang_count(), ttl=HenriqueEntity.Cache.DEFAULT_TTL))
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        h_codename2aliases = merge_dicts([{Port.port2codename(port): Port.port_langs2aliases(port, langs_recognizable)}
                                          for port in Port.list_all()],
                                         vwrite=vwrite_no_duplicate_key)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
                  GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2pattern_port_tradegood,
                  }
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher

    @classmethod
    # @CacheTool.cache2hashable(cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
    #                           f_pair=CacheTool.JSON.func_pair(), )
    @CacheTool.cache2hashable(cache=cached(cache=TTLCache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE,
                                             ttl=HenriqueEntity.Cache.DEFAULT_TTL),
                                           ),
                              f_pair=CacheTool.JSON.func_pair(),)
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{FoxylibEntity.Field.SPAN: span,
                        FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span),
                        FoxylibEntity.Field.VALUE: value,
                        FoxylibEntity.Field.TYPE: cls.entity_type(),
                        }
                       for span, value in span_value_list]

        return entity_list
Beispiel #3
0
class Me:
    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def j_yaml(cls):
        filepath = os.path.join(FILE_DIR, "me.yaml")
        j_yaml = YAMLTool.filepath2j(filepath, Loader=yaml.SafeLoader)
        return j_yaml

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2pattern(cls, lang):
        j_me = cls.j_yaml()

        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)
        me_list = [
            me for lang in langs_recognizable for me in j_me.get(lang, [])
        ]
        rstr = RegexTool.rstr_iter2or(map(re.escape, me_list))
        pattern = re.compile(rstr, re.I)
        return pattern
Beispiel #4
0
class TradegoodtypeEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    @WARMER.add(cond=not HenriqueEnv.is_skip_warmup())
    def _dict_lang2matcher(cls, ):
        return {
            lang: cls.lang2matcher(lang)
            for lang in HenriqueLocale.langs()
        }

    @classmethod
    @cached(cache=TTLCache(maxsize=HenriqueLocale.lang_count(),
                           ttl=HenriqueEntity.Cache.DEFAULT_TTL))
    # @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        tgt_list = Tradegoodtype.list_all()
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def tgt2aliases(tgt):
            for _lang in langs_recognizable:
                yield from Tradegoodtype.tradegoodtype_lang2aliases(tgt, _lang)

        h_value2aliases = merge_dicts([{
            Tradegoodtype.tradegoodtype2codename(tgt):
            list(tgt2aliases(tgt))
        } for tgt in tgt_list],
                                      vwrite=vwrite_no_duplicate_key)

        config = {
            GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
            # GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2rstr_word_with_cardinal_suffix,
        }
        matcher = GazetteerMatcher(h_value2aliases, config)
        return matcher

    @classmethod
    @CacheTool.cache2hashable(
        cache=cached(
            TTLCache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE,
                     ttl=HenriqueEntity.Cache.DEFAULT_TTL), ),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{
            FoxylibEntity.Field.SPAN:
            span,
            FoxylibEntity.Field.TEXT:
            StringTool.str_span2substr(text_in, span),
            FoxylibEntity.Field.VALUE:
            value,
            FoxylibEntity.Field.TYPE:
            cls.entity_type(),
        } for span, value in span_value_list]

        return entity_list
Beispiel #5
0
class SkillEntity:

    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def entity2skill_codename(cls, entity):
        return FoxylibEntity.entity2value(entity)

    @classmethod
    def text2norm(cls, text): return str2lower(text)

    @classmethod
    def dict_lang2codename2aliases(cls):
        from henrique.main.document.skill.googlesheets.skill_googlesheets import SkillGooglesheets
        return SkillGooglesheets.dict_lang2codename2aliases()

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def codenames(cls):
        codename_sets = [set(h_codename2aliases.keys())
                         for lang, h_codename2aliases in cls.dict_lang2codename2aliases().items()]
        return iter2singleton(codename_sets)

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        langs = HenriqueLocale.lang2langs_recognizable(lang)

        h_lang2codename2aliases = cls.dict_lang2codename2aliases()

        def codename2texts(codename):
            for lang in langs:
                aliases = JsonTool.down(h_lang2codename2aliases, [lang, codename])
                if not aliases:
                    continue

                yield from aliases

        h_codename2texts = {codename: list(codename2texts(codename))
                            for codename in cls.codenames()}

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2texts, config)
        return matcher


    @classmethod
    @CacheTool.cache2hashable(cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
                              f_pair=CacheTool.JSON.func_pair(), )
    def text2entity_list(cls, text_in, config=None):
        lang = LocaleTool.locale2lang(HenriqueEntity.Config.config2locale(config))

        span_value_list = list(cls.lang2matcher(lang).text2span_value_iter(text_in))

        entity_list = [{FoxylibEntity.Field.SPAN: span,
                        FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span),
                        FoxylibEntity.Field.VALUE: value,
                        FoxylibEntity.Field.TYPE: cls.entity_type(),
                        }
                       for span, value in span_value_list]
        return entity_list
Beispiel #6
0
class RateEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    def rstr(cls):
        rstr = format_str(
            r"{}\s*{}?",
            RegexTool.name_rstr2named(
                "cardinal",
                "\d+",
            ),
            RegexTool.name_rstr2named(
                "Metricprefix",
                Metricprefix.rstr(),
            ),
        )
        return rstr

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def rstr_last_char(cls):
        rstr_suffix_list = [r"\d", Metricprefix.rstr()]
        return RegexTool.rstr_iter2or(rstr_suffix_list)

    @classmethod
    def match2value(cls, m):
        v = int(m.group("cardinal"))
        multiple = Metricprefix.text2multiple(m.group("Metricprefix")) or 1
        return v * multiple

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2pattern(cls, lang):
        from henrique.main.document.price.trend.trend_entity import TrendEntity
        logger = HenriqueLogger.func_level2logger(cls.lang2pattern,
                                                  logging.DEBUG)

        # rstr_suffix = format_str("{}?",
        #                          RegexTool.rstr2wrapped(TrendEntity.lang2rstr(lang)),
        #                          )

        ### may be concatenated with port/tradegood name
        # rstr_prefixed = RegexTool.rstr2rstr_words_prefixed(cls.rstr())
        # raise Exception({"rstr_suffix":rstr_suffix})

        rstr_trend = TrendEntity.lang2rstr(lang)

        # bound_right_list_raw = RegexTool.right_wordbounds()

        right_bounds = lchain(
            RegexTool.bounds2prefixed(RegexTool.right_wordbounds(),
                                      rstr_trend),
            RegexTool.right_wordbounds(),
        )
        rstr_rightbound = RegexTool.rstr2right_bounded(cls.rstr(),
                                                       right_bounds)

        logger.debug({#"rstr_trend":rstr_trend,
                      #"right_bounds":right_bounds,
                      "rstr_rightbound":rstr_rightbound,
                      })
        # rstr_suffixed = RegexTool.rstr2rstr_words_suffixed(cls.rstr(), rstr_suffix=rstr_suffix)

        # raise Exception({"rstr_trend": rstr_trend, "rstr_suffixed": rstr_suffixed})
        # return re.compile(RegexTool.rstr2wordbounded(cls.rstr()))
        return re.compile(rstr_rightbound, re.I)

    @classmethod
    @CacheTool.cache2hashable(
        cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        pattern = cls.lang2pattern(lang)
        m_list = list(pattern.finditer(text_in))

        def match2entity(m):
            span = m.span()
            entity = {
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span),
                FoxylibEntity.Field.VALUE: cls.match2value(m),
                FoxylibEntity.Field.TYPE: cls.entity_type(),
            }
            return entity

        entity_list = lmap(match2entity, m_list)
        return entity_list
Beispiel #7
0
class CultureEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    # @classmethod
    # @WARMER.add(cond=not HenriqueEnv.is_skip_warmup())
    # def _dict_lang2matcher(cls,):
    #     return {lang: cls.lang2matcher(lang) for lang in HenriqueLocale.langs()}

    @classmethod
    @cached(cache=TTLCache(maxsize=HenriqueLocale.lang_count(),
                           ttl=HenriqueEntity.Cache.DEFAULT_TTL))
    # @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def culture2h_codename2aliases(culture):
            aliases = Culture.culture_langs2aliases(culture,
                                                    langs_recognizable)
            return {Culture.culture2codename(culture): aliases}

        h_codename2aliases = merge_dicts(map(culture2h_codename2aliases,
                                             Culture.list_all()),
                                         vwrite=vwrite_no_duplicate_key)
        assert_is_not_none(h_codename2aliases)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher

    @classmethod
    @CacheTool.cache2hashable(
        cache=cached(cache=TTLCache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE,
                                    ttl=HenriqueEntity.Cache.DEFAULT_TTL)),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{
            FoxylibEntity.Field.SPAN:
            span,
            FoxylibEntity.Field.TEXT:
            StringTool.str_span2substr(text_in, span),
            FoxylibEntity.Field.VALUE:
            value,
            FoxylibEntity.Field.TYPE:
            cls.entity_type(),
        } for span, value in span_value_list]

        return entity_list
Beispiel #8
0
class TrendEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def dict_lang2codename2texts(cls):
        from henrique.main.document.price.trend.googlesheets.trend_googlesheets import TrendGooglesheets
        h = TrendGooglesheets.dict_lang2codename2texts()
        return h

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def _lang2dict_alias2codename(cls, lang):

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        h = cls.dict_lang2codename2texts()
        # h_codename2aliases = cls.dict_lang2codename2texts().get(lang)

        h_alias2codename = merge_dicts([{
            cls.text2norm(alias): codename
        } for lang in langs for codename, aliases in h.get(lang).items()
                                        for alias in aliases],
                                       vwrite=vwrite_no_duplicate_key)
        return h_alias2codename

    @classmethod
    def lang_alias2codename(cls, lang, alias):
        h_alias2codename = cls._lang2dict_alias2codename(lang)
        return h_alias2codename.get(alias)

    @classmethod
    def lang2rstr(cls, lang):
        aliases = cls._lang2dict_alias2codename(lang).keys()
        return RegexTool.rstr_iter2or(
            map(lambda x: re.escape(cls.text2norm(x)), aliases))

    @classmethod
    def lang2pattern(cls, lang):
        from henrique.main.document.price.rate.rate_entity import RateEntity
        logger = HenriqueLogger.func_level2logger(cls.lang2pattern,
                                                  logging.DEBUG)

        left_bounds = [
            RateEntity.rstr_last_char(),
            r"\s",
        ]
        right_bounds = RegexTool.right_wordbounds()
        rstr = RegexTool.rstr2bounded(cls.lang2rstr(lang), left_bounds,
                                      right_bounds)

        logger.debug({"left_bounds": left_bounds, "rstr": rstr})
        return re.compile(rstr, re.I)

    @classmethod
    @CacheTool.cache2hashable(
        cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale)

        pattern = cls.lang2pattern(lang)

        m_list = list(pattern.finditer(text_in))

        def match2entity(match):
            span = match.span()
            text = StringTool.str_span2substr(text_in, span)
            codename = cls.lang_alias2codename(lang, text)

            entity = {
                FoxylibEntity.Field.VALUE: codename,
                FoxylibEntity.Field.TEXT: text,
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.TYPE: cls.entity_type(),
            }
            return entity

        entity_list = lmap(match2entity, m_list)
        return entity_list