Exemple #1
0
 def _lang2dict_alias2server(cls, lang):
     langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)
     h = merge_dicts([{alias: server}
                      for server in cls.list_all()
                      for alias in cls.server_langs2aliases(server, langs_recognizable)],
                     vwrite=vwrite_no_duplicate_key)
     return h
Exemple #2
0
        def datetime_nanban2str_out(dt_nanban):
            filepath = os.path.join(FILE_DIR, "tmplt.{}.part.txt".format(lang))
            has_dt_nanban = (dt_nanban is not None)
            if not has_dt_nanban:
                data = {
                    "server": Server.server_lang2name(server, lang),
                    "has_dt_nanban": has_dt_nanban,
                }
                str_out = HenriqueJinja2.textfile2text(filepath, data)
                return str_out

            td_nanban = dt_nanban - utc_now

            tzdb = HenriqueLocale.lang2tzdb(lang)

            str_timedelta_nanban = NanbanTimedelta.timedelta_lang2text(
                td_nanban, lang)

            logger.debug({
                "server_codename": server_codename,
                "server": server,
                "dt_nanban": dt_nanban,
            })

            data = {
                "server": Server.server_lang2name(server, lang),
                "dt_nanban": NanbanTimedelta.datetime2text(dt_nanban, tzdb),
                "dt_now": NanbanTimedelta.datetime2text(utc_now, tzdb),
                "timedelta_nanban": str_timedelta_nanban,
                "has_dt_nanban": has_dt_nanban,
            }
            str_out = HenriqueJinja2.textfile2text(filepath, data)
            return str_out
Exemple #3
0
    def lang2pattern(cls, lang):
        j_me = cls.j_yaml()

        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)
        me_list = [
            me for lang in langs_recognizable for me in j_me.get(lang, [])
        ]
        rstr = RegexTool.rstr_iter2or(map(re.escape, me_list))
        pattern = re.compile(rstr, re.I)
        return pattern
Exemple #4
0
class ServerEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def server2h_codename2aliases(server):
            aliases = Server.server_langs2aliases(server, langs_recognizable)
            return {Server.server2codename(server): aliases}

        h_codename2aliases = merge_dicts(map(server2h_codename2aliases,
                                             Server.list_all()),
                                         vwrite=vwrite_no_duplicate_key)
        assert_is_not_none(h_codename2aliases)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher

    @classmethod
    @CacheTool.cache2hashable(
        cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{
            FoxylibEntity.Field.SPAN:
            span,
            FoxylibEntity.Field.TEXT:
            StringTool.str_span2substr(text_in, span),
            FoxylibEntity.Field.VALUE:
            value,
            FoxylibEntity.Field.TYPE:
            cls.entity_type(),
        } for span, value in span_value_list]

        return entity_list
Exemple #5
0
    def _lang2dict_alias2codename(cls, lang):

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        h = cls.dict_lang2codename2texts()
        # h_codename2aliases = cls.dict_lang2codename2texts().get(lang)

        h_alias2codename = merge_dicts([{
            cls.text2norm(alias): codename
        } for lang in langs for codename, aliases in h.get(lang).items()
                                        for alias in aliases],
                                       vwrite=vwrite_no_duplicate_key)
        return h_alias2codename
Exemple #6
0
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        h_codename2aliases = merge_dicts([{Port.port2codename(port): Port.port_langs2aliases(port, langs_recognizable)}
                                          for port in Port.list_all()],
                                         vwrite=vwrite_no_duplicate_key)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
                  GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2pattern_port_tradegood,
                  }
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher
Exemple #7
0
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def server2h_codename2aliases(server):
            aliases = Server.server_langs2aliases(server, langs_recognizable)
            return {Server.server2codename(server): aliases}

        h_codename2aliases = merge_dicts(map(server2h_codename2aliases,
                                             Server.list_all()),
                                         vwrite=vwrite_no_duplicate_key)
        assert_is_not_none(h_codename2aliases)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher
Exemple #8
0
class PortEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text): return str2lower(text)

    @classmethod
    # @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    @cached(cache=TTLCache(maxsize=HenriqueLocale.lang_count(), ttl=HenriqueEntity.Cache.DEFAULT_TTL))
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        h_codename2aliases = merge_dicts([{Port.port2codename(port): Port.port_langs2aliases(port, langs_recognizable)}
                                          for port in Port.list_all()],
                                         vwrite=vwrite_no_duplicate_key)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
                  GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2pattern_port_tradegood,
                  }
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher

    @classmethod
    # @CacheTool.cache2hashable(cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
    #                           f_pair=CacheTool.JSON.func_pair(), )
    @CacheTool.cache2hashable(cache=cached(cache=TTLCache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE,
                                             ttl=HenriqueEntity.Cache.DEFAULT_TTL),
                                           ),
                              f_pair=CacheTool.JSON.func_pair(),)
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{FoxylibEntity.Field.SPAN: span,
                        FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span),
                        FoxylibEntity.Field.VALUE: value,
                        FoxylibEntity.Field.TYPE: cls.entity_type(),
                        }
                       for span, value in span_value_list]

        return entity_list
Exemple #9
0
    def text2element_list(cls, text_in, lang):
        logger = HenriqueLogger.func_level2logger(cls.text2element_list,
                                                  logging.DEBUG)

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        logger.debug({"langs": langs})

        match_list_number = list(cls.pattern_number().finditer(text_in))
        span_list_number = lmap(lambda m: m.span(), match_list_number)

        matcher = TimedeltaEntityUnit.langs2matcher(langs)
        span_value_list_timedelta_unit = list(
            matcher.text2span_value_iter(text_in))

        spans_list = [
            span_list_number,
            lmap(ig(0), span_value_list_timedelta_unit),
        ]

        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr,
                               text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            spans_list, gap2is_valid)

        def indextuple2element(indextuple):
            i, j = indextuple

            quantity = int(match_list_number[i].group())
            unit = span_value_list_timedelta_unit[j][1]

            span = (
                span_list_number[i][0],
                span_value_list_timedelta_unit[j][0][1],
            )

            element = {
                cls.Field.QUANTITY: quantity,
                cls.Field.UNIT: unit,
                cls.Field.SPAN: span,
            }

            return element

        element_list = lmap(indextuple2element, indextuple_list)
        return element_list
Exemple #10
0
    def test_03(self):
        logger = HenriqueLogger.func_level2logger(self.test_02, logging.DEBUG)

        Chatroom.chatrooms2upsert([ChatroomKakaotalk.chatroom()])

        sender_name = "iris"
        channel_user_codename = ChannelUserKakaotalk.sender_name2codename(
            sender_name)
        ChannelUser.channel_users2upsert(
            [ChannelUserKakaotalk.sender_name2channel_user(sender_name)])

        now_seoul = datetime.now(
            tz=pytz.timezone(HenriqueLocale.lang2tzdb("ko")))
        dt_target = now_seoul - timedelta(seconds=3 * 60)
        text = "?남만 {}".format(dt_target.strftime("%I:%M %p").lstrip("0"))
        logger.debug({
            "text": text,
            "now_seoul": now_seoul,
        })

        packet = {
            KhalaPacket.Field.TEXT: text,
            KhalaPacket.Field.CHATROOM: KakaotalkUWOChatroom.codename(),
            KhalaPacket.Field.CHANNEL_USER: channel_user_codename,
            KhalaPacket.Field.SENDER_NAME: sender_name,
        }
        response = NanbanSkill.packet2response(packet)

        # pprint(text)
        # pprint(response)

        response_lines = response.splitlines()

        span = (
            len("다음 남만 시각: "),
            len("다음 남만 시각: 3:58:00 PM (KST) "),
        )

        hyp = SpanTool.list_span2sublist(response_lines[2], span).strip()
        dt_nanban = dt_target + NanbanTimedelta.period()
        ref = dt_nanban.strftime("%I:%M:00 %p (KST)").lstrip("0")
        self.assertEqual(
            hyp,
            ref,
        )
Exemple #11
0
    def lang2matcher(cls, lang):
        langs = HenriqueLocale.lang2langs_recognizable(lang)

        h_lang2codename2aliases = cls.dict_lang2codename2aliases()

        def codename2texts(codename):
            for lang in langs:
                aliases = JsonTool.down(h_lang2codename2aliases, [lang, codename])
                if not aliases:
                    continue

                yield from aliases

        h_codename2texts = {codename: list(codename2texts(codename))
                            for codename in cls.codenames()}

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2texts, config)
        return matcher
Exemple #12
0
    def lang2matcher(cls, lang):
        tgt_list = Tradegoodtype.list_all()
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def tgt2aliases(tgt):
            for _lang in langs_recognizable:
                yield from Tradegoodtype.tradegoodtype_lang2aliases(tgt, _lang)

        h_value2aliases = merge_dicts([{
            Tradegoodtype.tradegoodtype2codename(tgt):
            list(tgt2aliases(tgt))
        } for tgt in tgt_list],
                                      vwrite=vwrite_no_duplicate_key)

        config = {
            GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
            # GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2rstr_word_with_cardinal_suffix,
        }
        matcher = GazetteerMatcher(h_value2aliases, config)
        return matcher
Exemple #13
0
class Me:
    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def j_yaml(cls):
        filepath = os.path.join(FILE_DIR, "me.yaml")
        j_yaml = YAMLTool.filepath2j(filepath, Loader=yaml.SafeLoader)
        return j_yaml

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2pattern(cls, lang):
        j_me = cls.j_yaml()

        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)
        me_list = [
            me for lang in langs_recognizable for me in j_me.get(lang, [])
        ]
        rstr = RegexTool.rstr_iter2or(map(re.escape, me_list))
        pattern = re.compile(rstr, re.I)
        return pattern
Exemple #14
0
    def lang2matcher(cls, lang):
        tg_list = Tradegood.list_all()
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def tg2aliases(tg):
            for _lang in langs_recognizable:
                yield from Tradegood.tradegood_lang2aliases(tg, _lang)

        h_value2aliases = merge_dicts(
            [{
                Tradegood.tradegood2codename(tg): list(tg2aliases(tg))
            } for tg in tg_list],
            vwrite=vwrite_no_duplicate_key)

        config = {
            GazetteerMatcher.Config.Key.NORMALIZER:
            cls.text2norm,
            GazetteerMatcher.Config.Key.TEXTS2PATTERN:
            HenriqueEntity.texts2pattern_port_tradegood,
        }
        matcher = GazetteerMatcher(h_value2aliases, config)
        return matcher
Exemple #15
0
 def _dict_lang2matcher(cls, ):
     return {
         lang: cls.lang2matcher(lang)
         for lang in HenriqueLocale.langs()
     }
Exemple #16
0
class TradegoodtypeEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    @WARMER.add(cond=not HenriqueEnv.is_skip_warmup())
    def _dict_lang2matcher(cls, ):
        return {
            lang: cls.lang2matcher(lang)
            for lang in HenriqueLocale.langs()
        }

    @classmethod
    @cached(cache=TTLCache(maxsize=HenriqueLocale.lang_count(),
                           ttl=HenriqueEntity.Cache.DEFAULT_TTL))
    # @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        tgt_list = Tradegoodtype.list_all()
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def tgt2aliases(tgt):
            for _lang in langs_recognizable:
                yield from Tradegoodtype.tradegoodtype_lang2aliases(tgt, _lang)

        h_value2aliases = merge_dicts([{
            Tradegoodtype.tradegoodtype2codename(tgt):
            list(tgt2aliases(tgt))
        } for tgt in tgt_list],
                                      vwrite=vwrite_no_duplicate_key)

        config = {
            GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
            # GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2rstr_word_with_cardinal_suffix,
        }
        matcher = GazetteerMatcher(h_value2aliases, config)
        return matcher

    @classmethod
    @CacheTool.cache2hashable(
        cache=cached(
            TTLCache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE,
                     ttl=HenriqueEntity.Cache.DEFAULT_TTL), ),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{
            FoxylibEntity.Field.SPAN:
            span,
            FoxylibEntity.Field.TEXT:
            StringTool.str_span2substr(text_in, span),
            FoxylibEntity.Field.VALUE:
            value,
            FoxylibEntity.Field.TYPE:
            cls.entity_type(),
        } for span, value in span_value_list]

        return entity_list
Exemple #17
0
class SkillEntity:

    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def entity2skill_codename(cls, entity):
        return FoxylibEntity.entity2value(entity)

    @classmethod
    def text2norm(cls, text): return str2lower(text)

    @classmethod
    def dict_lang2codename2aliases(cls):
        from henrique.main.document.skill.googlesheets.skill_googlesheets import SkillGooglesheets
        return SkillGooglesheets.dict_lang2codename2aliases()

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def codenames(cls):
        codename_sets = [set(h_codename2aliases.keys())
                         for lang, h_codename2aliases in cls.dict_lang2codename2aliases().items()]
        return iter2singleton(codename_sets)

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        langs = HenriqueLocale.lang2langs_recognizable(lang)

        h_lang2codename2aliases = cls.dict_lang2codename2aliases()

        def codename2texts(codename):
            for lang in langs:
                aliases = JsonTool.down(h_lang2codename2aliases, [lang, codename])
                if not aliases:
                    continue

                yield from aliases

        h_codename2texts = {codename: list(codename2texts(codename))
                            for codename in cls.codenames()}

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2texts, config)
        return matcher


    @classmethod
    @CacheTool.cache2hashable(cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
                              f_pair=CacheTool.JSON.func_pair(), )
    def text2entity_list(cls, text_in, config=None):
        lang = LocaleTool.locale2lang(HenriqueEntity.Config.config2locale(config))

        span_value_list = list(cls.lang2matcher(lang).text2span_value_iter(text_in))

        entity_list = [{FoxylibEntity.Field.SPAN: span,
                        FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span),
                        FoxylibEntity.Field.VALUE: value,
                        FoxylibEntity.Field.TYPE: cls.entity_type(),
                        }
                       for span, value in span_value_list]
        return entity_list
Exemple #18
0
    def packet2response(cls, packet):
        logger = HenriqueLogger.func_level2logger(cls.packet2response,
                                                  logging.DEBUG)
        logger.debug({"packet": packet})

        server_codename = HenriquePacket.packet2server(packet)
        chatroom = Chatroom.codename2chatroom(
            KhalaPacket.packet2chatroom(packet))
        locale = Chatroom.chatroom2locale(chatroom) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale)
        tz = pytz.timezone(HenriqueLocale.lang2tzdb(lang))
        dt_now = datetime.now(tz)

        text_in = KhalaPacket.packet2text(packet)
        config = {HenriqueEntity.Config.Field.LOCALE: locale}
        # entity_list = RelativeTimedeltaEntity.text2entity_list(text_in, config=config)

        entity_list = HenriqueEntity.text_extractors2entity_list(
            text_in,
            cls.config2extractors(config),
        )
        logger.debug({
            "len(entity_list)": len(entity_list),
            "entity_list": entity_list,
        })

        if not entity_list:
            return cls.server_lang2lookup(server_codename, lang)

        if len(entity_list) != 1:
            return  # Invalid request

        entity = l_singleton2obj(entity_list)
        if FoxylibEntity.entity2type(
                entity) == RelativeTimedeltaEntity.entity_type():
            reldelta = RelativeTimedeltaEntity.entity2relativedelta(entity)
            dt_in = cls.relativedelta2nanban_datetime(
                server_codename,
                reldelta,
            )

            # raise Exception({"dt_in":dt_in, "reldelta":reldelta})

            if dt_in is None:
                msg_error = NanbanSkillError.codename_lang2text(
                    NanbanSkillError.Codename.NO_PREV_NANBAN_ERROR, lang)
                raise HenriqueCommandError(msg_error)

            logger.debug({
                "reldelta": reldelta,
            })

        elif FoxylibEntity.entity2type(entity) == TimeEntity.entity_type():
            time_in = TimeEntity.value2datetime_time(
                FoxylibEntity.entity2value(entity))
            dt_in = PytzTool.localize(datetime.combine(dt_now.date(), time_in),
                                      tz)
            logger.debug({
                "time_in": time_in,
                "dt_in": dt_in,
            })
        else:
            raise RuntimeError({
                "Invalid entity type: {}".format(
                    FoxylibEntity.entity2type(entity))
            })

        dt_nearest = DatetimeTool.datetime2nearest(dt_in, dt_now,
                                                   NanbanTimedelta.period(),
                                                   Nearest.COMING)

        logger.debug({
            "text_in": text_in,
            "dt_now": dt_now,
            "dt_in": dt_in,
            "dt_nearest": dt_nearest,
        })

        cls.nanban_datetime2upsert_mongo(packet, dt_nearest)
        return cls.server_lang2lookup(server_codename, lang)
Exemple #19
0
class TrendEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def dict_lang2codename2texts(cls):
        from henrique.main.document.price.trend.googlesheets.trend_googlesheets import TrendGooglesheets
        h = TrendGooglesheets.dict_lang2codename2texts()
        return h

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def _lang2dict_alias2codename(cls, lang):

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        h = cls.dict_lang2codename2texts()
        # h_codename2aliases = cls.dict_lang2codename2texts().get(lang)

        h_alias2codename = merge_dicts([{
            cls.text2norm(alias): codename
        } for lang in langs for codename, aliases in h.get(lang).items()
                                        for alias in aliases],
                                       vwrite=vwrite_no_duplicate_key)
        return h_alias2codename

    @classmethod
    def lang_alias2codename(cls, lang, alias):
        h_alias2codename = cls._lang2dict_alias2codename(lang)
        return h_alias2codename.get(alias)

    @classmethod
    def lang2rstr(cls, lang):
        aliases = cls._lang2dict_alias2codename(lang).keys()
        return RegexTool.rstr_iter2or(
            map(lambda x: re.escape(cls.text2norm(x)), aliases))

    @classmethod
    def lang2pattern(cls, lang):
        from henrique.main.document.price.rate.rate_entity import RateEntity
        logger = HenriqueLogger.func_level2logger(cls.lang2pattern,
                                                  logging.DEBUG)

        left_bounds = [
            RateEntity.rstr_last_char(),
            r"\s",
        ]
        right_bounds = RegexTool.right_wordbounds()
        rstr = RegexTool.rstr2bounded(cls.lang2rstr(lang), left_bounds,
                                      right_bounds)

        logger.debug({"left_bounds": left_bounds, "rstr": rstr})
        return re.compile(rstr, re.I)

    @classmethod
    @CacheTool.cache2hashable(
        cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale)

        pattern = cls.lang2pattern(lang)

        m_list = list(pattern.finditer(text_in))

        def match2entity(match):
            span = match.span()
            text = StringTool.str_span2substr(text_in, span)
            codename = cls.lang_alias2codename(lang, text)

            entity = {
                FoxylibEntity.Field.VALUE: codename,
                FoxylibEntity.Field.TEXT: text,
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.TYPE: cls.entity_type(),
            }
            return entity

        entity_list = lmap(match2entity, m_list)
        return entity_list
Exemple #20
0
class RateEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    @classmethod
    def rstr(cls):
        rstr = format_str(
            r"{}\s*{}?",
            RegexTool.name_rstr2named(
                "cardinal",
                "\d+",
            ),
            RegexTool.name_rstr2named(
                "Metricprefix",
                Metricprefix.rstr(),
            ),
        )
        return rstr

    @classmethod
    @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=2))
    def rstr_last_char(cls):
        rstr_suffix_list = [r"\d", Metricprefix.rstr()]
        return RegexTool.rstr_iter2or(rstr_suffix_list)

    @classmethod
    def match2value(cls, m):
        v = int(m.group("cardinal"))
        multiple = Metricprefix.text2multiple(m.group("Metricprefix")) or 1
        return v * multiple

    @classmethod
    @FunctionTool.wrapper2wraps_applied(
        lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2pattern(cls, lang):
        from henrique.main.document.price.trend.trend_entity import TrendEntity
        logger = HenriqueLogger.func_level2logger(cls.lang2pattern,
                                                  logging.DEBUG)

        # rstr_suffix = format_str("{}?",
        #                          RegexTool.rstr2wrapped(TrendEntity.lang2rstr(lang)),
        #                          )

        ### may be concatenated with port/tradegood name
        # rstr_prefixed = RegexTool.rstr2rstr_words_prefixed(cls.rstr())
        # raise Exception({"rstr_suffix":rstr_suffix})

        rstr_trend = TrendEntity.lang2rstr(lang)

        # bound_right_list_raw = RegexTool.right_wordbounds()

        right_bounds = lchain(
            RegexTool.bounds2prefixed(RegexTool.right_wordbounds(),
                                      rstr_trend),
            RegexTool.right_wordbounds(),
        )
        rstr_rightbound = RegexTool.rstr2right_bounded(cls.rstr(),
                                                       right_bounds)

        logger.debug({#"rstr_trend":rstr_trend,
                      #"right_bounds":right_bounds,
                      "rstr_rightbound":rstr_rightbound,
                      })
        # rstr_suffixed = RegexTool.rstr2rstr_words_suffixed(cls.rstr(), rstr_suffix=rstr_suffix)

        # raise Exception({"rstr_trend": rstr_trend, "rstr_suffixed": rstr_suffixed})
        # return re.compile(RegexTool.rstr2wordbounded(cls.rstr()))
        return re.compile(rstr_rightbound, re.I)

    @classmethod
    @CacheTool.cache2hashable(
        cache=lru_cache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        pattern = cls.lang2pattern(lang)
        m_list = list(pattern.finditer(text_in))

        def match2entity(m):
            span = m.span()
            entity = {
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span),
                FoxylibEntity.Field.VALUE: cls.match2value(m),
                FoxylibEntity.Field.TYPE: cls.entity_type(),
            }
            return entity

        entity_list = lmap(match2entity, m_list)
        return entity_list
Exemple #21
0
class CultureEntity:
    @classmethod
    def entity_type(cls):
        return ClassTool.class2fullpath(cls)

    @classmethod
    def text2norm(cls, text):
        return str2lower(text)

    # @classmethod
    # @WARMER.add(cond=not HenriqueEnv.is_skip_warmup())
    # def _dict_lang2matcher(cls,):
    #     return {lang: cls.lang2matcher(lang) for lang in HenriqueLocale.langs()}

    @classmethod
    @cached(cache=TTLCache(maxsize=HenriqueLocale.lang_count(),
                           ttl=HenriqueEntity.Cache.DEFAULT_TTL))
    # @FunctionTool.wrapper2wraps_applied(lru_cache(maxsize=HenriqueLocale.lang_count()))
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def culture2h_codename2aliases(culture):
            aliases = Culture.culture_langs2aliases(culture,
                                                    langs_recognizable)
            return {Culture.culture2codename(culture): aliases}

        h_codename2aliases = merge_dicts(map(culture2h_codename2aliases,
                                             Culture.list_all()),
                                         vwrite=vwrite_no_duplicate_key)
        assert_is_not_none(h_codename2aliases)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher

    @classmethod
    @CacheTool.cache2hashable(
        cache=cached(cache=TTLCache(maxsize=HenriqueEntity.Cache.DEFAULT_SIZE,
                                    ttl=HenriqueEntity.Cache.DEFAULT_TTL)),
        f_pair=CacheTool.JSON.func_pair(),
    )
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale) or LocaleTool.locale2lang(
            HenriqueLocale.DEFAULT)

        matcher = cls.lang2matcher(lang)
        span_value_list = list(matcher.text2span_value_iter(text_in))

        entity_list = [{
            FoxylibEntity.Field.SPAN:
            span,
            FoxylibEntity.Field.TEXT:
            StringTool.str_span2substr(text_in, span),
            FoxylibEntity.Field.VALUE:
            value,
            FoxylibEntity.Field.TYPE:
            cls.entity_type(),
        } for span, value in span_value_list]

        return entity_list
Exemple #22
0
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale)
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        if "ko" not in langs_recognizable:
            return []

        match_list = list(cls.pattern_ko().finditer(text_in))

        def match2entity_list(match):
            span = match.span()
            assert_in(SpanTool.span2len(span), (2, 3))
            entity_list = []

            s, e = span
            span_nutmeg = (s, s + 1)
            entity_nutmeg = {
                FoxylibEntity.Field.SPAN:
                span_nutmeg,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span_nutmeg),
                FoxylibEntity.Field.VALUE:
                "Nutmeg",
                FoxylibEntity.Field.TYPE:
                TradegoodEntity.entity_type(),
            }
            entity_list.append(entity_nutmeg)

            span_mace = (s + 1, s + 2)
            entity_mace = {
                FoxylibEntity.Field.SPAN:
                span_mace,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span_mace),
                FoxylibEntity.Field.VALUE:
                "Mace",
                FoxylibEntity.Field.TYPE:
                TradegoodEntity.entity_type(),
            }
            entity_list.append(entity_mace)

            if SpanTool.span2len(span) == 3:
                span_clove = (s + 2, s + 3)
                entity_cloves = {
                    FoxylibEntity.Field.SPAN:
                    span_clove,
                    FoxylibEntity.Field.TEXT:
                    StringTool.str_span2substr(text_in, span_clove),
                    FoxylibEntity.Field.VALUE:
                    "Cloves",
                    FoxylibEntity.Field.TYPE:
                    TradegoodEntity.entity_type(),
                }
                entity_list.append(entity_cloves)

            return entity_list

        entity_list = [
            entity for m in match_list for entity in match2entity_list(m)
        ]
        return entity_list