def test_01(self): f_gap2valid = partial(StringTool.str_span2match_blank, "a b c d e") spans_pair1 = [[(0, 1), (4, 5)], [(2, 3)]] hyp1 = list( ContextfreeTool.spans_list2reducible_indextuple_list( spans_pair1, f_gap2valid)) self.assertEqual(hyp1, [(0, 0)]) spans_pair2 = [[(0, 1), (6, 7), (8, 9)], [(2, 3), (4, 5)]] hyp2 = list( ContextfreeTool.spans_list2reducible_indextuple_list( spans_pair2, f_gap2valid)) self.assertEqual(hyp2, [(0, 0)]) spans_pair3 = [ [(2, 3), (4, 5)], [(0, 1), (6, 7), (8, 9)], ] hyp3 = list( ContextfreeTool.spans_list2reducible_indextuple_list( spans_pair3, f_gap2valid)) self.assertEqual(hyp3, [(1, 1)]) spans_pair4 = [ [(2, 3), (4, 5)], [ (8, 9), (0, 1), (6, 7), ], ] hyp4 = list( ContextfreeTool.spans_list2reducible_indextuple_list( spans_pair4, f_gap2valid)) self.assertEqual(hyp4, [(1, 2)]) spans_pair5 = [[(2, 3), (6, 7)], [ (8, 9), (0, 1), (4, 5), ], [(6, 7)]] hyp5 = list( ContextfreeTool.spans_list2reducible_indextuple_list( spans_pair5, f_gap2valid)) self.assertEqual(hyp5, [(0, 2, 0)])
def data2entity_list(cls, data): text_in = TimeEntity.Data.data2text_in(data) m_list_hour = TimeEntity.Data.data2match_list_hour(data) span_list_hour = lmap(lambda m: m.span(), m_list_hour) m_list_ampm = TimeEntity.Data.data2match_list_ampm(data) span_list_ampm = lmap(lambda m: m.span(), m_list_ampm) spans_list = [ span_list_hour, span_list_ampm, ] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( spans_list, gap2is_valid) def indextuple2entity(indextuple): i, j = indextuple m_hour, m_ampm = m_list_hour[i], m_list_ampm[j] hour_raw = TimeTool.hour2norm(int(m_hour.group())) if hour_raw is None: return None hour, ampm = AMPM.hour_ampm2normalized(hour_raw, AMPM.match2value(m_ampm)) if hour is None: return None if ampm is None: return None span = (m_hour.span()[0], m_ampm.span()[1]) value = { TimeEntity.Value.Field.HOUR: hour, TimeEntity.Value.Field.MINUTE: 0, TimeEntity.Value.Field.AMPM: ampm, } entity = { FoxylibEntity.Field.FULLTEXT: text_in, FoxylibEntity.Field.TYPE: TimeEntity.entity_type(), FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.VALUE: value } return entity entity_list = lfilter(is_not_none, map(indextuple2entity, indextuple_list)) return entity_list
def _text2entity_list_multiday(cls, str_in): logger = FoxylibLogger.func_level2logger( cls._text2entity_list_multiday, logging.DEBUG) entity_list_1day = DayofweekEntityKoSingle.text2entity_list(str_in) p_delim = cls.pattern_delim() m_list_delim = list(p_delim.finditer(str_in)) span_ll = [ lmap(FoxylibEntity.entity2span, entity_list_1day), lmap(MatchTool.match2span, m_list_delim), lmap(FoxylibEntity.entity2span, entity_list_1day), ] f_span2is_gap = lambda span: cls.str_span2is_gap(str_in, span) j_tuple_list = list( ContextfreeTool.spans_list2reducible_indextuple_list( span_ll, f_span2is_gap)) logger.debug({ "j_tuple_list": j_tuple_list, "entity_list_1day": entity_list_1day, "m_list_delim": m_list_delim, }) for j_tuple in j_tuple_list: j1, j2, j3 = j_tuple entity_pair = entity_list_1day[j1], entity_list_1day[j3] logger.debug({ "j1": j1, "j3": j3, "entity_pair": entity_pair, }) span = ( FoxylibEntity.entity2span(entity_pair[0])[0], FoxylibEntity.entity2span(entity_pair[1])[1], ) j_entity = { FoxylibEntity.Field.TYPE: DayofweekSpanEntity.entity_type(), FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.FULLTEXT: str_in, FoxylibEntity.Field.VALUE: tmap(FoxylibEntity.entity2value, entity_pair), } yield j_entity
def _text2entity_list(cls, text_in, lang): match_list_sign = list(cls.Sign.pattern().finditer(text_in)) span_list_sign = lmap(lambda m: m.span(), match_list_sign) entity_list_timedelta = TimedeltaEntity._text2entity_list( text_in, lang) span_list_timedelta = lmap(FoxylibEntity.entity2span, entity_list_timedelta) span_lists = [ span_list_sign, span_list_timedelta, ] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( span_lists, gap2is_valid) def indextuple2entity(indextuple): i, j = indextuple match_sign = match_list_sign[i] span_sign = span_list_sign[i] sign = match_sign.group() entity_timedelta = entity_list_timedelta[j] span_timedelta = span_list_timedelta[j] value = { cls.Value.Field.SIGN: sign, cls.Value.Field.TIMEDELTA: entity_timedelta } span = ( span_sign[0], span_timedelta[1], ) entity = { FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity entity_list = lmap(indextuple2entity, indextuple_list) return entity_list
def text2element_list(cls, text_in, lang): logger = HenriqueLogger.func_level2logger(cls.text2element_list, logging.DEBUG) langs = HenriqueLocale.lang2langs_recognizable(lang) logger.debug({"langs": langs}) match_list_number = list(cls.pattern_number().finditer(text_in)) span_list_number = lmap(lambda m: m.span(), match_list_number) matcher = TimedeltaEntityUnit.langs2matcher(langs) span_value_list_timedelta_unit = list( matcher.text2span_value_iter(text_in)) spans_list = [ span_list_number, lmap(ig(0), span_value_list_timedelta_unit), ] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( spans_list, gap2is_valid) def indextuple2element(indextuple): i, j = indextuple quantity = int(match_list_number[i].group()) unit = span_value_list_timedelta_unit[j][1] span = ( span_list_number[i][0], span_value_list_timedelta_unit[j][0][1], ) element = { cls.Field.QUANTITY: quantity, cls.Field.UNIT: unit, cls.Field.SPAN: span, } return element element_list = lmap(indextuple2element, indextuple_list) return element_list
def text2entity_list_hm(): spans_list = [span_list_hour, span_list_minute] indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( spans_list, gap2valid) def indextuple2entity(indextuple): i, j = indextuple span = (span_list_hour[i][0], span_list_minute[j][1]) m1, m2 = m_list_hour[i], m_list_minute[j] hour, minute = int(m1.group()), int(m2.group()) # raise Exception({"hour":hour, "minute":minute}) # logger.debug({"hour": hour, "minute": minute, # "TimeTool.hour2is_valid(hour)":TimeTool.hour2is_valid(hour), # }) if not TimeTool.hour2is_valid(hour): return None if not TimeTool.minute2is_valid(minute): return None value = { TimeEntity.Value.Field.HOUR: hour, TimeEntity.Value.Field.MINUTE: minute, } entity = { FoxylibEntity.Field.TYPE: entity_type, FoxylibEntity.Field.FULLTEXT: text_in, FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.VALUE: value, } return entity entity_list = lfilter(bool, map(indextuple2entity, indextuple_list)) # raise Exception({"entity_list":entity_list}) # logger.debug({"entity_list": entity_list, # "indextuple_list":indextuple_list, # }) return entity_list
def text2entity_list_hms(): entity_list_hm = text2entity_list_hm() span_list_hm = lmap(FoxylibEntity.entity2span, entity_list_hm) span_list_second = span_list_minute spans_list = [span_list_hm, span_list_second] indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( spans_list, gap2valid) h_i2j = dict(indextuple_list) def i2entity(i): if i not in h_i2j: return entity_list_hm[i] j = h_i2j[i] span = (span_list_hm[i][0], span_list_second[j][1]) entity_hm, m2 = entity_list_hm[i], span_list_second[j] value_hm = FoxylibEntity.entity2value(entity_hm) hour, minute = TimeEntity.Value.value2hm(value_hm) second = int(m2.group()) if not TimeTool.second2is_valid(second): return None value = { TimeEntity.Value.Field.HOUR: hour, TimeEntity.Value.Field.MINUTE: minute, TimeEntity.Value.Field.SECOND: second, } entity = { FoxylibEntity.Field.TYPE: entity_type, FoxylibEntity.Field.FULLTEXT: text_in, FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.VALUE: value, } return entity return lmap(i2entity, range(len(entity_list_hm)))
def text2entity_list(cls, str_in, config=None): def entity2is_wordbound_prefixed(entity): return StringTool.str_span2is_wordbound_prefixed( str_in, FoxylibEntity.entity2span(entity)) cardinal_entity_list = lfilter(entity2is_wordbound_prefixed, CardinalEntity.text2entity_list(str_in)) m_list_suffix = cls.pattern_suffix().finditer(str_in) span_ll = [ lmap(FoxylibEntity.entity2span, cardinal_entity_list), lmap(MatchTool.match2span, m_list_suffix), ] f_span2is_gap = lambda span: StringTool.str_span2match_blank_or_nullstr( str_in, span, ) j_tuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( span_ll, f_span2is_gap) for j1, j2 in j_tuple_list: cardinal_entity = cardinal_entity_list[j1] m_suffix = m_list_suffix[j2] span = (FoxylibEntity.entity2span(cardinal_entity)[0], MatchTool.match2span(m_suffix)[1]) j_entity = { FoxylibEntity.Field.TYPE: HourEntity.entity_type(), FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.FULLTEXT: str_in, FoxylibEntity.Field.VALUE: FoxylibEntity.entity2value(cardinal_entity), } yield j_entity
def _text_config2skill_code(cls, text_in, config): pattern_prefix = cls.pattern_prefix() match_list_prefix = list(pattern_prefix.finditer(text_in)) if not match_list_prefix: return None entity_list = SkillEntity.text2entity_list(text_in, config=config) if not entity_list: return None spans_list = [lmap(lambda m: m.span(), match_list_prefix), lmap(FoxylibEntity.entity2span, entity_list) ] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(spans_list, gap2is_valid) assert_in(len(indextuple_list), [0, 1]) if not indextuple_list: return None index_entity = l_singleton2obj(indextuple_list)[1] entity = entity_list[index_entity] return SkillEntity.entity2skill_codename(entity)
def entity_list2ampm_suffixed( cls, data, entity_list_in, ): logger = FoxylibLogger.func_level2logger(cls.entity_list2ampm_suffixed, logging.DEBUG) text_in = cls.Data.data2text_in(data) m_list_ampm = cls.Data.data2match_list_ampm(data) span_list_in = lmap(FoxylibEntity.entity2span, entity_list_in) span_list_ampm = lmap(lambda m: m.span(), m_list_ampm) spans_list = [span_list_in, span_list_ampm] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( spans_list, gap2is_valid) h_i2j = dict(indextuple_list) def i2entity(i): entity = entity_list_in[i] assert_equal(FoxylibEntity.entity2type(entity), TimeEntity.entity_type()) if i not in h_i2j: return entity_list_in[i] j = h_i2j[i] m_ampm = m_list_ampm[j] span = (span_list_in[i][0], span_list_ampm[j][1]) v_entity = FoxylibEntity.entity2value(entity) hour, minute, second = TimeEntity.Value.value2hms(v_entity) ampm = AMPM.match2value(m_ampm) hour_adjusted, ampm_adjusted = AMPM.hour_ampm2normalized( hour, ampm) # logger.debug({"hour":hour, "ampm":ampm, # "hour_adjusted":hour_adjusted, "ampm_adjusted":ampm_adjusted}) value = DictTool.filter( lambda k, v: v is not None, { TimeEntity.Value.Field.HOUR: hour_adjusted, TimeEntity.Value.Field.MINUTE: minute, TimeEntity.Value.Field.SECOND: second, TimeEntity.Value.Field.AMPM: ampm_adjusted, }) entity = { FoxylibEntity.Field.TYPE: FoxylibEntity.entity2type(entity), FoxylibEntity.Field.FULLTEXT: text_in, FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.VALUE: value, } return entity entity_list = lmap(i2entity, range(len(entity_list_in))) return entity_list