Example #1
0
    def parser_duration_with_ago_and_later(
            self, source: str,
            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        duration_res = self.duration_extractor.extract(source, reference).pop()

        if duration_res:
            match = self.config._unit_regex.search(source)
            if match:
                suffix = source[duration_res.start + duration_res.length:]
                src_unit = RegExpUtility.get_group(match, 'unit')

                number_str = source[duration_res.start:match.lastindex -
                                    duration_res.start + 1]
                number = self.parse_chinese_written_number_to_value(number_str)

                if src_unit in self.config.unit_map:
                    unit_str = self.config.unit_map.get(src_unit)

                    before_match = RegExpUtility.get_matches(
                        ChineseDateExtractor.before_regex, suffix)
                    if before_match and suffix.startswith(before_match[0]):
                        if unit_str == Constants.TIMEX_DAY:
                            date = reference + timedelta(days=-number)
                        elif unit_str == Constants.TIMEX_WEEK:
                            date = reference + timedelta(days=-7 * number)
                        elif unit_str == Constants.TIMEX_MONTH_FULL:
                            date = reference.replace(month=reference.month - 1)
                        elif unit_str == Constants.TIMEX_YEAR:
                            date = reference.replace(year=reference.year - 1)
                        else:
                            return result

                        result.timex = DateTimeFormatUtil.luis_date_from_datetime(
                            date)
                        result.future_value = result.past_value = date
                        result.success = True
                        return result

                    after_match = RegExpUtility.get_matches(
                        ChineseDateExtractor.after_regex, suffix)
                    if after_match and suffix.startswith(after_match[0]):
                        if unit_str == Constants.TIMEX_DAY:
                            date = reference + timedelta(days=number)
                        elif unit_str == Constants.TIMEX_WEEK:
                            date = reference + timedelta(days=7 * number)
                        elif unit_str == Constants.TIMEX_MONTH_FULL:
                            date = reference.replace(month=reference.month + 1)
                        elif unit_str == Constants.TIMEX_YEAR:
                            date = reference.replace(year=reference.year + 1)
                        else:
                            return result

                        result.timex = DateTimeFormatUtil.luis_date_from_datetime(
                            date)
                        result.future_value = result.past_value = date
                        result.success = True
                        return result

        return result
    def _parser_duration_with_ago_and_later(
            self, source: str,
            reference: datetime) -> DateTimeResolutionResult:
        result = DateTimeResolutionResult()
        duration_res = self.duration_extractor.extract(source, reference).pop()

        if duration_res:
            match = ChineseDateTimeExtractor.date_time_period_unit_regex.search(
                source)
            if match:
                suffix = source[duration_res.start + duration_res.length:]
                src_unit = RegExpUtility.get_group(match, 'unit')

                number_str = source[duration_res.start:match.lastindex -
                                    duration_res.start + 1]
                number = ChineseDateParser.parse_chinese_written_number_to_value(
                    ChineseDateParser(), number_str)

                if src_unit in self.config.unit_map:
                    unit_str = self.config.unit_map.get(src_unit)

                    before_match = RegExpUtility.get_matches(
                        ChineseDateExtractor.before_regex, suffix)
                    if before_match and suffix.startswith(before_match[0]):
                        if unit_str == Constants.TIMEX_HOUR:
                            date = reference + timedelta(hours=-number)
                        elif unit_str == Constants.TIMEX_MINUTE:
                            date = reference + timedelta(minutes=-number)
                        elif unit_str == Constants.TIMEX_SECOND:
                            date = reference + timedelta(seconds=-number)
                        else:
                            return result

                        result.timex = DateTimeFormatUtil.luis_date_from_datetime(
                            date)
                        result.future_value = result.past_value = date
                        result.success = True
                        return result

                    after_match = RegExpUtility.get_matches(
                        ChineseDateExtractor.after_regex, suffix)
                    if after_match and suffix.startswith(after_match[0]):
                        if unit_str == Constants.TIMEX_HOUR:
                            date = reference + timedelta(hours=number)
                        elif unit_str == Constants.TIMEX_MINUTE:
                            date = reference + timedelta(minutes=number)
                        elif unit_str == Constants.TIMEX_SECOND:
                            date = reference + timedelta(seconds=number)
                        else:
                            return result

                        result.timex = DateTimeFormatUtil.luis_date_from_datetime(
                            date)
                        result.future_value = result.past_value = date
                        result.success = True
                        return result

        return result
Example #3
0
    def relative_duration_date(self, source: str, reference: datetime) -> List[Token]:
        ret: List[Token] = list()
        duration_er = self.duration_extractor.extract(source, reference)

        for er in duration_er:
            if not regex.search(self.date_time_period_unit_regex, er.text):
                pos = er.start + er.length
                if pos < len(source):
                    suffix = source[pos]
                    before_match = RegExpUtility.get_matches(self.before_regex, suffix)
                    after_match = RegExpUtility.get_matches(self.after_regex, suffix)

                    if (before_match and suffix.startswith(before_match[0])) \
                            or (after_match and suffix.startswith(after_match[0])):
                        meta_data = MetaData()
                        meta_data.is_duration_with_ago_and_later = True
                        ret.append(Token(er.start, pos + 1, meta_data))

        return ret
Example #4
0
    def extract(self, source: str):
        results: List[ExtractResult] = list()
        partial_results: List[ExtractResult] = list()
        trimmed_source = source.lower()

        if source is None or source.strip() == '':
            return results
        source_tokens = self.__tokenize(trimmed_source)

        for (regexp, type_extracted) in self.config.regexes_map.items():
            for match in RegExpUtility.get_matches(regexp, trimmed_source):
                match_tokens = self.__tokenize(match)
                top_score = 0.0

                for i in range(len(source_tokens)):
                    score = self.match_value(source_tokens, match_tokens, i)
                    top_score = max(top_score, score)

                if top_score > 0.0:
                    value = ExtractResult()
                    start = trimmed_source.index(match)
                    length = len(match)
                    text = source[start:start + length].strip()
                    value.start = start
                    value.length = length
                    value.text = text
                    value.type = type_extracted
                    value.data = ChoiceExtractDataResult(source, top_score)

                    partial_results.append(value)

        if len(partial_results) == 0:
            return results

        partial_results = sorted(partial_results, key=lambda res: res.start)

        if self.config.only_top_match:
            top_score = 0.0
            top_result_index = 0
            for i in range(len(partial_results)):
                data = ChoiceExtractDataResult(source,
                                               partial_results[i].data.score)
                if data.score > top_score:
                    top_score = data.score
                    top_result_index = i

            top_result = ChoiceExtractDataResult(
                partial_results[top_result_index].data.source,
                partial_results[top_result_index].data.score)
            top_result.other_matches = partial_results
            results.append(partial_results[top_result_index])
        else:
            results = partial_results

        return results