Esempio n. 1
0
class DIGPE():
    def __init__(self):
        self.preprocessor = Preprocessor()
        self.extractor = Extractor()
        self.normalizer = Normalizer()

    re_digits = re.compile(r'\d+')
    re_alphabet = re.compile(r'[a-z ]+')

    def extract(self, text):
        cleaned_text_list = self.preprocessor.preprocess(text)
        extracted_text_list = self.extractor.extract_from_list(
            cleaned_text_list)
        normalized_text_list = self.normalizer.normalize_from_list(
            extracted_text_list)
        ans = {}
        ans.setdefault(PE_DICT_NAME_PRICE, [])
        ans.setdefault(PE_DICT_NAME_PPH, [])
        for normalized in normalized_text_list:
            if not normalized[PE_JSON_NAME_TIME_UNIT]:
                ans[PE_DICT_NAME_PPH].append(normalized[PE_JSON_NAME_PRICE])
            else:
                tunit = DIGPE.re_alphabet.findall(
                    normalized[PE_JSON_NAME_TIME_UNIT])
                if tunit and tunit[0].strip() in UNIT_TIME_HOUR:
                    if tunit[0].strip() in UNIT_TIME_HOUR:
                        digits = DIGPE.re_digits.findall(
                            normalized[PE_JSON_NAME_TIME_UNIT])
                        if not digits or int(digits[0]) == 1:
                            # ans.append(normalized)
                            ans[PE_DICT_NAME_PPH].append(
                                normalized[PE_JSON_NAME_PRICE])

            ans[PE_DICT_NAME_PRICE].append(normalized)
        return ans

    def extract_from_list(self, text_list):
        return [self.extract(text) for text in text_list]