예제 #1
0
    def __init__(self, entity_name, language=ENGLISH_LANG):
        """Initializes a NumberDetector object

        Args:
            entity_name: A string by which the detected numbers would be replaced with on calling detect_entity()
            language (str, optional): language code of number text, defaults to 'en'
        """
        # assigning values to superclass attributes
        self._supported_languages = self.get_supported_languages()
        super(NumberDetector, self).__init__(language)
        self.entity_name = entity_name
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.number = []
        self.original_number_text = []
        self.tag = '__' + self.entity_name + '__'
        self.min_digit = 1
        self.max_digit = 6
        self.language = language
        try:
            number_detector_module = importlib.import_module(
                'ner_v2.detectors.numeral.number.{0}.number_detection'.format(
                    self.language))
            self.language_number_detector = number_detector_module.NumberDetector(
                entity_name=self.entity_name)

        except ImportError:
            standard_number_regex = importlib.import_module(
                'ner_v2.detectors.numeral.number.standard_number_detector')
            self.language_number_detector = standard_number_regex.NumberDetector(
                entity_name=self.entity_name,
                data_directory_path=get_lang_data_path(
                    detector_path=os.path.abspath(__file__),
                    lang_code=self.language))
예제 #2
0
    def __init__(self,
                 entity_name,
                 timezone='UTC',
                 range_enabled=False,
                 form_check=False,
                 language=ENGLISH_LANG):
        """Initializes a TimeDetector object with given entity_name and timezone

        Args:
            entity_name (str): A string by which the detected time stamp substrings would be replaced with on calling
                        detect_entity()
            timezone (str): timezone identifier string that is used to create a pytz timezone object
                            default is UTC
            range_enabled (bool): whether time range needs to be detected
            form_check (bool): Optional, boolean set to False, used when passed text is a form type message
            language (str): ISO 639 code for language of entities to be detected by the instance of this
                                          class
        """
        # assigning values to superclass attributes
        self._supported_languages = self.get_supported_languages()
        super(TimeDetector, self).__init__(language=language)
        self.entity_name = entity_name
        self.text = ''
        self.departure_flag = False
        self.return_flag = False
        self.tagged_text = ''
        self.processed_text = ''
        self.time = []
        self.original_time_text = []
        self.form_check = form_check
        self.tag = '__' + entity_name + '__'
        self.bot_message = None
        self.timezone = timezone or 'UTC'
        self.range_enabled = range_enabled
        self.language = language

        try:
            time_detector_module = importlib.import_module(
                'ner_v2.detectors.temporal.time.{0}.time_detection'.format(
                    self.language))
            self.language_time_detector = time_detector_module.TimeDetector(
                entity_name=self.entity_name,
                timezone=self.timezone,
                range_enabled=range_enabled,
                form_check=form_check)

        except ImportError:
            standard_time_regex = importlib.import_module(
                'ner_v2.detectors.temporal.time.standard_time_regex')
            self.language_time_detector = standard_time_regex.TimeDetector(
                entity_name=self.entity_name,
                data_directory_path=get_lang_data_path(
                    detector_path=os.path.abspath(__file__),
                    lang_code=self.language),
                timezone=self.timezone,
                range_enabled=range_enabled,
                form_check=form_check)
예제 #3
0
    def __init__(self,
                 entity_name,
                 language=ENGLISH_LANG,
                 timezone='UTC',
                 past_date_referenced=False):
        """Initializes a DateDetector object with given entity_name and pytz timezone object

        Args:
            entity_name: A string by which the detected date entity substrings would be replaced with on calling
                        detect_entity()
            timezone (Optional, str): timezone identifier string that is used to create a pytz timezone object
                                      default is UTC
            past_date_referenced (bool): to know if past or future date is referenced for date text like 'kal', 'parso'
        """
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.date = []
        self.original_date_text = []
        self.entity_name = entity_name
        self.tag = '__' + entity_name + '__'
        try:
            self.timezone = pytz.timezone(timezone)
        except Exception as e:
            ner_logger.debug('Timezone error: %s ' % e)
            self.timezone = pytz.timezone('UTC')
            ner_logger.debug('Default timezone passed as "UTC"')
        self.now_date = datetime.datetime.now(tz=self.timezone)
        self.bot_message = None
        self.language = language

        try:
            date_detector_module = importlib.import_module(
                'ner_v2.detectors.temporal.date.{0}.date_detection'.format(
                    self.language))
            self.language_date_detector = date_detector_module.DateDetector(
                entity_name=self.entity_name,
                past_date_referenced=past_date_referenced,
                timezone=self.timezone)
        except ImportError:
            standard_date_regex = importlib.import_module(
                'ner_v2.detectors.temporal.date.standard_date_regex')
            self.language_date_detector = standard_date_regex.DateDetector(
                entity_name=self.entity_name,
                data_directory_path=get_lang_data_path(
                    detector_path=os.path.abspath(__file__),
                    lang_code=self.language),
                timezone=self.timezone,
                past_date_referenced=past_date_referenced)
    def __init__(self,
                 entity_name='number_range',
                 language=ENGLISH_LANG,
                 unit_type=None):
        """Initializes a NumberDetector object

        Args:
            entity_name(str): A string by which the detected numbers would be replaced with on calling detect_entity()
            language (str, optional): language code of number text, defaults to 'en'
            unit_type (str, optional): number unit types like weight, currency, temperature, used to detect number with
                                       specific unit type only. If None, it will detect all number ranges irrespective
                                       of units. You can see all unit types supported inside number detection
                                       language data with filename unit.csv.


        """
        # assigning values to superclass attributes
        self._supported_languages = self.get_supported_languages()
        super(NumberRangeDetector, self).__init__(language)
        self.entity_name = entity_name
        self.text = ''
        self.tag = '__' + self.entity_name + '__'
        self.language = language
        self.unit_type = unit_type
        try:
            number_range_detector_module = importlib.import_module(
                'ner_v2.detectors.numeral.number_range.{0}.number_range_detection'
                .format(self.language))
            self.language_number_range_detector = \
                number_range_detector_module.NumberRangeDetector(entity_name=self.entity_name,
                                                                 language=self.language,
                                                                 unit_type=self.unit_type)

        except ImportError:
            standard_number_range_regex = importlib.import_module(
                'ner_v2.detectors.numeral.number_range.standard_number_range_detector'
            )
            self.language_number_range_detector = standard_number_range_regex.NumberRangeDetector(
                entity_name=self.entity_name,
                language=language,
                unit_type=self.unit_type,
                data_directory_path=get_lang_data_path(
                    detector_path=os.path.abspath(__file__),
                    lang_code=self.language))
    def __init__(self, entity_name='time', timezone=None, language=ENGLISH_LANG):
        """Initializes a TimeDetector object with given entity_name and timezone

        Args:
            entity_name(str): A string by which the detected time stamp substrings would be replaced with on calling
                               detect_entity()
            timezone(str): timezone identifier string that is used to create a pytz timezone object
                            default is UTC
            language(str): ISO 639 code for language of entities to be detected by the instance of this class
        """
        # assigning values to superclass attributes
        self._supported_languages = self.get_supported_languages()
        super(TimeDetector, self).__init__(language=language)
        self.entity_name = entity_name
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.time = []
        self.original_time_text = []
        self.tag = '__' + entity_name + '__'
        if timezone:
            self.timezone = get_timezone(timezone)
        else:
            self.timezone = None
        self.language = language

        try:
            time_detector_module = importlib.import_module(
                'ner_v2.detectors.temporal.time.{0}.time_detection'.format(self.language))
            self.language_time_detector = time_detector_module.TimeDetector(entity_name=self.entity_name,
                                                                            timezone=self.timezone)

        except ImportError:
            standard_time_regex = importlib.import_module(
                'ner_v2.detectors.temporal.time.standard_time_regex'
            )
            self.language_time_detector = standard_time_regex.TimeDetector(
                entity_name=self.entity_name,
                data_directory_path=get_lang_data_path(detector_path=os.path.abspath(__file__),
                                                       lang_code=self.language),
                timezone=self.timezone,
            )
예제 #6
0
    def __init__(self, entity_name, language=ENGLISH_LANG, unit_type=None, detect_without_unit=False):
        """Initializes a NumberDetector object

        Args:
            entity_name: A string by which the detected numbers would be replaced with on calling detect_entity()
            language (str, optional): language code of number text, defaults to 'en'
            unit_type (str): number unit types like weight, currency, temperature, used to detect number with
                               specific unit type.
        """
        # assigning values to superclass attributes
        self._supported_languages = self.get_supported_languages()
        super(NumberDetector, self).__init__(language)
        self.entity_name = entity_name
        self.text = ''
        self.tagged_text = ''
        self.processed_text = ''
        self.number = []
        self.original_number_text = []
        self.tag = '__' + self.entity_name + '__'
        self.min_digit = 1
        self.max_digit = 6
        self.language = language
        self.unit_type = unit_type
        self.detect_without_unit = detect_without_unit
        self.punctuations_to_filter = re.compile(f'[{COMMON_NON_NUMERIC_PUNCTUATIONS}]')
        try:
            number_detector_module = importlib.import_module(
                'ner_v2.detectors.numeral.number.{0}.number_detection'.format(self.language))
            self.language_number_detector = number_detector_module.NumberDetector(entity_name=self.entity_name,
                                                                                  unit_type=self.unit_type)

        except ImportError:
            standard_number_regex = importlib.import_module(
                'ner_v2.detectors.numeral.number.standard_number_detector'
            )
            self.language_number_detector = standard_number_regex.NumberDetector(
                entity_name=self.entity_name,
                unit_type=self.unit_type,
                data_directory_path=get_lang_data_path(detector_path=os.path.abspath(__file__),
                                                       lang_code=self.language)
            )