def __init__(self, entity_name, language=ENGLISH_LANG): """Initializes a NumberDetector object Args: entity_name: A string by which the detected numbers would be replaced with on calling detect_entity() language (str, optional): language code of number text, defaults to 'en' """ # assigning values to superclass attributes self._supported_languages = self.get_supported_languages() super(NumberDetector, self).__init__(language) self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.number = [] self.original_number_text = [] self.tag = '__' + self.entity_name + '__' self.min_digit = 1 self.max_digit = 6 self.language = language try: number_detector_module = importlib.import_module( 'ner_v2.detectors.numeral.number.{0}.number_detection'.format( self.language)) self.language_number_detector = number_detector_module.NumberDetector( entity_name=self.entity_name) except ImportError: standard_number_regex = importlib.import_module( 'ner_v2.detectors.numeral.number.standard_number_detector') self.language_number_detector = standard_number_regex.NumberDetector( entity_name=self.entity_name, data_directory_path=get_lang_data_path( detector_path=os.path.abspath(__file__), lang_code=self.language))
def __init__(self, entity_name, timezone='UTC', range_enabled=False, form_check=False, language=ENGLISH_LANG): """Initializes a TimeDetector object with given entity_name and timezone Args: entity_name (str): A string by which the detected time stamp substrings would be replaced with on calling detect_entity() timezone (str): timezone identifier string that is used to create a pytz timezone object default is UTC range_enabled (bool): whether time range needs to be detected form_check (bool): Optional, boolean set to False, used when passed text is a form type message language (str): ISO 639 code for language of entities to be detected by the instance of this class """ # assigning values to superclass attributes self._supported_languages = self.get_supported_languages() super(TimeDetector, self).__init__(language=language) self.entity_name = entity_name self.text = '' self.departure_flag = False self.return_flag = False self.tagged_text = '' self.processed_text = '' self.time = [] self.original_time_text = [] self.form_check = form_check self.tag = '__' + entity_name + '__' self.bot_message = None self.timezone = timezone or 'UTC' self.range_enabled = range_enabled self.language = language try: time_detector_module = importlib.import_module( 'ner_v2.detectors.temporal.time.{0}.time_detection'.format( self.language)) self.language_time_detector = time_detector_module.TimeDetector( entity_name=self.entity_name, timezone=self.timezone, range_enabled=range_enabled, form_check=form_check) except ImportError: standard_time_regex = importlib.import_module( 'ner_v2.detectors.temporal.time.standard_time_regex') self.language_time_detector = standard_time_regex.TimeDetector( entity_name=self.entity_name, data_directory_path=get_lang_data_path( detector_path=os.path.abspath(__file__), lang_code=self.language), timezone=self.timezone, range_enabled=range_enabled, form_check=form_check)
def __init__(self, entity_name, language=ENGLISH_LANG, timezone='UTC', past_date_referenced=False): """Initializes a DateDetector object with given entity_name and pytz timezone object Args: entity_name: A string by which the detected date entity substrings would be replaced with on calling detect_entity() timezone (Optional, str): timezone identifier string that is used to create a pytz timezone object default is UTC past_date_referenced (bool): to know if past or future date is referenced for date text like 'kal', 'parso' """ self.text = '' self.tagged_text = '' self.processed_text = '' self.date = [] self.original_date_text = [] self.entity_name = entity_name self.tag = '__' + entity_name + '__' try: self.timezone = pytz.timezone(timezone) except Exception as e: ner_logger.debug('Timezone error: %s ' % e) self.timezone = pytz.timezone('UTC') ner_logger.debug('Default timezone passed as "UTC"') self.now_date = datetime.datetime.now(tz=self.timezone) self.bot_message = None self.language = language try: date_detector_module = importlib.import_module( 'ner_v2.detectors.temporal.date.{0}.date_detection'.format( self.language)) self.language_date_detector = date_detector_module.DateDetector( entity_name=self.entity_name, past_date_referenced=past_date_referenced, timezone=self.timezone) except ImportError: standard_date_regex = importlib.import_module( 'ner_v2.detectors.temporal.date.standard_date_regex') self.language_date_detector = standard_date_regex.DateDetector( entity_name=self.entity_name, data_directory_path=get_lang_data_path( detector_path=os.path.abspath(__file__), lang_code=self.language), timezone=self.timezone, past_date_referenced=past_date_referenced)
def __init__(self, entity_name='number_range', language=ENGLISH_LANG, unit_type=None): """Initializes a NumberDetector object Args: entity_name(str): A string by which the detected numbers would be replaced with on calling detect_entity() language (str, optional): language code of number text, defaults to 'en' unit_type (str, optional): number unit types like weight, currency, temperature, used to detect number with specific unit type only. If None, it will detect all number ranges irrespective of units. You can see all unit types supported inside number detection language data with filename unit.csv. """ # assigning values to superclass attributes self._supported_languages = self.get_supported_languages() super(NumberRangeDetector, self).__init__(language) self.entity_name = entity_name self.text = '' self.tag = '__' + self.entity_name + '__' self.language = language self.unit_type = unit_type try: number_range_detector_module = importlib.import_module( 'ner_v2.detectors.numeral.number_range.{0}.number_range_detection' .format(self.language)) self.language_number_range_detector = \ number_range_detector_module.NumberRangeDetector(entity_name=self.entity_name, language=self.language, unit_type=self.unit_type) except ImportError: standard_number_range_regex = importlib.import_module( 'ner_v2.detectors.numeral.number_range.standard_number_range_detector' ) self.language_number_range_detector = standard_number_range_regex.NumberRangeDetector( entity_name=self.entity_name, language=language, unit_type=self.unit_type, data_directory_path=get_lang_data_path( detector_path=os.path.abspath(__file__), lang_code=self.language))
def __init__(self, entity_name='time', timezone=None, language=ENGLISH_LANG): """Initializes a TimeDetector object with given entity_name and timezone Args: entity_name(str): A string by which the detected time stamp substrings would be replaced with on calling detect_entity() timezone(str): timezone identifier string that is used to create a pytz timezone object default is UTC language(str): ISO 639 code for language of entities to be detected by the instance of this class """ # assigning values to superclass attributes self._supported_languages = self.get_supported_languages() super(TimeDetector, self).__init__(language=language) self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.time = [] self.original_time_text = [] self.tag = '__' + entity_name + '__' if timezone: self.timezone = get_timezone(timezone) else: self.timezone = None self.language = language try: time_detector_module = importlib.import_module( 'ner_v2.detectors.temporal.time.{0}.time_detection'.format(self.language)) self.language_time_detector = time_detector_module.TimeDetector(entity_name=self.entity_name, timezone=self.timezone) except ImportError: standard_time_regex = importlib.import_module( 'ner_v2.detectors.temporal.time.standard_time_regex' ) self.language_time_detector = standard_time_regex.TimeDetector( entity_name=self.entity_name, data_directory_path=get_lang_data_path(detector_path=os.path.abspath(__file__), lang_code=self.language), timezone=self.timezone, )
def __init__(self, entity_name, language=ENGLISH_LANG, unit_type=None, detect_without_unit=False): """Initializes a NumberDetector object Args: entity_name: A string by which the detected numbers would be replaced with on calling detect_entity() language (str, optional): language code of number text, defaults to 'en' unit_type (str): number unit types like weight, currency, temperature, used to detect number with specific unit type. """ # assigning values to superclass attributes self._supported_languages = self.get_supported_languages() super(NumberDetector, self).__init__(language) self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.number = [] self.original_number_text = [] self.tag = '__' + self.entity_name + '__' self.min_digit = 1 self.max_digit = 6 self.language = language self.unit_type = unit_type self.detect_without_unit = detect_without_unit self.punctuations_to_filter = re.compile(f'[{COMMON_NON_NUMERIC_PUNCTUATIONS}]') try: number_detector_module = importlib.import_module( 'ner_v2.detectors.numeral.number.{0}.number_detection'.format(self.language)) self.language_number_detector = number_detector_module.NumberDetector(entity_name=self.entity_name, unit_type=self.unit_type) except ImportError: standard_number_regex = importlib.import_module( 'ner_v2.detectors.numeral.number.standard_number_detector' ) self.language_number_detector = standard_number_regex.NumberDetector( entity_name=self.entity_name, unit_type=self.unit_type, data_directory_path=get_lang_data_path(detector_path=os.path.abspath(__file__), lang_code=self.language) )