def __init__(self, entity_name, data_directory_path, locale=None, timezone='UTC', past_date_referenced=False): """ Base Regex class which will be imported by language date class by giving their data folder path This will create standard regex and their parser to detect date for given language. Args: data_directory_path (str): path of data folder for given language timezone (Optional, str): user timezone default UTC past_date_referenced (boolean): if the date reference is in past, this is helpful for text like 'kal', 'parso' to know if the reference is past or future. locale (Optional, str): user locale default None """ self.text = '' self.tagged_text = '' self.processed_text = '' self.date = [] self.original_date_text = [] self.entity_name = entity_name self.tag = '__' + entity_name + '__' self.timezone = get_timezone(timezone) self.now_date = datetime.datetime.now(tz=self.timezone) self.bot_message = None self.past_date_referenced = past_date_referenced # dict to store words for date, numerals and words which comes in reference to some date self.date_constant_dict = {} self.datetime_constant_dict = {} self.numerals_constant_dict = {} # define dynamic created standard regex from language data files self.regex_relative_date = None self.regex_day_diff = None self.regex_date_month = None self.regex_date_ref_month_1 = None self.regex_date_ref_month_2 = None self.regex_date_ref_month_3 = None self.regex_after_days_ref = None self.regex_weekday_month_1 = None self.regex_weekday_month_2 = None self.regex_weekday_diff = None self.regex_weekday = None # Method to initialise value in regex self.init_regex_and_parser(data_directory_path) # Variable to define default order in which these regex will work self.detector_preferences = [ self._gregorian_day_month_year_format, self._detect_relative_date, self._detect_date_month, self._detect_date_ref_month_1, self._detect_date_ref_month_2, self._detect_date_ref_month_3, self._detect_date_diff, self._detect_after_days, self._detect_weekday_ref_month_1, self._detect_weekday_ref_month_2, self._detect_weekday_diff, self._detect_weekday ]
def __init__(self, entity_name, data_directory_path, timezone=None): """ Base Regex class which will be imported by language date class by giving their data folder path This will create standard regex and their parser to detect date for given language. Args: data_directory_path (str): path of data folder for given language timezone (str): user timezone default UTC """ self.text = '' self.tagged_text = '' self.processed_text = '' self.entity_name = entity_name self.tag = '__' + entity_name + '__' if timezone: self.timezone = get_timezone(timezone) else: self.timezone = None self.now_date = datetime.datetime.now(tz=self.timezone) self.bot_message = None # dict to store words for time, numerals and words which comes in reference to some date self.time_constant_dict = {} self.datetime_constant_dict = {} self.numerals_constant_dict = {} # define dynamic created standard regex for time from language data files self.regex_time = None # Method to initialise value in regex self.init_regex_and_parser(data_directory_path) # Variable to define default order in which these regex will work self.detector_preferences = [ self._detect_time_with_coln_format, self._detect_hour_minute ]
def __init__(self, entity_name, locale=None, language=ENGLISH_LANG, timezone='UTC', past_date_referenced=False): """Initializes a DateDetector object with given entity_name and pytz timezone object Args: entity_name: A string by which the detected date entity substrings would be replaced with on calling detect_entity() timezone (Optional, str): timezone identifier string that is used to create a pytz timezone object default is UTC past_date_referenced (bool): to know if past or future date is referenced for date text like 'kal', 'parso' locale(Optional, str): user locale default is None """ self.text = '' self.tagged_text = '' self.processed_text = '' self.date = [] self.original_date_text = [] self.entity_name = entity_name self.tag = '__' + entity_name + '__' self.timezone = get_timezone(timezone) self.now_date = datetime.datetime.now(tz=self.timezone) self.bot_message = None self.language = language self.locale = locale try: date_detector_module = importlib.import_module( 'ner_v2.detectors.temporal.date.{0}.date_detection'.format( self.language)) self.language_date_detector = date_detector_module.DateDetector( entity_name=self.entity_name, past_date_referenced=past_date_referenced, timezone=self.timezone, locale=self.locale) except ImportError: standard_date_regex = importlib.import_module( 'ner_v2.detectors.temporal.date.standard_date_regex') self.language_date_detector = standard_date_regex.DateDetector( entity_name=self.entity_name, data_directory_path=get_lang_data_path( detector_path=os.path.abspath(__file__), lang_code=self.language), timezone=self.timezone, past_date_referenced=past_date_referenced, locale=self.locale)
def __init__(self, entity_name='time', timezone=None, language=ENGLISH_LANG): """Initializes a TimeDetector object with given entity_name and timezone Args: entity_name(str): A string by which the detected time stamp substrings would be replaced with on calling detect_entity() timezone(str): timezone identifier string that is used to create a pytz timezone object default is UTC language(str): ISO 639 code for language of entities to be detected by the instance of this class """ # assigning values to superclass attributes self._supported_languages = self.get_supported_languages() super(TimeDetector, self).__init__(language=language) self.entity_name = entity_name self.text = '' self.tagged_text = '' self.processed_text = '' self.time = [] self.original_time_text = [] self.tag = '__' + entity_name + '__' if timezone: self.timezone = get_timezone(timezone) else: self.timezone = None self.language = language try: time_detector_module = importlib.import_module( 'ner_v2.detectors.temporal.time.{0}.time_detection'.format(self.language)) self.language_time_detector = time_detector_module.TimeDetector(entity_name=self.entity_name, timezone=self.timezone) except ImportError: standard_time_regex = importlib.import_module( 'ner_v2.detectors.temporal.time.standard_time_regex' ) self.language_time_detector = standard_time_regex.TimeDetector( entity_name=self.entity_name, data_directory_path=get_lang_data_path(detector_path=os.path.abspath(__file__), lang_code=self.language), timezone=self.timezone, )