def extract_datetime(text, anchorDate="DEFAULT", lang=None, default_time=None): """Extracts date and time information from a sentence. Parses many of the common ways that humans express dates and times, including relative dates like "5 days from today", "tomorrow', and "Tuesday". Vague terminology are given arbitrary values, like: * morning = 8 AM * afternoon = 3 PM * evening = 7 PM If a time isn't supplied or implied, the function defaults to 12 AM Args: text (str): the text to be interpreted anchorDate (:obj:`datetime`, optional): the date to be used for relative dating (for example, what does "tomorrow" mean?). Defaults to the current local date/time. lang (str): the BCP-47 code for the language to use, None uses default default_time (datetime.time): time to use if none was found in the input string. Returns: [:obj:`datetime`, :obj:`str`]: 'datetime' is the extracted date as a datetime object in the user's local timezone. 'leftover_string' is the original phrase with all date and time related keywords stripped out. See examples for further clarification Returns 'None' if no date or time related text is found. Examples: >>> extract_datetime( ... "What is the weather like the day after tomorrow?", ... datetime(2017, 06, 30, 00, 00) ... ) [datetime.datetime(2017, 7, 2, 0, 0), 'what is weather like'] >>> extract_datetime( ... "Set up an appointment 2 weeks from Sunday at 5 pm", ... datetime(2016, 02, 19, 00, 00) ... ) [datetime.datetime(2016, 3, 6, 17, 0), 'set up appointment'] >>> extract_datetime( ... "Set up an appointment", ... datetime(2016, 02, 19, 00, 00) ... ) None """ if anchorDate is None: warn( DeprecationWarning("extract_datetime(anchorDate=None) is " "deprecated. This parameter can be omitted.")) if anchorDate is None or anchorDate == "DEFAULT": anchorDate = now_local() return _extract_datetime(text, anchorDate, lang or get_default_loc(), default_time)
def test_load_language(self): lingua_franca.load_language('en') # Verify that English is loaded and, since it's the only language # we've loaded, also the default. self.assertEqual(lingua_franca.get_default_lang(), 'en') # Verify that English's default full code is 'en-us' self.assertEqual(lingua_franca.get_full_lang_code('en'), 'en-us') # Verify that this is also our current full code self.assertEqual(lingua_franca.get_default_loc(), 'en-us') self.assertFalse('es' in lingua_franca.get_active_langs()) # Verify that unloaded languages can't be invoked explicitly self.assertRaises(ModuleNotFoundError, lingua_franca.parse.extract_number, 'uno', lang='es') unload_all_languages()
def _duration_handler(time1, lang=None, speech=True, *, time2=None, use_years=True, clock=False, resolution=TimeResolution.SECONDS): """Convert duration in seconds to a nice spoken timespan. Used as a handler by nice_duration and nice_duration_dt. Accepts: datetime.timedelta, or seconds (int/float), or 2 x datetime.datetime Examples: time1 = 60 -> "1:00" or "one minute" time1 = 163 -> "2:43" or "two minutes forty three seconds" time1 = timedelta(seconds=120) -> "2:00" or "two minutes" time1 = datetime(2019, 3, 12), time2 = datetime(2019, 1, 1) -> "seventy days" Args: time1: int/float seconds, OR datetime.timedelta, OR datetime.datetime time2 (datetime, optional): subtracted from time1 if time1 is datetime lang (str, optional): a BCP-47 language code, None for default speech (bool, opt): format output for speech (True) or display (False) use_years (bool, opt): rtn years and days if True, total days if False clock (bool, opt): always format output like digital clock (see below) resolution (mycroft.util.format.TimeResolution, optional): lower bound mycroft.util.format.TimeResolution values: TimeResolution.YEARS TimeResolution.DAYS TimeResolution.HOURS TimeResolution.MINUTES TimeResolution.SECONDS TimeResolution.MILLISECONDS NOTE: nice_duration will not produce milliseconds unless that resolution is passed. NOTE: clock will produce digital clock-like output appropriate to resolution. Has no effect on resolutions DAYS or YEARS. Only applies to displayed output. Returns: str: timespan as a string """ lang = lang or get_default_loc() _leapdays = 0 _input_resolution = resolution milliseconds = 0 type1 = type(time1) if time2: type2 = type(time2) if type1 is not type2: raise Exception("nice_duration() can't combine data types: " "{} and {}".format(type1, type2)) elif type1 is datetime.datetime: duration = time1 - time2 _leapdays = (abs(leapdays(time1.year, time2.year))) # when operating on datetimes, refuse resolutions that # would result in bunches of trailing zeroes if all([ time1.second == 0, time2.second == 0, resolution.value >= TimeResolution.SECONDS.value ]): resolution = TimeResolution.MINUTES if all([ time1.minute == 0, time2.minute == 0, resolution.value == TimeResolution.MINUTES.value ]): resolution = TimeResolution.HOURS if all([ time1.hour == 0, time2.hour == 0, resolution.value == TimeResolution.HOURS.value ]): resolution = TimeResolution.DAYS else: _tmp = warnings.formatwarning warnings.formatwarning = lambda msg, * \ args, **kwargs: "{}\n".format(msg) warning = ("WARN: mycroft.util.format.nice_duration_dt() can't " "subtract " + str(type1) + ". Ignoring 2nd " "argument '" + str(time2) + "'.") warnings.warn(warning) warnings.formatwarning = _tmp duration = time1 else: duration = time1 # Pull decimal portion of seconds, if present, to use for milliseconds if isinstance(duration, float): milliseconds = str(duration).split('.')[1] if speech: milliseconds = milliseconds[:2] else: milliseconds = milliseconds[:3] milliseconds = float("0." + milliseconds) # Cast duration to datetime.timedelta for human-friendliness if not isinstance(duration, datetime.timedelta): duration = datetime.timedelta(seconds=duration) days = duration.days if use_years: days -= _leapdays if days > 365 else 0 years = days // 365 else: years = 0 days = days % 365 if years > 0 else days # We already stored milliseconds. Now we want the integer part. seconds = duration.seconds minutes = seconds // 60 seconds %= 60 hours = minutes // 60 minutes %= 60 if speech: out = "" if years > 0: out += pronounce_number(years, lang) + " " out += _translate_word("year" if years == 1 else "years", lang) if days > 0 and resolution.value > TimeResolution.YEARS.value: if out: out += " " out += pronounce_number(days, lang) + " " out += _translate_word("day" if days == 1 else "days", lang) if hours > 0 and resolution.value > TimeResolution.DAYS.value: if out: out += " " out += pronounce_number(hours, lang) + " " out += _translate_word("hour" if hours == 1 else "hours", lang) if minutes > 0 and resolution.value > TimeResolution.HOURS.value: if out: out += " " out += pronounce_number(minutes, lang) + " " out += _translate_word("minute" if minutes == 1 else "minutes", lang) if ((seconds > 0 and resolution.value >= TimeResolution.SECONDS.value) or (milliseconds > 0 and resolution.value == TimeResolution.MILLISECONDS.value)): if resolution.value == TimeResolution.MILLISECONDS.value: seconds += milliseconds if out: out += " " # Throw "and" between minutes and seconds if duration < 1 hour if len(out.split()) > 3 or seconds < 1: out += _translate_word("and", lang) + " " # speaking "zero point five seconds" is better than "point five" out += pronounce_number(seconds, lang) + " " out += _translate_word("second" if seconds == 1 else "seconds", lang) else: # M:SS, MM:SS, H:MM:SS, Dd H:MM:SS format _seconds_str = ("0" + str(seconds)) if seconds < 10 else str(seconds) out = "" if years > 0: out = str(years) + "y " if days > 0 and resolution.value > TimeResolution.YEARS.value: out += str(days) + "d " if (hours > 0 and resolution.value > TimeResolution.DAYS.value) or \ (clock and resolution is TimeResolution.HOURS): out += str(hours) if resolution.value == TimeResolution.MINUTES.value and not clock: out += (("h " + str(minutes) + "m") if hours > 0 else str(minutes) + "m") elif (minutes > 0 and resolution.value > TimeResolution.HOURS.value) \ or (clock and resolution.value >= TimeResolution.HOURS.value): if hours != 0 or (clock and resolution is TimeResolution.HOURS): out += ":" if minutes < 10: out += "0" out += str(minutes) + ":" if (seconds > 0 and resolution.value > TimeResolution.MINUTES.value) or clock: out += _seconds_str else: out += "00" # if we have seconds but no minutes... elif (seconds > 0 or clock) and resolution.value > \ TimeResolution.MINUTES.value: # check if output ends in hours try: if str(hours) == out.split()[-1]: out += ":" except IndexError: pass out += ("00:" if hours > 0 else "0:") + _seconds_str if (milliseconds > 0 or clock) and resolution.value \ == TimeResolution.MILLISECONDS.value: _mill = str(milliseconds).split(".")[1] # right-pad milliseconds to three decimal places while len(_mill) < 3: _mill += "0" # make sure output < 1s still formats correctly if out == "": out = "0:00" else: if (str(hours) == out.split()[-1]) and ":" not in out: out += ":00:00" # only append milliseconds to output that contains # minutes and/or seconds if ":" in out: out += "." + _mill # If this evaluates True, out currently ends in hours: "1d 12" if out and all([ resolution.value >= TimeResolution.HOURS.value, ":" not in out, out[-1] != "m", hours > 0 ]): # to "1d 12h" out += "h" out = out.strip() if not out: out = "zero " if speech else "0" if _input_resolution == TimeResolution.YEARS: out += "years" if speech else "y" elif _input_resolution == TimeResolution.DAYS: out += "days" if speech else "d" elif _input_resolution == TimeResolution.HOURS: out += "hours" if speech else "h" elif _input_resolution == TimeResolution.MINUTES: if speech: out = "under a minute" if seconds > 0 else "zero minutes" else: out = "0m" else: out = "zero seconds" if speech else "0:00" return out