Python find_word Exemples, utils.find_word Python Exemples

Exemple #1

0

Afficher le fichier

    def parse(self, doc) -> tuple:
        """
		Returns a tuple of 2 strings: (command, target word)
		"""
        word = utils.find_word(doc, "spell")
        if word and word.nbor(1):
            return "spell", word.nbor(1).text

        word = utils.find_word(doc, "define")
        if word and word.nbor(1):
            return "define", word.nbor(1).text

        word = utils.find_word(doc, ["definition", "spelling"])
        if word and word.nbor(2):
            if word.text == "spelling":
                command = "spell"
            else:
                command = "define"

            if word.nbor(1).text == "of":
                return command, word.nbor(2).text
            else:
                return command, word.nbor(1).text

        return None, None

Exemple #2

0

Afficher le fichier

 def get_query_type(self, doc: Doc):
     if utils.find_word(doc, ["toggle", "turn"]):
         return "interact"
     if utils.find_word(doc, ["scan", "look", "search", "find"]):
         if utils.find_word(doc, ["how"]):
             return "query"
         return "scan"
     if utils.find_word(
             doc, ["should", "how", "are", "did", "is", "what", "list"]):
         return "query"
     return "interact"

Exemple #3

0

Afficher le fichier

    def parse_interact(self, doc: Doc):
        verb_token = utils.find_word(doc, ["toggle", "turn"])
        target_token = None
        if verb_token:
            for token in doc[verb_token.i + 1:]:
                if token.pos_ == "NOUN":
                    target_token = token
                    break

            if not target_token:
                raise Exception("Unable to find target")

            if verb_token.text == "toggle":
                objective_state = "toggle"
            else:
                objective_state = 1 if verb_token.nbor(1).text == "on" else 0
        else:
            log.warning("no verb found, guessing where the target is")
            action_token = utils.find_word(doc, ["on", "off", "toggle"])
            if action_token:
                target_token = action_token.nbor(-1 if action_token.i ==
                                                 len(doc) - 1 else 1)
                if action_token.text == "toggle":
                    objective_state = "toggle"
                else:
                    objective_state = 1 if action_token.text == "on" else 0
            else:
                # HACK: assume toggle objective because that's probably what was
                # supposed to happen, but the speech recognition f****d up
                log.debug("No action token, assuming toggle objective")
                objective_state = "toggle"
                # TODO: also assume target token
                nouns = []
                for token in doc:
                    if any(token.pos_ == x for x in ["NOUN", "PROPN"]):
                        nouns += [token]
                log.debug(f"Found nouns: {nouns}")
                target_token = nouns[-1]

        device_name = target_token.text
        target_prev_token = target_token.nbor(
            -1) if target_token.i > 0 else None
        while target_token.dep_ == "compound":
            device_name += f" {target_token.nbor(1).text}"
            target_token = target_token.nbor(1)
        if target_prev_token and target_prev_token.pos_ == "PROPN":
            device_name = f"{target_prev_token.text} {device_name}"
        elif list(target_token.children)[0].dep_ == "poss":
            device_name = f"{list(target_token.children)[0].text} {device_name}"

        return device_name, objective_state

Exemple #4

0

Afficher le fichier

Fichier : __init__.py Projet : dyc3/crystal

	def parse(self, doc):
		volume_token = utils.find_word(doc, "volume")
		chapter_token = utils.find_word(doc, "chapter")
		nums = [utils.select_number_bleedy(volume_token.nbor(1)).text, utils.select_number_bleedy(chapter_token.nbor(1)).text]
		def _to_num(text):
			if text == "to":
				return 2
			elif text == "for":
				return 4
			try:
				return int(text)
			except ValueError:
				return utils.text2int(text)
		return map(_to_num, nums)

Exemple #5

0

Afficher le fichier

    def parse(self, doc):
        action = ACTION_CHECK
        target = TARGET_TIME

        if utils.find_word(doc, ["what", "give", "check", "list", "show"]):
            action = ACTION_CHECK
        elif utils.find_word(doc, ["set", "create", "make", "start"]):
            action = ACTION_SET

        if utils.find_word(doc, ["time"]):
            target = TARGET_TIME
        elif utils.find_word(doc, ["alarm", "timer", "alert"]):
            target = TARGET_ALARM

        return action, target

Exemple #6

0

Afficher le fichier

Fichier : tracker.py Projet : RacleRay/Bank_FAQ_ChatBot

 def check(self, query):
     "当前输入符合定义的某些模式"
     query = find_word(query)
     for rule in rules:
         q_pattern_match = pattern_pos_match(rule, query)
         if q_pattern_match[0][0]:
             self.keyword = ''.join(q_pattern_match[0][1])
             return True
     return False

Exemple #7

0

Afficher le fichier

	def parse(self, doc) -> str:
		"""
		Extracts what should be run from the given doc. This does NOT mean the exact program to be run.

		Outputs a string indicating the type of program to run. Possible outputs:
		- terminal
		- file browser
		- web browser
		- launcher
		- calculator
		- calendar
		"""
		for word in doc:
			word_strs = [word.lemma_, word.lower_]
			# any([w in [] for w in word_strs])
			if any([w in ["terminal", "shell", "console", "prompt", "bash"] for w in word_strs]):
				return "terminal"
			if any([w in ["nautilus", "file"] for w in word_strs]):
				return "file browser"
			if any([w in ["web", "internet"] for w in word_strs]):
				return "web browser"
			if any([w in ["launcher", "rofi", "dmenu"] for w in word_strs]):
				return "launcher"
			if any([w in ["mail", "email", "inbox"] for w in word_strs]):
				return "mail"
			if any([w in ["volume", "audio"] for w in word_strs]) and word.i < len(doc) - 1 and any([w in ["control", "panel"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
				return "volume-control"
			if any([w in ["notepad"] for w in word_strs]):
				return "text editor"
			if any([w in ["text", "note"] for w in word_strs]) and word.i < len(doc) - 1 and any([w in ["editor", "edit", "pad"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
				return "text editor"
			if any([w in ["g"] for w in word_strs]) and word.i < len(doc) - 1 and any([w in ["edit"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
				return "text editor"
			if any([w in ["calculator", "calendar"] for w in word_strs]):
				return word_strs[0].lower()
			if any([w in ["youtube", "reddit", "twitch", "amazon", "google", "netflix", "github", "canvas"] for w in word_strs]):
				return word_strs[0].lower()
			if any([w in ["together"] for w in word_strs]) and word.i < len(doc) - 1 and any([w in ["tube"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
				return "togethertube"
			if any([w in ["dab"] for w in word_strs]) and word.i < len(doc) - 1 and any([w in ["room"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
				return "togethertube-dab"
			if any([w in ["dc"] for w in word_strs]) and word.i < len(doc) - 1 and any([w in ["universe"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
				return "dc-universe"
			if any([w in ["screeps"] for w in word_strs]):
				if word.i < len(doc) - 1 and any([w in ["documentation", "docs", "api"] for w in [word.nbor(1).lemma_, str(word.nbor(1)).lower()]]):
					return "screeps-docs"
				else:
					return "screeps"
			if any([w in ["minecraft"] for w in word_strs]):
				return "minecraft"
			if any([w.endswith(".com") or w.endswith(".org") or w.endswith(".net") or w.endswith(".io") for w in word_strs]):
				return word_strs[0]
		if utils.find_word(doc, "textbook"):
			return "textbook"

Exemple #8

0

Afficher le fichier

    def test_find_word(self):
        # should find word
        doc = nlp("Hello, world!")
        token = utils.find_word(doc, ["hello"])
        self.assertIsNotNone(token)
        self.assertEqual(token.lemma_, "hello")
        token = utils.find_word(doc, "hello")
        self.assertIsNotNone(token)
        self.assertEqual(token.lemma_, "hello")

        # should find first word if there are duplicates
        doc = nlp("The quick brown fox jumps over the lazy dog.")
        token = utils.find_word(doc, ["the"])
        self.assertIsNotNone(token)
        self.assertEqual(token.lemma_, "the")
        self.assertEqual(token.i, 0)

        # should find the first word given more than one word
        token = utils.find_word(doc, ["dog", "fox"])
        self.assertIsNotNone(token)
        self.assertEqual(token.lemma_, "fox")

Exemple #9

0

Afficher le fichier

 def parse_target_time(
     self, doc, now=datetime.datetime.now()) -> datetime.datetime:
     if utils.find_word(doc, ["hour", "minute", "second"]):
         all_time_ents = [
             ent for ent in doc.ents if ent.label_ in ["TIME", "CARDINAL"]
         ]
         if len(all_time_ents) > 0:
             start_i = all_time_ents[0][0].i
             end_i = all_time_ents[-1][-1].i + 1
             if end_i < len(doc):
                 additional = utils.find_word(doc,
                                              ["hour", "minute", "second"],
                                              min_idx=end_i)
                 while additional:
                     # the entity parser didn't quite get all the tokens
                     end_i = additional.i + 1
                     additional = utils.find_word(
                         doc, ["hour", "minute", "second"], min_idx=end_i)
         else:
             time_word = utils.find_word(doc, ["hour", "minute", "second"])
             start_i = utils.select_number_bleedy(time_word.nbor(-1))[0].i
             while time_word:
                 end_i = time_word.i + 1
                 time_word = utils.find_word(doc,
                                             ["hour", "minute", "second"],
                                             min_idx=end_i)
         seconds = utils.parse_duration_to_seconds(doc[start_i:end_i])
         return now + datetime.timedelta(seconds=seconds)
     else:
         all_time_ents = [
             ent for ent in doc.ents
             if ent.label_ == "CARDINAL" or ent.label_ == "TIME"
         ]
         if len(all_time_ents) > 0:
             start_i = all_time_ents[0][0].i
             end_i = all_time_ents[-1][-1].i + 1
         else:
             num_tokens = [
                 token for token in doc
                 if token.like_num or token.text in ["am", "pm"]
             ]
             start_i = num_tokens[0].i
             end_i = num_tokens[-1].i + 1
         if utils.find_word(doc, ["am", "pm"]):
             target_time, _ = cal.parseDT(doc[start_i:end_i].text,
                                          sourceTime=now)
         else:
             target_time = now.replace(
                 hour=(utils.text2int(doc[start_i:end_i].text) + 12 *
                       (now.hour < 12)) % 24,
                 minute=0,
                 second=0,
                 microsecond=0)
         if target_time < now:
             target_time += datetime.timedelta(days=1)
         return target_time

Exemple #10

0

Afficher le fichier

Fichier : spell_check.py Projet : DasyDong/spell-check-go

def check_words_each_line(ent, words_line):
    """

    :param ent:
    :param words_line:
    :return:
    """
    words_english = find_word(words_line)
    for word in words_english:
        if word_is_need_check(word):
            # Convert AbcDefGo value to abc_def_go, is used for split Camel-Case word
            # Split GetClientOutOfClusterOrDie to [get client out of cluster or die]
            word_split = camel2underscore(word).split('_')
            for w_d_p in word_split:
                if w_d_p and word_is_need_check(w_d_p):
                    WORD_CHECK[word] = 1 if check_word_typo(ent, w_d_p) else 0

Exemple #11

0

Afficher le fichier

Fichier : __init__.py Projet : dyc3/crystal

	def parse(self, doc):
		"""
		Returns a i3-msg command with arguments to complete the action.
		"""
		workspace_token = utils.find_word(doc, ["workspace", "space", "desktop"])
		workspace_number = None
		if workspace_token:
			num_token = workspace_token.nbor(1)

			if num_token.lemma_ == "number":
				num_token = workspace_token.nbor(2)

			# just in case the input filtering doesn't catch these cases
			if num_token.text.lower() in ["to", "for"]:
				workspace_number = { "to": 2, "for": 4 }[num_token.text.lower()]

			try:
				workspace_number = int(num_token.text)
			except:
				try:
					workspace_number = utils.text2int(num_token.text.lower())
				except Exception as e:
					log.debug(f"Failed to parse workspace number: {e}")

		verb_word = utils.find_word(doc, ["switch", "focus", "show", "pull", "go", "move", "put", "kill", "close", "quit", "toggle", "enable", "disable", "make"])

		# target_token indicates the target entity the request is referencing
		# used for requests like "show me steam" or "switch to the web browser"
		# FIXME: do something more robust
		target_token = utils.find_word(doc, ["this", "that", "steam", "browser", "firefox", "discord", "telegram", "calculator", "gedit", "editor", "studio", "blender", "spotify", "vlc"])
		if target_token and target_token.text not in ["this", "that"]:
			matching_windows = self.find_matching_windows_in_tree(self.get_tree(), target_token.text.lower())
			log.info(f"Found {len(matching_windows)} matching windows")

		command = None
		# switching workspaces
		if verb_word.lower_ in ["switch", "focus", "show", "pull", "go"]:
			if target_token:
				if len(matching_windows) > 0:
					command = f'i3-msg \'[con_id="{matching_windows[0]["id"]}"] focus\''
				else:
					raise Exception("Could not find any windows matching query")
			elif workspace_token and workspace_number:
				command = f'i3-msg "workspace {workspace_number}"'
			else:
				# TODO: create Exception specifically for parsing failures
				raise Exception("Failed to parse input for workspace number")
		# moving windows to other workspaces
		elif verb_word.lower_ in ["move", "put"]:
			if workspace_token.nbor(-1).text in ["to", "on"] or (workspace_token.i >= 2 and workspace_token.nbor(-2).text in ["to", "on"]):
				# This means that we are moving a window to the target workspace
				if not workspace_token or not workspace_number:
					# TODO: create Exception specifically for parsing failures
					raise Exception("Unable to parse for target workspace")
				if target_token and target_token.text not in ["this", "that"]:
					if len(matching_windows) > 0:
						command = f'i3-msg \'[con_id="{matching_windows[0]["id"]}"] focus; move container to workspace number {workspace_number}\''
					else:
						raise Exception("Could not find any windows matching query")
				elif target_token and target_token.text in ["this", "that"]:
					command = f'i3-msg "move container to workspace number {workspace_number}"'
				else:
					raise Exception("Failed to parse which program to move")
			else:
				# This means that we are moving the target workspace to a different output
				direction = utils.find_word(doc, ["up", "down", "left", "right", "primary"])
				if not direction:
					raise Exception("Failed to parse which direction to move the current workspace")
				# if workspace_number:
					# NOTE: this is not yet supported by i3
					# command = 'i3-msg "move workspace {} to output {}"'.format(workspace_number, direction.text)
				command = f'i3-msg "move workspace to output {direction.text}"'
		elif verb_word.lower_ in ["kill", "close", "quit"]:
			if target_token and target_token.text not in ["this", "that"]:
				if len(matching_windows) > 0:
					command = f'i3-msg \'[con_id="{matching_windows[0]["id"]}"] focus; kill\''
				else:
					raise Exception("Could not find any windows matching query")
			elif target_token and target_token.text in ["this", "that"]:
				command = 'i3-msg "kill"'
			else:
				raise Exception("Failed to parse which program to kill")
		elif verb_word.lower_ in ["toggle", "enable", "disable", "make"]:
			verb_word = utils.find_word(doc, ["toggle", "enable", "disable", "make"])
			attribute_word = utils.find_word(doc, ["fullscreen", "floating", "full", "float"])
			if verb_word and attribute_word:
				verb = verb_word.text
				if verb == "make":
					verb = "enable"
				attribute = attribute_word.text
				if attribute == "full":
					attribute = "fullscreen"
				elif attribute == "float":
					attribute = "floating"
				if target_token and target_token.text not in ["this", "that"]:
					if len(matching_windows) > 0:
						command = f'i3-msg \'[con_id="{matching_windows[0]["id"]}"] focus; {attribute} {verb}\''
					else:
						raise Exception("Could not find any windows matching query")
				else:
					command = f'i3-msg "{attribute} {verb}"'
			else:
				raise Exception(f"verb_word ({verb_word}) or attribute_word ({attribute_word}) not found")
		else:
			raise Exception(f"Unknown verb {verb_word.text}")

		return command

Exemple #12

0

Afficher le fichier

	def determine_program(self, program_type: str, doc=None) -> str:
		"""
		Determine the exact program to run and the exact arguments.
		"""
		if program_type == "terminal":
			return "x-terminal-emulator"
		if program_type == "file browser":
			return "nautilus --no-desktop"
		if program_type == "web browser":
			return "x-www-browser"
		if program_type == "launcher":
			return "rofi -show run"
		if program_type == "mail":
			return "x-www-browser mail.google.com"
		if program_type == "volume-control":
			return "pavucontrol"
		if program_type == "text editor":
			return "gedit"
		if program_type in ["calculator", "calendar"]:
			return f"gnome-{program_type}"
		if program_type in ["youtube", "reddit", "twitch", "amazon", "google", "netflix", "github", "canvas"]:
			if program_type == "github":
				return "x-www-browser https://github.com/notifications"
			if program_type == "canvas":
				return "x-www-browser https://sit.instructure.com"
			if program_type == "twitch":
				suffix = ".tv"
			else:
				suffix = ".com"
			return f"x-www-browser {program_type}{suffix}"
		if program_type == "togethertube":
			return "x-www-browser opentogethertube.com"
		if program_type == "togethertube-dab":
			return "x-www-browser opentogethertube.com/room/dab"
		if program_type == "dc-universe":
			return "x-www-browser dcuniverse.com"
		if program_type == "screeps":
			return "x-www-browser https://screeps.com/a/#!/map"
		if program_type == "screeps-docs":
			return "x-www-browser https://docs.screeps.com/api/"
		if program_type == "minecraft":
			return "minecraft-launcher"
		if any([program_type.endswith(s) for s in [".com", ".org", ".net", ".io"]]):
			return f"x-www-browser {program_type}"
		if not doc:
			return
		if program_type == "textbook":
			textbook = utils.find_word(doc, "textbook")
			specifier = textbook.nbor(-1)
			# any hardcoded commands to open specific textbooks should go here.

			# otherwise, look for textbooks that match the specifier in this folder
			# TODO: use crystal.core.get_config("textbooks_path", optional=True)
			search_path: Path = Path.home().joinpath("Documents/school/textbooks")
			select = None
			for path in search_path.glob("**/*.pdf"):
				if specifier.text.lower() in path.name.lower():
					select = path

			if select:
				return f"evince {shlex.quote(str(select.absolute()))}"
			raise Exception(f"Unable to find a {specifier} textbook")

Exemple #13

0

Afficher le fichier

	def parse(self, doc):
		word = utils.find_word(doc, "tomorrow")
		if word:
			return "forecast"
		return "current"

Exemple #14

0

Afficher le fichier

	def extract_parameters(self, doc):
		"""
		Extracts action and parameters for human_input

		Returns a tuple, a string of the action and a tuple of the parameters.
		"""
		sentence = next(doc.sents)

		inputaction = None

		# extract the action
		for word in sentence:
			if word.lemma_ in ["click", "move", "press", "scroll"]:
				inputaction = word.lemma_
				break
			if word.lemma_ in ["type", "dictate"]:
				inputaction = "type"
				break

		# default parameters
		click_param = "left"
		scroll_direction = "down"
		scroll_amount = 0
		move_direction = ""
		move_amount = 0
		press_param = ""
		type_param = ""

		# extract the parameters
		if inputaction == "click":
			word = utils.find_word(sentence.doc, ["left","middle","right","double","triple"])
			if word:
				click_param = word.lemma_

		elif inputaction == "scroll":
			for word in sentence:
				if str(word) in ["up", "down"]:
					scroll_direction = word.lemma_
					scroll_amount = 8
				elif str(word) in ["top", "bottom"]:
					scroll_direction = {"top":"up", "bottom":"down"}[str(word)]
					scroll_amount = 1000

		elif inputaction == "move":
			unit_size = 10
			for word in sentence:
				numToken = None
				if str(word) in ["up", "down", "left", "right", "center"]:
					move_direction = str(word)
				elif word.dep_ == "prep":
					if str(word) == "by":
						for prepchild in word.children:
							if prepchild.dep_ == "pobj":
								if prepchild.lemma_ in ["pixel", "unit"]:
									if prepchild.lemma_ == "pixel":
										unit_size = 1
									for c in prepchild.children:
										if c.like_num:
											numToken = c
								elif prepchild.like_num:
									numToken = prepchild
				elif word.lemma_ in ["pixel", "unit"]:
					if word.lemma_ == "pixel":
						unit_size = 1
					for c in word.children:
						if c.like_num:
							numToken = c
				elif word.like_num:
					numToken = word
				if numToken:
					try:
						move_amount = int(str(numToken))
					except:
						try:
							move_amount = utils.text2int(str(numToken))
						except Exception as e:
							log.error("could not parse {}".format(numToken))
							break
			move_amount *= unit_size

		elif inputaction == "press":
			word = utils.find_word(sentence.doc, ["press"])
			if word and word.nbor(1):
				press_param = '+'.join(map(str, sentence.doc[word.i + 1:]))

		elif inputaction == "type":
			word = utils.find_word(sentence.doc, ["type", "dictate"])
			if word and word.nbor(1):
				objective_span = sentence.doc[word.i + 1:]
				if len(objective_span) >= 3 and str(objective_span[0:2]).startswith("the word"):
					symbol_to_word = {
						"(": "parenthesis",
						")": "closed parenthesis",
						"[": "square bracket",
						"]": "closed square bracket",
						"&": "ampersand",
					}
					type_param = symbol_to_word[str(objective_span[2])]
				else:
					type_param = ' '.join(map(str, objective_span))
					# remove spaces in front of dollar signs if preceding a number
					dollar_with_num_regex = re.compile(r"(\$) (\d)")
					type_param = re.sub(dollar_with_num_regex, lambda m: m.group(1) + m.group(2), type_param)

					# HACK: quick fix to make dictating longer numbers easier
					broken_num_regex = re.compile(r"((\d+\.?(\d+)?|\d*\.(\d+)).([A-Za-z-:]+| |\d)|for|to)\s?\d+")
					if broken_num_regex.match(type_param):
						def _to_string_num_filtered(s):
							s = str(s).replace(":", "").replace("-", "")
							if not s:
								return s
							if s == "to":
								return "2"
							if s == "for":
								return "4"
							if s in ["point", "dot"]:
								return "."
							try:
								float(s)
								return s
							except ValueError:
								return str(utils.text2int(str(s)))
						type_param = ''.join(map(_to_string_num_filtered, objective_span))

		if inputaction == "click":
			return inputaction, (click_param,)
		elif inputaction == "scroll":
			return inputaction, (scroll_direction, scroll_amount,)
		elif inputaction == "move":
			return inputaction, (move_direction, move_amount)
		elif inputaction == "press":
			return inputaction, (press_param,)
		elif inputaction == "type":
			return inputaction, (type_param,)