def valid_web_jargon(self, text): """ Text is valid web jargon if it is good English of type str or unicode that is non-empty. :param text: the web jargon request :return: whether the input text is valid web jargon aka good English, no weird characters """ return h.is_text_type(text) and len(text) > 0 and len(self.web_jargon_matcher.match(text).group()) > 0
def match_arg(self, orig_arg_type, command_words, arg_sections): """ Tries to find the given arg type in the list of argument sections, using the provided command words as backup evidence in decision making. :param orig_arg_type: the type of argument to search for as addressed by the global pattern dictionary in this class :param command_words: the words of the command to match to :param arg_sections: the already known argument sections in the command :return: the parsed argument from the given command and data """ arg_sections = [x.strip() for x in arg_sections] parsed_arg = '' # may accept multiple argument types, so treat them independently if "|" in orig_arg_type: arg_types = orig_arg_type.split("|") else: # otherwise, just have one argument type to look for arg_types = [orig_arg_type] # run search for pattern matches to argument types in the command text for arg_type in arg_types: if len(command_words) > 0 and len(arg_sections) > 0 and arg_type in self.PATTERN_DICT.keys(): # extract the proper pattern pattern = self.PATTERN_DICT[arg_type] # The pattern may be a function call, strings mean regex patterns are given if not h.is_text_type(pattern): # match using a matching function that is callable valid_match = False for arg_section in arg_sections: match = pattern(arg_section) valid_match = (type(match) == int and match > 0) or (type(match) != int and match is not None) if valid_match: parsed_arg = match break if valid_match: break else: # compile a regex pattern on the fly (not really used in practice but always an option) pat = re.compile(pattern) # try to match to words first using regex for word in command_words: match = pat.match(word) if match is not None and len(match.group()) > 0: parsed_arg = match.group() break # otherwise, try to match to argument phrase sections for arg_section in arg_sections: match = pat.match(arg_section) if match is not None and len(match.group()) > 0: parsed_arg = match.group() break return parsed_arg
def process_web_action_request(self, text, curr_url): """ Parses the provided text into web text actions that will be converted into web actions by the web text to action mapper. The order will be maintained. :param text: the input command text :param curr_url: the url of the current web page :return: the action request response, which will be empty or None if in error """ web_action_request = None if self.valid_web_jargon(text) and h.is_text_type(curr_url) and len(curr_url) > 0: # extract action request from the current command and add to web action token list words = text.split(" ") words = [x for x in words if len(x) > 0] curr_request = self.extract_action_request(text, words, curr_url) if curr_request is not None: web_action_request = curr_request else: h.log_to_console(["request error: ", text]) return web_action_request
def template_action_interpreter(self, command_text, command_words, command_url): """ This method will not always work. multiple instances of the same string may be detected in matching and may throw off the interpreter. :param command_text: the command text for the current action request :param command_words: the command words for the current action request :param command_url: the url of the command given used for context determination :return: the current action request response """ # store lowercase of all strings and filter out quotes command_words = [x.lower() for x in command_words if x != '``' and x != '\'\''] # store lowercase, parens removed, stripped version of command text input command_text = command_text.lower().strip().lstrip("\"").lstrip('``').lstrip('\'\'')\ .rstrip('\'\'').rstrip("\"").rstrip('``').strip() # clean up command url and get command context command_url = command_url.strip() command_context, context_type = h.determine_url_context(command_url) # get possible action mappings possible_action_text_mapping_keys = h.get_possible_action_text_mapping_keys(command_context, self.action_text_mappings.keys()) # store matches list matches = [] has_exact_match = False # try to find match for command in templates for action_key in possible_action_text_mapping_keys: if not has_exact_match: for u_map in self.action_text_mappings[action_key]: indices = [] curr_command_text = command_text curr_command_words = [x for x in command_words] # track the words found in the command words list for part in u_map[h.PARTS]: # check if part of the utterance is in the command if part in curr_command_text: part_start = command_text.index(part) part_end = part_start + len(part) indices.append((part_start, part_end)) # replace that part of string with underscore to signify removal curr_command_text = curr_command_text.replace(part, '') # remove this part from the word list (if not in list, problem but neglect) part_split = part.split(" ") for p in part_split: if p in curr_command_words: curr_command_words.remove(p) # store match if parts are in command if len(indices) == len(u_map[h.PARTS]): # store indices where args will be extracted from in string arg_sections = h.extract_arg_sections(command_text, indices) # do smart argument parsing use regex, parse trees, etc. args = u_map[h.CMD_ARGS_DICT].copy() if len(arg_sections) > 0: for arg_type in u_map[h.CMD_ARGS_DICT]: # extract argument using argument type parsed_arg = self.match_arg(arg_type, curr_command_words, arg_sections) if (type(parsed_arg) == int and parsed_arg > 0)\ or (type(parsed_arg) == list or h.is_text_type(parsed_arg) and len(parsed_arg) > 0): args[arg_type] = parsed_arg matches.append((action_key, " ".join(u_map[h.PARTS]), args, min(indices[:][0]))) curr_action_request = dict() # select the earliest and/or longest command match for the current action request if len(matches) > 0: longest_phrase = 0 earliest_pos = 0 earliest_index = 0 ctr = 0 for match in matches: # get length of parts string that matched command mlen = len(match[1]) # get start pos of command match start_pos = match[3] # look for longer phrase if mlen > longest_phrase: longest_phrase = mlen # take longer phrase (still same starting location) if start_pos == earliest_pos: earliest_pos = start_pos earliest_index = ctr # look for same length phrase with earlier command match if start_pos < earliest_pos or (start_pos == earliest_pos and mlen == longest_phrase): earliest_pos = start_pos earliest_index = ctr ctr += 1 # set command and args from action text mappings curr_action_request[h.CMD] = matches[earliest_index][0] curr_action_request[h.CMD_ARGS_DICT] = matches[earliest_index][2] curr_action_request[h.CONTEXT_TYPE] = context_type # handle music context boolean setting for music actions if command_context == h.MUSIC_CONTEXT: if "spotify" in command_url: curr_action_request[h.CMD_ARGS_DICT][IS_SPOTIFY] = 'true' else: curr_action_request[h.CMD_ARGS_DICT][IS_SPOTIFY] = 'false' return curr_action_request