Esempi in Python per is_text_type, esempi in Python per helpers.is_text_type

Esempio n. 1

0

Mostra file

File: text_processor.py Progetto: ShaunHoward/web_jargon

 def valid_web_jargon(self, text):
     """
     Text is valid web jargon if it is good English of type str or unicode that is non-empty.
     :param text: the web jargon request
     :return: whether the input text is valid web jargon aka good English, no weird characters
     """
     return h.is_text_type(text) and len(text) > 0 and len(self.web_jargon_matcher.match(text).group()) > 0

Esempio n. 2

0

Mostra file

File: text_processor.py Progetto: ShaunHoward/web_jargon

    def match_arg(self, orig_arg_type, command_words, arg_sections):
        """
        Tries to find the given arg type in the list of argument sections,
        using the provided command words as backup evidence in decision making.
        :param orig_arg_type: the type of argument to search for as addressed by the global pattern dictionary
        in this class
        :param command_words: the words of the command to match to
        :param arg_sections: the already known argument sections in the command
        :return: the parsed argument from the given command and data
        """
        arg_sections = [x.strip() for x in arg_sections]
        parsed_arg = ''
        # may accept multiple argument types, so treat them independently
        if "|" in orig_arg_type:
            arg_types = orig_arg_type.split("|")
        else:
            # otherwise, just have one argument type to look for
            arg_types = [orig_arg_type]

        # run search for pattern matches to argument types in the command text
        for arg_type in arg_types:
            if len(command_words) > 0 and len(arg_sections) > 0 and arg_type in self.PATTERN_DICT.keys():
                # extract the proper pattern
                pattern = self.PATTERN_DICT[arg_type]
                # The pattern may be a function call, strings mean regex patterns are given
                if not h.is_text_type(pattern):
                    # match using a matching function that is callable
                    valid_match = False
                    for arg_section in arg_sections:
                        match = pattern(arg_section)
                        valid_match = (type(match) == int and match > 0) or (type(match) != int and match is not None)
                        if valid_match:
                            parsed_arg = match
                            break
                    if valid_match:
                        break
                else:
                    # compile a regex pattern on the fly (not really used in practice but always an option)
                    pat = re.compile(pattern)

                    # try to match to words first using regex
                    for word in command_words:
                        match = pat.match(word)
                        if match is not None and len(match.group()) > 0:
                            parsed_arg = match.group()
                            break

                    # otherwise, try to match to argument phrase sections
                    for arg_section in arg_sections:
                        match = pat.match(arg_section)
                        if match is not None and len(match.group()) > 0:
                            parsed_arg = match.group()
                            break
        return parsed_arg

Esempio n. 3

0

Mostra file

File: text_processor.py Progetto: ShaunHoward/web_jargon

    def process_web_action_request(self, text, curr_url):
        """
        Parses the provided text into web text actions that will be converted into
        web actions by the web text to action mapper. The order will be maintained.
        :param text: the input command text
        :param curr_url: the url of the current web page
        :return: the action request response, which will be empty or None if in error
        """
        web_action_request = None
        if self.valid_web_jargon(text) and h.is_text_type(curr_url) and len(curr_url) > 0:
            # extract action request from the current command and add to web action token list
            words = text.split(" ")
            words = [x for x in words if len(x) > 0]
            curr_request = self.extract_action_request(text, words, curr_url)
            if curr_request is not None:
                web_action_request = curr_request
        else:
            h.log_to_console(["request error: ", text])

        return web_action_request

Esempio n. 4

0

Mostra file

File: text_processor.py Progetto: ShaunHoward/web_jargon

    def template_action_interpreter(self, command_text, command_words, command_url):
        """
        This method will not always work. multiple instances of the same string may be detected
        in matching and may throw off the interpreter.
        :param command_text: the command text for the current action request
        :param command_words: the command words for the current action request
        :param command_url: the url of the command given used for context determination
        :return: the current action request response
        """

        # store lowercase of all strings and filter out quotes
        command_words = [x.lower() for x in command_words if x != '``' and x != '\'\'']

        # store lowercase, parens removed, stripped version of command text input
        command_text = command_text.lower().strip().lstrip("\"").lstrip('``').lstrip('\'\'')\
            .rstrip('\'\'').rstrip("\"").rstrip('``').strip()

        # clean up command url and get command context
        command_url = command_url.strip()
        command_context, context_type = h.determine_url_context(command_url)

        # get possible action mappings
        possible_action_text_mapping_keys = h.get_possible_action_text_mapping_keys(command_context,
                                                                                    self.action_text_mappings.keys())

        # store matches list
        matches = []
        has_exact_match = False
        # try to find match for command in templates
        for action_key in possible_action_text_mapping_keys:
            if not has_exact_match:
                for u_map in self.action_text_mappings[action_key]:
                    indices = []
                    curr_command_text = command_text
                    curr_command_words = [x for x in command_words]
                    # track the words found in the command words list
                    for part in u_map[h.PARTS]:
                        # check if part of the utterance is in the command
                        if part in curr_command_text:
                            part_start = command_text.index(part)
                            part_end = part_start + len(part)
                            indices.append((part_start, part_end))
                            # replace that part of string with underscore to signify removal
                            curr_command_text = curr_command_text.replace(part, '')
                            # remove this part from the word list (if not in list, problem but neglect)
                            part_split = part.split(" ")
                            for p in part_split:
                                if p in curr_command_words:
                                    curr_command_words.remove(p)

                    # store match if parts are in command
                    if len(indices) == len(u_map[h.PARTS]):

                        # store indices where args will be extracted from in string
                        arg_sections = h.extract_arg_sections(command_text, indices)

                        # do smart argument parsing use regex, parse trees, etc.
                        args = u_map[h.CMD_ARGS_DICT].copy()
                        if len(arg_sections) > 0:
                            for arg_type in u_map[h.CMD_ARGS_DICT]:
                                # extract argument using argument type
                                parsed_arg = self.match_arg(arg_type, curr_command_words, arg_sections)
                                if (type(parsed_arg) == int and parsed_arg > 0)\
                                        or (type(parsed_arg) == list
                                            or h.is_text_type(parsed_arg) and len(parsed_arg) > 0):
                                    args[arg_type] = parsed_arg
                        matches.append((action_key, " ".join(u_map[h.PARTS]), args, min(indices[:][0])))

        curr_action_request = dict()
        # select the earliest and/or longest command match for the current action request
        if len(matches) > 0:
            longest_phrase = 0
            earliest_pos = 0
            earliest_index = 0
            ctr = 0
            for match in matches:
                # get length of parts string that matched command
                mlen = len(match[1])
                # get start pos of command match
                start_pos = match[3]

                # look for longer phrase
                if mlen > longest_phrase:
                    longest_phrase = mlen
                    # take longer phrase (still same starting location)
                    if start_pos == earliest_pos:
                        earliest_pos = start_pos
                        earliest_index = ctr

                # look for same length phrase with earlier command match
                if start_pos < earliest_pos or (start_pos == earliest_pos and mlen == longest_phrase):
                    earliest_pos = start_pos
                    earliest_index = ctr
                ctr += 1

            # set command and args from action text mappings
            curr_action_request[h.CMD] = matches[earliest_index][0]
            curr_action_request[h.CMD_ARGS_DICT] = matches[earliest_index][2]
            curr_action_request[h.CONTEXT_TYPE] = context_type

            # handle music context boolean setting for music actions
            if command_context == h.MUSIC_CONTEXT:
                if "spotify" in command_url:
                    curr_action_request[h.CMD_ARGS_DICT][IS_SPOTIFY] = 'true'
                else:
                    curr_action_request[h.CMD_ARGS_DICT][IS_SPOTIFY] = 'false'

        return curr_action_request