class ColorDetector(CrowModule):
    """
    Detects colors in text.
    """
    def __init__(self, language='en'):
        self.colors = ["red", "blue", "black", "green"]
        self.lang = language
        self.ui = UserInputManager(language=self.lang)
        self.templ_det = self.ui.load_file('templates_detection.json')

    def detect_color(self, text: TaggedText):
        """
        A simple method for detecting a color in text.
        Yes, there is some space for improvement.

        Parameters
        ----------
        text  an input text
        """
        for color in self.colors:
            try:
                color_lang = self.templ_det[self.lang][color]
                if color_lang in text.get_text():
                    return color
            except:
                pass

        return None
class ApplyGlueTask(Template):
    namespace = db.onto
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.register_parameter(name="size", value=db.onto.GeometricObject)
        self.register_parameter(name="position", value=db.onto.Location)

        self.ui = UserInputManager()

        self.templ_det = self.ui.load_file('templates_detection.json')

    def match(self, tagged_text : TaggedText, language = 'en') -> None:
        import nlp_crow.modules.GeometryDetector as GeometryDetector
        import nlp_crow.modules.LocationDetector as LocationDetector

        gd = GeometryDetector.GeometryDetector(language = language)
        ld = LocationDetector.LocationDetector(language = language)

        self.size = gd.detect_geometry(tagged_text)
        self.position = ld.detect_location(tagged_text)

    def evaluate(self,language = 'en') -> None:
        if db.onto.RelativeLocation in self.position.is_instance_of:
            lg = LocationGrounder.LocationGrounder(language = self.lang)

            self.position = lg.ground_location(loc=self.position)
Example #3
0
class TidyTask(Template):
    namespace = db.onto

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self.register_parameter(name="object_to_put", value=db.onto.Object)
        self.logger = logging.getLogger(__name__)
        self.ui = UserInputManager()

        self.templ_det = self.ui.load_file('templates_detection.json')

    def match(self, tagged_text: TaggedText, language='en') -> None:
        od = ObjectDetector(language=language)

        # TODO: temporary solution: detect the "put subject" in 3 words after the "put" verb. Should be improved with better grammar parsing.
        try:
            put_index = tagged_text.indices_of(
                self.templ_det[language]['tidy'])[0]
            tagged_text_cut = tagged_text.cut(put_index + 1, put_index + 4)

            self.object_to_put = od.detect_object(tagged_text_cut)
        except:
            self.logger.debug(f"Put index not detected.")

    def evaluate(self, language='en') -> None:
        # check if the object to be put down is in the workspace
        self.lang = language
        og = ObjectGrounder(language=self.lang)
        self.object_to_put = og.ground_object(
            obj_placeholder=self.object_to_put)
class GeometryDetector(CrowModule):
    """
    Detects geometry in text - e.g. point, line, etc.
    """
    namespace = db.onto

    def __init__(self, language='en'):
        self.lang = language
        self.logger = logging.getLogger(__name__)
        self.ui = UserInputManager()

        self.templ_det = self.ui.load_file('templates_detection.json')

    def detect_geometry(self, tagged_text: TaggedText):
        go = None

        if tagged_text.contains_text(self.templ_det[self.lang]['point']):
            go = db.onto.GeometricObject("point")

        if go:
            self.logger.debug(
                f"Geometry detected for \"{tagged_text.get_text()}\": {go}")

        return go
class TemplateDetector(CrowModule):
    """
    First part of the NL pipeline. Preliminary template detection in the text.
    """
    namespace = db.onto

    def __init__(self, language='en'):
        self.lang = language
        self.ui = UserInputManager(language=language)

    def detect_templates(self, tagged_text: TaggedText) -> List[tt]:
        """
        Tries to guess which template should be used to represent robot instructions for the chunk of
        text. Can detect more templates, but only the first one which matches will be used later.

        Parameters
        ----------
        tagged_text  a tagged text in which the template should be detected

        Returns
        -------
        a list of guessed templates for the tagged text sorted by their probability
        """

        self.templ_det = self.ui.load_file('templates_detection.json')
        self.guidance_file = self.ui.load_file('guidance_dialogue.json')

        templates = []
        detect_fns = [
            self.detect_pick, self.detect_apply_glue, self.detect_put,
            self.detect_tidy, self.detect_learn, self.detect_tower,
            self.detect_demonstration_list, self.detect_define_area
        ]

        # try to find custom templates (compound actions) first
        custom_templates = self.detect_custom_templates(tagged_text)

        if custom_templates:
            templates += custom_templates

        # add detected basic templates (actions)
        for detect_fn in detect_fns:
            res = detect_fn(tagged_text)

            if res:
                templates += res

        return templates

    def detect_custom_templates(self, tagged_text: TaggedText):
        """
        Retrieves learned custom templates (compound actions) from the database
        and tries to detect them in the text.

        Parameters
        ----------
        tagged_text  a tagged text in which a custom template should be detected

        Returns
        -------
        a list of custom templates detected in the text, an empty list if no template is detected
        """
        all_custom_templates = db_api.get_custom_templates()

        custom_templates = []

        for custom_template in all_custom_templates:
            custom_template_name = custom_template.name[1:]

            if custom_template_name.lower() in tagged_text.get_text().lower():
                custom_templates.append(custom_template)
        return custom_templates

    def detect_pick(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for PickTask
        """
        if tagged_text.contains_pos_token(
                self.templ_det[self.lang]['take'],
                "VB") or tagged_text.contains_pos_token(
                    self.templ_det[self.lang]['pick'], "VB"):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['pick'])

            #if tagged_text.contains_pos_token("take", "VB") or \
            #        tagged_text.contains_pos_token("pick", "VB"):
            return [tt.PICK_TASK]

    def detect_apply_glue(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for ApplyGlueTask
        """
        if tagged_text.contains_pos_token(
                self.templ_det[self.lang]['glue'],
                "VB") or tagged_text.contains_pos_token(
                    self.templ_det[self.lang]['glue'],
                    "NNS") or tagged_text.contains_pos_token(
                        self.templ_det[self.lang]['glue'], "NNP"):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['glue'])
            return [tt.APPLY_GLUE]

    def detect_learn(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for LearnNewTask
        """
        if tagged_text.contains_text("learn") and tagged_text.contains_text(
                "new task"):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['learn_new_task'])
            return [tt.LEARN_NEW_TASK]

    def detect_put(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for PutTask
        """
        #if tagged_text.contains_text("put"):
        if tagged_text.contains_text(self.templ_det[self.lang]['put']):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['put'])
            return [tt.PUT_TASK]

    def detect_tidy(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for PutTask
        """
        #if tagged_text.contains_text("put"):
        if tagged_text.contains_text(self.templ_det[self.lang]['tidy']):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['tidy'])
            return [tt.TIDY_TASK]

    def detect_tower(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for LearnTowerFromDemonstration
        """
        if tagged_text.contains_text("learn") and tagged_text.contains_text(
                "tower"):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['learn_new_tower'])
            return [tt.LEARN_TOWER]

    def detect_demonstration_list(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for DemonstrationList
        """
        if tagged_text.contains_text("show") and tagged_text.contains_text(
                "demonstration"):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['demonstration'])
            return [tt.DEMONSTRATION_LIST]

    def detect_define_area(self, tagged_text: TaggedText) -> List[tt]:
        """
        Detector for DefineArea
        """
        if tagged_text.contains_text("define") and tagged_text.contains_text(
                "area"):
            self.ui.say(self.guidance_file[self.lang]["template_match"] +
                        self.templ_det[self.lang]['define_area'])
            return [tt.DEFINE_AREA]
class ObjectGrounder:
    namespace = db.onto
    #class Flags(Enum):
    #     CAN_BE_PICKED = 1

    def __init__(self, language = 'en'):
        self.lang = language
        self.db_api = DatabaseAPI()
        self.cd = ColorDetector()
        self.ar = UserInputManager(language = self.lang)

        self.logger = logging.getLogger(__name__)
        self.templ_file = self.ar.load_file('templates_detection.json')
        self.guidance_file = self.ar.load_file('guidance_dialogue.json')

    def ground_object(self, obj_placeholder, flags=()) -> Any:
        # try to find out the class of the object
        # the 0th item of an is_a list should be always ObjectPlaceholder
        # if the item is bound to a real class, the class will be the 1st item
        cls = obj_placeholder.is_a[-1]

        if "last_mentioned" in obj_placeholder.flags:
            objs = [self.db_api.get_last_mentioned_object()]

        else:
            props = obj_placeholder.get_properties()
            props.discard(db.onto.hasFlags) # this is internal property of the object placeholder

            # put together a dictionary of properties required from the object
            props_vals = {self.get_prop_name(prop): getattr(obj_placeholder, self.get_prop_name(prop)) for prop in props}

            # find if there is an object with such properties in the workspace
            objs = self.db_api.get_by_properties(cls=cls, properties=props_vals)

        # if self.Flags.CAN_BE_PICKED in flags:
        #     objs = list(filter(lambda x: x._can_be_picked, objs))

        # only one object found / any object can be selected
        if len(objs) == 1 or ("any" in obj_placeholder.flags and len(objs) > 0):
            obj = objs[0]
            self.db_api.set_last_mentioned_object(obj)
        # more objects -> ask the user to select one
        elif len(objs) > 1:
            obj = self.ar.ask_to_select_from_class_objects(objs)
            self.db_api.set_last_mentioned_object(obj)
        else:
            self.logger.warning(f"No object of type {cls} in the workspace.")
            self.ar.say(self.guidance_file[self.lang]["no_object_workspace"], f"{cls}")
            obj = None

        if obj:
            self.logger.debug(f"Object found for {obj_placeholder}: {obj}")
            #self.ar.say(self.guidance_file[self.lang]["object_found"],f"{obj_placeholder}")
        return obj


    def get_prop_name(self, prop : ow.DataPropertyClass):
        # we need to use the python name of properties whenever it is defined
        if hasattr(prop, "python_name"):
            return prop.python_name

        return prop.name
class GrammarParser(CrowModule):
    """
    Tags and parses the text
    """
    def __init__(self, language = 'en'):
        self.lang = language
        self.logger = logging.getLogger(__name__)
        self.nltk_tagger = NLTK(self.lang)

        self.ui = UserInputManager()

        self.templ_det = self.ui.load_file('templates_detection.json')

    def parse(self, sentence : str) -> ParsedText:
        """
        Currently used for dummy text parsing. After the text is tagged, it is split on "and" and "." tokens
        into sentences. Each sentence has its tokens hanged under an "S" node.
        TODO: swap with the parse() method which relies on a grammar

        Parameters
        ----------
        sentence    an input sentence as a string

        Returns
        -------
        parsed text
        """

        # use NLTK for tagging
        tagged_text = self.nltk_tagger.tag(sentence)

        # create a new object for parsed text
        parsed_text = ParsedText()
        # parsed_text.lang = self.lang
        # save the original text
        parsed_text.orig_text = sentence

        # create the root of the tree
        root = ParseTreeNode(label="T")
        # root.lang = self.lang
        parsed_text.parse_tree = root

        # create a parent node for the first sentence
        sentence_node = ParseTreeNode(label="S")
        # sentence_node.lang = self.lang
        # sequentially process the tagged tokens
        for tagged_token in tagged_text.get_tokens_with_tags():
            if tagged_token.token in [self.templ_det[self.lang]["and"], "."]:
                # in case there is a previous sentence
                if sentence_node.subnodes:
                    # append the previous sentence node under the root node
                    root.subnodes.append(sentence_node)
                    # and start a new sentence
                    sentence_node = ParseTreeNode(label="S")

                # append the separating token under the root node
                root.subnodes.append(tagged_token)
            else:
                # append the token to the current sentence
                sentence_node.subnodes.append(tagged_token)

        if sentence_node.subnodes:
            # finalize the last sentence
            root.subnodes.append(sentence_node)

        self.logger.debug(f"Parsed text: {parsed_text}")

        return parsed_text

    # TODO this method should be used in the future, relies on a grammar
    # def parse(self, sentence : str) -> ParsedText:
    #     tree = get_parse_tree(sentence)
    #
    #     tokens = nltk.word_tokenize(sentence)
    #
    #     root, _ = self.transform_recursive(tree, tokens)
    #
    #     parsed_text = ParsedText()
    #     parsed_text.orig_text = sentence
    #     parsed_text.parse_tree = root
    #
    #     self.logger.debug(f"Parsed text: {parsed_text}")
    #
    #     return parsed_text


    def transform_recursive(self, node : Any, tokens : List):
        """
        Recursively transforms the tree from the format of the grammar parser to the format used in the NL processing.

        Parameters
        ----------
        node    a node to be processed - can be either a ParentedTree object or a string
                (for the first call this should be the tree root)
        tokens  a list of tokens (not provided in the tree from the grammar parser)

        Returns
        -------
        the recursively transformed node, the list of remaining tokens
        """
        if type(node) == ParentedTree:
            return self.transform_node(node, tokens)

        elif type(node) == str:
            return self.transform_tag(node, tokens[0]), tokens[1:]


    def transform_node(self, node, tokens):
        """
        Transforms a node by recursively calling transform_recursive() on its subnodes.
        """
        label = node._label
        parse_tree_node = ParseTreeNode(label=label)

        for subnode in node:
            parse_tree_subnode, tokens = self.transform_recursive(subnode, tokens)
            parse_tree_node.subnodes.append(parse_tree_subnode)

        return parse_tree_node, tokens


    def transform_tag(self, node, token):
        """
        Transforms a single token and its tag (in the string form) into a tagged token.
        """
        tagged_token = TaggedToken()

        tagged_token.token = token
        tagged_token.tag = Tag(pos=POS(node))

        return tagged_token
class ObjectDetector(CrowModule):
    """
    Detects an object in text.
    """
    namespace = db.onto
    def __init__(self,language = 'en'):
        self.logger = logging.getLogger(__name__)
        self.lang = language
        self.db_api = DatabaseAPI()
        self.ui = UserInputManager(language = language)

        self.class_map = {
            "screwdriver": db.onto.Screwdriver,
            "hammer": db.onto.Hammer,
            "pliers": db.onto.Pliers,
            "glue" : db.onto.Glue,
            "panel" : db.onto.Panel,
            "cube" : db.onto.Cube
        }
        self.templ_det = self.ui.load_file('templates_detection.json')
        self.guidance_file = self.ui.load_file('guidance_dialogue.json')

    def detect_object(self, tagged_text : TaggedText) -> db.onto.Object:
        """
        Detects an object mentioned in the input text, extracts its properties and saves it
        in the object placeholder.

        Parameters
        ----------
        tagged_text  an input text

        Returns
        -------
        an object placeholder to be grounded later
        """
        obj = None
        text = tagged_text.get_text()

        # try to detect one of the known objects in text

        for obj_str in self.class_map.keys():
            try:
                #TODO should be only NN, but we have a problem that kostka is detected as VB/VBD
                obj_str_lang = self.templ_det[self.lang][obj_str]
                if tagged_text.contains_pos_token(obj_str_lang, "NN") or tagged_text.contains_pos_token(obj_str_lang, "VBD") or tagged_text.contains_pos_token(obj_str_lang, "VB") or tagged_text.contains_pos_token(obj_str_lang, "NNS") :
                    obj = self.detect_explicit_object(tagged_text, obj_str)
                    break
            except:
                pass
        # try to detect a coreference to an object
        if obj is None and tagged_text.contains_pos_token("it", "PRP"):
            obj = self.detect_coreferenced_object()

        self.logger.debug(f"Object detected for \"{text}\": {obj}")
        self.ui.say(self.guidance_file[self.lang]["object_matched"]+" "+ text)
        return obj

    def detect_explicit_object(self, tagged_text, obj_str):
        """
        Detect an object which is mentioned explicitly.
        """
        cls = self.class_map[obj_str]
        obj = db.onto.ObjectPlaceholder()
        obj.is_a.append(cls)
        obj_str_lang = self.templ_det[self.lang][obj_str]

        if tagged_text.contains_text(self.templ_det[self.lang]['any']+" " + obj_str_lang):
            # the "any" flag will be used to select any object without asking the user
            obj.flags.append("any")

        self.detect_object_color(obj, tagged_text)
        self.detect_object_id(obj, tagged_text)
        self.detect_object_location(obj, obj_str, tagged_text)

        return obj

    # def detect_known_object(self, tagged_text, obj_str):
    #     cls = self.class_map[obj_str]
    #     obj = db.onto.ObjectPlaceholder()
    #     obj.is_a.append(cls)
    #
    #     if tagged_text.contains_text("any " + obj_str):
    #         obj.flags.append("any")
    #
    #     self.detect_object_color(obj, tagged_text)
    #     self.detect_object_location(obj, obj_str, tagged_text)
    #
    #     return obj

    def detect_coreferenced_object(self):
        """
        Detect that the text is referencing an object mentioned earlier.
        """

        obj = db.onto.ObjectPlaceholder()
        obj.flags.append("last_mentioned")

        return obj

    def detect_object_location(self, obj, obj_str, tagged_text):
        # cut the part of the text that is sent into the location detector to avoid infinite loop
        # TODO: all of this is only a temporary solution, not intended to be used in the final product
        end_index = tagged_text.get_text().find(obj_str) + len(obj_str)
        new_tagged_text = tagged_text.cut(end_index, None)

        ld = LocationDetector.LocationDetector(language = self.lang)
        location = ld.detect_location(new_tagged_text)
        if location:
            obj.location = location

    def detect_object_color(self, obj, tagged_text):
        cd = ColorDetector.ColorDetector(language = self.lang)
        color = cd.detect_color(tagged_text)

        if color:
            obj.color.append(db.onto.NamedColor(color))

    def detect_object_id(self, obj, tagged_text):
        idet = IdDetector.IdDetector()

        id = idet.detect_id(tagged_text)

        if id is not None:
            obj.aruco_id = id
Example #9
0
class LocationDetector(CrowModule):
    """
    Detects a location in text.
    """
    namespace = db.onto

    def __init__(self, language='en'):
        self.logger = logging.getLogger(__name__)
        self.lang = language
        self.ui = UserInputManager(language=self.lang)
        self.templ_det = self.ui.load_file('templates_detection.json')

    def detect_location(self, tagged_text: TaggedText):
        """
        Tries to extract the information about the location in the text.

        Parameters
        ----------
        tagged_text  a text in which the location should be detected

        Returns
        -------
        the location object or None if no location is detected
        """
        tagged_text.lang = self.lang
        loc = self.detect_absolute_location(tagged_text)

        if not loc:
            self.logger.debug(
                f"Absolute Location not detected for \"{tagged_text.get_text()}\": {loc}. Trying relative location"
            )
            loc = self.detect_relative_location(tagged_text)

        if loc:
            self.logger.debug(
                f"Location detected for \"{tagged_text.get_text()}\": {loc}")
        else:
            self.logger.debug(
                f"Location not detected for \"{tagged_text.get_text()}\": {loc}"
            )

        return loc

    def detect_absolute_location(self,
                                 tagged_text: TaggedText) -> db.onto.Location:
        if tagged_text.contains_text(self.templ_det[self.lang]["here"]):
            return self.detect_current_finger_location()

        elif tagged_text.contains_text(self.templ_det[self.lang]["down"]):
            return self.detect_current_robot_handle_location()

        return self.detect_location_from_text(tagged_text)

    def detect_relative_location(
            self, tagged_text: TaggedText) -> db.onto.RelativeLocation:
        # for preventing cyclic imports in previous Python versions
        import nlp_crow.modules.ObjectDetector as ObjectDetector

        loc = None
        obj_rel_locs = ["center", "left", "right", "top", "bottom"]

        for rel_loc in obj_rel_locs:
            if tagged_text.contains_text(self.templ_det[self.lang][rel_loc]):
                # TODO temporary solution: detect the object which the location refers to in the part of the sentence *after* the location
                index = tagged_text.indices_of(rel_loc)[0]
                tagged_text_cut = tagged_text.cut(index + 1, None)

                # detect to which object the location refers
                od = ObjectDetector.ObjectDetector()
                relative_to = od.detect_object(tagged_text_cut)

                if not relative_to:
                    ad = AreaDetector()
                    relative_to = ad.detect_area(tagged_text_cut)

                if relative_to:
                    loc = db.onto.RelativeLocation()
                    loc.loc_type = rel_loc
                    loc.relative_to = relative_to

        return loc

    def detect_location_from_text(self, tagged_text):
        pos_lang = self.templ_det[self.lang]["position"]
        regex_pos_list = [
            #(rf"{pos_lang}", "NN"),
            (r"position", "NN"),
            (r".*", "CD"),
            (r".*", "CD")
        ]
        # looking for " (...) position/NN (...) X/CD (...) Y/CD (...)"
        res = tagged_text.match_regex_pos_list(regex_pos_list)

        if res:
            loc = db.onto.Location()
            loc.x = self.get_coordinate(res[1].string)
            loc.y = self.get_coordinate(res[2].string)

            return loc

    def detect_current_robot_handle_location(self):
        # TODO this should be changed to RelativeLocation and then grounded according to the current robot handle location
        return self.generate_random_location()

    def detect_current_finger_location(self):
        # TODO connect with the vision module
        return self.generate_random_location()

    def get_coordinate(self, text):
        try:
            return int(text)
        except ValueError:
            return nlp.text2int(text)

    def generate_random_location(self):
        import random
        loc = db.onto.Location()
        loc.x = random.randint(0, 5) / 10
        loc.y = random.randint(0, 5) / 10
        return loc
Example #10
0
class NLProcessor():
    def __init__(self, language="en"):
        self.gp = GrammarParser(language=language)
        self.td = TemplateDetector(language=language)
        self.tf = TemplateFactory()
        self.lang = language
        self.ui = UserInputManager(language=language)

        self.logger = logging.getLogger(__name__)
        self.guidance_file = self.ui.load_file('guidance_dialogue.json')

    def process_text(self, sentence: str) -> RobotProgram:
        """
        Turns an input text into a program template which can be used for creating instructions for the robot
        (after grounding).

        The program template is dependent only on the content of the sentence and not
        on the current state of the workspace.

        Parameters
        ----------
        sentence  an input sentence as string

        Returns
        -------
        a program template - formalized instructions for the robot with placeholders for real objects and locations
        (right now, the behavior is undefined in case the sentence does not allow creating a valid program)
        """
        parsed_text = self.gp.parse(sentence)
        root = parsed_text.parse_tree

        db_api = DatabaseAPI()
        state = db_api.get_state()

        if state == State.LEARN_FROM_INSTRUCTIONS:
            program = RobotCustomProgram()
        else:
            program = RobotProgram()

        # hardcoded program structure: all subprograms are located directly under the root node "AND"
        program.root = RobotProgramOperator(operator_type="AND")

        for subnode in root.subnodes:
            if type(subnode) is ParseTreeNode:
                # create a single robot instructon
                program_node = self.process_node(subnode)
                program.root.add_child(program_node)

        return program

    def process_node(self, subnode: ParseTreeNode) -> RobotProgramOperand:
        node = RobotProgramOperand()
        node.parsed_text = subnode

        # working with flat tagged text (without any tree structure)
        tagged_text = subnode.flatten()

        # using TemplateDetector to get a list of templates sorted by probability
        template_types = self.td.detect_templates(tagged_text)

        self.logger.debug(
            f"Templates detected for \"{tagged_text.get_text()}\": {[t.name for t in template_types]}"
        )

        # try to sequentially match each template
        for template_type in template_types:
            # custom template cannot be parametrized yet -> no matching required
            # TODO the condition looks kind of stupid
            if type(template_type) is not TemplateType:
                # custom template is already a valid program
                node = template_type.root
                template = None
                break

            # get an object representing the template
            template = self.tf.get_template(template_type)

            # try to match all the template parameters
            template.match(tagged_text, language=self.lang)

            # check if the template is matched successfully
            if template.is_filled():
                break
        else:
            self.logger.error("No template match for \"{}\"".format(
                tagged_text.get_text()))
            self.ui.say(self.guidance_file[self.lang]["no_template_match"] +
                        tagged_text.get_text())
            template = None

        # save the filled template in the program node
        node.template = template

        return node
Example #11
0
class TaggedText(ow.Thing):
    """
    A sequence of tokens and their respective tags.
    """
    namespace = db.onto
    def __init__(self, language='en', **kargs):
        super().__init__(**kargs)
        self.lang = language
        self.ui = UserInputManager(language = self.lang)
        self.templ_det = self.ui.load_file('templates_detection.json')

    def add_tagged_token(self, token : str, tag : Tag):
        """
        Add a token and its tag.

        Parameters
        ----------
        token  a token to be added
        tag    a tag for the token
        """
        self.tokens.append(token)
        self.tags.append(tag)

    def get_tokens_with_tags(self):
        """
        A generator which yields the sequence of tagged tokens.
        """
        for token, tag in zip(self.tokens, self.tags):
            yield TaggedToken(token=token, tag=tag)

    def get_text(self) -> str:
        """
        Returns the tokens as a space-separated plain text.
        """
        return " ".join(self.tokens)

    def contains_pos_token(self, token : str, pos_str : str, case_sensitive=False, include_subcategories : bool = True):
        """
        See contains_tagged_token(). This is syntax sugar allowing the POS tag to be given as a string (e.g. "NN").
        """
        return self.contains_tagged_token(token=token, tag=Tag(pos=POS(pos_str)),
                                          case_sensitive=case_sensitive, include_subcategories=include_subcategories)

    def contains_tagged_token(self, token : str, tag : Tag, case_sensitive : bool = False, include_subcategories : bool = True):
        """
        Returns True if the tagged text contains a token with the corresponding tag, False otherwise.

        Parameters
        ----------
        token                   the token as a string
        tag                     the tag as a Tag object
        case_sensitive          if the token matching should be case-sensitive
        include_subcategories   if POS subcategories should be considered as matching (e.g. "NN" tag
                                will be matched for "N" query)
        """
        for a, b in zip(self.tokens, self.tags):
            if self.token_equals(a, token, case_sensitive) and self.tag_equals(b, tag, include_subcategories):
                return True
        return False

    def contains_text(self, text : str, case_sensitive : bool = False):
        """
        Returns True if the text contains the given plain-text pattern, False otherwise.

        Parameters
        ----------
        text            pattern to be found in the text
        case_sensitive  if the matching should be case-sensitive
        """
        if case_sensitive:
            return text in self.get_text()

        return text.lower() in self.get_text().lower()

    def indices_of(self, token : str, tag_str : str = None, case_sensitive : bool = False, include_subcategories : bool = True):
        """
        Returns the all indices of the token in the tagged text or an empty list if the token is not present at all.

        If the tag_str is specified, the corresponding tags are checked and only the indices of matching pairs
        (token,tag) are returned in the list.


        Parameters
        ----------
        token           the token to be found in the text
        tag_str         the optional tag which has to match each token occurrence
        case_sensitive          if the token matching should be case-sensitive
        include_subcategories   if POS subcategories should be considered as matching (e.g. "NN" tag
                                will be matched for "N" query)
        """
        indices = [i for i, t in enumerate(self.tokens) if self.token_equals(t, token, case_sensitive)]

        if tag_str:
            indices = [i for i in indices if self.tag_equals(self.tags[i], Tag(pos=POS(tag_str)), include_subcategories)]

        return indices

    def match_regex(self, regex : str):
        """
        Return True if the text contains the given regex pattern, False otherwise.

        Parameters
        ----------
        text            regex pattern to be found in the text
        case_sensitive  if the matching should be case-sensitive
        """
        text = self.get_text()
        match = re.search(regex, text)

        return match

    def match_regex_pos_list(self, regex_pos_list : List[Tuple[str,str]]):
        """
        Return True if the text contains the given sequence of tagged tokens (tokens can be specified as regexes).
        The sequence is matched sequentially, i.e. the tagged tokens should be in the specified order.

        Parameters
        ----------
        regex_pos_list  a list of tuples (<regex_as_string>, <pos_as_string>), e.g. [(r"\w*", "NN"), (r".*", "CD")]
        """
        i = 0
        matches = []

        for tagged_token in self.get_tokens_with_tags():
            regex_to_match = regex_pos_list[i][0]
            pos_to_match = regex_pos_list[i][1]
            tag_to_match = Tag(pos=POS(pos_to_match))
            try:
                regex_to_match_lang = self.templ_det[self.lang][regex_to_match]
            except:
                regex_to_match_lang = regex_to_match

            match = re.match(regex_to_match_lang, tagged_token.token)

            #TODO this make more general, variants for pos
            if pos_to_match == 'NN'  and match != None :
                matches.append(match)
                i += 1
            elif match and tagged_token.tag.equals(tag_to_match):
                matches.append(match)
                i += 1

        # all items were matched
        if i == len(regex_pos_list):
            return matches

        return None


    def cut(self, start_idx, end_idx):
        """
        Returns a new TaggedText object which is a slice of the original tagged text. Uses the pythonic way
        of slicing lists.

        Parameters
        ----------
        start_idx   the start index (inclusive)
        end_idx     the end index (exclusive)
        """
        return TaggedText(tokens=self.tokens[start_idx:end_idx], tags=self.tags[start_idx:end_idx])

    def token_equals(self, tok1 : str, tok2 : str, case_sensitive : bool):
        if not case_sensitive:
            tok1 = tok1.lower()
            tok2 = tok2.lower()

        return tok1 == tok2

    def tag_equals(self, tag1 : Tag, tag2 : Tag, include_pos_subcategories: bool):
        return tag1.equals(tag2, include_pos_subcategories)

    def __str__(self):
        return "TaggedText({})".format(", ".join([f"\"{tok}\"/{tag}"
                                                     for tok, tag in zip(self.tokens, self.tags)]))

    def __iadd__(self, other):
        for a, b in zip(other.tokens, other.tags):
            self.add_tagged_token(a, b)

        return self