class TestRealization(TestCase): def setUp(self): self.fact = Fact("1", "_", "_", "_", "_", "_", "_", "kissa", "_") self.message = Message(self.fact) self.expr = FactField("corpus") self.matcher = Matcher(self.expr, "=", "1") self.rules = [([self.matcher], [0])] self.slot = Slot(FactFieldSource("result_value")) self.literal = LiteralSlot("sana") self.components = [self.slot, self.literal] self.template = Template(self.components, self.rules) self.template.fill(self.message, [self.message]) self.realizer = FinnishUralicNLPMorphologicalRealizer() def test_no_attrs_slot_left_as_is(self): self.assertEqual("kissa", self.realizer.realize(self.slot)) def test_no_attrs_literal_left_as_is(self): self.assertEqual("sana", self.realizer.realize(self.literal)) def test_gen_slot_realized_correctly(self): self.slot.attributes["case"] = "genitive" self.assertEqual("kissan", self.realizer.realize(self.slot)) def test_gen_literal_realized_correctly(self): self.literal.attributes["case"] = "genitive" self.assertEqual("sanan", self.realizer.realize(self.literal))
def _combine(self, registry: Registry, language: str, first: Message, second: Message) -> Message: log.debug("Combining {} and {}".format( [c.value for c in first.template.components], [c.value for c in second.template.components])) shared_prefix = self._get_combinable_prefix(first, second) log.debug(f"Shared prefix is {[e.value for e in shared_prefix]}") combined = [c for c in first.template.components] conjunctions = registry.get("CONJUNCTIONS").get(language, None) if not conjunctions: conjunctions = (defaultdict(lambda x: "NO-CONJUNCTION-DICT"), ) combined.append( Literal( conjunctions.get("default_combiner", "MISSING-DEFAULT-CONJUCTION"))) combined.extend(second.template.components[len(shared_prefix):]) log.debug("Combined thing is {}".format([c.value for c in combined])) new_message = Message( facts=first.facts + [fact for fact in second.facts if fact not in first.facts], importance_coefficient=first.importance_coefficient, ) new_message.template = Template(combined) new_message.prevent_aggregation = True return new_message
def setUp(self): self.fact = Fact("1", "_", "_", "_", "_", "_", "_", "kissa", "_") self.message = Message(self.fact) self.expr = FactField("corpus") self.matcher = Matcher(self.expr, "=", "1") self.rules = [([self.matcher], [0])] self.slot = Slot(FactFieldSource("result_value")) self.literal = LiteralSlot("sana") self.components = [self.slot, self.literal] self.template = Template(self.components, self.rules) self.template.fill(self.message, [self.message]) self.realizer = FinnishUralicNLPMorphologicalRealizer()
def setUp(self): self.fact1 = Fact("1", "_", "_", "_", "_", "_", "_", "_", "_") self.fact2 = Fact("2", "_", "_", "_", "_", "_", "_", "_", "_") self.message1 = Message(self.fact1) self.message2 = Message(self.fact2) self.expr = FactField("corpus") self.matcher = Matcher(self.expr, "=", "1") self.rules = [([self.matcher], [0])] self.slot = Slot(FactFieldSource("corpus")) self.literal = LiteralSlot("literal") self.components = [self.slot, self.literal] self.template = Template(self.components, self.rules)
def _add_template_to_message(message: Message, template_original: Template, all_messages: List[Message]) -> None: """ Adds a matching template to a message, also adding the facts used by the template to the message. :param message: The message to be fitted with a template :param template_original: The template to be added to the message. :param all_messages: Other available messages, some of which will be needed to match possible secondary rules in the template. :return: Nothing """ template = template_original.copy() used_facts = template.fill(message, all_messages) if used_facts: log.debug("Successfully linked template to message") else: log.error( "Chosen template '{}' for fact '{}' could not be used! " "Falling back to default templates".format(template.display_template(), message.main_fact) ) template = DefaultTemplate("") message.template = template message.facts = used_facts
def read_template_group( template_spec: List[str], current_language: Optional[str] = None, warn_on_old_format: bool = True ): """ Parse a template group: one block that shares fact constraints and may specify multiple templates (for different languages, or the same). :param template_spec: text of block, split into lines :param current_language: default language to start with :param warn_on_old_format: output warnings when the old template format is used. This is the default, since the old format is deprecated when using this function, but if you know you're reading an old file, you can suppress the warnings :return: dict of language -> template list, new default language after group """ # Allow either a string (block) or a list of lines as input if isinstance(template_spec, str): template_spec = template_spec.splitlines() # For readability, lines may be spread over multiple lines of the file, indenting after the first # This applies to the template text and fact constraints lines = list(group_indented_lines(template_spec)) # Allow changing the current language without any templates # This is mostly used to specify a monolingual set of templates, defining the language and letting it carry through lang_name, colon, rest = "".join(lines).strip().partition(":") if colon and len(rest) == 0: # Return no templates and update the current language return {}, lang_name # Detect whether this template is specified in the new or old format. Templates using the old format are # ignored. if not any(line.startswith(RULE_PREFIX) for line in lines): # If there are no fact constraint lines (now explicitly marked), assume this is the old format if warn_on_old_format: warnings.warn("no fact constraint lines found in template: assuming old format") return {}, current_language # Something has to be specified if len(lines) == 0: raise TemplateReadingError("empty template definition", raw_text="\n".join(template_spec)) # Split up the lines into template lines (potentially with a language specifier, but not necessarily) and # constraint lines template_lines = [line for line in lines if not line.startswith(RULE_PREFIX)] # The rest of the lines each define a fact associated with the templates, along with constraints constraint_lines = [line[len(RULE_PREFIX) :].lstrip() for line in lines if line.startswith(RULE_PREFIX)] # FACT CONSTRAINTS # Read in the fact constraints first to get a list of rules that will be associated with the template rules = [] # type: List[List[Matcher]] seen_what_types = [] for constraint_line in constraint_lines: # Every part of this line represents a constraint on the facts that may match matchers = [] for lhs, op, value in parse_matcher_expr(constraint_line): if "what_type" in lhs.field_name: # Keep track of all what_types we've seen for reference # ToDo: What to do with the regexes? if type(value) is set: seen_what_types.extend(value) else: seen_what_types.append(value) matchers.append(Matcher(lhs, op, value)) rules.append(matchers) # Every template is associated with at least one rule # If no constraints were given (very usual), create one rule with no constraints if len(rules) == 0: rules.append([]) # TEMPLATES # Now we parse the template lines themselves templates = {} for template_line in template_lines: # Work out what language this template is for lang_id_match = lang_spec_re.match(template_line) if lang_id_match is None: # No language spec for this template: use the default language pass else: language, template_line = lang_id_match.groups() # Make language specifiers case insensitive language = language.lower() # If empty language spec, use default language (and strip away the colon prefix) if len(language) > 0: # Otherwise, switch the current language, so it gets used for this template and becomes the default current_language = language # Allow alternative versions of a template to be specified using the [] notation for optional parts for expanded_template_line in expand_alternatives(template_line): components = [] # type: List['TemplateComponent'] # Generate list for mapping rules into template Slots rule_to_slot = [] # type: List[List[int]] for idx in range(len(rules)): rule_to_slot.append([]) rest = expanded_template_line.strip() while len(rest.strip()): # Look for the next opening brace marking a substitution literal_part, __, rest = rest.partition("{") # Everything up to the brace is a literal if len(literal_part) > 0: # To make life easier for the aggregator, literals are split on whitespace here for literal in literal_part.split(): components.append(Literal(literal)) # If no brace was found, we're done if len(rest) > 0: # Look for the closing brace subst, closer, rest = rest.partition("}") if not closer: raise TemplateReadingError("closing brace missing in {}".format(expanded_template_line)) # Split up the substitution spec on commas, to allow various attributes and filters to be included subst_parts = [p.strip() for p in subst.split(",")] # First check if the first part is actually a literal. if subst_parts[0][0] in ['"', "'"]: if subst_parts[0][-1] != subst_parts[0][0]: raise TemplateReadingError("closing quote missing in {}".format(expanded_template_line)) field_name = subst_parts[0] rule_ref = None else: # The first thing is the base value to substitute, which should be one of the fact fields # or the new {time} slot, which refers to both when-fields field_name = subst_parts[0] # It may specify which of the facts it's referring to, though this is not required # (default to first) if "." in field_name: rule_ref, __, field_name = field_name.partition(".") # Use 1-indexed fact numbering in templates: makes more sense for anyone but # computer scientists rule_ref = int(rule_ref) - 1 if rule_ref < 0: raise TemplateReadingError( "Rule references use 1-index numbering. Found reference to rule " "0: did you mean 1?" ) else: # Default to referring to the first rule, since there's usually only one rule_ref = 0 # Map alternative field names to their canonical form used internally try: field_name = FACT_FIELD_MAP[field_name] except KeyError: raise TemplateReadingError( "unknown fact field '{}' used in substitution ({})".format(field_name, subst) ) # Only some of the field names are allowed to be used in templates # TODO: Remove or reinstate with allowed things received as params from "somewhere" if field_name not in FACT_FIELDS: raise TemplateReadingError( "invalid field name '{}' for use in a template: {}".format( field_name, expanded_template_line ) ) if rule_ref >= len(rules): raise TemplateReadingError( "Substitution '{}' refers to rule {}, but template only has {} " "rules".format(subst, rule_ref + 1, len(rules)) ) attributes = {} # Read each of the attribute specifications for subst_part in subst_parts[1:]: if "=" in subst_part: # Attributes specify things like case, to be used in realisation att, __, val = subst_part.partition("=") attributes[att.strip()] = val.strip() else: raise TemplateReadingError( "Found an attribute with no value specified. " "Possibly a leftover old style filter? {}".format(subst_part) ) if field_name[0] in ["'", '"']: to_value = LiteralSource(field_name[1:-1]) elif field_name == "time": to_value = TimeSource() else: to_value = FactFieldSource(field_name) # Postprocess attributes attributes = process_attributes(attributes) # len(components) is the index for the next component to be added if rule_ref is not None: rule_to_slot[rule_ref].append(len(components)) new_slot = Slot(to_value, attributes=attributes) components.append(new_slot) template = Template(components, list(zip(rules, rule_to_slot))) # Add this template to the list for the relevant language templates.setdefault(current_language, []).append(template) return templates, current_language, set(seen_what_types)
class TestTemplate(TestCase): def setUp(self): self.fact1 = Fact("1", "_", "_", "_", "_", "_", "_", "_", "_") self.fact2 = Fact("2", "_", "_", "_", "_", "_", "_", "_", "_") self.message1 = Message(self.fact1) self.message2 = Message(self.fact2) self.expr = FactField("corpus") self.matcher = Matcher(self.expr, "=", "1") self.rules = [([self.matcher], [0])] self.slot = Slot(FactFieldSource("corpus")) self.literal = LiteralSlot("literal") self.components = [self.slot, self.literal] self.template = Template(self.components, self.rules) def test_template_constructs(self): self.assertListEqual(self.template.components, self.components) self.assertIsInstance(self.template.facts, list) self.assertEqual(len(self.template.facts), 0) def test_template_sets_parent_to_components(self): self.assertEqual(self.slot.parent, self.template) self.assertEqual(self.literal.parent, self.template) def test_template_get_slot(self): self.assertEqual(self.template.get_slot("corpus"), self.slot) self.assertEqual(self.template.get_slot("literal"), self.literal) with self.assertRaises(KeyError): self.template.get_slot("no such") def test_template_add_slot(self): new_slot = Slot(FactFieldSource("timestamp_from")) self.template.add_slot(2, new_slot) self.assertIn(new_slot, self.template.components) self.assertEqual(self.template.get_slot("timestamp_from"), new_slot) def test_template_added_slot(self): new_slot = Slot(FactFieldSource("timestamp_from")) self.template.add_slot(1, new_slot) self.assertListEqual(self.template.components, [self.slot, new_slot, self.literal]) def test_template_added_slot_is_last_component(self): new_slot = Slot(FactFieldSource("timestamp_from")) self.template.add_slot(2, new_slot) self.assertListEqual(self.template.components, [self.slot, self.literal, new_slot]) def test_template_move_slot_forwards(self): # TODO: This fails, when it shouldn't new_slot = Slot(FactFieldSource("timestamp_from")) self.template.add_slot(2, new_slot) self.template.move_slot(0, 1) self.assertListEqual(self.template.components, [self.literal, self.slot, new_slot]) def test_template_move_slot_backwards(self): new_slot = Slot(FactFieldSource("timestamp_from")) self.template.add_slot(2, new_slot) self.template.move_slot(2, 1) self.assertListEqual(self.template.components, [self.slot, new_slot, self.literal]) def test_template_check_success(self): used_facts = self.template.check(self.message1, [self.message1]) self.assertEqual(len(used_facts), 1) self.assertIn(self.fact1, used_facts) def test_template_check_success_does_not_fill(self): self.template.check(self.message1, [self.message1]) self.assertIsNone(self.message1.template) self.assertEqual(len(self.template.facts), 0) self.assertIsNone(self.slot.fact) def test_template_check_failure(self): used_facts = self.template.check(self.message2, [self.message2]) self.assertEqual(len(used_facts), 0) self.assertNotIn(self.fact2, used_facts) def test_template_check_failure_does_not_fill(self): self.template.check(self.message2, [self.message2]) self.assertEqual(len(self.template.facts), 0) self.assertIsNone(self.slot.fact) def test_template_fill_success(self): used_facts = self.template.fill(self.message1, [self.message1]) self.assertEqual(len(used_facts), 1) self.assertIn(self.fact1, used_facts) def test_template_fill_success_fills(self): self.template.fill(self.message1, [self.message1]) self.assertEqual(len(self.template.facts), 1) self.assertIn(self.fact1, self.template.facts) self.assertEqual(self.slot.fact, self.fact1) def test_template_fill_failure(self): used_facts = self.template.fill(self.message2, [self.message2]) self.assertEqual(len(used_facts), 0) self.assertNotIn(self.fact2, used_facts) def test_template_fill_failure_does_not_fill(self): self.template.fill(self.message2, [self.message2]) self.assertEqual(len(self.template.facts), 0) self.assertIsNone(self.slot.fact)