def generate_all(self): self.check_casegen() generated_examples = [] if self.randgen is not None: generated_examples.append(Example()) aliases = self.parser \ .get_definition(self.name, UnitType.alias) \ .generate_all(self.variation_name, self.arg_value) generated_examples.extend(aliases) if self.leading_space: for (i, ex) in enumerate(generated_examples): if may_get_leading_space(ex.text): generated_examples[i].text = ' ' + ex.text if self.casegen: tmp_buffer = [] for ex in generated_examples: if may_change_leading_case(ex.text): tmp_buffer.append( Example(with_leading_lower(ex.text), ex.entities)) tmp_buffer.append( Example(with_leading_upper(ex.text), ex.entities)) else: tmp_buffer.append(ex) generated_examples = tmp_buffer return generated_examples
def generate_all(self): generated_examples = [] if self.randgen is not None: generated_examples.append("") if self.casegen: generated_examples.append(with_leading_lower(self.words)) generated_examples.append(with_leading_upper(self.words)) else: generated_examples.append(self.words) if self.leading_space: for (i, ex) in enumerate(generated_examples): if may_get_leading_space(ex): generated_examples[i] = ' ' + ex result = [Example(ex) for ex in generated_examples] return result
def generate_all(self): self.check_casegen() generated_examples = [] if self.randgen: generated_examples.append(Example()) for choice in self.choices: current_examples = [] for token in choice: current_token_all_generations = token.generate_all() if len(current_examples) <= 0: current_examples = [ gen for gen in current_token_all_generations ] else: current_examples = [ Example(partial_example.text + gen.text, partial_example.entities + gen.entities) for partial_example in current_examples for gen in current_token_all_generations ] generated_examples.extend(current_examples) if self.leading_space: for (i, ex) in enumerate(generated_examples): if may_get_leading_space(ex.text): generated_examples[i].text = ' ' + ex.text if self.casegen: tmp_buffer = [] for ex in generated_examples: tmp_buffer.append( Example(with_leading_lower(ex.text), ex.entities)) tmp_buffer.append( Example(with_leading_upper(ex.text), ex.entities)) return generated_examples
def test_change(self): strings = ["test", "\tindentation", "Several words", "IN CAPITAL?"] lower_str = ["test", "\tindentation", "several words", "iN CAPITAL?"] for (i, s) in enumerate(strings): assert with_leading_lower(s) == lower_str[i]
def test_no_change(self): strings = ["12", " ", "???", "(test)"] for s in strings: assert with_leading_lower(s) == s
def test_empty(self): assert with_leading_lower("") == ""
def generate_all(self, variation_name=None, arg_value=None): if (arg_value is not None and arg_value not in self.arg_values_encountered): # Memorize arg value self.arg_values_encountered.append(arg_value) generated_examples = [] relevant_rules = self.rules if variation_name is not None: if variation_name in self.variations: relevant_rules = self.variations[variation_name] else: raise SyntaxError("Couldn't find variation '" + str(variation_name) + "' for slot '" + str(self.name) + "'") if not relevant_rules: # No rules if variation_name is None: raise SyntaxError("No rules could be found for "+self.type+" '"+ self.name+"'") else: raise SyntaxError("No rules could be found for "+self.type+" '"+ self.name+"' (variation: '"+variation_name+"'") for rule in relevant_rules: examples_from_current_rule = [] for sub_unit_rule in rule: sub_unit_possibilities = \ sub_unit_rule.generate_all() if len(examples_from_current_rule) <= 0: examples_from_current_rule = sub_unit_possibilities else: tmp_buffer = [] for ex in examples_from_current_rule: for possibility in sub_unit_possibilities: tmp_buffer.append( Example( ex.text + possibility.text, ex.entities + possibility.entities, # this is a list ) ) examples_from_current_rule = tmp_buffer # Replace `arg` inside generated sentences if arg_value is not None and self.modifiers.argument_name is not None: for ex in examples_from_current_rule: ex.text = self._replace_arg(ex.text, arg_value) for entity in ex.entities: entity["text"] = self._replace_arg(entity["text"], arg_value) entity["value"] = self._replace_arg(entity["value"], arg_value) # Apply casegen if self.modifiers.casegen and self.can_have_casegen(): tmp_examples = [] for ex in examples_from_current_rule: (lower_ex, upper_ex) = (deepcopy(ex), deepcopy(ex)) lower_ex.text = with_leading_lower(lower_ex.text) upper_ex.text = with_leading_upper(upper_ex.text) if lower_ex != upper_ex: tmp_examples.append(lower_ex) tmp_examples.append(upper_ex) else: tmp_examples.append(ex) examples_from_current_rule = tmp_examples # Add the entity in the list slot_value = rule[0].name if not isinstance(rule[0], DummySlotValRuleContent): slot_value = None else: # Replace the argument by its value if needed slot_value = self._replace_arg(slot_value, arg_value) for ex in examples_from_current_rule: if slot_value is not None: ex.entities.append({ "slot-name": self.name, "text": ex.text[:], "value": slot_value, }) else: ex.entities.append({ "slot-name": self.name, "text": ex.text[:], "value": ex.text[:], }) generated_examples.extend(examples_from_current_rule) for ex in generated_examples: ex.text = ENTITY_MARKER + ex.text return generated_examples