コード例 #1
0
def make_all_possibilities(examples, empty_example, randgen_name=None):
    """
    Given the list of examples `examples`, constructs and returns a list
    of all possible examples after the random generation modifier applied.
    Updates the random generation mapping for each example, considering
    that each example in `examples` was generated with a mapping such that
    `randgen_name` was associated to `True`.
    `empty_example` is a new example that has an empty text and no entities.
    @raises: - `KeyError` if `randgen_name` is already present in a random
               generation mapping.
    """
    if randgen_name is not None:
        for ex in examples:
            current_randgen_mapping = getattr(ex, RANDGEN_MAPPING_KEY, dict())
            if randgen_name in current_randgen_mapping:
                raise KeyError(
                    "Didn't expect the random generation name '" + randgen_name + \
                    "' to already be set."
                )
            current_randgen_mapping[randgen_name] = True
            setattr(ex, RANDGEN_MAPPING_KEY, current_randgen_mapping)

        current_randgen_mapping = \
            getattr(empty_example, RANDGEN_MAPPING_KEY, dict())
        if randgen_name in current_randgen_mapping:
            raise KeyError(
                "Didn't expect the random generation name '" + randgen_name + \
                "' to already be set."
            )
        current_randgen_mapping[randgen_name] = False
        setattr(empty_example, RANDGEN_MAPPING_KEY, current_randgen_mapping)

    return add_example_no_dup(examples, empty_example)
コード例 #2
0
 def _generate_n_strategy(self, n, **kwargs):
     """
     Strategy to generate `n` examples without using the cache.
     Returns the list of generated examples.
     `kwargs` can contain `variation_name`.
     @pre: `n` <= `self.get_max_nb_possibilities()`
     """
     # TODO wouldn't it be better with a set rather than a list?
     generated_examples = []
     loop_count = 0
     while len(generated_examples) < n:
         current_ex = self.generate_random(**kwargs)
         add_example_no_dup(generated_examples, current_ex)
         loop_count += 1
         if loop_count > 10*n:  # QUESTION is that a good idea?
             break
     return generated_examples
コード例 #3
0
ファイル: rule.py プロジェクト: alvarorivasg/Chatette
 def _generate_all_strategy(self):
     if len(self._contents) == 0:
         return []
     generated_examples = None
     for content in self._contents:
         tmp_buffer = []
         content_examples = content.generate_all()
         if generated_examples is None:
             generated_examples = content_examples
         else:
             for ex in generated_examples:
                 for content_ex in content_examples:
                     if can_concat_examples(ex, content_ex):
                         new_example = \
                             concat_examples_with_randgen(ex, content_ex)
                         add_example_no_dup(tmp_buffer, new_example)
             generated_examples = tmp_buffer
     if generated_examples is None:
         return []
     return sort_by_texts(generated_examples)
コード例 #4
0
    def generate_test(self, training_examples):
        """
        Returns a list of examples that can be put in the test set
        (not present in the training set).
        The list has as many examples as were asked in the templates.
        """
        if self._nb_testing_ex_asked is None or self._nb_testing_ex_asked == 0:
            return []
        if (
            self._nb_testing_ex_asked < \
            float(self.get_max_nb_possibilities()) / 5.0
        ):
            test_examples = []
            loop_count = 0
            while len(test_examples) < self._nb_testing_ex_asked:
                loop_count += 1
                current_ex = self.generate_random()
                if current_ex in training_examples:
                    continue
                add_example_no_dup(test_examples, current_ex)

                if loop_count > 10 * self._nb_testing_ex_asked:
                    break
            return test_examples
        else:
            test_examples = []
            all_examples = self.generate_all()
            shuffle(all_examples)
            for ex in all_examples:
                if ex in training_examples:
                    continue
                add_example_no_dup(test_examples, ex)

                if len(test_examples) == self._nb_testing_ex_asked:
                    break
            return test_examples