def make_all_possibilities(examples, empty_example, randgen_name=None): """ Given the list of examples `examples`, constructs and returns a list of all possible examples after the random generation modifier applied. Updates the random generation mapping for each example, considering that each example in `examples` was generated with a mapping such that `randgen_name` was associated to `True`. `empty_example` is a new example that has an empty text and no entities. @raises: - `KeyError` if `randgen_name` is already present in a random generation mapping. """ if randgen_name is not None: for ex in examples: current_randgen_mapping = getattr(ex, RANDGEN_MAPPING_KEY, dict()) if randgen_name in current_randgen_mapping: raise KeyError( "Didn't expect the random generation name '" + randgen_name + \ "' to already be set." ) current_randgen_mapping[randgen_name] = True setattr(ex, RANDGEN_MAPPING_KEY, current_randgen_mapping) current_randgen_mapping = \ getattr(empty_example, RANDGEN_MAPPING_KEY, dict()) if randgen_name in current_randgen_mapping: raise KeyError( "Didn't expect the random generation name '" + randgen_name + \ "' to already be set." ) current_randgen_mapping[randgen_name] = False setattr(empty_example, RANDGEN_MAPPING_KEY, current_randgen_mapping) return add_example_no_dup(examples, empty_example)
def _generate_n_strategy(self, n, **kwargs): """ Strategy to generate `n` examples without using the cache. Returns the list of generated examples. `kwargs` can contain `variation_name`. @pre: `n` <= `self.get_max_nb_possibilities()` """ # TODO wouldn't it be better with a set rather than a list? generated_examples = [] loop_count = 0 while len(generated_examples) < n: current_ex = self.generate_random(**kwargs) add_example_no_dup(generated_examples, current_ex) loop_count += 1 if loop_count > 10*n: # QUESTION is that a good idea? break return generated_examples
def _generate_all_strategy(self): if len(self._contents) == 0: return [] generated_examples = None for content in self._contents: tmp_buffer = [] content_examples = content.generate_all() if generated_examples is None: generated_examples = content_examples else: for ex in generated_examples: for content_ex in content_examples: if can_concat_examples(ex, content_ex): new_example = \ concat_examples_with_randgen(ex, content_ex) add_example_no_dup(tmp_buffer, new_example) generated_examples = tmp_buffer if generated_examples is None: return [] return sort_by_texts(generated_examples)
def generate_test(self, training_examples): """ Returns a list of examples that can be put in the test set (not present in the training set). The list has as many examples as were asked in the templates. """ if self._nb_testing_ex_asked is None or self._nb_testing_ex_asked == 0: return [] if ( self._nb_testing_ex_asked < \ float(self.get_max_nb_possibilities()) / 5.0 ): test_examples = [] loop_count = 0 while len(test_examples) < self._nb_testing_ex_asked: loop_count += 1 current_ex = self.generate_random() if current_ex in training_examples: continue add_example_no_dup(test_examples, current_ex) if loop_count > 10 * self._nb_testing_ex_asked: break return test_examples else: test_examples = [] all_examples = self.generate_all() shuffle(all_examples) for ex in all_examples: if ex in training_examples: continue add_example_no_dup(test_examples, ex) if len(test_examples) == self._nb_testing_ex_asked: break return test_examples