def setUp(self): self.ruler = SeparatorsRegexRuler() super(TestSeparatorsRegexRuler, self).setUp() self.example06 = Example([ '(Botella.*P\.|P\..*Botella)', '(Solona.*B\.|B\..*Solsona)', '(A\..*Martinez-Arias|Martinez-Arias.*A\.)', '(J\.M\..*Nieto|Nieto.*J\.M\.)' ], [ u'P. Botella1, B. Solsona1, ' 'A. Martinez-Arias2 and J.M. ' 'Lopez Nieto1' ]) self.example07 = Example([ '(Cabre.*L\.|L\..*Cabre)', '(Mancebo.*J\.|J\..*Mancebo)', '(J\..*Solsona|Solsona.*J\.)' ], u'L. Cabre1, J. Mancebo2, J. F. Solsona3, ' ' and the Bioethics Working ' 'Group of the SEMICYUC')
def generate_wrappers(self, url): wrapper_manager = WrapperGateway() example_manager = ExampleGateway( max_examples=self.max_examples, max_examples_from_db=self.max_examples_from_db, seconds_between_requests=self.secs_between_reqs) example_sets = example_manager.get_examples(self.wrapper_gen_examples, url, self.min_validity) rulers = [] for set in example_sets: log.info('Starting wrapper training for set "%s"' % set) #@UndefinedVariable if set == 'author' or set == 'editor': rulers = [ MultiValuePathRuler(), SeparatorsRegexRuler(), ElementsRegexRuler(), PersonRuler() ] else: try: value_guide = self.value_guides[set] pass except KeyError: value_guide = '.*' rulers = [PathRuler(value_guide), RegexRuler()] trainer = WrapperTrainer(rulers, self.wrapper_gen_examples) try: wrappers = trainer.train(example_sets[set]) wrappers = self._prune_wrappers(wrappers) wrapper_manager.persist_wrappers(url, set, wrappers) log.info('Trainer generated %d wrappers' % len(wrappers)) #@UndefinedVariable except Exception, e: log.error('Error training wrapper for set "%s": %s' % (set, e)) #@UndefinedVariable