コード例 #1
0
class TestMultiValuePathRuler(TestRuler):
    def setUp(self):
        self.ruler = MultiValuePathRuler()
        super(TestMultiValuePathRuler, self).setUp()
        
        self.example06 = Example(['.*(Botella.*P\.|P\..*Botella).*',
                                  '.*(Solona.*B\.|B\..*Solsona).*'],
                                  self.soup03)
        
        self.example07 = Example(['.*(Alberto.*Angel|Angel.*Alberto).*',
                                  '.*(Geurts.*Pierre|Pierre.*Geurts).*'],
                                  self.soup01)
        self.example08 = Example(['.*(Michael.*Sweredoski|Sweredoski.*Michael).*',
                                  '.*(Pierre.*Baldi|Baldi.*Pierre).*'],
                                  self.soup02)
    
    def test_rule_example(self):
        rules = self.ruler._rule_example(self.example06)
        self.failUnless(len(rules) == 1)
        
        rules = self.ruler._rule_example(self.example07)
        self.failUnless(len(rules) == 2)
    
    def test_rule(self):
        rules = self.ruler.rule([self.example07, self.example08])
        self.failUnless(len(rules) == 2)
    
        result = rules[0].apply(self.soup01)
        self.failUnless(len(result) == 5)
コード例 #2
0
class TestMultiValuePathRuler(TestRuler):
    def setUp(self):
        self.ruler = MultiValuePathRuler()
        super(TestMultiValuePathRuler, self).setUp()

        self.example06 = Example([
            '.*(Botella.*P\.|P\..*Botella).*', '.*(Solona.*B\.|B\..*Solsona).*'
        ], self.soup03)

        self.example07 = Example([
            '.*(Alberto.*Angel|Angel.*Alberto).*',
            '.*(Geurts.*Pierre|Pierre.*Geurts).*'
        ], self.soup01)
        self.example08 = Example([
            '.*(Michael.*Sweredoski|Sweredoski.*Michael).*',
            '.*(Pierre.*Baldi|Baldi.*Pierre).*'
        ], self.soup02)

    def test_rule_example(self):
        rules = self.ruler._rule_example(self.example06)
        self.failUnless(len(rules) == 1)

        rules = self.ruler._rule_example(self.example07)
        self.failUnless(len(rules) == 2)

    def test_rule(self):
        rules = self.ruler.rule([self.example07, self.example08])
        self.failUnless(len(rules) == 2)

        result = rules[0].apply(self.soup01)
        self.failUnless(len(result) == 5)
コード例 #3
0
    def setUp(self):
        self.ruler = MultiValuePathRuler()
        super(TestMultiValuePathRuler, self).setUp()

        self.example06 = Example([
            '.*(Botella.*P\.|P\..*Botella).*', '.*(Solona.*B\.|B\..*Solsona).*'
        ], self.soup03)

        self.example07 = Example([
            '.*(Alberto.*Angel|Angel.*Alberto).*',
            '.*(Geurts.*Pierre|Pierre.*Geurts).*'
        ], self.soup01)
        self.example08 = Example([
            '.*(Michael.*Sweredoski|Sweredoski.*Michael).*',
            '.*(Pierre.*Baldi|Baldi.*Pierre).*'
        ], self.soup02)
コード例 #4
0
 def setUp(self):
     self.ruler = MultiValuePathRuler()
     super(TestMultiValuePathRuler, self).setUp()
     
     self.example06 = Example(['.*(Botella.*P\.|P\..*Botella).*',
                               '.*(Solona.*B\.|B\..*Solsona).*'],
                               self.soup03)
     
     self.example07 = Example(['.*(Alberto.*Angel|Angel.*Alberto).*',
                               '.*(Geurts.*Pierre|Pierre.*Geurts).*'],
                               self.soup01)
     self.example08 = Example(['.*(Michael.*Sweredoski|Sweredoski.*Michael).*',
                               '.*(Pierre.*Baldi|Baldi.*Pierre).*'],
                               self.soup02)
コード例 #5
0
    def generate_wrappers(self, url):
        wrapper_manager = WrapperGateway()
        example_manager = ExampleGateway(
            max_examples=self.max_examples,
            max_examples_from_db=self.max_examples_from_db,
            seconds_between_requests=self.secs_between_reqs)
        example_sets = example_manager.get_examples(self.wrapper_gen_examples,
                                                    url, self.min_validity)

        rulers = []
        for set in example_sets:
            log.info('Starting wrapper training for set "%s"' %
                     set)  #@UndefinedVariable

            if set == 'author' or set == 'editor':
                rulers = [
                    MultiValuePathRuler(),
                    SeparatorsRegexRuler(),
                    ElementsRegexRuler(),
                    PersonRuler()
                ]
            else:
                try:
                    value_guide = self.value_guides[set]
                    pass
                except KeyError:
                    value_guide = '.*'
                rulers = [PathRuler(value_guide), RegexRuler()]

            trainer = WrapperTrainer(rulers, self.wrapper_gen_examples)
            try:
                wrappers = trainer.train(example_sets[set])
                wrappers = self._prune_wrappers(wrappers)
                wrapper_manager.persist_wrappers(url, set, wrappers)
                log.info('Trainer generated %d wrappers' %
                         len(wrappers))  #@UndefinedVariable
            except Exception, e:
                log.error('Error training wrapper for set "%s": %s' %
                          (set, e))  #@UndefinedVariable