Esempio n. 1
0
    def __init__(self, files=None, morphs=None, lmin=1):
        self.name = "Reuters Performance"

        c = reuters.get_keywords()
        d = set()
        for a in c:
            if len(a) >= lmin: d.add(a)
            for b in morphs or []:
                e = b(a)
                if len(e) >= lmin: d.add(e)
        print len(d)
        self.patterns = list(d)

        c = ""
        a = reuters.get_data_files()
        for b in files or [0]:
            c += a[b].read()
        self.text = c
Esempio n. 2
0
    def __init__(self, files=None, morphs=None, lmin=1):
        self.name = "Reuters Performance"

        c = reuters.get_keywords()
        d = set()
        for a in c:
            if len(a) >= lmin: d.add(a)
            for b in morphs or []:
                e = b(a)
                if len(e) >= lmin: d.add(e)
        print len(d)
        self.patterns = list(d)

        c = ""
        a = reuters.get_data_files()
        for b in files or [0]:
            c += a[b].read()
        self.text = c
Esempio n. 3
0
 def testReuters(self):
     machine = SBOM(reuters.get_keywords())
     self.hits = 0
     machine(reuters.get_data_files()[0].read(), self.reuters_cb)
     self.assertEqual(16193, self.hits)
Esempio n. 4
0
 def testReuters(self):
     machine = SBOM(reuters.get_keywords())
     self.hits = 0
     machine(reuters.get_data_files()[0].read(), self.reuters_cb)
     self.assertEqual(16193, self.hits)