def __init__(self, files=None, morphs=None, lmin=1): self.name = "Reuters Performance" c = reuters.get_keywords() d = set() for a in c: if len(a) >= lmin: d.add(a) for b in morphs or []: e = b(a) if len(e) >= lmin: d.add(e) print len(d) self.patterns = list(d) c = "" a = reuters.get_data_files() for b in files or [0]: c += a[b].read() self.text = c
def testReuters(self): machine = SBOM(reuters.get_keywords()) self.hits = 0 machine(reuters.get_data_files()[0].read(), self.reuters_cb) self.assertEqual(16193, self.hits)