Ejemplo n.º 1
0
 def ngram(self, products, n):
     for prod in products:
         for review in prod.getReviews():
             lines = review.split('\n')
             for line in lines:
                 parsed = wakachi.parse(line) + [wakachi.DELIM]*(n-1)
                 for i in xrange(len(parsed)):
                     key = (parsed[i+j+1] for j in xrange(-n,0))
                     self.cnt[key] += 1
Ejemplo n.º 2
0
 def regist(self, text):
     lines = text.split('\n')
     lst = []
     for lnum, line in enumerate(lines):
         morphs = wakachi.parse(text)
         for morph in morphs:
             if self.PosNo(morph):
                 lst.append(morph)
                 self.words[(morph.posid, morph.original)] += 1
             else:
                 lst.append(None)
         lst += [None]*5
         if line == '':
             self.consume(lst)
             lst = []
     self.consume(lst)
Ejemplo n.º 3
0
 def regist(self, products):
     for prod in products:
         for review in prod.getReviews():
             for m in wakachi.parse(review):
                 self.cnt[m.surface] += 1