def ngram(self, products, n): for prod in products: for review in prod.getReviews(): lines = review.split('\n') for line in lines: parsed = wakachi.parse(line) + [wakachi.DELIM]*(n-1) for i in xrange(len(parsed)): key = (parsed[i+j+1] for j in xrange(-n,0)) self.cnt[key] += 1
def regist(self, text): lines = text.split('\n') lst = [] for lnum, line in enumerate(lines): morphs = wakachi.parse(text) for morph in morphs: if self.PosNo(morph): lst.append(morph) self.words[(morph.posid, morph.original)] += 1 else: lst.append(None) lst += [None]*5 if line == '': self.consume(lst) lst = [] self.consume(lst)
def regist(self, products): for prod in products: for review in prod.getReviews(): for m in wakachi.parse(review): self.cnt[m.surface] += 1