Beispiel #1
0
class NodeMaker:
    def __init__(self):
        self.ser = Serializer()
        self.words = defaultdict(int)
        self.graph = Graph()
    def PosNo(self, morph):
        for i, p in enumerate([u'形容詞', u'名詞']):
            if p in morph.pos():
                return i+2
        return 0
    def regist(self, text):
        lines = text.split('\n')
        lst = []
        for lnum, line in enumerate(lines):
            morphs = wakachi.parse(text)
            for morph in morphs:
                if self.PosNo(morph):
                    lst.append(morph)
                    self.words[(morph.posid, morph.original)] += 1
                else:
                    lst.append(None)
            lst += [None]*5
            if line == '':
                self.consume(lst)
                lst = []
        self.consume(lst)
    def consume(self, lst, back=3, fore=10): #0:N, 1:V, 2:Y
        size = len(lst)
        for i in xrange(size):
            if lst[i] is None: continue
            posno = self.PosNo(lst[i])
            node = []
            for x in xrange(posno):
                node.append(self.ser.encode((lst[i].posid, lst[i].original(), x)))
                self.graph.registerNode(node[x])
            #for node = V
            for j in xrange(max(0,i-fore), min(size,i+back)):
                if lst[j] is None or self.PosNo(lst[j]) == 2: continue
                ny = self.ser.encode((lst[j].posid, lst[j].original(), 2))
                self.graph.addEdge(node[1], ny)
            #for node = Y
            if posno == 3:
                for j in xrange(max(0,i-back), min(size,i+fore)):
                    if lst[j] is None: continue
                    nv = self.ser.encode((lst[j].posid, lst[j].original(), 1))
                    self.graph.addEdge(node[2],nv)