Ejemplos de ParseCabochaTextToChunks en Python, ejemplos de parse_cabocha_text_to_chunks.ParseCabochaTextToChunks en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: 46.py Proyecto: jiruru/nlp100-1

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for chunk in sentence:
                if '動詞' in [m.pos for m in chunk.morphs]:
                    predicate = [
                        m.base for m in chunk.morphs if m.pos == '動詞'
                    ][0]
                    arguments = []
                    for src in chunk.srcs:
                        if '助詞' in [m.pos for m in sentence[src].morphs]:
                            argument = sentence[src].get_text()
                            case = [
                                m.base for m in sentence[src].morphs
                                if m.pos == '助詞'
                            ][-1]
                            arguments.append((case, argument))
                    if arguments:
                        arguments.sort()
                        print '{0}\t{1}\t{2}'.format(
                            predicate, ' '.join([a[0] for a in arguments]),
                            ' '.join([a[1] for a in arguments]))

        return None

Ejemplo n.º 2

0

Mostrar archivo

Archivo: 49.py Proyecto: lethe2211/nlp100

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for i in range(len(sentence)-1):
                for j in range(i+1, len(sentence)):
                    if '名詞' in [m.pos for m in sentence[i].morphs] and '名詞' in [m.pos for m in sentence[j].morphs]:
                        # Get indices of morphs to be replaced by 'X' or 'Y'
                        index_x = []
                        index_x_flag = False
                        for index, m in enumerate(sentence[i].morphs):
                            if m.pos == '名詞':
                                index_x_flag = True
                                index_x.append(index)
                            else:
                                if index_x_flag:
                                    break
                        index_y = []
                        index_y_flag = False
                        for index, m in enumerate(sentence[j].morphs):
                            if m.pos == '名詞':
                                index_y_flag = True
                                index_y.append(index)
                            else:
                                if index_y_flag:
                                    break

                        # Get paths of syntax tree
                        path_i = []
                        path_j = []

                        node = i
                        while True:
                            path_i.append(node)
                            node = sentence[node].dst
                            if node is None:
                                break
                        node = j
                        while True:
                            path_j.append(node)
                            node = sentence[node].dst
                            if node is None:
                                break

                        # Display syntax tree
                        if set(path_i) >= set(path_j):
                            path_texts = self.get_replaced_texts(sentence, sorted(list(set(path_i) - set(path_j)) + [j]), i, j, index_x, index_y)
                            print ' -> '.join(path_texts)
                        else:
                            path_i_only_texts = self.get_replaced_texts(sentence, sorted(list(set(path_i) - set(path_j))), i, j, index_x, index_y)
                            path_j_only_texts = self.get_replaced_texts(sentence, sorted(list(set(path_j) - set(path_i))), i, j, index_x, index_y)
                            path_common_texts = self.get_replaced_texts(sentence, sorted(list(set(path_i) & set(path_j))), i, j, index_x, index_y)                            
                            print '{0} | {1} | {2}'.format(' -> '.join(path_i_only_texts), ' -> '.join(path_j_only_texts), ' -> '.join(path_common_texts))
                            
        return None

Ejemplo n.º 3

0

Mostrar archivo

Archivo: 41.py Proyecto: lethe2211/nlp100

 def solve(self):
     '''
     insert your code
     '''
     pcttc = ParseCabochaTextToChunks()
     text = pcttc.parse('neko.txt.cabocha')
     print text[7]
     
     return None

Ejemplo n.º 4

0

Mostrar archivo

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')
        print text[7]

        return None

Ejemplo n.º 5

0

Mostrar archivo

Archivo: 42.py Proyecto: jiruru/nlp100-1

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for chunk in sentence:
                if chunk.dst is not None:
                    print '{0}\t{1}'.format(chunk.get_text(), sentence[chunk.dst].get_text())
        
        return None

Ejemplo n.º 6

0

Mostrar archivo

Archivo: 44.py Proyecto: lethe2211/nlp100

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        sentence = text[7]      # As the dependency graph becomes too complex, we visualize only 8th sentence in this time
        edges = []
        for chunk in sentence:
            if chunk.dst is not None:
                edges.append((chunk.get_text(), sentence[chunk.dst].get_text()))
        g = pydot.graph_from_edges(edges)
        g.write_jpeg('44.jpg', prog='dot')
        
        return None

Ejemplo n.º 7

0

Mostrar archivo

Archivo: 44.py Proyecto: jiruru/nlp100-1

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        sentence = text[
            7]  # As the dependency graph becomes too complex, we visualize only 8th sentence in this time
        edges = []
        for chunk in sentence:
            if chunk.dst is not None:
                edges.append(
                    (chunk.get_text(), sentence[chunk.dst].get_text()))
        g = pydot.graph_from_edges(edges)
        g.write_jpeg('44.jpg', prog='dot')

        return None

Ejemplo n.º 8

0

Mostrar archivo

Archivo: 48.py Proyecto: jiruru/nlp100-1

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for index, chunk in enumerate(sentence):
                if '名詞' in [m.pos for m in chunk.morphs]:
                    path = []
                    node = index
                    while True:
                        path.append(sentence[node].get_text())
                        node = sentence[node].dst
                        if node is None:
                            break
                    print ' -> '.join(path)
        return None

Ejemplo n.º 9

0

Mostrar archivo

Archivo: 48.py Proyecto: lethe2211/nlp100

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for index, chunk in enumerate(sentence):
                if '名詞' in [m.pos for m in chunk.morphs]:
                    path = []
                    node = index
                    while True:
                        path.append(sentence[node].get_text())
                        node = sentence[node].dst
                        if node is None:
                            break
                    print ' -> '.join(path)
        return None

Ejemplo n.º 10

0

Mostrar archivo

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for j in range(len(sentence) - 1):
                for k in range(len(sentence[j].morphs) - 1):
                    if sentence[j].morphs[k].pos == '名詞' and sentence[
                            j].morphs[k].pos1 == 'サ変接続' and sentence[j].morphs[
                                k + 1].pos == '助詞' and sentence[j].morphs[
                                    k + 1].surface == 'を' and '動詞' in [
                                        m.pos for m in sentence[
                                            sentence[j].dst].morphs
                                    ]:
                        predicate = sentence[j].get_text() + [
                            m.base for m in sentence[sentence[j].dst].morphs
                            if m.pos == '動詞'
                        ][0]
                        arguments = []
                        for src in list(
                                set(sentence[j].srcs +
                                    sentence[sentence[j].dst].srcs) -
                                set([j])):
                            if '助詞' in [m.pos for m in sentence[src].morphs]:
                                argument = sentence[src].get_text()
                                case = [
                                    m.base for m in sentence[src].morphs
                                    if m.pos == '助詞'
                                ][-1]
                                arguments.append((case, argument))
                        if arguments:
                            arguments.sort()
                            print '{0}\t{1}\t{2}'.format(
                                predicate, ' '.join([a[0] for a in arguments]),
                                ' '.join([a[1] for a in arguments]))
                        break

        return None

Ejemplo n.º 11

0

Mostrar archivo

Archivo: 46.py Proyecto: lethe2211/nlp100

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for chunk in sentence:
                if '動詞' in [m.pos for m in chunk.morphs]:
                    predicate = [m.base for m in chunk.morphs if m.pos == '動詞'][0]
                    arguments = []
                    for src in chunk.srcs:
                        if '助詞' in [m.pos for m in sentence[src].morphs]:
                            argument = sentence[src].get_text()
                            case = [m.base for m in sentence[src].morphs if m.pos == '助詞'][-1]
                            arguments.append((case, argument))
                    if arguments:
                        arguments.sort()
                        print '{0}\t{1}\t{2}'.format(predicate, ' '.join([a[0] for a in arguments]), ' '.join([a[1] for a in arguments]))
        
        return None

Ejemplo n.º 12

0

Mostrar archivo

Archivo: 47.py Proyecto: lethe2211/nlp100

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for j in range(len(sentence)-1):
                for k in range(len(sentence[j].morphs)-1):
                    if sentence[j].morphs[k].pos == '名詞' and sentence[j].morphs[k].pos1 == 'サ変接続' and sentence[j].morphs[k+1].pos == '助詞' and sentence[j].morphs[k+1].surface == 'を' and '動詞' in [m.pos for m in sentence[sentence[j].dst].morphs]:
                        predicate = sentence[j].get_text() + [m.base for m in sentence[sentence[j].dst].morphs if m.pos == '動詞'][0]
                        arguments = []
                        for src in list(set(sentence[j].srcs + sentence[sentence[j].dst].srcs) - set([j])):
                            if '助詞' in [m.pos for m in sentence[src].morphs]:
                                argument = sentence[src].get_text()
                                case = [m.base for m in sentence[src].morphs if m.pos == '助詞'][-1]
                                arguments.append((case, argument))
                        if arguments:
                            arguments.sort()
                            print '{0}\t{1}\t{2}'.format(predicate, ' '.join([a[0] for a in arguments]), ' '.join([a[1] for a in arguments]))
                        break
        
        return None

Ejemplo n.º 13

0

Mostrar archivo

Archivo: 45.py Proyecto: jiruru/nlp100-1

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for chunk in sentence:
                if '動詞' in [m.pos for m in chunk.morphs]:
                    predicate = [
                        m.base for m in chunk.morphs if m.pos == '動詞'
                    ][0]
                    cases = []
                    for src in chunk.srcs:
                        if '助詞' in [m.pos for m in sentence[src].morphs]:
                            case = [
                                m.base for m in sentence[src].morphs
                                if m.pos == '助詞'
                            ][-1]
                            cases.append(case)
                    if cases:
                        cases.sort()
                        print '{0}\t{1}'.format(predicate, ' '.join(cases))

Ejemplo n.º 14

0

Mostrar archivo

Archivo: 49.py Proyecto: jiruru/nlp100-1

    def solve(self):
        '''
        insert your code
        '''
        pcttc = ParseCabochaTextToChunks()
        text = pcttc.parse('neko.txt.cabocha')

        for sentence in text:
            for i in range(len(sentence) - 1):
                for j in range(i + 1, len(sentence)):
                    if '名詞' in [
                            m.pos for m in sentence[i].morphs
                    ] and '名詞' in [m.pos for m in sentence[j].morphs]:
                        # Get indices of morphs to be replaced by 'X' or 'Y'
                        index_x = []
                        index_x_flag = False
                        for index, m in enumerate(sentence[i].morphs):
                            if m.pos == '名詞':
                                index_x_flag = True
                                index_x.append(index)
                            else:
                                if index_x_flag:
                                    break
                        index_y = []
                        index_y_flag = False
                        for index, m in enumerate(sentence[j].morphs):
                            if m.pos == '名詞':
                                index_y_flag = True
                                index_y.append(index)
                            else:
                                if index_y_flag:
                                    break

                        # Get paths of syntax tree
                        path_i = []
                        path_j = []

                        node = i
                        while True:
                            path_i.append(node)
                            node = sentence[node].dst
                            if node is None:
                                break
                        node = j
                        while True:
                            path_j.append(node)
                            node = sentence[node].dst
                            if node is None:
                                break

                        # Display syntax tree
                        if set(path_i) >= set(path_j):
                            path_texts = self.get_replaced_texts(
                                sentence,
                                sorted(list(set(path_i) - set(path_j)) + [j]),
                                i, j, index_x, index_y)
                            print ' -> '.join(path_texts)
                        else:
                            path_i_only_texts = self.get_replaced_texts(
                                sentence,
                                sorted(list(set(path_i) - set(path_j))), i, j,
                                index_x, index_y)
                            path_j_only_texts = self.get_replaced_texts(
                                sentence,
                                sorted(list(set(path_j) - set(path_i))), i, j,
                                index_x, index_y)
                            path_common_texts = self.get_replaced_texts(
                                sentence,
                                sorted(list(set(path_i) & set(path_j))), i, j,
                                index_x, index_y)
                            print '{0} | {1} | {2}'.format(
                                ' -> '.join(path_i_only_texts),
                                ' -> '.join(path_j_only_texts),
                                ' -> '.join(path_common_texts))

        return None