Ejemplo n.º 1
0
    def test_parsed_sents(self):

        parsed_sents = conll2007.parsed_sents("esp.train")[0]

        self.assertEqual(
            parsed_sents.tree(),
            Tree(
                "fortaleció",
                [
                    Tree(
                        "aumento",
                        ["El", Tree("del", [Tree("índice", [Tree("de", [Tree("desempleo", ["estadounidense"])])])])],
                    ),
                    "hoy",
                    "considerablemente",
                    Tree(
                        "al",
                        [
                            Tree(
                                "euro",
                                [
                                    Tree(
                                        "cotizaba",
                                        [
                                            ",",
                                            "que",
                                            Tree("a", [Tree("15.35", ["las", "GMT"])]),
                                            "se",
                                            Tree(
                                                "en",
                                                [
                                                    Tree(
                                                        "mercado",
                                                        ["el", Tree("de", ["divisas"]), Tree("de", ["Fráncfort"])],
                                                    )
                                                ],
                                            ),
                                            Tree("a", ["0,9452_dólares"]),
                                            Tree(
                                                "frente_a",
                                                [
                                                    ",",
                                                    Tree(
                                                        "0,9349_dólares",
                                                        ["los", Tree("de", [Tree("mañana", ["esta"])])],
                                                    ),
                                                ],
                                            ),
                                        ],
                                    )
                                ],
                            )
                        ],
                    ),
                    ".",
                ],
            ),
        )
    def test_parsed_sents(self):

        parsed_sents = conll2007.parsed_sents('esp.train')[0]

        self.assertEqual(parsed_sents.tree(),
            Tree('fortaleció', [
                Tree('aumento', [
                    'El',
                    Tree('del', [
                        Tree('índice', [
                            Tree('de', [
                                Tree('desempleo', ['estadounidense'])
                            ])
                        ])
                    ])
                ]),
                'hoy',
                'considerablemente',
                Tree('al', [
                    Tree('euro', [
                        Tree('cotizaba', [
                            ',',
                            'que',
                            Tree('a', [
                                Tree('15.35', ['las', 'GMT'])
                            ]),
                            'se',
                            Tree('en', [
                                Tree('mercado', [
                                    'el',
                                    Tree('de', ['divisas']),
                                    Tree('de', ['Fráncfort'])
                                ])
                            ]),
                            Tree('a', ['0,9452_dólares']),
                            Tree('frente_a', [
                                ',',
                                Tree('0,9349_dólares', [
                                    'los',
                                    Tree('de', [
                                        Tree('mañana', ['esta'])
                                    ])
                                ])
                            ])
                        ])
                    ])
                ]),
                '.'
            ])
        )
Ejemplo n.º 3
0
    def test_parsed_sents(self):

        parsed_sents = conll2007.parsed_sents('esp.train')[0]

        self.assertEqual(parsed_sents.tree(),
            Tree('fortaleció', [
                Tree('aumento', [
                    'El',
                    Tree('del', [
                        Tree('índice', [
                            Tree('de', [
                                Tree('desempleo', ['estadounidense'])
                            ])
                        ])
                    ])
                ]),
                'hoy',
                'considerablemente',
                Tree('al', [
                    Tree('euro', [
                        Tree('cotizaba', [
                            ',',
                            'que',
                            Tree('a', [
                                Tree('15.35', ['las', 'GMT'])
                            ]),
                            'se',
                            Tree('en', [
                                Tree('mercado', [
                                    'el',
                                    Tree('de', ['divisas']),
                                    Tree('de', ['Fráncfort'])
                                ])
                            ]),
                            Tree('a', ['0,9452_dólares']),
                            Tree('frente_a', [
                                ',',
                                Tree('0,9349_dólares', [
                                    'los',
                                    Tree('de', [
                                        Tree('mañana', ['esta'])
                                    ])
                                ])
                            ])
                        ])
                    ])
                ]),
                '.'
            ])
        )
Ejemplo n.º 4
0
    def test_parsed_sents(self):

        parsed_sents = conll2007.parsed_sents("esp.train")[0]

        self.assertEqual(
            parsed_sents.tree(),
            Tree(
                "fortaleció",
                [
                    Tree(
                        "aumento",
                        [
                            "El",
                            Tree(
                                "del",
                                [
                                    Tree(
                                        "índice",
                                        [
                                            Tree(
                                                "de",
                                                [
                                                    Tree(
                                                        "desempleo",
                                                        ["estadounidense"])
                                                ],
                                            )
                                        ],
                                    )
                                ],
                            ),
                        ],
                    ),
                    "hoy",
                    "considerablemente",
                    Tree(
                        "al",
                        [
                            Tree(
                                "euro",
                                [
                                    Tree(
                                        "cotizaba",
                                        [
                                            ",",
                                            "que",
                                            Tree("a", [
                                                Tree("15.35", ["las", "GMT"])
                                            ]),
                                            "se",
                                            Tree(
                                                "en",
                                                [
                                                    Tree(
                                                        "mercado",
                                                        [
                                                            "el",
                                                            Tree(
                                                                "de",
                                                                ["divisas"]),
                                                            Tree(
                                                                "de",
                                                                ["Fráncfort"]),
                                                        ],
                                                    )
                                                ],
                                            ),
                                            Tree("a", ["0,9452_dólares"]),
                                            Tree(
                                                "frente_a",
                                                [
                                                    ",",
                                                    Tree(
                                                        "0,9349_dólares",
                                                        [
                                                            "los",
                                                            Tree(
                                                                "de",
                                                                [
                                                                    Tree(
                                                                        "mañana",
                                                                        [
                                                                            "esta"
                                                                        ],
                                                                    )
                                                                ],
                                                            ),
                                                        ],
                                                    ),
                                                ],
                                            ),
                                        ],
                                    )
                                ],
                            )
                        ],
                    ),
                    ".",
                ],
            ),
        )
Ejemplo n.º 5
0
      [0])  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
# nltk.download('ptb')
print(ptb.fileids())  # doctest: +SKIP
# download the corpus from here: https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/treebank.zip
# then extract and place to the following location: .../nltk_data/corpora/ptb/
print(ptb.words('treebank/combined/wsj_0003.mrg'))  # doctest: +SKIP
print(ptb.tagged_words('treebank/combined/wsj_0003.mrg'))  # doctest: +SKIP
# print(ptb.categories())  # doctest: +SKIP
# print(ptb.fileids('news'))  # doctest: +SKIP
# print(ptb.words(categories=['humor', 'fiction']))  # doctest: +SKIP
# nltk.download('sinica_treebank')
print(sinica_treebank.sents())  # doctest: +SKIP
print(sinica_treebank.parsed_sents()[25])  # doctest: +SKIP
# nltk.download('conll2007')
print(conll2007.sents('esp.train')[0])  # doctest: +SKIP
print(conll2007.parsed_sents('esp.train')[0])  # doctest: +SKIP
print(conll2007.parsed_sents('esp.train')[0].tree())  # doctest: +SKIP
# for tree in ycoe.parsed_sents('cocuraC')[:4]:
#     print(tree)  # doctest: +SKIP
# word lists and lexicons
print(words.fileids())
print(words.words('en'))  # doctest: +ELLIPSIS
print(stopwords.fileids())  # doctest: +ELLIPSIS
print(stopwords.words('portuguese'))  # doctest: +ELLIPSIS
# nltk.download('names')
print(names.fileids())
print(names.words('male.txt'))  # doctest: +ELLIPSIS
print(names.words('female.txt'))  # doctest: +ELLIPSIS
# nltk.download('cmudict')
print(cmudict.entries()[653:659])  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
# Load the entire cmudict corpus into a Python dictionary: