Esempi in Python per html_to_document

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: yalign.input_conversion

Metodo/funzione: html_to_document

Esempi su hotexamples.com: 10

html_to_document in Python: 10 esempi trovati. Questi sono i migliori esempi reali in Python per yalign.input_conversion.html_to_document, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: test_input_conversion.py Progetto: wannaphong/yalign

 def test_extract(self):
     html = "<html><head></head><body><p>Hello Peter</p></body></html>"
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Hello Peter'.split()], d)
     html = ("<html><head></head><body><p>Hello Peter. "
             "Go for gold.</p></body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Hello Peter .'.split(), u'Go for gold .'.split()],
                       d)

Esempio n. 2

Mostra file

File: test_input_conversion.py Progetto: ahurriyetoglu/yalign

 def test_extract(self):
     html = "<html><head></head><body><p>Hello Peter</p></body></html>"
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Hello Peter'.split()], d)
     html = ("<html><head></head><body><p>Hello Peter. "
             "Go for gold.</p></body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Hello Peter .'.split(), u'Go for gold .'.split()],
                        d)

Esempio n. 3

Mostra file

File: test_input_conversion.py Progetto: ahurriyetoglu/yalign

 def test_sentence_splitting(self):
     html = ("<html><head></head><body><p>Wow!! "
             "I did not know! Are you sure?</p></body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Wow !!'.split(),
                        u'I did not know !'.split(),
                        u'Are you sure ?'.split()], d)

Esempio n. 4

Mostra file

File: test_input_conversion.py Progetto: ahurriyetoglu/yalign

 def test_generates_something(self):
     text = open(os.path.join(data_path, "index.html")).read()
     document = html_to_document(text, "en")
     self.assertGreater(len(document), 1)
     for sentence in document:
         self.assertIsInstance(sentence, Sentence)
         for word in sentence:
             self.assertIsInstance(word, unicode)

Esempio n. 5

Mostra file

File: test_input_conversion.py Progetto: wannaphong/yalign

 def test_sentence_splitting(self):
     html = ("<html><head></head><body><p>Wow!! "
             "I did not know! Are you sure?</p></body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([
         u'Wow !!'.split(), u'I did not know !'.split(),
         u'Are you sure ?'.split()
     ], d)

Esempio n. 6

Mostra file

File: test_input_conversion.py Progetto: wannaphong/yalign

 def test_generates_something(self):
     text = open(os.path.join(data_path, "index.html")).read()
     document = html_to_document(text, "en")
     self.assertGreater(len(document), 1)
     for sentence in document:
         self.assertIsInstance(sentence, Sentence)
         for word in sentence:
             self.assertIsInstance(word, unicode)

Esempio n. 7

Mostra file

File: test_input_conversion.py Progetto: ahurriyetoglu/yalign

 def test_remove_whitespacing(self):
     html = ("<html><head></head><body><p>Wow\n\tWhat now?\t\t"
             "</p></body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Wow What now ?'.split()], d)

Esempio n. 8

Mostra file

File: test_input_conversion.py Progetto: ahurriyetoglu/yalign

 def test_newlines(self):
     html = ("<html><head></head>\n\n<body><p>\nHello Peter."
             "\n\n\n Go for gold.\n</p>\n</body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Hello Peter .'.split(), u'Go for gold .'.split()],
                       d)

Esempio n. 9

Mostra file

File: test_input_conversion.py Progetto: wannaphong/yalign

 def test_remove_whitespacing(self):
     html = ("<html><head></head><body><p>Wow\n\tWhat now?\t\t"
             "</p></body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Wow What now ?'.split()], d)

Esempio n. 10

Mostra file

File: test_input_conversion.py Progetto: wannaphong/yalign

 def test_newlines(self):
     html = ("<html><head></head>\n\n<body><p>\nHello Peter."
             "\n\n\n Go for gold.\n</p>\n</body></html>")
     d = [list(xs) for xs in html_to_document(html, "en")]
     self.assertEquals([u'Hello Peter .'.split(), u'Go for gold .'.split()],
                       d)