Python scrap_body Examples, processing.load_body.scrap_body Python Examples

Example #1

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_scrapt_body(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data, 'en')

        self.assertEqual(result, '<div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p>')

Example #2

0

Show file

    def test_scrapt_body_not_found_for_a_given_language(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data.encode('utf-8'), 'pt')

        self.assertEqual(result, None)

Example #3

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_scrapt_body_not_found(self):

        data = u"""<html><header></header><body><div class="content"></div></body></html>"""

        result = load_body.scrap_body(data, 'pt')

        self.assertEqual(result, None)

Example #4

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_scrapt_body_not_found_for_a_given_language(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data, 'pt')

        self.assertEqual(result, None)

Example #5

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_scrapt_body(self):

        data = u"""<html><header></header><body><div class="content"><div class="index,en"><div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p></div></div></body></html>"""

        result = load_body.scrap_body(data, 'en')

        self.assertEqual(result, '<div class="title">Crazy <i>Title</i></div><p>Crazy Body</p><p>Really Crazy Body</p>')

Example #6

0

Show file

    def test_scrapt_body_not_found(self):

        data = u"""<html><header></header><body><div class="content"></div></body></html>"""

        result = load_body.scrap_body(data.encode('utf-8'), 'pt')

        self.assertEqual(result, None)

Example #7

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_7(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_7.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'en')

        # Text on the begining of the document
        self.assertTrue(u'caso da bacia    do Amazonas' in result)
        # Text on the end of the document
        self.assertTrue(u'com o Embasamento. Universidade Federal' in result)

Example #8

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_6(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_6.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Editorial' in result)
        # Text on the end of the document
        self.assertTrue(u'Boa leitura!' in result)

Example #9

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_5(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_5.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Molestia de Carlos Chagas' in result)
        # Text on the end of the document
        self.assertTrue(u'Full text available only in PDF format' in result)

Example #10

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_4(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_4.html', 'r', encoding='utf-8').readlines()])
          
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Aquarelas de um Brasil' in result)
        # Text on the end of the document
        self.assertTrue(u'São Paulo, Companhia das Letras.' in result)

Example #11

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_3(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_3.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'A TRIBUTAÇÃO NA PRODUÇÃO DE CARVÃO VEGETAL' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido: 03 de Fevereiro de 2012; Aceito: 14 de Abril de 2014' in result)

Example #12

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_2(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_2.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'meio para isolamento de' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido    para publicação em 31-7-1967' in result)

Example #13

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_4(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_4.html', 'r', encoding='utf-8').readlines()])
          
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Aquarelas de um Brasil' in result)
        # Text on the end of the document
        self.assertTrue(u'São Paulo, Companhia das Letras.' in result)

Example #14

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_2(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_2.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'meio para isolamento de' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido    para publicação em 31-7-1967' in result)

Example #15

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_7(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_7.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'en')

        # Text on the begining of the document
        self.assertTrue(u'caso da bacia    do Amazonas' in result)
        # Text on the end of the document
        self.assertTrue(u'com o Embasamento. Universidade Federal' in result)

Example #16

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_6(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_6.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Editorial' in result)
        # Text on the end of the document
        self.assertTrue(u'Boa leitura!' in result)

Example #17

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_5(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_5.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'Molestia de Carlos Chagas' in result)
        # Text on the end of the document
        self.assertTrue(u'Full text available only in PDF format' in result)

Example #18

0

Show file

File: test_load_body.py Project: brunousml/articles_meta

    def test_body_sample_1(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_1.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'On the one pot syntheses' in result)
        # Text on the end of the document
        self.assertTrue(u'Web Release Date: November 26, 2009' in result)

Example #19

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_1(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_1.html', 'r', encoding='utf-8').readlines()])

        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'On the one pot syntheses' in result)
        # Text on the end of the document
        self.assertTrue(u'Web Release Date: November 26, 2009' in result)

Example #20

0

Show file

File: test_load_body.py Project: javani/articles_meta

    def test_body_sample_3(self):
        data = ' '.join([i.strip() for i in codecs.open(os.path.dirname(__file__)+'/fixtures/body_sample_3.html', 'r', encoding='utf-8').readlines()])
      
        result = load_body.scrap_body(data, 'pt')

        # Text on the begining of the document
        self.assertTrue(u'A TRIBUTAÇÃO NA PRODUÇÃO DE CARVÃO VEGETAL' in result)
        # Text on the end of the document
        self.assertTrue(u'Recebido: 03 de Fevereiro de 2012; Aceito: 14 de Abril de 2014' in result)