Python split_body Examples, amcat.scripts.article_upload.lexisnexis.split_body Python Examples

Example #1

0

Show file

    def test_split_body(self):
        splitted = self.split()

        n_found = len(list(split_body(splitted[1])))
        n_sol = len(self.test_body_sols)

        self.assertEquals(n_found, n_sol + 1)  # +1 for 'defigured' article

Example #2

0

Show file

    def test_parse_article(self):
        splitted = self.split()
        texts = split_body(splitted[1])
        #texts = [list(texts)[24]]; self.test_body_sols = [self.test_body_sols[23]]
        arts = []
        for t in texts:
            art = parse_article(t)
            if art:
                # Json doesn't do dates
                art['date'] = str(art['date'])
                arts.append(art)

        # Tests..
        self.assertEquals(len(arts), len(self.test_body_sols))

        for i, (found, actual) in enumerate(zip(arts, self.test_body_sols)):
            akeys = sorted(actual.keys())
            fkeys = sorted(found.keys())
            if found != actual:  # 'debug mode'
                print("Article", i, actual.get('title'))
                print("Found keys:", fkeys)
                print("Actual keys:", akeys)
                for key in sorted(set(fkeys) | set(akeys)):
                    f = found.get(key)
                    a = actual.get(key)
                    if f != a:
                        print("i:", i, "Key:", key, " found:", repr(f),
                              " actual:", repr(a))
            self.assertEqual(fkeys, akeys)
            self.assertEquals(found, actual)

Example #3

0

Show file

    def test_parse_no_header(self):
        header, body = split_header(self.test_text2)
        header = header.replace(u'\ufeff', '').strip()
        self.assertFalse(bool(header))

        n_found = len(list(split_body(body)))
        self.assertEqual(n_found, 1)

Example #4

0

Show file

File: test_lexisnexis.py Project: aemal/amcat

    def test_split_body(self):
        splitted = self.split()

        n_found = len(list(split_body(splitted[1])))
        n_sol = len(self.test_body_sols)

        self.assertEquals(n_found, n_sol + 1)  # +1 for 'defigured' article

Example #5

0

Show file

File: test_lexisnexis.py Project: aemal/amcat

    def test_body_to_article(self):
        header, body = self.split()
        articles = split_body(body)
        articles = [parse_article(a) for a in articles]

        # Only testing the first article. If this contains correct
        # data, we assume the implementation is correct. However,
        # we do test the remaining articles with full_clean().

        art = body_to_article(*articles[0])
        self.assertEquals(art.length, 306)
        self.assertEquals(art.headline, "This is a headline")
        self.assertEquals(art.byline, ("with a byline. The article "
                                       "contains unicode characters."))
        self.assertEquals(art.text, articles[0][2])
        self.assertEquals(art.date, datetime.datetime(2011, 8, 31))
        self.assertEquals(art.medium.name, u"B\u00f6rsen-Zeitung")
        self.assertEquals(art.author, "MF Tokio")
        self.assertEquals(eval(art.metastring),
                          {u'update': u'2. September 2011',
                           u'language': u'GERMAN; DEUTSCH',
                           u'publication-type': u'Zeitung'})

        # Setup environment
        dp = amcattest.create_test_project()

        # Test remaining articles
        for art in articles[1:]:
            if art is None: continue
            self._create_medium(art[4])

            p = body_to_article(*art)
            p.project = dp
            p.full_clean()

Example #6

0

Show file

File: test_lexisnexis.py Project: aemal/amcat

    def test_parse_no_header(self):
        header, body = split_header(self.test_text2)
        header = header.replace(u'\ufeff', '').strip()
        self.assertFalse(bool(header))

        n_found = len(list(split_body(body)))
        self.assertEqual(n_found, 1)

Example #7

0

Show file

File: test_lexisnexis.py Project: aemal/amcat

    def test_parse_article(self):
        splitted = self.split()
        texts = split_body(splitted[1])

        # Json doesn't do dates
        arts = []

        for a in texts:
            art = parse_article(a)
            if art is not None:
                art = list(art)
                art[3] = str(art[3])
                arts.append(art)

        # Tests..
        self.assertEquals(len(arts), len(self.test_body_sols))

        for i, art in enumerate(self.test_body_sols):
            self.assertEquals(art, arts[i])

Example #8

0

Show file

File: test_lexisnexis.py Project: christianbaden/amcat

    def test_parse_article(self):
        splitted = self.split()
        texts = split_body(splitted[1])

        # Json doesn't do dates
        arts = []

        for a in texts:
            art = parse_article(a)
            if art is not None:
                art = list(art)
                art[3] = str(art[3])
                arts.append(art)

        # Tests..
        self.assertEquals(len(arts), len(self.test_body_sols))

        for i, art in enumerate(self.test_body_sols):
            self.assertEquals(art, arts[i])

Example #9

0

Show file

File: test_lexisnexis.py Project: christianbaden/amcat

    def test_body_to_article(self):
        header, body = self.split()
        articles = split_body(body)
        articles = [parse_article(a) for a in articles]

        # Only testing the first article. If this contains correct
        # data, we assume the implementation is correct. However,
        # we do test the remaining articles with full_clean().

        art = body_to_article(*articles[0])
        self.assertEquals(art.length, 306)
        self.assertEquals(art.headline, "This is a headline")
        self.assertEquals(art.byline, ("with a byline. The article "
                                       "contains unicode characters."))
        self.assertEquals(art.text, articles[0][2])
        self.assertEquals(art.date, datetime.datetime(2011, 8, 31))
        self.assertEquals(art.medium.name, u"B\u00f6rsen-Zeitung")
        self.assertEquals(art.author, "MF Tokio")
        self.assertEquals(
            eval(art.metastring), {
                u'update': u'2. September 2011',
                u'language': u'GERMAN; DEUTSCH',
                u'publication-type': u'Zeitung'
            })

        # Setup environment
        dp = amcattest.create_test_project()

        # Test remaining articles
        for art in articles[1:]:
            if art is None: continue
            self._create_medium(art[4])

            p = body_to_article(*art)
            p.project = dp
            p.full_clean()

Example #10

0

Show file

 def test_kop_as_headline(self):
     # Some lexis nexis files contain "KOP: " instaed of "HEADLINE: "
     header, body = split_header(self.test_text3)
     article = parse_article(next(split_body(body)))
     self.assertEqual("Gretta Duisenberg oprichtster van Palestina-groep",
                      article['title'])

Example #11

0

Show file

File: test_lexisnexis.py Project: aemal/amcat

    def test_meta(self):

        a = list(split_body(self.split()[1]))[0]
        meta = parse_article(a)[-1]
        self.assertEqual(meta.pop('length').split()[0], "306")

Example #12

0

Show file

File: test_lexisnexis.py Project: aemal/amcat

 def test_kop_as_headline(self):
     # Some lexis nexis files contain "KOP: " instaed of "HEADLINE: "
     header, body = split_header(self.test_text3)
     article = body_to_article(*parse_article(next(split_body(body))))
     self.assertEqual("Gretta Duisenberg oprichtster van Palestina-groep", article.headline)

Example #13

0

Show file

File: test_lexisnexis.py Project: christianbaden/amcat

    def test_meta(self):

        a = list(split_body(self.split()[1]))[0]
        meta = parse_article(a)[-1]
        self.assertEqual(meta.pop('length').split()[0], "306")