Ejemplo n.º 1
0
    def test_get_query(self):
        header, body = split_header(self.test_text)
        q = get_query(parse_header(header))
        query = (u'(((Japan OR Fukushima) AND (Erdbeben OR nuklear OR Tsunami'
                 ' OR Krise\nOR Katastrophe OR Tepco)) '
                 ' AND date(geq(7/3/2011) AND leq(31/8/2011)) AND\n'
                 'pub(B\xf6rsen Zeitung OR  Frankfurter Rundschau OR  '
                 'taz OR  die tageszeitung))')
        self.assertEqual(q, query)

        header, body = split_header(self.test_text2)
        q = get_query(parse_header(header))
        self.assertIsNone(q)
Ejemplo n.º 2
0
    def test_get_query(self):
        header, body = split_header(self.test_text)
        q = get_query(parse_header(header))
        query = (u'(((Japan OR Fukushima) AND (Erdbeben OR nuklear OR Tsunami'
                 ' OR Krise\nOR Katastrophe OR Tepco)) '
                 ' AND date(geq(7/3/2011) AND leq(31/8/2011)) AND\n'
                 'pub(B\xf6rsen Zeitung OR  Frankfurter Rundschau OR  '
                 'taz OR  die tageszeitung))')
        self.assertEqual(q, query)

        header, body = split_header(self.test_text2)
        q = get_query(parse_header(header))
        self.assertIsNone(q)
Ejemplo n.º 3
0
    def test_parse_no_header(self):
        header, body = split_header(self.test_text2)
        header = header.replace(u'\ufeff', '').strip()
        self.assertFalse(bool(header))

        n_found = len(list(split_body(body)))
        self.assertEqual(n_found, 1)
Ejemplo n.º 4
0
    def test_parse_no_header(self):
        header, body = split_header(self.test_text2)
        header = header.replace(u'\ufeff', '').strip()
        self.assertFalse(bool(header))

        n_found = len(list(split_body(body)))
        self.assertEqual(n_found, 1)
Ejemplo n.º 5
0
 def split(self):
     return split_header(self.test_text)
Ejemplo n.º 6
0
 def test_kop_as_headline(self):
     # Some lexis nexis files contain "KOP: " instaed of "HEADLINE: "
     header, body = split_header(self.test_text3)
     article = parse_article(next(split_body(body)))
     self.assertEqual("Gretta Duisenberg oprichtster van Palestina-groep",
                      article['title'])
Ejemplo n.º 7
0
 def split(self):
     return split_header(self.test_text)
Ejemplo n.º 8
0
 def test_kop_as_headline(self):
     # Some lexis nexis files contain "KOP: " instaed of "HEADLINE: "
     header, body = split_header(self.test_text3)
     article = body_to_article(*parse_article(next(split_body(body))))
     self.assertEqual("Gretta Duisenberg oprichtster van Palestina-groep", article.headline)