Exemple #1
0
    def test_unique_uuid(self):
        test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \
                                           "uuid_unique.json"
        query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
                'DELETE a, r'
        query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
        tag = Tag(name="science").save()
        results = gather_news_results(None, test_file)
        query_webhose(results, tag)
        tag.delete()
        self.assertEqual(len(NewsArticle.nodes.all()), 2)

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "862248ce467c25d9fa20d66e43f13d7f0800882b"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)
        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c11407c888ddbfedca4a56b1aac482d153ad6039"}) ' \
                'RETURN a'

        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)
Exemple #2
0
    def test_too_close_to_another_article_title(self):
        test_file = settings.PROJECT_DIR + \
            "/sb_news/tests/sample_json/" \
            "too_close_to_another_article_title.json"
        query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
                'DELETE a, r'
        query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
        tag = Tag(name="science").save()
        results = gather_news_results(None, test_file)
        query_webhose(results, tag)
        tag.delete()
        self.assertEqual(len(NewsArticle.nodes.all()), 2)

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "862248ce467c25d9fa20d66e43f13d7f0800882b"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)
        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "682408ea92f61d5a44de67ed2aaef7369c401bce"}) ' \
                'RETURN a'

        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)
Exemple #3
0
    def test_too_close_to_another_article_content(self):
        test_file = settings.PROJECT_DIR + \
            "/sb_news/tests/sample_json/" \
            "too_close_to_another_article_content.json"
        query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
                'DELETE a, r'
        query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
        tag = Tag(name="science").save()
        results = gather_news_results(None, test_file)
        query_webhose(results, tag)
        tag.delete()
        self.assertEqual(len(NewsArticle.nodes.all()), 2)

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "188ed9cfc6e2214c067ad8e46ec6cd10e392646e"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c11407c888ddbfedca4a56b1aac482d153ad6039"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c11407c888ddbfedca4a56b1aac482d153ad6039"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertIsNotNone(res.one)
Exemple #4
0
    def test_title_unique(self):
        test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \
                                           "title_unique.json"
        query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
                'DELETE a, r'
        query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
        tag = Tag(name="science").save()
        results = gather_news_results(None, test_file)
        query_webhose(results, tag)
        tag.delete()
        query = 'MATCH (a:NewsArticle {title: ' \
                '"Sanders\' American Dream Is In Denmark"}) RETURN a'
        res, _ = db.cypher_query(query)
        self.assertEqual(len(res), 1)
        query = 'MATCH (a:NewsArticle {title: ' \
                '"ISIS Inc: How Oil Fuels The Jihadi Terrorists - ' \
                'ft.com"}) RETURN a'
        res, _ = db.cypher_query(query)
        self.assertEqual(len(res), 1)
        query = 'MATCH (a:NewsArticle {title: ' \
                '"Breaking: Donald Trump Stuns With ' \
                'Announcement Of Foreign Policy Dream Team"}) RETURN a'

        res, _ = db.cypher_query(query)
        self.assertEqual(len(res), 1)
 def test_detail(self):
     query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
             'DELETE a, r'
     query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
              'DELETE a, r'
     query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
              'DELETE a, r'
     db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
     self.client.force_authenticate(user=self.user)
     url = reverse('news-list')
     data = {
         "provider":
         "sb_crawler",
         "external_id":
         str(uuid1()),
         "url":
         "https://www.sagebrew.com",
         "site_full":
         "https://www.sagebrew.com",
         "site_section":
         "site_section",
         "title":
         "This is the title",
         "content":
         "This is some fake content",
         "title_full":
         "A full title",
         "language":
         "en",
         "published":
         datetime.now(pytz.utc),
         "country":
         "US",
         "spam_score":
         0.0,
         "image":
         "https://sagebrew-master.s3.amazonaws.com/"
         "profile_pictures/"
         "8a274be8-71ee-259c-32fc-c3269a5adf9b-198x200.png",
         "performance_score":
         10,
         "crawled":
         datetime.now(pytz.utc),
     }
     response = self.client.post(url, data=data, format='json')
     url = reverse('news-detail',
                   kwargs={"object_uuid": response.data['id']})
     response = self.client.get(url, format='json')
     self.assertEqual(response.data['title'], 'This Is The Title')
     self.assertEqual(response.status_code, status.HTTP_200_OK)
Exemple #6
0
 def test_excluded_titles(self):
     test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \
                                        "exclude_titles.json"
     query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
             'DELETE a, r'
     query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
              'DELETE a, r'
     query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
              'DELETE a, r'
     db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
     tag = Tag(name="science").save()
     results = gather_news_results(None, test_file)
     query_webhose(results, tag)
     tag.delete()
     self.assertEqual(len(NewsArticle.nodes.all()), 0)
Exemple #7
0
    def test_title_reformat(self):
        test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \
                                           "title_reformat.json"
        query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
                'DELETE a, r'
        query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
        tag = Tag(name="science").save()
        results = gather_news_results(None, test_file)
        query_webhose(results, tag)
        tag.delete()
        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c7dd81cf775476d17fd9effe3a43d13d060eb2c8"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertEqual(
            res.one['title'], "Obama's Team Welcomes Castro "
            "& Cuba To America & To Receive "
            "Criticism: 'Wouldn't Disagree'..."
            "DNC; DNC: DNC. Dncabc DNC")
        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c294037cba0aad280b655614c5c776f1c5b453ce"}) ' \
                'RETURN a'

        res, _ = db.cypher_query(query)
        self.assertEqual(res.one['title'], "Friends Of Israel - "
                         "The New Yorker")

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c6dd81cf775476d17fd9effe3a43d13d060eb2c8"}) ' \
                'RETURN a'

        res, _ = db.cypher_query(query)
        self.assertEqual(
            res.one['title'], "What's This Another New "
            "US USA U.S. U.S.A. Title It's A "
            "Miracle...")

        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c8dd81cf775476d17fd9effe3a43d13d060eb2c8"}) ' \
                'RETURN a'

        res, _ = db.cypher_query(query)
        self.assertEqual(res.one['title'], "Yet Another Title! What Is This!")
Exemple #8
0
 def test_site_not_supported(self):
     test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \
                                        "site_not_supported.json"
     query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
             'DELETE a, r'
     query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
              'DELETE a, r'
     query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
              'DELETE a, r'
     db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
     tag = Tag(name="science").save()
     results = gather_news_results(None, test_file)
     query_webhose(results, tag)
     tag.delete()
     query = 'MATCH (a:NewsArticle) RETURN a'
     res, _ = db.cypher_query(query)
     self.assertIsNone(res.one)
Exemple #9
0
    def test_quote_query(self):
        test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \
                                           "quote_query.json"

        query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \
                'DELETE a, r'
        query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \
                 'DELETE a, r'
        db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})])
        tag = Tag(name="science").save()
        results = gather_news_results(None, test_file)
        query_webhose(results, tag)
        tag.delete()
        query = 'MATCH (a:NewsArticle ' \
                '{external_id: "c8a3179934b6d4609632383add7fb27ddf3d3842"}) ' \
                'RETURN a'
        res, _ = db.cypher_query(query)
        self.assertEqual(
            res.one['title'], "Breaking: Donald Trump Stuns "
            "With Announcement Of Foreign "
            "Policy Dream Team")