def test_unique_uuid(self): test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \ "uuid_unique.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() self.assertEqual(len(NewsArticle.nodes.all()), 2) query = 'MATCH (a:NewsArticle ' \ '{external_id: "862248ce467c25d9fa20d66e43f13d7f0800882b"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one) query = 'MATCH (a:NewsArticle ' \ '{external_id: "c11407c888ddbfedca4a56b1aac482d153ad6039"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one)
def test_too_close_to_another_article_title(self): test_file = settings.PROJECT_DIR + \ "/sb_news/tests/sample_json/" \ "too_close_to_another_article_title.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() self.assertEqual(len(NewsArticle.nodes.all()), 2) query = 'MATCH (a:NewsArticle ' \ '{external_id: "862248ce467c25d9fa20d66e43f13d7f0800882b"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one) query = 'MATCH (a:NewsArticle ' \ '{external_id: "682408ea92f61d5a44de67ed2aaef7369c401bce"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one)
def test_too_close_to_another_article_content(self): test_file = settings.PROJECT_DIR + \ "/sb_news/tests/sample_json/" \ "too_close_to_another_article_content.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() self.assertEqual(len(NewsArticle.nodes.all()), 2) query = 'MATCH (a:NewsArticle ' \ '{external_id: "188ed9cfc6e2214c067ad8e46ec6cd10e392646e"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one) query = 'MATCH (a:NewsArticle ' \ '{external_id: "c11407c888ddbfedca4a56b1aac482d153ad6039"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one) query = 'MATCH (a:NewsArticle ' \ '{external_id: "c11407c888ddbfedca4a56b1aac482d153ad6039"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertIsNotNone(res.one)
def test_title_unique(self): test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \ "title_unique.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() query = 'MATCH (a:NewsArticle {title: ' \ '"Sanders\' American Dream Is In Denmark"}) RETURN a' res, _ = db.cypher_query(query) self.assertEqual(len(res), 1) query = 'MATCH (a:NewsArticle {title: ' \ '"ISIS Inc: How Oil Fuels The Jihadi Terrorists - ' \ 'ft.com"}) RETURN a' res, _ = db.cypher_query(query) self.assertEqual(len(res), 1) query = 'MATCH (a:NewsArticle {title: ' \ '"Breaking: Donald Trump Stuns With ' \ 'Announcement Of Foreign Policy Dream Team"}) RETURN a' res, _ = db.cypher_query(query) self.assertEqual(len(res), 1)
def test_detail(self): query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) self.client.force_authenticate(user=self.user) url = reverse('news-list') data = { "provider": "sb_crawler", "external_id": str(uuid1()), "url": "https://www.sagebrew.com", "site_full": "https://www.sagebrew.com", "site_section": "site_section", "title": "This is the title", "content": "This is some fake content", "title_full": "A full title", "language": "en", "published": datetime.now(pytz.utc), "country": "US", "spam_score": 0.0, "image": "https://sagebrew-master.s3.amazonaws.com/" "profile_pictures/" "8a274be8-71ee-259c-32fc-c3269a5adf9b-198x200.png", "performance_score": 10, "crawled": datetime.now(pytz.utc), } response = self.client.post(url, data=data, format='json') url = reverse('news-detail', kwargs={"object_uuid": response.data['id']}) response = self.client.get(url, format='json') self.assertEqual(response.data['title'], 'This Is The Title') self.assertEqual(response.status_code, status.HTTP_200_OK)
def test_excluded_titles(self): test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \ "exclude_titles.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() self.assertEqual(len(NewsArticle.nodes.all()), 0)
def test_title_reformat(self): test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \ "title_reformat.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() query = 'MATCH (a:NewsArticle ' \ '{external_id: "c7dd81cf775476d17fd9effe3a43d13d060eb2c8"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertEqual( res.one['title'], "Obama's Team Welcomes Castro " "& Cuba To America & To Receive " "Criticism: 'Wouldn't Disagree'..." "DNC; DNC: DNC. Dncabc DNC") query = 'MATCH (a:NewsArticle ' \ '{external_id: "c294037cba0aad280b655614c5c776f1c5b453ce"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertEqual(res.one['title'], "Friends Of Israel - " "The New Yorker") query = 'MATCH (a:NewsArticle ' \ '{external_id: "c6dd81cf775476d17fd9effe3a43d13d060eb2c8"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertEqual( res.one['title'], "What's This Another New " "US USA U.S. U.S.A. Title It's A " "Miracle...") query = 'MATCH (a:NewsArticle ' \ '{external_id: "c8dd81cf775476d17fd9effe3a43d13d060eb2c8"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertEqual(res.one['title'], "Yet Another Title! What Is This!")
def test_site_not_supported(self): test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \ "site_not_supported.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() query = 'MATCH (a:NewsArticle) RETURN a' res, _ = db.cypher_query(query) self.assertIsNone(res.one)
def test_quote_query(self): test_file = settings.PROJECT_DIR + "/sb_news/tests/sample_json/" \ "quote_query.json" query = 'MATCH (a:NewsArticle) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query2 = 'MATCH (a:Tag) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' query3 = 'MATCH (a:UploadedObject) OPTIONAL MATCH (a)-[r]-() ' \ 'DELETE a, r' db.cypher_batch_query([(query, {}), (query2, {}), (query3, {})]) tag = Tag(name="science").save() results = gather_news_results(None, test_file) query_webhose(results, tag) tag.delete() query = 'MATCH (a:NewsArticle ' \ '{external_id: "c8a3179934b6d4609632383add7fb27ddf3d3842"}) ' \ 'RETURN a' res, _ = db.cypher_query(query) self.assertEqual( res.one['title'], "Breaking: Donald Trump Stuns " "With Announcement Of Foreign " "Policy Dream Team")