def test_get_articles(self): collector = CollectArticles(("brokenpromises.channels.guardian",), "2014", "1") results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report().collector == "brokenpromises.operations.CollectArticles", collector.get_report().collector assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results)
def test_get_articles_with_storage(self): from brokenpromises import Article searched_date = (2014, 1, None) collector = CollectArticles(("brokenpromises.channels.nytimes", ), *searched_date, use_storage=True) # replace storage with custom storage (testing db) collector.storage = self.testing_storage results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report( ).collector == "brokenpromises.operations.CollectArticles" assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) assert len( self.testing_storage.get_reports( name="collector", searched_date=searched_date, status="done")) == 1, self.testing_storage.get_reports( searched_date) results = collector.run() assert len(results) > 0, results assert type(results[0]) is Article, type(results[0]) assert len( self.testing_storage.get_reports(searched_date=searched_date)) == 2 assert len( self.testing_storage.get_reports(name="collector", searched_date=searched_date)) == 2 assert len( self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="escaped")) == 1
def test_get_articles_with_storage(self): from brokenpromises import Article searched_date = (2014, 1, None) collector = CollectArticles(("brokenpromises.channels.nytimes",), *searched_date, use_storage=True) # replace storage with custom storage (testing db) collector.storage = self.testing_storage results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report().collector == "brokenpromises.operations.CollectArticles" assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="done")) == 1, self.testing_storage.get_reports(searched_date) results = collector.run() assert len(results) > 0, results assert type(results[0]) is Article, type(results[0]) assert len(self.testing_storage.get_reports(searched_date=searched_date)) == 2 assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date)) == 2 assert len(self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="escaped")) == 1
def test_get_articles(self): collector = CollectArticles(("brokenpromises.channels.guardian", ), "2014", "1") results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report( ).collector == "brokenpromises.operations.CollectArticles", collector.get_report( ).collector assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results)
# Think to update the README.md file after modifying the options options, args = oparser.parse_args() assert len(args) > 0 and len(args) <= 3 if options.output_file: sys.stdout = open(options.output_file, 'a') channels = brokenpromises.channels.get_available_channels() if options.channels_file: with open(options.channels_file) as f: channels = [line.replace("\n", "") for line in f.readlines()] if options.channels_list: channels = options.channels_list.split(",") collector = CollectArticles(channels, *args, use_storage=options.storage, force_collect=options.force_collect) if options.mongodb_drop: collector.storage.get_database().drop_collection("articles") collector.storage.get_database().drop_collection("reports") results = collector.run() # OUTPUT print dumps([_.__dict__ for _ in results]).encode('utf-8') info("%d articles collected." % (len(results))) exit() # EOF
options, args = oparser.parse_args() assert len(args) > 0 and len(args) <= 3 if options.output_file: sys.stdout = open(options.output_file, 'a') channels = brokenpromises.channels.get_available_channels() if options.channels_file: with open(options.channels_file) as f: channels = [line.replace("\n", "") for line in f.readlines()] if options.channels_list: channels = options.channels_list.split(",") collector = CollectArticles(channels, *args, use_storage=options.storage, force_collect=options.force_collect) if options.mongodb_drop: collector.storage.get_database().drop_collection("articles") collector.storage.get_database().drop_collection("reports") results = collector.run() # OUTPUT print dumps([_.__dict__ for _ in results]).encode('utf-8') info("%d articles collected." % (len(results))) exit() # EOF