def run(self, **kwargs): today = datetime.date.today() for year in range(0, 2): date = [today.year + year, None, None] collector = CollectArticles(get_available_channels(), *date, use_storage=True) worker.run(collector)
def run(self, **kwargs): today = datetime.date.today() date = (today.year, today.month, today.day) collector = CollectArticles(get_available_channels(), *date, use_storage=True, force_collect=True) worker.run(collector)
def test_get_articles_with_queue(self): # need to explicitly import the runnable object from brokenpromises.operations import CollectArticles from brokenpromises.worker import worker collector = CollectArticles(("brokenpromises.channels.guardian", ), "2014", 1, use_storage=False) worker.run(collector)
def run(self, **kwargs): today = datetime.date.today() for day in range(1, 7): # j+7 date = today + datetime.timedelta(days=day) date = (date.year, date.month, date.day) collector = CollectArticles(get_available_channels(), *date, use_storage=True) worker.run(collector)
def run(self, **kwargs): today = datetime.date.today() for month in range(0, 2): date = [None, None, None] date[0] = today.year + (today.month + month) / 12 date[1] = (today.month + month - 1) % 12 + 1 collector = CollectArticles(get_available_channels(), *date, use_storage=True) worker.run(collector)
def test_get_articles(self): collector = CollectArticles(("brokenpromises.channels.guardian", ), "2014", "1") results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report( ).collector == "brokenpromises.operations.CollectArticles", collector.get_report( ).collector assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results)
def test_get_articles_with_storage(self): from brokenpromises import Article searched_date = (2014, 1, None) collector = CollectArticles(("brokenpromises.channels.nytimes", ), *searched_date, use_storage=True) # replace storage with custom storage (testing db) collector.storage = self.testing_storage results = collector.run() print print "results:", len(results) assert len(results) > 0 for result in results: assert result.ref_dates, "%s : %s" % (result, result.url) assert collector.get_report() assert collector.get_report( ).collector == "brokenpromises.operations.CollectArticles" assert collector.get_report().meta['count'] == len(results) assert collector.get_report().meta['related_articles'] <= len(results) assert len(collector.get_report().meta['urls_found']) == len(results) assert len( self.testing_storage.get_reports( name="collector", searched_date=searched_date, status="done")) == 1, self.testing_storage.get_reports( searched_date) results = collector.run() assert len(results) > 0, results assert type(results[0]) is Article, type(results[0]) assert len( self.testing_storage.get_reports(searched_date=searched_date)) == 2 assert len( self.testing_storage.get_reports(name="collector", searched_date=searched_date)) == 2 assert len( self.testing_storage.get_reports(name="collector", searched_date=searched_date, status="escaped")) == 1
options, args = oparser.parse_args() assert len(args) > 0 and len(args) <= 3 if options.output_file: sys.stdout = open(options.output_file, 'a') channels = brokenpromises.channels.get_available_channels() if options.channels_file: with open(options.channels_file) as f: channels = [line.replace("\n", "") for line in f.readlines()] if options.channels_list: channels = options.channels_list.split(",") collector = CollectArticles(channels, *args, use_storage=options.storage, force_collect=options.force_collect) if options.mongodb_drop: collector.storage.get_database().drop_collection("articles") collector.storage.get_database().drop_collection("reports") results = collector.run() # OUTPUT print dumps([_.__dict__ for _ in results]).encode('utf-8') info("%d articles collected." % (len(results))) exit() # EOF