def run(self, **kwargs):
     today = datetime.date.today()
     for year in range(0, 2):
         date = [today.year + year, None, None]
         collector = CollectArticles(get_available_channels(),
                                     *date,
                                     use_storage=True)
         worker.run(collector)
 def run(self, **kwargs):
     today = datetime.date.today()
     date = (today.year, today.month, today.day)
     collector = CollectArticles(get_available_channels(),
                                 *date,
                                 use_storage=True,
                                 force_collect=True)
     worker.run(collector)
 def test_get_articles_with_queue(self):
     # need to explicitly import the runnable object
     from brokenpromises.operations import CollectArticles
     from brokenpromises.worker import worker
     collector = CollectArticles(("brokenpromises.channels.guardian", ),
                                 "2014",
                                 1,
                                 use_storage=False)
     worker.run(collector)
 def run(self, **kwargs):
     today = datetime.date.today()
     for day in range(1, 7):  # j+7
         date = today + datetime.timedelta(days=day)
         date = (date.year, date.month, date.day)
         collector = CollectArticles(get_available_channels(),
                                     *date,
                                     use_storage=True)
         worker.run(collector)
 def run(self, **kwargs):
     today = datetime.date.today()
     for month in range(0, 2):
         date = [None, None, None]
         date[0] = today.year + (today.month + month) / 12
         date[1] = (today.month + month - 1) % 12 + 1
         collector = CollectArticles(get_available_channels(),
                                     *date,
                                     use_storage=True)
         worker.run(collector)
 def test_get_articles(self):
     collector = CollectArticles(("brokenpromises.channels.guardian", ),
                                 "2014", "1")
     results = collector.run()
     print
     print "results:", len(results)
     assert len(results) > 0
     for result in results:
         assert result.ref_dates, "%s : %s" % (result, result.url)
     assert collector.get_report()
     assert collector.get_report(
     ).collector == "brokenpromises.operations.CollectArticles", collector.get_report(
     ).collector
     assert collector.get_report().meta['count'] == len(results)
     assert collector.get_report().meta['related_articles'] <= len(results)
     assert len(collector.get_report().meta['urls_found']) == len(results)
 def test_get_articles_with_storage(self):
     from brokenpromises import Article
     searched_date = (2014, 1, None)
     collector = CollectArticles(("brokenpromises.channels.nytimes", ),
                                 *searched_date,
                                 use_storage=True)
     # replace storage with custom storage (testing db)
     collector.storage = self.testing_storage
     results = collector.run()
     print
     print "results:", len(results)
     assert len(results) > 0
     for result in results:
         assert result.ref_dates, "%s : %s" % (result, result.url)
     assert collector.get_report()
     assert collector.get_report(
     ).collector == "brokenpromises.operations.CollectArticles"
     assert collector.get_report().meta['count'] == len(results)
     assert collector.get_report().meta['related_articles'] <= len(results)
     assert len(collector.get_report().meta['urls_found']) == len(results)
     assert len(
         self.testing_storage.get_reports(
             name="collector", searched_date=searched_date,
             status="done")) == 1, self.testing_storage.get_reports(
                 searched_date)
     results = collector.run()
     assert len(results) > 0, results
     assert type(results[0]) is Article, type(results[0])
     assert len(
         self.testing_storage.get_reports(searched_date=searched_date)) == 2
     assert len(
         self.testing_storage.get_reports(name="collector",
                                          searched_date=searched_date)) == 2
     assert len(
         self.testing_storage.get_reports(name="collector",
                                          searched_date=searched_date,
                                          status="escaped")) == 1
Exemple #8
0
options, args = oparser.parse_args()
assert len(args) > 0 and len(args) <= 3

if options.output_file:
    sys.stdout = open(options.output_file, 'a')

channels = brokenpromises.channels.get_available_channels()
if options.channels_file:
    with open(options.channels_file) as f:
        channels = [line.replace("\n", "") for line in f.readlines()]
if options.channels_list:
    channels = options.channels_list.split(",")

collector = CollectArticles(channels,
                            *args,
                            use_storage=options.storage,
                            force_collect=options.force_collect)

if options.mongodb_drop:
    collector.storage.get_database().drop_collection("articles")
    collector.storage.get_database().drop_collection("reports")

results = collector.run()

# OUTPUT
print dumps([_.__dict__ for _ in results]).encode('utf-8')
info("%d articles collected." % (len(results)))
exit()

# EOF