Ejemplo n.º 1
0
 def get_item_count_for_day(self, day_directory):
     item_count = 0
     for batch_file in utils.get_json_files(day_directory):
         batch_hour = batch_file[:-5]
         with open(os.path.join(day_directory, batch_file)) as f:
             items = json.load(f)
             item_count += len(items['articles'])
     return item_count
Ejemplo n.º 2
0
    def get_queued_items_by_batch(self, day_directory):
        """
        Queued items for a day are stored in json files, one for every batch.
        The hierarchy looks like:

         - 2011-26-11/
            - ...
            - 21.00.00.json
            - 22.00.00.json
            - ...

        Every file contains two lists of (title, url) pairs: one for the actual
        news stories, and one for the occasionally promoted blogposts.
        """
        items_by_batch = list()
        for batch_file in utils.get_json_files(day_directory):
            batch_hour = batch_file[:-5]
            with open(os.path.join(day_directory, batch_file)) as f:
                items = json.load(f)
                items_by_batch.append((batch_hour, items))
        items_by_batch.sort(key=lambda batch: batch[0])
        return items_by_batch