def get_item_count_for_day(self, day_directory): item_count = 0 for batch_file in utils.get_json_files(day_directory): batch_hour = batch_file[:-5] with open(os.path.join(day_directory, batch_file)) as f: items = json.load(f) item_count += len(items['articles']) return item_count
def get_queued_items_by_batch(self, day_directory): """ Queued items for a day are stored in json files, one for every batch. The hierarchy looks like: - 2011-26-11/ - ... - 21.00.00.json - 22.00.00.json - ... Every file contains two lists of (title, url) pairs: one for the actual news stories, and one for the occasionally promoted blogposts. """ items_by_batch = list() for batch_file in utils.get_json_files(day_directory): batch_hour = batch_file[:-5] with open(os.path.join(day_directory, batch_file)) as f: items = json.load(f) items_by_batch.append((batch_hour, items)) items_by_batch.sort(key=lambda batch: batch[0]) return items_by_batch