def load_all_articles():
    all_blob_path = os.path.join(config.NPR_DATA_DIR, "all_articles.gz")
    if not os.path.exists(all_blob_path):
        all_blobs = []
        for filename in glob(os.path.join(config.NPR_DATA_DIR, "*gz")):
            all_blobs.extend(util.load_from_disk(filename))
        util.save_to_disk(all_blob_path, all_blobs)
    else:
        all_blobs = util.load_from_disk(all_blob_path)
    return all_blobs
to_top_n(article_scores, n=50)

# Print out some stuff... this is working!!
show_best_articles(all_articles, petitions, article_scores, petition_number=10)

show_best_articles(all_articles, petitions, article_scores, petition_number=100)

petition_counts = load_petition_counts()

all_clean_blobs = get_all_json(all_articles, petitions, article_scores, petition_counts)

# Restrict to 2013 blobs with a decent amount of articles
blob_paths = []

URL_OUTPUT_PATH = '/post-files/20140101-whitehouse-petitions/blobs'

i = 0
for b in all_clean_blobs:
    if b['petition_date'].startswith('2013'):
        if type(b['petition_close']) == float:
            b['petition_close'] = None
        b['articles'] = [a for a in b['articles'] if a['date'].startswith('2013')]
        if len(b['articles']) >= MIN_ARTICLES:
            filename = 'blob-{}.json'.format(i)
            save_to_disk('blobs/' + filename, b, compress=False)
            blob_paths.append({'fragment': str(abs(hash(b['petition_title']))),
                               'url': '{}/{}'.format(URL_OUTPUT_PATH, filename)})
            i += 1

save_to_disk('blobs/all_petitions.json', blob_paths, compress=False)
def crawl_all_articles(num_days, start_date):
    """Crawls num_days articles, going forwards from start_date."""
    for i in range(num_days):
        article_date = start_date + timedelta(days=i)
        filename = article_filename(article_date)
        save_to_disk(filename, articles_for_date(article_date))