help='year start to crawl') ap.add_argument('-e', '--end', type=check_positive, required=True, help='year end to crawl') ap.add_argument('-l', '--limit', type=check_positive, required=True, help='limit of articles to crawl') ap.add_argument('-p', '--sleep', type=check_positive, default=10, help='seconds to sleep for every 10 articles') args = vars(ap.parse_args()) from core import google_news_run import json results = google_news_run(args['issue'], limit=args['limit'], year_start=args['start'], year_end=args['end'], debug=False, sleep_time_every_ten_articles=args['sleep']) with open(args['issue'] + '.json', 'w') as fopen: fopen.write(json.dumps(results))
from core import google_news_run import json import os import logging topics = ['mimpi', 'angan-angan'] for topic in topics: topic = topic.lower() # topic = 'isu ' + topic file = topic + '.json' if file in os.listdir(os.getcwd()): print('passed: ', file) continue print('crawling', topic) results = google_news_run( topic, limit = 100000, year_start = 2000, year_end = 2021, debug = False, sleep_time_every_ten_articles = 10 ) with open(file, 'w') as fopen: fopen.write(json.dumps(results))