Esempio n. 1
0
                help='year start to crawl')
ap.add_argument('-e',
                '--end',
                type=check_positive,
                required=True,
                help='year end to crawl')
ap.add_argument('-l',
                '--limit',
                type=check_positive,
                required=True,
                help='limit of articles to crawl')
ap.add_argument('-p',
                '--sleep',
                type=check_positive,
                default=10,
                help='seconds to sleep for every 10 articles')
args = vars(ap.parse_args())

from core import google_news_run
import json

results = google_news_run(args['issue'],
                          limit=args['limit'],
                          year_start=args['start'],
                          year_end=args['end'],
                          debug=False,
                          sleep_time_every_ten_articles=args['sleep'])

with open(args['issue'] + '.json', 'w') as fopen:
    fopen.write(json.dumps(results))
Esempio n. 2
0
from core import google_news_run
import json
import os
import logging

topics = ['mimpi', 'angan-angan']

for topic in topics:
    topic = topic.lower()
    # topic = 'isu ' + topic
    file = topic + '.json'
    if file in os.listdir(os.getcwd()):
        print('passed: ', file)
        continue
    
    print('crawling', topic)
    results = google_news_run(
        topic,
        limit = 100000,
        year_start = 2000,
        year_end = 2021,
        debug = False,
        sleep_time_every_ten_articles = 10
    )

    with open(file, 'w') as fopen:
        fopen.write(json.dumps(results))