import wikipydia from wikipydia import query_current_events import datetime import sys import os date = datetime.date(int((sys.argv[2])[:4]), int((sys.argv[2])[5:7]), int((sys.argv[2])[8:10])) loops = 1 if (len(sys.argv) > 3): loops = int(sys.argv[3]) current_news = query_current_events(date, loops) top_news = [] wikitopics_path = os.environ['WIKITOPICS'] articles_path = wikitopics_path + "/data/articles/" + sys.argv[1] + "/" + (sys.argv[2])[:4] + "/" for i in range(0, loops): previousdays = datetime.timedelta(days=i) new_date = date - previousdays; articles = articles_path + new_date.strftime("%Y-%m-%d") if (os.path.exists(articles)): listing = os.listdir(articles) for infile in listing: if infile[-2:] == "es": top_news.append(infile[:-10]) intersection = list(set(current_news) & set(top_news))
if len(sys.argv) != 3: sys.stderr.write("Usage: %s [--dry-run] [-o OUTPUT_DIR] START_DATE END_DATE\n" % sys.argv[0]) sys.exit(1) start_date = utils.convert_date(sys.argv[1]) end_date = utils.convert_date(sys.argv[2]) if start_date > end_date: sys.stderr.write("START_DATE is later than END_DATE\n") sys.exit(1) if os.path.exists(OUTPUT_DIR): if not os.path.isdir(OUTPUT_DIR): sys.stderr.write(OUTPUT_DIR + " is not a directory.\n") sys.exit(1) else: os.makedirs(OUTPUT_DIR) date = start_date while date <= end_date: events = wikipydia.query_current_events(date) if events: filename = date.isoformat() + ".events" if DRYRUN: print filename for event in events: print event["text"].encode('utf8').replace('*','\t') else: with open (os.path.join(OUTPUT_DIR, filename), 'w') as f: f.write(simplejson.dumps(events) + '\n') date += datetime.timedelta(days=1)