import wikipydia
from wikipydia import query_current_events

import datetime
import sys
import os

date = datetime.date(int((sys.argv[2])[:4]), int((sys.argv[2])[5:7]), int((sys.argv[2])[8:10]))

loops = 1

if (len(sys.argv) > 3):
    loops = int(sys.argv[3])
current_news = query_current_events(date, loops)
top_news = []

wikitopics_path = os.environ['WIKITOPICS']

articles_path = wikitopics_path + "/data/articles/" + sys.argv[1] + "/" + (sys.argv[2])[:4] + "/"
for i in range(0, loops):
    previousdays = datetime.timedelta(days=i)
    new_date = date - previousdays;
    articles = articles_path + new_date.strftime("%Y-%m-%d")
    if (os.path.exists(articles)):
        listing = os.listdir(articles)

        for infile in listing:
            if infile[-2:] == "es":
                top_news.append(infile[:-10])

intersection = list(set(current_news) & set(top_news))
Beispiel #2
0
if len(sys.argv) != 3:
    sys.stderr.write("Usage: %s [--dry-run] [-o OUTPUT_DIR] START_DATE END_DATE\n" % sys.argv[0])
    sys.exit(1)

start_date = utils.convert_date(sys.argv[1])
end_date = utils.convert_date(sys.argv[2])
if start_date > end_date:
    sys.stderr.write("START_DATE is later than END_DATE\n")
    sys.exit(1)

if os.path.exists(OUTPUT_DIR):
    if not os.path.isdir(OUTPUT_DIR):
        sys.stderr.write(OUTPUT_DIR + " is not a directory.\n")
        sys.exit(1)
else:
    os.makedirs(OUTPUT_DIR)

date = start_date
while date <= end_date:
    events = wikipydia.query_current_events(date)
    if events:
        filename = date.isoformat() + ".events"
        if DRYRUN:
            print filename
            for event in events:
                print event["text"].encode('utf8').replace('*','\t')
        else:
            with open (os.path.join(OUTPUT_DIR, filename), 'w') as f:
                f.write(simplejson.dumps(events) + '\n')
    date += datetime.timedelta(days=1)