import json import argparse from gatherer import Page, Fetch, Cache with open("pages/schedule.json") as fp: schedule_json = json.load(fp) c = Cache("cache") f = Fetch(headers={'User-Agent': 'gatherer agent'}, cache=c) schedule = Page.from_json(schedule_json) def get_season(year): BASE_URL = "http://www.nfl.com/schedules/{}/REG{}" for week in range(1, 18): dom = f.get(BASE_URL.format(year, week)) s = schedule.gather(dom) with open("data/{}-{:02d}.json".format(year, week), "w") as fp: json.dump(s, fp, indent=2) def get_week(year, week): BASE_URL = "http://www.nfl.com/schedules/{}/REG{}" dom = f.get(BASE_URL.format(year, week)) s = schedule.gather(dom) with open("data/{}-{:02d}.json".format(year, week), "w") as fp: json.dump(s, fp, indent=2)
roster_rules = json.load(fp) # load a dict with the urls for all of the FBS D1-A teams' roster urls with open("team_pages.json") as fp: team_urls = json.load(fp) cache = Cache("cache") wiki_city = city.City(city.city_rule_set, { "headers": { "User-Agent": "gatherer" }, "sleep_time": 0, "cache": cache }) fetcher = Fetch(headers={"User-Agent": "gatherer"}, cache=cache) roster_page = Page.from_json(roster_rules) KNOWN_CITIES = {} def get_roster(url): """ given the url (on espn.com) for a football team, return an array of dicts with hometown and position keys """ dom = fetcher.get(url) return roster_page.gather(dom) def get_coordinates(hometown): # if a player does not live in the US or Canada, his hometown is listed as --
import json from gatherer import Fetch, Cache, Page cache = Cache("cache") fetcher = Fetch(cache=cache) with open("rules/www_rottentomatoes_com/actor.json") as fp: actor_json = json.load(fp) actor_page = Page.from_json(actor_json) with open("rules/www_rottentomatoes_com/movie.json") as fp: movie_json = json.load(fp) movie_page = Page.from_json(movie_json) def get_actor(url): """ return a dict with the data from an actor's profile """ dom = fetcher.get(url, True) if dom is not None: return actor_page.gather(dom) def get_movie(url): """ return a dict with the data from a movie's profile """ dom = fetcher.get(url) if dom is not None: return movie_page.gather(dom)
import os import json import argparse from gatherer import Page, Fetch os.makedirs("data", exist_ok=True) with open("submissions.json") as fp: sub_json = json.load(fp) f = Fetch(headers={"User-Agent": "gatherer"}) p = Page.from_json(sub_json) def fetch_and_save(filename, subreddit=None): if subreddit is None: url = "http://www.reddit.com" else: url = "http://www.reddit.com/r/{}".format(subreddit) dom = f.get(url) if dom is not None: data = p.gather(dom) path = "data/{}".format(filename) with open(path, "w") as fp: json.dump(data, fp) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-subreddit', dest='subreddit', help='subreddit to get data from') parser.add_argument('-filename', dest='filename',
from gatherer import Fetch, Cache fs_cache = Cache("cache") fetcher = Fetch(headers={"User-Agent": "Saturday Night Live Data"}, cache=fs_cache)