예제 #1
0
import json
import argparse

from gatherer import Page, Fetch, Cache

with open("pages/schedule.json") as fp:
    schedule_json = json.load(fp)

c = Cache("cache")
f = Fetch(headers={'User-Agent': 'gatherer agent'}, cache=c)

schedule = Page.from_json(schedule_json)


def get_season(year):
    BASE_URL = "http://www.nfl.com/schedules/{}/REG{}"

    for week in range(1, 18):
        dom = f.get(BASE_URL.format(year, week))
        s = schedule.gather(dom)
        with open("data/{}-{:02d}.json".format(year, week), "w") as fp:
            json.dump(s, fp, indent=2)


def get_week(year, week):
    BASE_URL = "http://www.nfl.com/schedules/{}/REG{}"
    dom = f.get(BASE_URL.format(year, week))
    s = schedule.gather(dom)
    with open("data/{}-{:02d}.json".format(year, week), "w") as fp:
        json.dump(s, fp, indent=2)
예제 #2
0
파일: team.py 프로젝트: pshrmn/viz-archive
    roster_rules = json.load(fp)

# load a dict with the urls for all of the FBS D1-A teams' roster urls
with open("team_pages.json") as fp:
    team_urls = json.load(fp)

cache = Cache("cache")
wiki_city = city.City(city.city_rule_set, {
    "headers": {
        "User-Agent": "gatherer"
    },
    "sleep_time": 0,
    "cache": cache
})

fetcher = Fetch(headers={"User-Agent": "gatherer"}, cache=cache)
roster_page = Page.from_json(roster_rules)
KNOWN_CITIES = {}


def get_roster(url):
    """
    given the url (on espn.com) for a football team, return an array of dicts
    with hometown and position keys
    """
    dom = fetcher.get(url)
    return roster_page.gather(dom)


def get_coordinates(hometown):
    # if a player does not live in the US or Canada, his hometown is listed as --
예제 #3
0
파일: dom.py 프로젝트: pshrmn/foraging
import json
from gatherer import Fetch, Cache, Page

cache = Cache("cache")
fetcher = Fetch(cache=cache)

with open("rules/www_rottentomatoes_com/actor.json") as fp:
    actor_json = json.load(fp)
actor_page = Page.from_json(actor_json)

with open("rules/www_rottentomatoes_com/movie.json") as fp:
    movie_json = json.load(fp)
movie_page = Page.from_json(movie_json)


def get_actor(url):
    """
    return a dict with the data from an actor's profile
    """
    dom = fetcher.get(url, True)
    if dom is not None:
        return actor_page.gather(dom)


def get_movie(url):
    """
    return a dict with the data from a movie's profile
    """
    dom = fetcher.get(url)
    if dom is not None:
        return movie_page.gather(dom)
예제 #4
0
import os
import json
import argparse

from gatherer import Page, Fetch

os.makedirs("data", exist_ok=True)
with open("submissions.json") as fp:
    sub_json = json.load(fp)

f = Fetch(headers={"User-Agent": "gatherer"})
p = Page.from_json(sub_json)


def fetch_and_save(filename, subreddit=None):
    if subreddit is None:
        url = "http://www.reddit.com"
    else:
        url = "http://www.reddit.com/r/{}".format(subreddit)
    dom = f.get(url)
    if dom is not None:
        data = p.gather(dom)
        path = "data/{}".format(filename)
        with open(path, "w") as fp:
            json.dump(data, fp)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('-subreddit', dest='subreddit',
                        help='subreddit to get data from')
    parser.add_argument('-filename', dest='filename',
예제 #5
0
파일: pages.py 프로젝트: pshrmn/viz-archive
from gatherer import Fetch, Cache

fs_cache = Cache("cache")
fetcher = Fetch(headers={"User-Agent": "Saturday Night Live Data"}, cache=fs_cache)