def processESPNShotsPage(url): ''' Handles grabbing the x,y coords from the ESPN shots page; returns a dict; ''' shotSoup = makeSoup(makePage(url), url, parser='xml') shots = shotSoup.findAll('Shot') shotDict = getESPNShotDict(shots) return shotDict
def getESPNData(url, ptype): ''' Handles which data is being called for; ''' raw = makePage(url) soup = makeSoup(raw, url) if ptype=='box': labels = CONTENT_DICT[ptype] tables = soup.find_all('div', {labels[0]:labels[1]}) data = getESPNbox(tables[0]) elif ptype=='pbp': labels = CONTENT_DICT[ptype] if 'Play-By-Play not available' in soup.text: #nfid data = {'head':'No PBP data for game', 'content':"!!!!!!"} else: tables = soup.find_all('div', {labels[0]:labels[1]}) data = getESPNpbp(tables[1]) # check this plz elif ptype=='extra': if raw: text = getText(raw) else: text = '' data = getESPNext(soup, EXTRA_LIST) data['text'] = text return data
def getESPNData(url, ptype): ''' Handles which data is being called for; ''' raw = makePage(url) soup = makeSoup(raw, url) if ptype == 'box': labels = CONTENT_DICT[ptype] tables = soup.find_all('div', {labels[0]: labels[1]}) data = getESPNbox(tables[0]) elif ptype == 'pbp': labels = CONTENT_DICT[ptype] if 'Play-By-Play not available' in soup.text: #nfid data = {'head': 'No PBP data for game', 'content': "!!!!!!"} else: tables = soup.find_all('div', {labels[0]: labels[1]}) data = getESPNpbp(tables[1]) # check this plz elif ptype == 'extra': if raw: text = getText(raw) else: text = '' data = getESPNext(soup, EXTRA_LIST) data['text'] = text return data
import sys, os from prepworkURL import makeSoup from prepworkURL import makePage ap_raw_link = "http://hosted.ap.org/dynamic/fronts/RAW?SITE=AP&SECTION=HOME" page = makePage(ap_raw_link) soup = makeSoup(page, ap_raw_link) '''Content in spans of class 'apCaption'''' ap_stories = soup('span', 'apCaption') # 200 per page '''.string is the story title''' titles = [s.string for s in ap_stories] '''links are a bit differnt, since they're nested under something else''' links = [s.a['href'] for s in ap_stories] '''zip them into tuples, for obvi reasons''' stories = zip(titles, links)