def processESPNShotsPage(url):
    '''
    Handles grabbing the x,y coords from the ESPN shots page;
    returns a dict;
    '''
    shotSoup = makeSoup(makePage(url), url, parser='xml')
    shots = shotSoup.findAll('Shot')
    shotDict = getESPNShotDict(shots)
    return shotDict
Beispiel #2
0
def processESPNShotsPage(url):
    '''
    Handles grabbing the x,y coords from the ESPN shots page;
    returns a dict;
    '''
    shotSoup = makeSoup(makePage(url), url, parser='xml')
    shots = shotSoup.findAll('Shot')
    shotDict = getESPNShotDict(shots)
    return shotDict
def getESPNData(url, ptype):
    '''
    Handles which data is being called for; 
    '''
    raw     = makePage(url)
    soup    = makeSoup(raw, url)
    if ptype=='box':
        labels  = CONTENT_DICT[ptype]
        tables  = soup.find_all('div', {labels[0]:labels[1]})
        data    = getESPNbox(tables[0])
    elif ptype=='pbp':
        labels  = CONTENT_DICT[ptype]
        if 'Play-By-Play not available' in soup.text:   #nfid
            data = {'head':'No PBP data for game', 'content':"!!!!!!"}
        else: 
            tables  = soup.find_all('div', {labels[0]:labels[1]})
            data    = getESPNpbp(tables[1])     # check this plz
    elif ptype=='extra':
        if raw: text = getText(raw)
        else: text = ''
        data    = getESPNext(soup, EXTRA_LIST)
        data['text'] = text
    return data
Beispiel #4
0
def getESPNData(url, ptype):
    '''
    Handles which data is being called for; 
    '''
    raw = makePage(url)
    soup = makeSoup(raw, url)
    if ptype == 'box':
        labels = CONTENT_DICT[ptype]
        tables = soup.find_all('div', {labels[0]: labels[1]})
        data = getESPNbox(tables[0])
    elif ptype == 'pbp':
        labels = CONTENT_DICT[ptype]
        if 'Play-By-Play not available' in soup.text:  #nfid
            data = {'head': 'No PBP data for game', 'content': "!!!!!!"}
        else:
            tables = soup.find_all('div', {labels[0]: labels[1]})
            data = getESPNpbp(tables[1])  # check this plz
    elif ptype == 'extra':
        if raw: text = getText(raw)
        else: text = ''
        data = getESPNext(soup, EXTRA_LIST)
        data['text'] = text
    return data
import sys, os

from prepworkURL import makeSoup
from prepworkURL import makePage

ap_raw_link = "http://hosted.ap.org/dynamic/fronts/RAW?SITE=AP&SECTION=HOME"

page = makePage(ap_raw_link)
soup = makeSoup(page, ap_raw_link)
'''Content in spans of class 'apCaption''''
ap_stories = soup('span', 'apCaption')      # 200 per page
'''.string is the story title'''
titles = [s.string for s in ap_stories]
'''links are a bit differnt, since they're nested under something else'''
links = [s.a['href'] for s in ap_stories]
'''zip them into tuples, for obvi reasons'''
stories = zip(titles, links)