コード例 #1
0
def getESPNbox(data_in, mode='url'):
    '''
    url is a box score url obtained from score-summary ESPN page;
    use BeautifulSoup to parse apart data_in; all relevant data found
    in 'table' HTML structures, hence we grab those;
    '''
    tables = genBSandURLtools.getDataType(data_in, 'table', mode)
    summary = [t for t in tables if t.text.find('STARTERS') > -1]
    if summary:
        summary = summary[0].findAll('tr')
    else:
        raise AttributeError, "Houston, there is a f*****g problem"
    details = []
    content = []
    for line in summary:
        '''
        "details" are headers, teams stuff;
        "content" is actual player data
        '''
        details.append([str(h.text) for h in line.findAll('th')])
        content.append([str(h.text) for h in line.findAll('td')])
    playerlink_dict = getESPNplayerlinks(summary)
    return {
        'details': details,
        'content': content,
        'playerlinks': playerlink_dict
    }
コード例 #2
0
ファイル: parseESPN.py プロジェクト: abresler/NBA-Data-Stuff
def getESPNpbp(data_in, mode='url'):
    '''
    url is a play-by-play url obtained from score-summary ESPN page;
    use BeautifulSoup to parse apart data_in; all relevant data found
    in 'table' HTML structures, hence we grab those;
    '''
    tables          = genBSandURLtools.getDataType(data_in, 'table', mode)
    pbp             = [t for t in tables if t.text.find('TIME') > -1]
    if pbp:
        pbp         = pbp[0].findAll('tr')
    else:
        raise AttributeError, "Houston, there is a f*****g problem"
    '''Use BS to get the headers (e.g., home and away team for game)'''
    header          = [str(h.text) for h in pbp[1].findAll('th')] # time, away, score, home
    content         = []
    for line in pbp[2:]:
        temp        = line.findAll('td')
        content.append([str(e.text) for e in temp])
    return {'head':header, 'content':content}
コード例 #3
0
def getESPNpbp(data_in, mode='url'):
    '''
    url is a play-by-play url obtained from score-summary ESPN page;
    use BeautifulSoup to parse apart data_in; all relevant data found
    in 'table' HTML structures, hence we grab those;
    '''
    tables = genBSandURLtools.getDataType(data_in, 'table', mode)
    pbp = [t for t in tables if t.text.find('TIME') > -1]
    if pbp:
        pbp = pbp[0].findAll('tr')
    else:
        raise AttributeError, "Houston, there is a f*****g problem"
    '''Use BS to get the headers (e.g., home and away team for game)'''
    header = [str(h.text)
              for h in pbp[1].findAll('th')]  # time, away, score, home
    content = []
    for line in pbp[2:]:
        temp = line.findAll('td')
        content.append([str(e.text) for e in temp])
    return {'head': header, 'content': content}
コード例 #4
0
ファイル: parseESPN.py プロジェクト: abresler/NBA-Data-Stuff
def getESPNbox(data_in, mode='url'):
    '''
    url is a box score url obtained from score-summary ESPN page;
    use BeautifulSoup to parse apart data_in; all relevant data found
    in 'table' HTML structures, hence we grab those;
    '''
    tables          = genBSandURLtools.getDataType(data_in, 'table', mode)
    summary         = [t for t in tables if t.text.find('STARTERS') > -1]
    if summary:
        summary     = summary[0].findAll('tr')
    else:
        raise AttributeError, "Houston, there is a f*****g problem"
    details         = []
    content         = []
    for line in summary:
        '''
        "details" are headers, teams stuff;
        "content" is actual player data
        '''
        details.append([str(h.text) for h in line.findAll('th')])
        content.append([str(h.text) for h in line.findAll('td')])
    playerlink_dict = getESPNplayerlinks(summary)
    return {'details':details, 'content':content,
            'playerlinks':playerlink_dict}