/
getdraftdata.py
227 lines (178 loc) · 6.41 KB
/
getdraftdata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
module: getdraftdata.py
author: Zach Lamberty
created: 2014-08-31
Description:
Hit the web for draft data; save it localy or just return that ish
Usage:
<usage>
"""
import argparse
import csv
import datetime
import lxml.html
import os
import requests
import zachlog
#-----------------------#
# Module Constants #
#-----------------------#
_ESPN_PREDICTION_BASE = "http://games.espn.go.com/ffl/tools/projections?leagueId={}"
_X_TABLE = 'table[@id="playertable_0"]'
_X_HEADER = 'tr[@class="playerTableBgRowSubhead tableSubHead"]'
_X_ROWS = 'tr[@class="pncPlayerRow playerTableBgRow0" or @class="pncPlayerRow playerTableBgRow1"]'
_X_PAGE_NAV = 'div[@class="paginationNav"]'
_OUT = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'ffl_data_%Y%m%d.csv'
)
_OUT = datetime.datetime.now().strftime(_OUT)
MASCOT = {
'49ers': 'SF',
'Bears': 'Chi',
'Bengals': 'Cin',
'Bills': 'Buf',
'Broncos': 'Den',
'Browns': 'Cle',
'Buccaneers': 'TB',
'Cardinals': 'Ari',
'Chargers': 'SD',
'Chiefs': 'KC',
'Colts': 'Ind',
'Cowboys': 'Dal',
'Dolphins': 'Mia',
'Eagles': 'Phi',
'Falcons': 'Atl',
'Giants': 'NYG',
'Jaguars': 'Jax',
'Jets': 'NYJ',
'Lions': 'Det',
'Packers': 'GB',
'Panthers': 'Car',
'Patriots': 'NE',
'Raiders': 'Oak',
'Rams': 'StL',
'Ravens': 'Bal',
'Redskins': 'Was',
'Saints': 'NO',
'Seahawks': 'Sea',
'Steelers': 'Pit',
'Texans': 'Hou',
'Titans': 'Ten',
'Vikings': 'Min',
}
# logging
zachlog.Config().configure()
logger = zachlog.getLogger(__name__)
#-------------------------------#
# ESPN prediction info #
#-------------------------------#
def espn_get_prediction_data(espnPredBase=_ESPN_PREDICTION_BASE,
leagueId=209006, xTable=_X_TABLE,
xHeader=_X_HEADER, xPageNav=_X_PAGE_NAV):
""" Hit the websites and parse scoring predictions into a listdict """
logger.info("Fetching ESPN prediction data")
predictionData = []
for pred in espn_prediction_pages(espnPredBase, leagueId, xTable, xHeader,
xPageNav):
predictionData += pred
return predictionData
def espn_prediction_pages(espnPredBase=_ESPN_PREDICTION_BASE, leagueId=209006,
xTable=_X_TABLE, xHeader=_X_HEADER,
xPageNav=_X_PAGE_NAV):
""" Create a generator which returns the html object of pages with
prediction data tables on them
"""
urlNext = None
while True:
# get new page and parse to html object
url = urlNext if urlNext else espnPredBase.format(leagueId)
logger.debug("parsing url {}".format(url))
page = requests.get(url)
html = lxml.html.fromstring(page.text)
# check if it has info
if espn_has_prediction_info(html, xTable, xHeader):
# get next url and yield the parse table
yield espn_get_prediction(html, xTable, xHeader)
urlNext = espn_get_url_next(html)
if urlNext is None:
raise StopIteration()
else:
raise StopIteration()
def espn_has_prediction_info(h, xTable=_X_TABLE, xHeader=_X_HEADER):
""" Check and see whether this html object contains a subheader row """
return espn_get_prediction_headers(h, xTable, xHeader) is not None
def espn_get_prediction(html, xTable=_X_TABLE, xHeader=_X_HEADER, xRows=_X_ROWS):
""" Parse the html text/object into a listdict of score prediction """
headers = espn_get_prediction_headers(html, xTable, xHeader)
if headers is None:
raise ValueError("This is not a valid table page!")
trs = html.xpath('//{}/{}'.format(xTable, xRows))
def tr_to_vals(tr):
return [t.text_content().strip() for t in tr.xpath('./td')]
return [{h: v for (h, v) in zip(headers, tr_to_vals(tr))} for tr in trs]
def espn_get_url_next(html, xPageNav=_X_PAGE_NAV):
""" Parse the html object for a "next" link of the expected type """
try:
return [h for h in html.xpath('//{}/a'.format(xPageNav))
if 'NEXT' in h.text_content()][0].attrib['href']
except:
return None
def espn_get_prediction_headers(html, xTable=_X_TABLE, xHeader=_X_HEADER):
try:
xheaders = html.xpath('//{}//{}//td'.format(xTable, xHeader))
return [h.text_content().strip() for h in xheaders]
except:
return None
def espn_prediction_to_file(outname=_OUT, espnPredBase=_ESPN_PREDICTION_BASE,
leagueId=209006, xTable=_X_TABLE,
xHeader=_X_HEADER, xPageNav=_X_PAGE_NAV):
""" grab the info from the espn website and write that shit to file """
pred = espn_get_prediction_data(espnPredBase, leagueId, xTable, xHeader, xPageNav)
multiPosPred = []
# clean up first
for player in pred:
# split out player team pos
ptp = player.pop('PLAYER, TEAM POS')
pt, pos = ptp.split(u"\xa0")[0:2]
if 'D/ST' in pt:
mascot = pt.split(' ')[0]
playerName = pt
playerTeam = MASCOT[mascot]
else:
playerName, playerTeam = pt.split(', ')
splitName = playerName.split(' ')
player['FIRST'] = splitName[0]
player['LAST'] = ' '.join(splitName[1:])
player['TEAM'] = playerTeam
# Assign to the appropriate fantasy team
player['F TEAM'] = player.pop('TYPE')
# Some players have two positions. DAFUQ.
posList = pos.split(', ')
for (i, pos) in enumerate(posList):
if i == 0:
player['POS'] = pos
else:
newPlayer = player.copy()
newPlayer['POS'] = pos
multiPosPred.append(newPlayer)
with open(outname, 'w') as fOut:
csvOut = csv.DictWriter(fOut, fieldnames=pred[0].keys())
csvOut.writeheader()
csvOut.writerows(pred)
if multiPosPred:
csvOut.writerows(multiPosPred)
#-------------------------------#
# Main routine #
#-------------------------------#
def _parse_args():
""" Take a log file from the commmand line """
parser = argparse.ArgumentParser()
parser.add_argument("-x", "--xample", help="An Example", action='store_true')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = _parse_args()
main()