def from_results_line(line): """Parses a single line from the results txt file.""" info = line.split(';') # Skip the header lines if info[0] == 'Week': return None ln, fn = info[3].lower().split(',') if ',' in info[3] else ('', '') name = (fn + " " + ln).strip().lower() position = get_position(info[4]) team = get_team(info[5]) try: float(info[9]) except ValueError: return None return { 'week': info[0], 'position': position, 'team': team, 'name': get_name(name, team, position), 'points': float(info[8]), 'salary': float(info[9]), 'opponent': get_team(info[7]), 'home': info[6] == 'h' }
def crawl(): """Crawls the rotogrinders predictions.""" response = requests.get('https://rotogrinders.com/projected-stats/nfl?site=draftkings') soup = BeautifulSoup(response.content, 'html.parser') ret = {} players = soup.find_all('tr', class_='player') for player in players: # Get Position _position = player.attrs['data-position'].lower() if _position in ['k']: continue position = get_position(_position) # Get Team _team = player.attrs['data-team'] team = get_team(_team) _name = player.td.a.string.strip() name = get_name(_name, team, position) points = player.attrs['data-fpts'] ret[name] = { 'name': name, 'position': position, 'team': team, 'points': float(points) } return ret
def scrape(week, year=2015): """ Scrapes the dailyfantasynerd data that has been copied from the website and pasted to s3. Sends the data to s3 after processing. :param week (int): Week of the season :param year (int): Year of the season """ print "Scraping DailyFantasyNerd Projections" client = boto3.client("s3") players_key = os.path.join("dailyfantasynerd", str(year), "week" + str(week), "players.txt") players_data = client.get_object(Bucket=SOURCES_BUCKET, Key=players_key)["Body"].read() ret = {} for line in players_data.splitlines(): if "\t" in line: data = line.strip().split("\t") name = data[0].lower() if is_team(name): name = get_team(name) else: name = normalize_name(name) points = float(data[-1]) ret[name] = {"name": name, "points": points} send_json_to_s3(year, week, "dailyfantasynerd", ret)
def _add_defense(player, projections): """Add defense to projection data.""" projection = {} valid = True for cell in player.contents: if cell.div: team = cell.div['class'][1].split('-')[1] projection['name'] = get_team(team) projection['position'] = 'dst' projection['team'] = get_team(team) elif 'stat' in cell['class'][0]: if cell['class'][1] in STAT_MAP: statistic = STAT_MAP[cell['class'][1]] projection[statistic] = float(cell.string) if cell.string.strip() != '-' else 0.0 if valid: calculate_ppr(projection) projections[projection['name']] = projection
def _read_numberfire_line(line): """reads a line from numberfire defense""" m = re.match(numberfire_pattern, line) if m: ret = {} rest = m.group('rest').split() if rest[0].lower() == 'bye': return None name = m.group('name').strip().lower() team = get_team(m.group('team')) position = get_position(m.group('position')) ret['name'] = get_name(name, team, position) ret['position'] = position ret['team'] = team ret['salary'] = int(rest[-2][1:]) ret['points'] = float(rest[-4]) ret['opponent'] = get_team(rest[0]) return ret return None
def _add_player(player, projections): """Add player to projection data.""" projection = {} valid = True for cell in player.contents: # Get player, position, and team if 'playerNameAndInfo' in cell['class'][0]: name = cell.find(class_='playerName').string # Searching for 'em' seems broken. Making do with this loop for now. team = None position = None for item in cell.div.contents: if item.name == 'em': if '-' in item.string: position, team = item.string.split(' - ') break else: position = item.string.strip() if team is None: valid = False break projection['team'] = get_team(team) projection['position'] = get_position(position) projection['name'] = get_name(name, team, position) elif 'playerOpponent' in cell['class'][0]: # Get opponent if cell.string[0] == '@': # away game projection['home'] = False projection['opponent'] = cell.string[1:] else: projection['home'] = True projection['opponent'] = cell.string elif 'stat' in cell['class'][0]: if cell['class'][1] in STAT_MAP: statistic = STAT_MAP[cell['class'][1]] projection[statistic] = float(cell.string) if cell.string.strip() != '-' else 0.0 if valid: calculate_ppr(projection) projections[projection['name']] = projection
def crawl_espn_projection_page(projections=None, next_page=None, **params): """Crawls the ESPN Page and returns the projection data as a dict""" if next_page: response = requests.get(next_page) else: response = requests.get(ESPN_PROJECTIONS_URL, params) print response.url soup = BeautifulSoup(response.content, 'html.parser') pagination_nav = soup.body.find(class_='paginationNav') for item in pagination_nav.find_all('a'): if 'NEXT' in item.contents: next_page = item['href'] projections = {} if projections is None else projections player_rows = soup.body(class_='pncPlayerRow') for row in player_rows: projection = {} valid = True for i, cell in enumerate(row.find_all('td')): if i == 0: # Find Name, Team, and Position name = cell.a.string if 'D/ST' in cell.contents[1]: team = get_team(cell.contents[0].string.split()[0].strip().lower()) projection['name'] = get_name(name, team, 'dst') projection['team'] = get_team(team) projection['position'] = 'dst' else: splits = cell.contents[1].split() team = splits[1] position = splits[2] # No Free Agents if team == 'FA': valid = False break projection['name'] = get_name(name, team, position) projection['team'] = get_team(team) try: projection['position'] = get_position(position) except Exception: # Remove kickers and the like. valid = False break if i == 1: # Find opponent and whether or not team is home or away if cell.a is None: valid = False break text = cell.a.string if text[0] == '@': projection['home'] = False projection['opponent'] = get_team(text[1:]) else: projection['home'] = True projection['opponent'] = get_team(text) elif i == 3: projection['receptions'] = float(cell.string.split('/', 1)[0]) elif i in range(4, 14): _populate_stats(i, cell, projection) if valid: calculate_ppr(projection) projections[projection['name']] = projection if next_page and len(projections) < 500: time.sleep(0.250) return crawl_espn_projection_page(projections=projections, next_page=next_page) else: return projections