def scrape(week, year=2015): """ Scrapes the dailyfantasynerd data that has been copied from the website and pasted to s3. Sends the data to s3 after processing. :param week (int): Week of the season :param year (int): Year of the season """ print "Scraping DailyFantasyNerd Projections" client = boto3.client("s3") players_key = os.path.join("dailyfantasynerd", str(year), "week" + str(week), "players.txt") players_data = client.get_object(Bucket=SOURCES_BUCKET, Key=players_key)["Body"].read() ret = {} for line in players_data.splitlines(): if "\t" in line: data = line.strip().split("\t") name = data[0].lower() if is_team(name): name = get_team(name) else: name = normalize_name(name) points = float(data[-1]) ret[name] = {"name": name, "points": points} send_json_to_s3(year, week, "dailyfantasynerd", ret)
def scrape(week, year=2015, position=0): """ Scrapes the NFL page at the given position. :param week (int): Week of the season. :param year (int): Year of the season. :param position (int): The first index to start scraping through pagination. :return: The key on s3 where the data is sent to :rtype: str """ print "Scraping NFL Projections" params = { 'statWeek': week, 'statType': 'weekProjectedStats', 'statSeason': year, 'statCategory': 'projectedStats', 'position': position } # Get Players projections = crawl_nfl_projection_page(**params) # Grab defenses too params['offset'] = 0 params['position'] = 8 projections = crawl_nfl_projection_page(projections=projections, **params) return send_json_to_s3(year, week, 'nfl', projections)
def scrape(week, year): """ Scrapes the pasted results from Numberfire. :param week (int): Week of the season. :param year (int): Year of the season. :return: The key on s3 where the data is sent to :rtype: str """ print "Scraping Numberfire Projections" projections = {} client = boto3.client('s3') players_key = os.path.join('numberfire', str(year), 'week' + str(week), 'players.txt') defense_key = os.path.join('numberfire', str(year), 'week' + str(week), 'defense.txt') print players_key players_data = client.get_object(Bucket=SOURCES_BUCKET, Key=players_key) defense_data = client.get_object(Bucket=SOURCES_BUCKET, Key=defense_key) players = players_data['Body'].read().splitlines() + defense_data['Body'].read().splitlines() for line in players: player = _read_numberfire_line(line) if player is None: continue projections[player['name']] = player return send_json_to_s3(year, week, 'numberfire', projections)
def aggregate(week, year): """ Aggregates all the projections from the different sources to a single dict and saves to s3. :param week (str): Week of the season. :param year (str): Year of the season. """ print "aggregating data" # Build up a dict of all the projectsion keyed by the filename projections = {} client = boto3.client('s3') for key in _query_projections(year, week): print "Reading", key name, _ = os.path.basename(key).split('.') projections[name] = json.loads(client.get_object(Bucket=SCRAPED_BUCKET, Key=key)['Body'].read()) # Build a list of all the players that show up in each projection. players = set.intersection(*[set(items.keys()) for items in projections.values()]) print len(players) # Build up data output = {} for name in players: ret = {'name': name} points = np.array([]) for source_name, data in projections.items(): if source_name == 'numberfire': # Using Numbefire as a source for player info ret['opponent'] = data[name]['opponent'] ret['position'] = data[name]['position'] ret['salary'] = data[name]['salary'] ret['team'] = data[name]['team'] ret[source_name] = data[name]['points'] points = np.append(points, float(data[name]['points'])) ret['std'] = np.std(points) ret['mean'] = np.mean(points) trimmed_points = sorted(points)[1:-1] ret['trimmed_mean'] = np.mean(trimmed_points) output[name] = ret send_json_to_s3(year, week, 'projections', output)
def scrape(week, year=2015): """ Scrapes the Rotogrinders page. :param week (int): Week of the season. :param year (int): Year of the season. :return: The key on s3 where the data is sent to :rtype: str """ print "Scraping RotoGrinders Projections" projections = crawl() return send_json_to_s3(year, week, 'rotogrinders', projections)
def scrape(week, year=2015): """ Scrapes the ESPN page. :param week (int): Week of the season. :param year (int): Year of the season. :return: The key on s3 where the data is sent to :rtype: str """ print "Scraping ESPN Projections" # &scoringPeriodId=5&seasonId=2015&startIndex=40 params = { 'scoringPeriodId': week, 'seasonId': year } projections = crawl_espn_projection_page(**params) return send_json_to_s3(year, week, 'espn', projections)