def to_team(row): tpos = row.find(class_='playertablePlayerName').a.next_sibling tpos = tpos.strip(' \r\n\t*,|').upper() # This is a little weird because the team name seems to run # in with the position. Perhaps a weird encoding quirk? if len(tpos) < 2: return 'UNK' elif len(tpos) == 2: return nfldb.standard_team(tpos) else: team = nfldb.standard_team(tpos[0:3]) if team == 'UNK': team = nfldb.standard_team(tpos[0:2]) return team
def schedule(bot, trigger): season_type, season_year, current_week = nfldb.current(db) if current_week is None: bot.reply('not currently in season') return name = trigger.group(2) team = nfldb.standard_team(name) p = None if team == 'UNK': results = nfldb.player_search2(db, name, limit=1000) if len(results) == 0: bot.reply("No player or team matching that name could be found") return else: p = results[0][0] team = p.team weeks = range(18) q = nfldb.Query(db).game(season_type=season_type, season_year=season_year, week=weeks[current_week:current_week + 5], team=team) message = [] if p is not None: message.append('Upcoming schedule for %s' % p) else: message.append('Upcoming schedule for %s' % team) for g in q.sort(('week', 'asc')).as_games(): message.append(str(g)) say_multiline(bot, '\n'.join(message))
def schedule(bot, trigger): season_type, season_year, current_week = nfldb.current(db) if current_week is None: bot.reply("not currently in season") return name = trigger.group(2) team = nfldb.standard_team(name) p = None if team == "UNK": results = nfldb.player_search2(db, name, limit=1000) if len(results) == 0: bot.reply("No player or team matching that name could be found") return else: p = results[0][0] team = p.team weeks = range(18) q = nfldb.Query(db).game( season_type=season_type, season_year=season_year, week=weeks[current_week : current_week + 5], team=team ) message = [] if p is not None: message.append("Upcoming schedule for %s" % p) else: message.append("Upcoming schedule for %s" % team) for g in q.sort(("week", "asc")).as_games(): message.append(str(g)) say_multiline(bot, "\n".join(message))
def rplayer(r, name, team, pos): bench = pos == 'BN' if name is None and team is None: return r.new_player(pos, None, bench, None) elif nfldb.standard_team(name) != 'UNK': return r.new_player(pos, team, bench, None) else: player = player_search(name, team=team, position=pos) return r.new_player(pos, team, bench, player.player_id)
def gameforecast(bot, trigger): """.gameforecast denver gives the forecast for the denver game this week""" week = None team = nfldb.standard_team(trigger.group(2)) if team == 'UNK': bot.reply('I do not know that team') return season_type, season_year, current_week = nfldb.current(db) if week is None: if current_week is None: bot.reply('Not currently in season') return week = current_week q = nfldb.Query(db).game(season_type='Regular', season_year=season_year, week=week, team=team) games = q.as_games() if len(games) == 0: bot.reply('%s is on BYE' % team) return g = games[0] start_time = g.start_time stadium = stadiums[g.home_team] lat, lon = stadium[2] output = [] output.append('Kickoff forecast for %s at %s %s' % (g.away_team, g.home_team, g.start_time.strftime('%Y-%m-%d %I:%M:%S%p'))) if stadium[3] == False: try: forecast = forecastio.load_forecast(forecastio_api_key, lat, lon, time=start_time, units='us') output.append( u'%s %s\u00B0F windspeed %smph from the %s chance of precip %s%%' % (forecast.currently().d['summary'], forecast.currently().d['temperature'], forecast.currently().d['windSpeed'], windbearing(forecast.currently().d['windBearing']), forecast.currently().d['precipProbability'])) except: output.append('there was an error getting the forecast') else: output.append('Dome') say_multiline(bot, '\n'.join(output))
def gameforecast(bot, trigger): """.gameforecast denver gives the forecast for the denver game this week""" week = None team = nfldb.standard_team(trigger.group(2)) if team == "UNK": bot.reply("I do not know that team") return season_type, season_year, current_week = nfldb.current(db) if week is None: if current_week is None: bot.reply("Not currently in season") return week = current_week q = nfldb.Query(db).game(season_type="Regular", season_year=season_year, week=week, team=team) games = q.as_games() if len(games) == 0: bot.reply("%s is on BYE" % team) return g = games[0] start_time = g.start_time stadium = stadiums[g.home_team] lat, lon = stadium[2] output = [] output.append( "Kickoff forecast for %s at %s %s" % (g.away_team, g.home_team, g.start_time.strftime("%Y-%m-%d %I:%M:%S%p")) ) if stadium[3] == False: try: forecast = forecastio.load_forecast(forecastio_api_key, lat, lon, time=start_time, units="us") output.append( u"%s %s\u00B0F windspeed %smph from the %s chance of precip %s%%" % ( forecast.currently().d["summary"], forecast.currently().d["temperature"], forecast.currently().d["windSpeed"], windbearing(forecast.currently().d["windBearing"]), forecast.currently().d["precipProbability"], ) ) except: output.append("there was an error getting the forecast") else: output.append("Dome") say_multiline(bot, "\n".join(output))
def spread(bot, trigger): vdb = vegasdb.VegasDb('vegas.db') team = nfldb.standard_team(trigger.group(2)) if team == 'UNK': bot.reply("No team matching that name could be found") return line = vdb.get_line(format_team(team)) if line is None: bot.say('sorry i do not have a line for that game at this time') ltz = reference.LocalTimezone() d = line[0].astimezone(ltz) if line[3] is None and line[4] is None and line[5] is None: bot.say('sorry i do not have a line for that game at this time') return message = '%s (%0.1f) @ %s (%0.1f) %0.1fo/u %s' % ( line[1], line[3] if line[3] is not None else 0, line[2], line[4] if line[4] is not None else 0, line[5] if line[5] is not None else 0, d.strftime('%I:%M%p %A %b %d, %Y')) bot.say(message)
def spread(bot, trigger): vdb = vegasdb.VegasDb("vegas.db") team = nfldb.standard_team(trigger.group(2)) if team == "UNK": bot.reply("No team matching that name could be found") return line = vdb.get_line(format_team(team)) if line is None: bot.say("sorry i do not have a line for that game at this time") ltz = reference.LocalTimezone() d = line[0].astimezone(ltz) if line[3] is None and line[4] is None and line[5] is None: bot.say("sorry i do not have a line for that game at this time") return message = "%s (%0.1f) @ %s (%0.1f) %0.1fo/u %s" % ( line[1], line[3] if line[3] is not None else 0, line[2], line[4] if line[4] is not None else 0, line[5] if line[5] is not None else 0, d.strftime("%I:%M%p %A %b %d, %Y"), ) bot.say(message)
def parse_upsert_xmls(xml_filenames, qb_stat_descs, stat_descs, stat_with_dash_descs, rare_stat_descs, rare_stat_with_dash_descs): for xml_file in xml_filenames: # raw game summary data season_year, week, season_type, gamekey = xml_file.split( '.xml')[0].split('-') sys.stderr.write("{}-{}-{}-{}.xml\n".format(season_year, week, season_type, gamekey)) season_type = short_to_long_stype(season_type) dom = get_dom(gamebooks_path, xml_file) gamebook_summary = dom.getElementsByTagName('GamebookSummary')[0] schedule_date = get_xml_attribute(gamebook_summary, 'ScheduleDate') home_team = get_xml_attribute(gamebook_summary, 'HomeTeam') away_team = get_xml_attribute(gamebook_summary, 'VisitingTeam') start_time = get_xml_attribute(gamebook_summary, 'StartTime') time_zone = get_xml_attribute(gamebook_summary, 'TimeZone') stadium = get_xml_attribute(gamebook_summary, 'Stadium') stadium_type = get_xml_attribute(gamebook_summary, 'StadiumType') game_weather = get_xml_attribute(gamebook_summary, 'GameWeather') temp = get_xml_attribute(gamebook_summary, 'Temperature') humidity = get_xml_attribute(gamebook_summary, 'Humidity') windspeed = get_xml_attribute(gamebook_summary, 'WindSpeed') turf_type = get_xml_attribute(gamebook_summary, 'TurfType') outdoor_weather = get_xml_attribute(gamebook_summary, 'OutdoorWeather') wind_chill = get_xml_attribute(gamebook_summary, 'WindChill') wind_direction = get_xml_attribute(gamebook_summary, 'WindDirection') referee = get_xml_attribute(gamebook_summary, 'Referee') umpire = get_xml_attribute(gamebook_summary, 'Umpire') head_linesman = get_xml_attribute(gamebook_summary, 'HeadLinesman') line_judge = get_xml_attribute(gamebook_summary, 'LineJudge') side_judge = get_xml_attribute(gamebook_summary, 'SideJudge') back_judge = get_xml_attribute(gamebook_summary, 'BackJudge') field_judge = get_xml_attribute(gamebook_summary, 'FieldJudge') replay_official = get_xml_attribute(gamebook_summary, 'ReplayOfficial') attendance = get_xml_attribute(gamebook_summary, 'Attendance').replace(',', '') game_length = get_xml_attribute(gamebook_summary, 'GameLength') visitor_score_q1 = get_xml_attribute(gamebook_summary, 'VisitorScoreQ1') visitor_score_q2 = get_xml_attribute(gamebook_summary, 'VisitorScoreQ2') visitor_score_q3 = get_xml_attribute(gamebook_summary, 'VisitorScoreQ3') visitor_score_q4 = get_xml_attribute(gamebook_summary, 'VisitorScoreQ4') visitor_score_ot = get_xml_attribute(gamebook_summary, 'VisitorScoreOT') visitor_score = int(visitor_score_q1) + int(visitor_score_q2) + int( visitor_score_q3) + int(visitor_score_q4) + int(visitor_score_ot) home_score_q1 = get_xml_attribute(gamebook_summary, 'HomeScoreQ1') home_score_q2 = get_xml_attribute(gamebook_summary, 'HomeScoreQ2') home_score_q3 = get_xml_attribute(gamebook_summary, 'HomeScoreQ3') home_score_q4 = get_xml_attribute(gamebook_summary, 'HomeScoreQ4') home_score_ot = get_xml_attribute(gamebook_summary, 'HomeScoreOT') home_score = int(home_score_q1) + int(home_score_q2) + int( home_score_q3) + int(home_score_q4) + int(home_score_ot) last_updated = get_xml_attribute(gamebook_summary, 'LastUpdated') # raw roster data offensive_starters_away = parse_players_from_xml( dom, 'OffensiveStarterVisitor') offensive_starters_home = parse_players_from_xml( dom, 'OffensiveStarterHome') defensive_starters_away = parse_players_from_xml( dom, 'DefensiveStarterVisitor') defensive_starters_home = parse_players_from_xml( dom, 'DefensiveStarterHome') subs_away = parse_players_from_xml(dom, 'SubstitutionsVisitor') subs_home = parse_players_from_xml(dom, 'SubstitutionsHome') did_not_play_away = parse_players_from_xml(dom, 'DidNotPlayVisitor') did_not_play_home = parse_players_from_xml(dom, 'DidNotPlayHome') not_active_away = parse_players_from_xml(dom, 'NotActiveVisitor') not_active_home = parse_players_from_xml(dom, 'NotActiveHome') # raw team statistics all_team_stats_xml = dom.getElementsByTagName('TeamStatistics') first_half_team_stats_xml = dom.getElementsByTagName( 'FirstHalfSummary')[0].getElementsByTagName('TeamStatistics') # filter out first half stats to get just the full game team stats game_team_stats_xml = [] for team_stat_xml in all_team_stats_xml: if team_stat_xml not in first_half_team_stats_xml: game_team_stats_xml.append(team_stat_xml) home_team_stats = {} away_team_stats = {} for team_stat_xml in game_team_stats_xml: # convert all descriptions to lowercase team_stat_description = get_xml_attribute(team_stat_xml, 'Description').lower() # edge case error, replace all double -- with single - away_team_stats[team_stat_description] = re.sub( '--', '-', get_xml_attribute(team_stat_xml, 'VisitorStats')) home_team_stats[team_stat_description] = re.sub( '--', '-', get_xml_attribute(team_stat_xml, 'HomeStats')) # check for unknown stats if team_stat_description not in stat_descs and \ team_stat_description not in stat_with_dash_descs and \ team_stat_description not in rare_stat_descs and \ team_stat_description not in rare_stat_with_dash_descs: print "ERROR: Unknown team stat %s" % team_stat_description # clean and standardize the raw game summary data home_team = standard_team(home_team) away_team = standard_team(away_team) # some stadium values are empty, manually replace each one if stadium == '': stadium = edge_case_stadium(gamekey) stadium = standard_stadium(stadium) # split up the team stats with dash away_team_stats['third downs converted'], away_team_stats[ 'third downs'], away_team_stats[ 'third down convert percent'] = re.sub( '%', '', away_team_stats['third down efficiency']).split('-') home_team_stats['third downs converted'], home_team_stats[ 'third downs'], home_team_stats[ 'third down convert percent'] = re.sub( '%', '', home_team_stats['third down efficiency']).split('-') away_team_stats['fourth downs converted'], away_team_stats[ 'fourth downs'], away_team_stats[ 'fourth down convert percent'] = re.sub( '%', '', away_team_stats['fourth down efficiency']).split('-') home_team_stats['fourth downs converted'], home_team_stats[ 'fourth downs'], home_team_stats[ 'fourth down convert percent'] = re.sub( '%', '', home_team_stats['fourth down efficiency']).split('-') away_team_stats['tackles for a loss'], away_team_stats[ 'tackles for a loss yardage'] = away_team_stats[ 'tackles for a loss-number and yards'].split('-') home_team_stats['tackles for a loss'], home_team_stats[ 'tackles for a loss yardage'] = home_team_stats[ 'tackles for a loss-number and yards'].split('-') away_team_stats['times thrown'], away_team_stats[ 'yards lost attempting to pass'] = away_team_stats[ 'times thrown - yards lost attempting to pass'].split('-') home_team_stats['times thrown'], home_team_stats[ 'yards lost attempting to pass'] = home_team_stats[ 'times thrown - yards lost attempting to pass'].split('-') away_team_stats['pass attempts'], away_team_stats[ 'completions'], away_team_stats[ 'had intercepted'] = away_team_stats[ 'pass attempts-completions-had intercepted'].split('-') home_team_stats['pass attempts'], home_team_stats[ 'completions'], home_team_stats[ 'had intercepted'] = home_team_stats[ 'pass attempts-completions-had intercepted'].split('-') away_team_stats['n kickoffs'], away_team_stats[ 'n kickoffs in endzone'], away_team_stats[ 'n kickoffs touchbacks'] = away_team_stats[ 'kickoffs number-in end zone-touchbacks'].split('-') home_team_stats['n kickoffs'], home_team_stats[ 'n kickoffs in endzone'], home_team_stats[ 'n kickoffs touchbacks'] = home_team_stats[ 'kickoffs number-in end zone-touchbacks'].split('-') away_team_stats['n punts'], away_team_stats[ 'avg punt'] = away_team_stats['punts number and average'].split( '-') home_team_stats['n punts'], home_team_stats[ 'avg punt'] = home_team_stats['punts number and average'].split( '-') away_team_stats['fgs had blocked'], away_team_stats[ 'pats had blocked'] = away_team_stats[ 'fgs - pats had blocked'].split('-') home_team_stats['fgs had blocked'], home_team_stats[ 'pats had blocked'] = home_team_stats[ 'fgs - pats had blocked'].split('-') away_team_stats['n punt returns'], away_team_stats[ 'yards punt returns'] = away_team_stats[ 'no. and yards punt returns'].split('-') home_team_stats['n punt returns'], home_team_stats[ 'yards punt returns'] = home_team_stats[ 'no. and yards punt returns'].split('-') away_team_stats['n kickoff returns'], away_team_stats[ 'yards kickoff returns'] = away_team_stats[ 'no. and yards kickoff returns'].split('-') home_team_stats['n kickoff returns'], home_team_stats[ 'yards kickoff returns'] = home_team_stats[ 'no. and yards kickoff returns'].split('-') away_team_stats['n interception returns'], away_team_stats[ 'yards interception returns'] = away_team_stats[ 'no. and yards interception returns'].split('-') home_team_stats['n interception returns'], home_team_stats[ 'yards interception returns'] = home_team_stats[ 'no. and yards interception returns'].split('-') away_team_stats['n penalties'], away_team_stats[ 'penalty yards'] = away_team_stats[ 'penalties number and yards'].split('-') home_team_stats['n penalties'], home_team_stats[ 'penalty yards'] = home_team_stats[ 'penalties number and yards'].split('-') away_team_stats['n fumbles'], away_team_stats[ 'n fumbles lost'] = away_team_stats[ 'fumbles number and lost'].split('-') home_team_stats['n fumbles'], home_team_stats[ 'n fumbles lost'] = home_team_stats[ 'fumbles number and lost'].split('-') away_team_stats['extra points made'], away_team_stats[ 'extra points attempts'] = away_team_stats[ 'extra points made-attempts'].split('-') home_team_stats['extra points made'], home_team_stats[ 'extra points attempts'] = home_team_stats[ 'extra points made-attempts'].split('-') away_team_stats['kicking made'], away_team_stats[ 'kicking attempts'] = away_team_stats[ 'kicking made-attempts'].split('-') home_team_stats['kicking made'], home_team_stats[ 'kicking attempts'] = home_team_stats[ 'kicking made-attempts'].split('-') away_team_stats['field goals made'], away_team_stats[ 'field goals attempts'] = away_team_stats[ 'field goals made-attempts'].split('-') home_team_stats['field goals made'], home_team_stats[ 'field goals attempts'] = home_team_stats[ 'field goals made-attempts'].split('-') away_team_stats['red zone converts'], away_team_stats[ 'red zone attempts'], away_team_stats[ 'red zone convert percentage'] = re.sub( '%', '', away_team_stats['red zone efficiency']).split('-') home_team_stats['red zone converts'], home_team_stats[ 'red zone attempts'], home_team_stats[ 'red zone convert percentage'] = re.sub( '%', '', home_team_stats['red zone efficiency']).split('-') away_team_stats['goal to go converts'], away_team_stats[ 'goal to go attempts'], away_team_stats[ 'goal to go convert percentage'] = re.sub( '%', '', away_team_stats['goal to go efficiency']).split('-') home_team_stats['goal to go converts'], home_team_stats[ 'goal to go attempts'], home_team_stats[ 'goal to go convert percentage'] = re.sub( '%', '', home_team_stats['goal to go efficiency']).split('-') ''' away_team_stats['time of possession minutes'], away_team_stats['time of possession seconds'] = away_team_stats['time of possession'].split(':') home_team_stats['time of possession minutes'], home_team_stats['time of possession seconds'] = home_team_stats['time of possession'].split(':') away_team_stats['time of possession decimal'] = str(round(float(away_team_stats['time of possession minutes']) + float(away_team_stats['time of possession seconds']) / 60.0, 2)) home_team_stats['time of possession decimal'] = str(round(float(home_team_stats['time of possession minutes']) + float(home_team_stats['time of possession seconds']) / 60.0, 2)) ''' # set rare team stats to zero if they don't exist for rare_stat_desc in rare_stat_descs: if rare_stat_desc not in away_team_stats: away_team_stats[rare_stat_desc] = '0' home_team_stats[rare_stat_desc] = '0' # set rare team stats with dash to zero if they don't exist, fill them in otherwise if 'rushing made-attempts' in away_team_stats: away_team_stats['2pt conv rush made'] = away_team_stats[ 'rushing made-attempts'].split('-')[0] away_team_stats['2pt conv rush att'] = away_team_stats[ 'rushing made-attempts'].split('-')[1] home_team_stats['2pt conv rush made'] = home_team_stats[ 'rushing made-attempts'].split('-')[0] home_team_stats['2pt conv rush att'] = home_team_stats[ 'rushing made-attempts'].split('-')[1] else: away_team_stats['2pt conv rush made'] = '0' away_team_stats['2pt conv rush att'] = '0' home_team_stats['2pt conv rush made'] = '0' home_team_stats['2pt conv rush att'] = '0' if 'passing made-attempts' in away_team_stats: away_team_stats['2pt conv pass made'] = away_team_stats[ 'passing made-attempts'].split('-')[0] away_team_stats['2pt conv pass att'] = away_team_stats[ 'passing made-attempts'].split('-')[1] home_team_stats['2pt conv pass made'] = home_team_stats[ 'passing made-attempts'].split('-')[0] home_team_stats['2pt conv pass att'] = home_team_stats[ 'passing made-attempts'].split('-')[1] else: away_team_stats['2pt conv pass made'] = '0' away_team_stats['2pt conv pass att'] = '0' home_team_stats['2pt conv pass made'] = '0' home_team_stats['2pt conv pass att'] = '0' # raw individual stats individual_stats_dom = dom.getElementsByTagName( 'IndividualStatistics')[0] # get starting qbs names starting_qb_away_name = get_starting_position_player_name( offensive_starters_away, 'QB') starting_qb_home_name = get_starting_position_player_name( offensive_starters_home, 'QB') # get qb stats for away starter and for the game away_passers_xml = individual_stats_dom.getElementsByTagName( 'PasserVisitor') away_rushers_xml = individual_stats_dom.getElementsByTagName( 'RusherVisitor') away_qb_stats = get_qb_stats(starting_qb_away_name, away_passers_xml, away_rushers_xml) away_total_qb_stats = get_qb_stats('Total', away_passers_xml, away_rushers_xml) # get qb stats for home starter and for the game home_passers_xml = individual_stats_dom.getElementsByTagName( 'PasserHome') home_rushers_xml = individual_stats_dom.getElementsByTagName( 'RusherHome') home_qb_stats = get_qb_stats(starting_qb_home_name, home_passers_xml, home_rushers_xml) home_total_qb_stats = get_qb_stats('Total', home_passers_xml, home_rushers_xml) # get away interceptors stats away_interceptors_xml = individual_stats_dom.getElementsByTagName( 'InterceptorVisitor') away_interceptor_stats = get_interceptor_stats('Total', away_interceptors_xml) # get home interceptors stats home_interceptors_xml = individual_stats_dom.getElementsByTagName( 'InterceptorHome') home_interceptor_stats = get_interceptor_stats('Total', home_interceptors_xml) output = [ season_year, week, season_type, gamekey, away_team, home_team, stadium, stadium_type, turf_type, schedule_date, start_time, time_zone, game_weather, temp, humidity, windspeed, outdoor_weather, wind_chill, wind_direction, referee, umpire, head_linesman, line_judge, side_judge, back_judge, field_judge, replay_official, attendance, game_length, visitor_score, visitor_score_q1, visitor_score_q2, visitor_score_q3, visitor_score_q4, visitor_score_ot, home_score, home_score_q1, home_score_q2, home_score_q3, home_score_q4, home_score_ot, last_updated ] for qb_stat in qb_stat_descs: output.append(away_qb_stats[qb_stat]) output.append(home_qb_stats[qb_stat]) for stat in stat_descs + rare_stat_descs: output.append(away_team_stats[stat]) output.append(home_team_stats[stat]) output += [ away_team_stats['2pt conv rush made'], away_team_stats['2pt conv rush att'], away_team_stats['2pt conv pass made'], away_team_stats['2pt conv pass att'] ] output += [ home_team_stats['2pt conv rush made'], home_team_stats['2pt conv rush att'], home_team_stats['2pt conv pass made'], home_team_stats['2pt conv pass att'] ] # upsert gamebooks upsert_gamebook(output) # parse and upsert gamebook drives away_drive_stats_dom = dom.getElementsByTagName('DriveVisitor') home_drive_stats_dom = dom.getElementsByTagName('DriveHome') away_drives_inserted = parse_and_upsert_drives(gamekey, away_drive_stats_dom, True, away_team, home_team) home_drives_inserted = parse_and_upsert_drives(gamekey, home_drive_stats_dom, False, away_team, home_team)
def player_search2(db, full_name, team=None, position=None, limit=1, soundex=False): from nfldb.db import Tx import nfldb.sql as sql import nfldb.types as types """ Given a database handle and a player's full name, this function searches the database for players with full names *similar* to the one given. Similarity is measured by the [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance), or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex). Results are returned as tuples. The first element is the is a `nfldb.Player` object and the second element is the Levenshtein (or Soundex) distance. When `limit` is `1` (the default), then the return value is a tuple. When `limit` is more than `1`, then the return value is a list of tuples. If no results are found, then `(None, None)` is returned when `limit == 1` or the empty list is returned when `limit > 1`. If `team` is not `None`, then only players **currently** on the team provided will be returned. Any players with an unknown team are therefore omitted. If `position` is not `None`, then only players **currently** at that position will be returned. Any players with an unknown position are therefore omitted. In order to use this function, the PostgreSQL `levenshtein` function must be available. If running this functions gives you an error about "No function matches the given name and argument types", then you can install the `levenshtein` function into your database by running the SQL query `CREATE EXTENSION fuzzystrmatch` as a superuser like `postgres`. For example: #!bash psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb Note that enabled the `fuzzystrmatch` extension also provides functions for comparing using Soundex. """ assert isinstance(limit, int) and limit >= 1 if soundex: # Careful, soundex distances are sorted in reverse of Levenshtein # distances. # Difference yields an integer in [0, 4]. # A 4 is an exact match. fuzzy = 'difference(%s, %%s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance DESC LIMIT {limit} ''' else: fuzzy = 'levenshtein(LOWER(%s), %%s)' q = ''' SELECT {columns} FROM player WHERE {where} ORDER BY distance ASC LIMIT {limit} ''' full_name = full_name.lower() tokens = full_name.split(' ') for token in tokens: team = nfldb.standard_team(token) if team != 'UNK': tokens.remove(token) full_name = ' '.join(tokens) break team = None def get_results(fuzzy, q, name_type, name): fuzzy = fuzzy % name_type similar = 'LOWER(%s) LIKE %%s' % name_type qteam, qposition = '', '' results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify('team = %s', (team, )) if position is not None: qposition = cursor.mogrify('position = %s', (position, )) fuzzy_filled = cursor.mogrify(fuzzy, (name, )) similar_filled = cursor.mogrify(similar, (name + '%', )) columns = types.Player._sql_select_fields( types.Player.sql_fields()) columns.append('%s AS distance' % fuzzy_filled) q = q.format(columns=', '.join(columns), where=sql.ands(similar_filled, fuzzy_filled + ' IS NOT NULL', 'team != \'UNK\'', qteam, qposition), limit=limit) cursor.execute(q) for row in cursor.fetchall(): results.append( (types.Player.from_row_dict(db, row), row['distance'])) return results if len(full_name.split(' ')) > 1: first_name, last_name = full_name.split(' ')[:2] results_first = get_results(fuzzy, q, 'first_name', first_name) results_last = get_results(fuzzy, q, 'last_name', last_name) results_dict = {} for player, dist in results_last: results_dict[str(player)] = dist results_2nd_pass = {} for player, dist in results_first: if str(player) in results_dict: results_2nd_pass[player] = (results_dict[str(player)], dist) combined_results = results_2nd_pass.items() combined_results = sorted( combined_results, cmp=lambda x, y: x[1][0] - y[1][0] if x[1][0] - y[1][0] != 0 else x[1][1] - y[1][1]) results = combined_results else: results = get_results(fuzzy, q, 'last_name', full_name) results.extend(get_results(fuzzy, q, 'first_name', full_name)) results = sorted(results, cmp=lambda x, y: x[1] - y[1]) if limit == 1: if len(results) == 0: return (None, None) return results[0] return results
def to_team(row): team_pos = row.find(class_='ysf-player-name').span.text.strip() return nfldb.standard_team(re.search('^\S+', team_pos).group(0))
def player_search2(db, full_name, team=None, position=None, limit=1, soundex=False): from nfldb.db import Tx import nfldb.sql as sql import nfldb.types as types """ Given a database handle and a player's full name, this function searches the database for players with full names *similar* to the one given. Similarity is measured by the [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance), or by [Soundex similarity](http://en.wikipedia.org/wiki/Soundex). Results are returned as tuples. The first element is the is a `nfldb.Player` object and the second element is the Levenshtein (or Soundex) distance. When `limit` is `1` (the default), then the return value is a tuple. When `limit` is more than `1`, then the return value is a list of tuples. If no results are found, then `(None, None)` is returned when `limit == 1` or the empty list is returned when `limit > 1`. If `team` is not `None`, then only players **currently** on the team provided will be returned. Any players with an unknown team are therefore omitted. If `position` is not `None`, then only players **currently** at that position will be returned. Any players with an unknown position are therefore omitted. In order to use this function, the PostgreSQL `levenshtein` function must be available. If running this functions gives you an error about "No function matches the given name and argument types", then you can install the `levenshtein` function into your database by running the SQL query `CREATE EXTENSION fuzzystrmatch` as a superuser like `postgres`. For example: #!bash psql -U postgres -c 'CREATE EXTENSION fuzzystrmatch;' nfldb Note that enabled the `fuzzystrmatch` extension also provides functions for comparing using Soundex. """ assert isinstance(limit, int) and limit >= 1 if soundex: # Careful, soundex distances are sorted in reverse of Levenshtein # distances. # Difference yields an integer in [0, 4]. # A 4 is an exact match. fuzzy = "difference(%s, %%s)" q = """ SELECT {columns} FROM player WHERE {where} ORDER BY distance DESC LIMIT {limit} """ else: fuzzy = "levenshtein(LOWER(%s), %%s)" q = """ SELECT {columns} FROM player WHERE {where} ORDER BY distance ASC LIMIT {limit} """ full_name = full_name.lower() tokens = full_name.split(" ") for token in tokens: team = nfldb.standard_team(token) if team != "UNK": tokens.remove(token) full_name = " ".join(tokens) break team = None def get_results(fuzzy, q, name_type, name): fuzzy = fuzzy % name_type similar = "LOWER(%s) LIKE %%s" % name_type qteam, qposition = "", "" results = [] with Tx(db) as cursor: if team is not None: qteam = cursor.mogrify("team = %s", (team,)) if position is not None: qposition = cursor.mogrify("position = %s", (position,)) fuzzy_filled = cursor.mogrify(fuzzy, (name,)) similar_filled = cursor.mogrify(similar, (name + "%",)) columns = types.Player._sql_select_fields(types.Player.sql_fields()) columns.append("%s AS distance" % fuzzy_filled) q = q.format( columns=", ".join(columns), where=sql.ands(similar_filled, fuzzy_filled + " IS NOT NULL", "team != 'UNK'", qteam, qposition), limit=limit, ) cursor.execute(q) for row in cursor.fetchall(): results.append((types.Player.from_row_dict(db, row), row["distance"])) return results if len(full_name.split(" ")) > 1: first_name, last_name = full_name.split(" ")[:2] results_first = get_results(fuzzy, q, "first_name", first_name) results_last = get_results(fuzzy, q, "last_name", last_name) results_dict = {} for player, dist in results_last: results_dict[str(player)] = dist results_2nd_pass = {} for player, dist in results_first: if str(player) in results_dict: results_2nd_pass[player] = (results_dict[str(player)], dist) combined_results = results_2nd_pass.items() combined_results = sorted( combined_results, cmp=lambda x, y: x[1][0] - y[1][0] if x[1][0] - y[1][0] != 0 else x[1][1] - y[1][1] ) results = combined_results else: results = get_results(fuzzy, q, "last_name", full_name) results.extend(get_results(fuzzy, q, "first_name", full_name)) results = sorted(results, cmp=lambda x, y: x[1] - y[1]) if limit == 1: if len(results) == 0: return (None, None) return results[0] return results
def _defense_lookup(d, conn): t = standard_team(d) if t != 'UNK': return (t, t), False else: return _player_lookup(d, conn), True