return nr[0], int(wl[0]), int(wl[1]) results = [] for line in coaches: chstr = line['coaches_text'].split(", ") chid = line['coaches_id'].split('|') chs = zip(chstr, chid) order = len(chs) for c in reversed(chs): name, wins, losses = parse_coach(c[0]) res = {} res['team'] = line['team'] res['franchise'] = line['franchise'] res['year'] = line['year'] res['coaches_text'] = line['coaches_text'] res['coach'] = name res['coach_id'] = c[1] res['wins'] = wins res['losses'] = losses res['games'] = wins + losses res['order'] = order results.append(res) order -= 1 settings.write_csv( results, 'data/coaches-ordered.csv', ['franchise','team','year','coaches_text','coach','coach_id','wins','losses','games','order'] )
# coaches must appear in the right order for this to work coaches = sorted(coaches, key=lambda x: (x['franchise'], x['year'], x['order'])) # make a "lookup" such that team_look[team_name][game_num] returns the coach for that game team_look = {} for c in coaches: f = c['franchise'] s = c['year'] g = int(c['games']) ch = c['coach_id'] if not team_look.get(f): team_look[f] = {} if not team_look[f].get(s): team_look[f][s] = [] for i in range(0, g): team_look[f][s].append(ch) with open('data/team-coach-lookup.json', 'w') as outfile: json.dump(team_look, outfile, indent=2) # attach the coach's name to each game games = settings.read_csv('scrape/sportsref-gamelogs.csv') for g in games: g['coach'] = team_look[g['franchise']][g['season']][int(g['game_num'])-1] settings.write_csv( games, 'data/gamelogs-coaches.csv', ['franchise','team','season','game_num','date','result','coach'], )
} for coach in rawcoaches: poc = note = '' if coach['coach_id'] in oldpoc: poc, note = oldpoc[coach['coach_id']] coach['poc'] = poc coach['note'] = note rawcoaches = sorted(rawcoaches, key=lambda x: (x['poc'], x['coach']), reverse=True) settings.write_csv( rawcoaches, 'categorized/coach-list-poc.csv', ['coach', 'coach_id', 'franchises', 'poc', 'note'], ) ## update NBA player categorization rawcoaches = settings.read_csv('data/coach-list.csv') oldplayed = { l['coach_id']: l['former_nba_player'] for l in settings.read_csv('categorized/coach-list-qualifications.csv') } for coach in rawcoaches: former_player = '' if coach['coach_id'] in oldplayed:
T.max_date = gm['date'] tenures.append(T) # summaries of a coach's entire tenure tenure_summaries = [t.summarize() for t in tenures] with open('data/tenures.json', 'w') as outfile: json.dump(tenure_summaries, outfile, indent=2) settings.write_csv( tenure_summaries, 'data/tenures-summarized.csv', [ 'slug', 'coach', 'coach_id', 'franchise', 'min_date', 'max_date', 'left_truncated' ], ) # unique coach names and franchises they coached for coach_names = set([(t.coach, t.coach_id) for t in tenures]) coach_info = [{ 'coach': n[0], 'coach_id': n[1], 'franchises': '|'.join(set(t.franchise for t in tenures if t.coach_id == n[1])) } for n in coach_names]
continue elif y <= 2002: f = 'NORLHP' s = 'CHH' elif s == 'NOH': f = 'NORLHP' if y >= 2014: s = 'NOP' # elif y <= 2002: # s = 'CHH' elif y == 2006 or y == 2007: s = 'NOK' coachres['team'] = s coachres['franchise'] = f coachres['coaches_text'] = t.find(attrs={ "data-stat": "coaches" }).text coachres['coaches_id'] = '|'.join( c['href'].split('/')[-1][:-5] for c in t.find(attrs={ "data-stat": "coaches" }).find_all('a')) coaches.append(coachres) settings.write_csv( gamelogs, 'scrape/sportsref-gamelogs.csv', ['franchise', 'team', 'season', 'game_num', 'date', 'result']) settings.write_csv(coaches, 'scrape/sportsref-coaches.csv', ['franchise', 'team', 'year', 'coaches_text', 'coaches_id'])