def run(run=-1, count=1, **kwargs): run = runs.resolve(run) agentfunc = lambda: mcts.MCTSAgent(storage.load_raw(run, 'model')) agents = snapshot_agents(run, agentfunc, **kwargs) worlds = common.worlds(run, 256, device='cuda') while True: agents = snapshot_agents(run, agentfunc, **kwargs) n, w = database.symmetric(run, agents) zeros = (n.stack().loc[lambda s: s < count].reset_index(). loc[lambda df: df.black_name != df.white_name]) indices = {n: i for i, n in enumerate(n.index)} diff = abs( zeros.black_name.replace(indices) - zeros.white_name.replace(indices)) ordered = zeros.loc[diff.sort_values().index] # Sample so there's no problems if we run in parallel if len(ordered) == 0: log.info('No matchups to play') time.sleep(15) continue matchup = ordered.head(10).sample(1).iloc[0, :2].tolist() log.info(f'Playing {matchup}') matchup = {m: agents[m] for m in matchup} results = common.evaluate(worlds, matchup) wins, games = int(results[0].wins[0] + results[1].wins[1]), int( sum(r.games for r in results)) log.info( f'Storing. {wins} wins in {games} games for {list(matchup)[0]} ') database.save(run, results)
def warm_start(agent, opt, scaler, parent): if parent: parent = runs.resolve(parent) sd = storage.load_latest(parent, device='cuda') agent.load_state_dict(sd['agent']) opt.load_state_dict(sd['opt']) scaler.load_state_dict(sd['scaler']) return parent
def save_trained(run, count=1024): buffer = [] for obs, seats, y in tqdm(generate_trained(run), total=count): buffer.append(compress(obs, seats, y)) if len(buffer) == count: break run = runs.resolve(run) path = ROOT / 'batches' / '{run}.pkl' path.parent.mkdir(exist_ok=True, parents=True) with open(path, 'wb+') as f: pickle.dump(buffer, f)
def elos(run, target=None, filter='.*'): run = runs.resolve(run) games, wins = json.symmetric(run) games, wins = mask(games, wins, filter) soln = activelo.solve(games.values, wins.values) soln = pandas(soln, games.index) if isinstance(target, int): μ, σ = difference(soln, soln.μ.index[target]) elif isinstance(target, str): μ, σ = difference(soln, target) else: μ, σ = soln.μ, pd.Series(np.diag(soln.Σ)**.5, games.index) return pd.concat({'μ': μ, 'σ': σ}, 1)
def snapshots(run=-1, target=None, filter=''): run = runs.resolve(run) elos = analysis.elos(run, target, filter=filter) if target: title = f'{run} eElo v. {target}' else: title = f'{run} eElo, raw' fig, axes = plt.subplots(1, 1, squeeze=False) ax = axes[0, 0] ax.errorbar(np.arange(len(elos)), elos.μ, yerr=elos.σ, marker='.', capsize=2, linestyle='') ax.set_title(title) ax.set_xticks(np.arange(len(elos.μ))) ax.set_xticklabels(elos.μ.index, rotation=-90) ax.grid(True, axis='y') return elos.μ
def run_sync(run): log.info('Arena launched') run = runs.resolve(run) log.info(f'Running arena for "{run}"') with logs.to_run(run), stats.to_run(run): worlds = common.worlds(run, 4) arena = RollingArena(worlds, 128) i = 0 agent = None last_load, last_step = 0, 0 while True: if time.time() - last_load > 15: last_load = time.time() agent = common.agent(run) if agent and (time.time() - last_step > 1): last_step = time.time() log.info('Running trial') arena.play(agent) i += 1
def errors(run=-1, filter='.*'): run = runs.resolve(run) games, wins = database.symmetric(run) games, wins = analysis.mask(games, wins, filter) soln = activelo.solve(games.values, wins.values) rates = wins / games expected = 1 / (1 + np.exp(-soln.μ[:, None] + soln.μ[None, :])) actual = rates.where(games > 0, np.nan).values resid_var = np.nanmean((actual - expected)**2) / np.nanmean(actual**2) corr = np.corrcoef(actual[~np.isnan(actual)], expected[~np.isnan(actual)])[0, 1] mohex = stats.pandas( run, 'elo-mohex', 'μ').pipe(lambda df: df.ffill().where(df.bfill().notnull())) mohex.index = (mohex.index - mohex.index[0]).total_seconds( ) / 900 #TODO: Generalise this to non-15-min snapshots fig = plt.figure() gs = plt.GridSpec(4, 3, fig, height_ratios=[20, 1, 20, 1]) fig.set_size_inches(18, 12) # Top row cmap = copy.copy(plt.cm.RdBu) cmap.set_bad('lightgrey') kwargs = dict(cmap=cmap, vmin=0, vmax=1, aspect=1) ax = plt.subplot(gs[0, 0]) ax.imshow(actual, **kwargs) ax.set_title('actual') ax = plt.subplot(gs[0, 1]) im = ax.imshow(expected, **kwargs) ax.set_title('expected') ax = plt.subplot(gs[1, :2]) plt.colorbar(im, cax=ax, orientation='horizontal') # Top right ax = plt.subplot(gs[0, 2]) elos = analysis.elos(run, target=0) ax.errorbar(np.arange(len(elos)), elos.μ, yerr=elos.σ, marker='.', capsize=2, linestyle='') ax.set_title('elos v. first') ax.grid() # Bottom left ax = plt.subplot(gs[2, 0]) im = ax.imshow(actual - expected, vmin=-1, vmax=+1, aspect=1, cmap=cmap) ax.set_title('error') ax = plt.subplot(gs[3, 0]) plt.colorbar(im, cax=ax, orientation='horizontal') # ax.annotate(f'resid var: {resid_var:.0%}, corr: {corr:.0%}', (.5, -1.2), ha='center', xycoords='axes fraction') # Bottom middle ax = plt.subplot(gs[2, 1]) se = (expected * (1 - expected) / games)**.5 im = ax.imshow((actual - expected) / se, vmin=-3, vmax=+3, aspect=1, cmap='RdBu') ax.set_title('standard error') ax = plt.subplot(gs[3, 1]) plt.colorbar(im, cax=ax, orientation='horizontal') # ax.annotate(f'resid var: {resid_var:.0%}, corr: {corr:.0%}', (.5, -1.2), ha='center', xycoords='axes fraction') # Bottom right ax = plt.subplot(gs[2, 2]) im = mohex.plot(ax=ax, grid=True) ax.set_title('elos v. mohex') ax.set_xlabel('')