def get_best_move(board): moves = board.generate_legal_moves() moves = list(moves) bitboards = [] for move in moves: b = board.copy() b.push(move) bitboards.append(get_bitboard(b)) bitboards = np.array(bitboards) curr_bitboard = get_bitboard(board) _, features = featurize(featurizer, bitboards, device) features = features.cpu().detach().numpy() _, curr_features = featurize(featurizer, curr_bitboard, device) curr_features = curr_features.cpu().detach().numpy() to_compare = np.hstack((np.repeat(curr_features, len(moves), axis=0), features)) scores = compare(comparator, to_compare, device) scores = scores[:, 0] best_idx = np.argmax(scores) board.push(moves[best_idx]) return moves[best_idx]
def step(self, action_dict): if self.is_render: self.render() actions = [] for id in range(4): if id in action_dict: actions.append(action_dict[id]) else: actions.append(0) obs = {} rewards = {} dones = {} infos = {i - 10: {} for i in self.prev_obs[0]['alive']} _obs, _reward, _done, _info = self.env.step(actions) for id in self.prev_obs[0]['alive']: if _done or self.is_done(id - 10, _obs[0]['alive']): dones[id - 10] = True infos[id - 10]["metrics"] = self.stat[id - 10] dones["__all__"] = _done for id in range(4): if self.is_agent_alive(id): obs[id] = featurize(_obs[id]) rewards[id] = self.reward(id, _obs, _info) infos[id].update(_info) self.prev_obs = _obs return obs, rewards, dones, infos
def reset(self): self.prev_obs = self.env.reset() obs = {} self.reset_stat() for i in range(4): if self.is_agent_alive(i): obs[i] = featurize(self.prev_obs[i]) return obs
def get_all_actions(): feature = featurize(obs[train_idx]) action, _states = model.predict(feature) action = tuple(action) some_actions = env.act(obs) # 不包含我的 agent # 如果其他智能体动作不是元组(只有单一动作),改成元组 for agent_idx in range(3): if not isinstance(some_actions[agent_idx], tuple): some_actions[agent_idx] = (some_actions[agent_idx], 0, 0) some_actions.insert(train_idx, action) # 把我的 agent 的动作也加进来 return some_actions
def process_data(traj_1, traj_2): """ Input: Traj: a list of list, contains one trajectory for one driver example:[[114.10437, 22.573433, '2016-07-02 00:08:45', 1], [114.179665, 22.558701, '2016-07-02 00:08:52', 1]] Output: Data: any format that can be consumed by your model. """ data = [] split1 = [ featurize(sub, torch.device("cuda")) for sub in split_traj(traj_1, 64) ] split2 = [ featurize(sub, torch.device("cuda")) for sub in split_traj(traj_2, 64) ] x1 = torch.stack( [torch.stack(random.choices(split1, k=8)) for _ in range(8)]) x2 = torch.stack( [torch.stack(random.choices(split2, k=8)) for _ in range(8)]) return x1, x2
def step(self, action_dict): if self.is_render: self.render() actions = [] for id in range(4): if id in action_dict: actions.append(action_dict[id]) else: actions.append(0) obs = {} rewards = {} dones = {} infos = {} _obs, _reward, _done, _info = self.env.step(actions) for id in range(4): if self.is_done(id, _obs[0]['alive']): dones[id] = True if id == 0: _done = True _info['result'] = constants.Result.Loss dones["__all__"] = _done for id in range(4): if self.is_agent_alive(id): obs[id] = featurize(_obs[id]) rewards[id] = self.reward(id, _obs, _info) infos[id] = _info self.prev_obs = _obs return obs, rewards, dones, infos
# Assign best accs metrics['best_epoch'] = best_epoch metrics['best_val_acc'] = best_val_acc metrics['best_val_same_acc'] = best_val_same_acc metrics['best_test_acc'] = best_test_acc metrics['best_test_same_acc'] = best_test_same_acc metrics['best_test_acc_ci'] = best_test_acc_ci metrics['best_val_ap'] = val_avg_prec metrics['best_test_ap'] = test_avg_prec metrics['best_val_same_ap'] = val_same_avg_prec metrics['best_test_same_ap'] = test_same_avg_prec metrics['has_same'] = has_same save_defaultdict_to_fs(metrics, os.path.join(args.exp_dir, 'metrics.json')) featurize() if (not args.skip_eval): print('====> DONE') print('====> BEST EPOCH: {}'.format(best_epoch)) print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format( '(best_val)', best_epoch, best_val_acc)) print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format( '(best_val_same)', best_epoch, best_val_same_acc)) print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format( '(best_test)', best_epoch, best_test_acc)) print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format( '(best_test_same)', best_epoch, best_test_same_acc)) print('====>') print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format( '(best_val_avg)', best_epoch, (best_val_acc + best_val_same_acc) / 2))
for fold_ix, fold in enumerate(dataset): start_batch = time.time() batch = [fold] fold_mut = copy.deepcopy(fold) print(len(fold['mutation_data'])) num_mutations = len(fold['mutation_data']) rocklin_df = pd.DataFrame(columns=['seq', 'stabilityscore', 'neglogp']) try: for mut_ix, (mut_seq, effect) in enumerate(fold['mutation_data']): fold_mut['seq'] = mut_seq # Get a batch X, S, mask, lengths = featurize([fold_mut], device) elapsed_featurize = time.time() - start_batch log_probs = model(X, S, lengths, mask) loss, loss_av = _loss(S, log_probs, mask) neglogp = torch.sum(loss * mask, dim=1) / torch.sum(mask, dim=1) neglogp = neglogp.cpu().data.numpy().tolist()[0] print(fold['name'], neglogp, effect, mut_ix, num_mutations) rocklin_df.loc[mut_ix] = [mut_seq, effect, neglogp] rocklin_df.to_csv('rocklin/mutations/' + fold['name'] + '_' + args.features + '.tsv', sep='\t') plt.clf() plt.scatter(rocklin_df['neglogp'], rocklin_df['stabilityscore'], s=5) plt.xlabel('Transformer Neglogp') plt.ylabel('Stability score in experiment')
if __name__ == '__main__': agent_list = [ agents.RandomAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent() ] env = pommerman.make( 'PommeTeam-v0', agent_list, # '/home/lucius/working/projects/pomme_rllib/resources/one_line_state.json' ) obs = env.reset() while True: features = featurize(obs[0]) for i in range(17): print(features[i]) print() actions = env.act(obs) print(actions) obs, reward, done, info = env.step(actions) if done: break print(obs) features = featurize(obs[0]) for i in range(17): print(features[i]) print()
del matches_hard[("66ef54d76ff989a91d52", "51c9e1dd498e33ecd8670892")] del matches_hard[("edeba23f215dcc702220", "51a11cbc498e4083823909f1")] # Extra matches matches_hard_test = utils.read_matches("matches_test_hard.csv") matches_easy = utils.read_matches("matches_train.csv") matches_easy_test = utils.read_matches("matches_test.csv") # Compiling data sets try: f = open("working/locu_classifier.cache", 'rb') (X_tot, y_tot) = cPickle.load(f) sys.stderr.write("Loading data from cache.") except IOError: sys.stderr.write( "Featurizing easy dataset..." ) (X_easy, index_easy) = utils.featurize(locu_easy, four_easy, utils.sim) y_easy = utils.get_y(index_easy, matches_easy) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing easy test dataset..." ) (X_easy_test, index_easy_test) = utils.featurize(locu_easy_test, four_easy_test, utils.sim) y_easy_test = utils.get_y(index_easy_test, matches_easy_test) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing hard dataset..." ) (X, index) = utils.featurize(locu, four, utils.sim) y = utils.get_y(index, matches_hard) sys.stderr.write( "done.\n" ) sys.stderr.write( "Featurizing hard test dataset..." ) (X_hard_test, index_hard_test) = utils.featurize(locu_test, four_test, utils.sim)
for i in range(len(matching)): locu = reverse_a[i] four = reverse_b[matching[i]] w = weights[i][matching[i]] res[(locu, four)] = w return res # ---------------- # Main # ---------------- # Load in the json files model_file = sys.argv[1] locu_file = sys.argv[2] four_file = sys.argv[3] match_out_file = sys.argv[4] locu = utils.load_json(locu_file) four = utils.load_json(four_file) sys.stderr.write( "Featurizing dataset..." ) (X, index) = utils.featurize(locu, four, utils.sim) sys.stderr.write( "done.\n" ) # Load in model (model, thresh) = joblib.load(model_file) ## Test on testing p = model.predict_proba(X) res = weights_to_matching(p, index) utils.write_matching(res, thresh, match_out_file)
# fdb733b6 checkpoint = 600 checkpoint_dir = "/home/lucius/ray_results/two_policies_vs_static_agents/PPO_RllibPomme_0_2020-06-09_23-39-347whmqdrs" ppo_agent.restore("{}/checkpoint_{}/checkpoint-{}".format( checkpoint_dir, checkpoint, checkpoint)) agent_list = [] for agent_id in range(4): agent_list.append(agents.StaticAgent()) env = pommerman.make("PommeTeam-v0", agent_list=agent_list) for i in range(1): obs = env.reset() done = False while not done: env.render() actions = env.act(obs) actions[0] = ppo_agent.compute_action(observation=featurize(obs[0]), policy_id="policy_0") actions[2] = ppo_agent.compute_action(observation=featurize(obs[2]), policy_id="policy_0") obs, reward, done, info = env.step(actions) print("reward:", reward) print("done:", done) print("info:", info) print("=========") env.render(close=True) # env.close()