Example #1
0
def get_best_move(board):
    moves = board.generate_legal_moves()
    moves = list(moves)

    bitboards = []
    for move in moves:
        b = board.copy()
        b.push(move)
        bitboards.append(get_bitboard(b))
    bitboards = np.array(bitboards)
    curr_bitboard = get_bitboard(board)

    _, features = featurize(featurizer, bitboards, device)
    features = features.cpu().detach().numpy()
    _, curr_features = featurize(featurizer, curr_bitboard, device)
    curr_features = curr_features.cpu().detach().numpy()

    to_compare = np.hstack((np.repeat(curr_features, len(moves),
                                      axis=0), features))
    scores = compare(comparator, to_compare, device)
    scores = scores[:, 0]
    best_idx = np.argmax(scores)
    board.push(moves[best_idx])

    return moves[best_idx]
Example #2
0
    def step(self, action_dict):
        if self.is_render:
            self.render()

        actions = []
        for id in range(4):
            if id in action_dict:
                actions.append(action_dict[id])
            else:
                actions.append(0)

        obs = {}
        rewards = {}
        dones = {}
        infos = {i - 10: {} for i in self.prev_obs[0]['alive']}

        _obs, _reward, _done, _info = self.env.step(actions)

        for id in self.prev_obs[0]['alive']:
            if _done or self.is_done(id - 10, _obs[0]['alive']):
                dones[id - 10] = True
                infos[id - 10]["metrics"] = self.stat[id - 10]

        dones["__all__"] = _done

        for id in range(4):
            if self.is_agent_alive(id):
                obs[id] = featurize(_obs[id])
                rewards[id] = self.reward(id, _obs, _info)
                infos[id].update(_info)

        self.prev_obs = _obs

        return obs, rewards, dones, infos
Example #3
0
    def reset(self):
        self.prev_obs = self.env.reset()
        obs = {}
        self.reset_stat()
        for i in range(4):
            if self.is_agent_alive(i):
                obs[i] = featurize(self.prev_obs[i])

        return obs
Example #4
0
    def get_all_actions():
        feature = featurize(obs[train_idx])
        action, _states = model.predict(feature)
        action = tuple(action)
        some_actions = env.act(obs)  # 不包含我的 agent
        # 如果其他智能体动作不是元组(只有单一动作),改成元组
        for agent_idx in range(3):
            if not isinstance(some_actions[agent_idx], tuple):
                some_actions[agent_idx] = (some_actions[agent_idx], 0, 0)
        some_actions.insert(train_idx, action)  # 把我的 agent 的动作也加进来

        return some_actions
Example #5
0
def process_data(traj_1, traj_2):
    """
    Input:
      Traj: a list of list, contains one trajectory for one driver
      example:[[114.10437, 22.573433, '2016-07-02 00:08:45', 1],
         [114.179665, 22.558701, '2016-07-02 00:08:52', 1]]
    Output:
      Data: any format that can be consumed by your model.

    """
    data = []
    split1 = [
        featurize(sub, torch.device("cuda")) for sub in split_traj(traj_1, 64)
    ]
    split2 = [
        featurize(sub, torch.device("cuda")) for sub in split_traj(traj_2, 64)
    ]
    x1 = torch.stack(
        [torch.stack(random.choices(split1, k=8)) for _ in range(8)])
    x2 = torch.stack(
        [torch.stack(random.choices(split2, k=8)) for _ in range(8)])
    return x1, x2
Example #6
0
    def step(self, action_dict):
        if self.is_render:
            self.render()

        actions = []
        for id in range(4):
            if id in action_dict:
                actions.append(action_dict[id])
            else:
                actions.append(0)

        obs = {}
        rewards = {}
        dones = {}
        infos = {}

        _obs, _reward, _done, _info = self.env.step(actions)

        for id in range(4):
            if self.is_done(id, _obs[0]['alive']):
                dones[id] = True

                if id == 0:
                    _done = True
                    _info['result'] = constants.Result.Loss

        dones["__all__"] = _done

        for id in range(4):
            if self.is_agent_alive(id):
                obs[id] = featurize(_obs[id])
                rewards[id] = self.reward(id, _obs, _info)
                infos[id] = _info

        self.prev_obs = _obs

        return obs, rewards, dones, infos
Example #7
0
        # Assign best accs
        metrics['best_epoch'] = best_epoch
        metrics['best_val_acc'] = best_val_acc
        metrics['best_val_same_acc'] = best_val_same_acc
        metrics['best_test_acc'] = best_test_acc
        metrics['best_test_same_acc'] = best_test_same_acc
        metrics['best_test_acc_ci'] = best_test_acc_ci
        metrics['best_val_ap'] = val_avg_prec
        metrics['best_test_ap'] = test_avg_prec
        metrics['best_val_same_ap'] = val_same_avg_prec
        metrics['best_test_same_ap'] = test_same_avg_prec
        metrics['has_same'] = has_same
        save_defaultdict_to_fs(metrics,
                               os.path.join(args.exp_dir, 'metrics.json'))

    featurize()
    if (not args.skip_eval):
        print('====> DONE')
        print('====> BEST EPOCH: {}'.format(best_epoch))
        print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format(
            '(best_val)', best_epoch, best_val_acc))
        print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format(
            '(best_val_same)', best_epoch, best_val_same_acc))
        print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format(
            '(best_test)', best_epoch, best_test_acc))
        print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format(
            '(best_test_same)', best_epoch, best_test_same_acc))
        print('====>')
        print('====> {:>17}\tEpoch: {}\tAccuracy: {:.4f}'.format(
            '(best_val_avg)', best_epoch,
            (best_val_acc + best_val_same_acc) / 2))
    for fold_ix, fold in enumerate(dataset):
        start_batch = time.time()

        batch = [fold]
        fold_mut = copy.deepcopy(fold)
        print(len(fold['mutation_data']))

        num_mutations = len(fold['mutation_data'])
        rocklin_df = pd.DataFrame(columns=['seq', 'stabilityscore', 'neglogp'])
        try:

            for mut_ix, (mut_seq, effect) in enumerate(fold['mutation_data']):
                fold_mut['seq'] = mut_seq

                # Get a batch
                X, S, mask, lengths = featurize([fold_mut], device)
                elapsed_featurize = time.time() - start_batch

                log_probs = model(X, S, lengths, mask)
                loss, loss_av = _loss(S, log_probs, mask)
                neglogp = torch.sum(loss * mask, dim=1) / torch.sum(mask, dim=1)
                neglogp = neglogp.cpu().data.numpy().tolist()[0]
                print(fold['name'], neglogp, effect, mut_ix, num_mutations)
                rocklin_df.loc[mut_ix] = [mut_seq, effect, neglogp]

            rocklin_df.to_csv('rocklin/mutations/' + fold['name'] + '_' + args.features + '.tsv', sep='\t')

            plt.clf()
            plt.scatter(rocklin_df['neglogp'], rocklin_df['stabilityscore'], s=5)
            plt.xlabel('Transformer Neglogp')
            plt.ylabel('Stability score in experiment')
Example #9
0
if __name__ == '__main__':
    agent_list = [
        agents.RandomAgent(),
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent()
    ]
    env = pommerman.make(
        'PommeTeam-v0',
        agent_list,
        # '/home/lucius/working/projects/pomme_rllib/resources/one_line_state.json'
    )
    obs = env.reset()

    while True:
        features = featurize(obs[0])
        for i in range(17):
            print(features[i])
        print()
        actions = env.act(obs)
        print(actions)
        obs, reward, done, info = env.step(actions)

        if done:
            break

    print(obs)
    features = featurize(obs[0])
    for i in range(17):
        print(features[i])
    print()
Example #10
0
del matches_hard[("66ef54d76ff989a91d52", "51c9e1dd498e33ecd8670892")]
del matches_hard[("edeba23f215dcc702220", "51a11cbc498e4083823909f1")]

# Extra matches
matches_hard_test = utils.read_matches("matches_test_hard.csv")
matches_easy = utils.read_matches("matches_train.csv")
matches_easy_test = utils.read_matches("matches_test.csv")

# Compiling data sets
try:
    f = open("working/locu_classifier.cache", 'rb')
    (X_tot, y_tot) = cPickle.load(f)
    sys.stderr.write("Loading data from cache.")
except IOError:
    sys.stderr.write( "Featurizing easy dataset..." )
    (X_easy, index_easy) = utils.featurize(locu_easy, four_easy, utils.sim)
    y_easy = utils.get_y(index_easy, matches_easy)
    sys.stderr.write( "done.\n" )

    sys.stderr.write( "Featurizing easy test dataset..." )
    (X_easy_test, index_easy_test) = utils.featurize(locu_easy_test, four_easy_test, utils.sim)
    y_easy_test = utils.get_y(index_easy_test, matches_easy_test) 
    sys.stderr.write( "done.\n" )

    sys.stderr.write( "Featurizing hard dataset..." )
    (X, index) = utils.featurize(locu, four, utils.sim)
    y = utils.get_y(index, matches_hard) 
    sys.stderr.write( "done.\n" )

    sys.stderr.write( "Featurizing hard test dataset..." )
    (X_hard_test, index_hard_test) = utils.featurize(locu_test, four_test, utils.sim)
Example #11
0
    for i in range(len(matching)):
        locu = reverse_a[i]
        four = reverse_b[matching[i]]
        w = weights[i][matching[i]]
        res[(locu, four)] = w
    return res

# ----------------
# Main
# ----------------
# Load in the json files
model_file = sys.argv[1]
locu_file = sys.argv[2]
four_file = sys.argv[3]
match_out_file = sys.argv[4]

locu = utils.load_json(locu_file)  
four = utils.load_json(four_file) 

sys.stderr.write( "Featurizing dataset..." )
(X, index) = utils.featurize(locu, four, utils.sim)
sys.stderr.write( "done.\n" )

# Load in model
(model, thresh) = joblib.load(model_file)

## Test on testing
p = model.predict_proba(X)
res = weights_to_matching(p, index)
utils.write_matching(res, thresh, match_out_file)
Example #12
0
# fdb733b6
checkpoint = 600
checkpoint_dir = "/home/lucius/ray_results/two_policies_vs_static_agents/PPO_RllibPomme_0_2020-06-09_23-39-347whmqdrs"
ppo_agent.restore("{}/checkpoint_{}/checkpoint-{}".format(
    checkpoint_dir, checkpoint, checkpoint))

agent_list = []
for agent_id in range(4):
    agent_list.append(agents.StaticAgent())
env = pommerman.make("PommeTeam-v0", agent_list=agent_list)

for i in range(1):
    obs = env.reset()

    done = False
    while not done:
        env.render()
        actions = env.act(obs)
        actions[0] = ppo_agent.compute_action(observation=featurize(obs[0]),
                                              policy_id="policy_0")
        actions[2] = ppo_agent.compute_action(observation=featurize(obs[2]),
                                              policy_id="policy_0")
        obs, reward, done, info = env.step(actions)
        print("reward:", reward)
        print("done:", done)
        print("info:", info)
        print("=========")
    env.render(close=True)
    # env.close()