def optimize(self, env, t): if self.method == "cycles": pulled_arms = optimal(env, t, t)["matched_cycles"][t] pulled_arms = list(map(lambda x: tuple(sorted(x)), pulled_arms)) else: pulled_arms = optimal(env, t, t)["matched"][t] return pulled_arms
def get_features(env): opt = optimal(env) features = [] labels = [] for t in range(env.time_length): liv = np.array(env.get_living(t)) A = env.A(t) has_cycle = np.diag(A @ A) > 0 liv = liv[has_cycle] m = opt["matched"][t] Y = np.zeros(len(liv)) Y[np.isin(liv, list(m))] = 1 labels.append(Y) if len(liv) > 0: X = env.X(t)[has_cycle] subg = env.subgraph(liv) E = run_node2vec(subg) features.append(np.hstack([X, E])) env.removed_container[t].update() return np.vstack(features), np.hstack(labels)
def get_cost(self, v: np.ndarray): cycle = list( chain(*[self.cycles[i] for i, x in enumerate(v) if x == 1])) snap = snapshot(self.env, self.t) snap.removed_container[self.t].update(cycle) snap.populate(self.t + 1, self.t + self.h + 1, seed=clock_seed()) reward = optimal(snap, self.t, self.h + 1)["obj"] + len(cycle) return -(reward / self.h)
def count_matched_today(env, t_begin, t_end, taken, n_times=10): snap = snapshot(env, t_begin) matched_today = 0 for i in range(n_times): if t_begin + 1 < t_end: snap.populate(t_begin + 1, t_end) opt_mt = optimal(snap, t_begin, t_end)["matched_cycles"][t_begin] matched_today += taken in opt_mt return matched_today / n_times
def get_actions(env, t, n_times=100): sols = [] for _ in range(n_times): opt_m = optimal(env, t, t)["matched_pairs"] if len(opt_m) == 0: break sols.append(tuple(sorted(opt_m))) return list(set(sols)) + [()]
def get_cycles(env, t, n_times): cycles = set() for _ in range(n_times): cs = optimal(env, t, t)["matched_cycles"][t] cycles.update([tuple(cyc) for cyc in cs]) cycles = list(cycles) shuffle(cycles) return cycles
def best_cycle(net, env, t, thres = 0): liv_idx = dict({j:i for i,j in enumerate(env.get_living(t))}) if len(liv_idx) == 0: return None a_probs = evaluate_policy(net, env, t) cycles = list(map(tuple, optimal(env, t, t)["matched_cycles"][t])) if len(cycles) == 0: return None np_probs = np.zeros(len(cycles)) for q,(i,j) in enumerate(cycles): np_probs[q] = np.mean((a_probs[liv_idx[i]], a_probs[liv_idx[j]])) selected = np.argmax(np_probs) if np_probs[selected] >= thres: return cycles[selected] else: return None
def rollout(env, t_begin, t_end, taken, gamma): snap = snapshot(env, t_begin) snap.populate(t_begin + 1, t_end, seed=clock_seed()) snap.removed_container[t_begin].update(taken) opt = optimal(snap, t_begin + 1, t_end) opt_matched = get_n_matched(opt["matched"], t_begin, t_end) opt_matched[0] = len(taken) opt_value = disc_mean(opt_matched, gamma) # g = greedy(snap, t_begin+1, t_end) # g_matched = get_n_matched(g["matched"], t_begin, t_end) # g_matched[0] = len(taken) # g_value = disc_mean(g_matched, gamma) r = opt_value #- g_value return r
def get_features(env): opt = optimal(env) labels = [] pair_features = [] networkx_features = [] node2vec_features = [] for t in trange(env.time_length): try: liv = np.array(env.get_living(t)) A_full = env.A(t) has_cycle = np.einsum("ij,ji->i", A_full, A_full) > 0 if not np.any(has_cycle): continue X = env.X(t)[has_cycle] A = A_full[has_cycle, :][:, has_cycle] E = run_node2vec(A) G = get_additional_regressors(env, t, dtype="numpy")[has_cycle] liv_and_cycle = liv[has_cycle] m = opt["matched"][t] Y = np.isin(liv_and_cycle, list(m)).astype(int) labels.append(Y) env.removed_container[t].update(m) assert G.shape[0] == E.shape[0] == X.shape[0] pair_features.append(X) networkx_features.append(G) node2vec_features.append(E) except Exception as e: print(e) import pdb pdb.set_trace() return pair_features, networkx_features, node2vec_features, labels
horizon = 10 newseed = str(np.random.randint(1e8)) train = True disc = 0.1 net = torch.load("results/RNN_50-1-abo_4386504") #%% for k in [2]: print("Creating environment") env = ABOKidneyExchange(entry_rate, death_rate, time_length, seed=k) print("Solving environment") opt = optimal(env) gre = greedy(env) o = get_n_matched(opt["matched"], 0, env.time_length) g = get_n_matched(gre["matched"], 0, env.time_length) rewards = [] actions = [] t = -1 print("Beginning") #%% for t in range(env.time_length): living = np.array(env.get_living(t)) if len(living) == 1: continue
max_time = 200 n_sims = 100 #choice([1, 5, 10, 20, 50, 100, 500, 1000, 2000]) n_prior = 50 #choice([1, 5, 10, 20, 50]) seed = 123456 #clock_seed() print("Opening file", file) try: net = torch.load("results/" + file) except Exception as e: print(str(e)) continue env_type = "abo" env = ABOKidneyExchange(entry_rate, death_rate, max_time, seed=seed) opt = optimal(env) gre = greedy(env) o = get_n_matched(opt["matched"], 0, env.time_length) g = get_n_matched(gre["matched"], 0, env.time_length) rewards = np.zeros(env.time_length) #%% np.random.seed(clock_seed()) for t in range(env.time_length): probs, count = evaluate_policy(net, env, t, dtype="numpy") for i in range(count): probs, _ = evaluate_policy(net, env, t, dtype="numpy") cycles = two_cycles(env, t)
from random import choice import numpy as np from tqdm import trange from matching.utils.env_utils import two_cycles from matching.solver.kidney_solver2 import optimal, greedy from matching.utils.data_utils import clock_seed, get_n_matched from matching.environment.abo_environment import ABOKidneyExchange from matching.bandits.combinatorial import CombinatorialBandit env = ABOKidneyExchange(4, .1, 101) rewards = np.zeros(env.time_length) opt = optimal(env) gre = greedy(env) o = get_n_matched(opt["matched"], 0, env.time_length) g = get_n_matched(gre["matched"], 0, env.time_length) for t in trange(env.time_length): cycles = two_cycles(env, t) print("len(cycles): ", len(cycles)) if len(cycles) > 0: algo = CombinatorialBandit(env, t, iters_per_arm=10, max_match=4) algo.simulate() best = algo.choose() env.removed_container[t].update(best) rewards[t] = len(best)
def greedy_actions(env, t, n_repeats): return set([tuple(optimal(env, t, t)["matched"][t]) for _ in range(n_repeats)])
net.opt = torch.optim.Adam(net.parameters(), lr=0.0001) #%% for epoch in range(5): for seed in [0, 1, 2] * 5: print("Creating environment") env = OPTNKidneyExchange(entry_rate, death_rate, time_length, seed=seed) print("Solving environment") opt = optimal(env) gre = greedy(env) o = get_n_matched(opt["matched"], 0, env.time_length) g = get_n_matched(gre["matched"], 0, env.time_length) rewards = [] actions = [] t = 0 print("Beginning") for t in range((2 * horizon + 1) * (epoch + 1)): print("Getting living") living = np.array(env.get_living(t)) if len(living) > 1:
seed = clock_seed() args = {'entry_rate': entry_rate, 'death_rate': death_rate, 'time_length': 1001, 'seed': seed} if envtype == "abo": env = ABOKidneyExchange(, elif envtype == "saidman": env = SaidmanKidneyExchange(, elif envtype == "optn": env = OPTNKidneyExchange(**args) opt = optimal(env) gre = greedy(env) g = get_n_matched(gre["matched"], 0, env.time_length) o = get_n_matched(opt["matched"], 0, env.time_length) #%% rewards = np.zeros(env.time_length) for t in trange(env.time_length): liv = np.array(env.get_living(t)) A = env.A(t) has_cycle = np.diag(A @ A) > 0 liv_and_cycle = liv[has_cycle] yhat_full = np.zeros(len(liv), dtype=bool)