def SuperHeuristicLinearBeta(delta, X): assert is_of_type(X, "numpy.matrix") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 N, K = np.shape(X) return (lambda t: np.log(1 / float(delta)) + N * np.log(np.log(t + 1)) / float(2))
def beta_factory(beta, args): '''Factory for betas: returns a lambda function for the exploration rate initialized on problem values stored in @args''' assert is_of_type(args, "dict") assert is_of_type(beta, "str") di = { "AlphaDependent": (lambda _: AlphaDependentBeta(args["alpha"], args["delta"], args["X"]) ), "LUCB1": (lambda _: LUCB1Beta(args["delta"], args["X"])), "Hoeffding": (lambda _: HoeffdingBeta(args["delta"], args["sigma"])), "Heuristic": (lambda _: HeuristicBeta(args["delta"])), "HeuristicLinear": (lambda _: HeuristicLinearBeta(args["delta"], args["X"])), "SuperHeuristicLinear": (lambda _: SuperHeuristicLinearBeta(args["delta"], args["X"])), "KLLUCB": (lambda _: KLLUCBBeta(args["alpha"], args["k1_diff"], args[ "X"], args["delta"])), "Frequentist": (lambda _: FrequentistBeta(args["X"], args[ "sigma"], args["eta"], args["S"], args["delta"])), "Informational": (lambda _: InformationalBeta(args["X"], args["delta"])), "Deviational": (lambda _: DeviationalBeta(args["alpha"], args["X"], args["delta"])), } if (not (beta in list(di.keys()))): print("\"" + beta + "\" not in " + str(list(di.keys()))) raise ValueError return di[beta](0)
def AlphaDependentBeta(alpha, delta, X): assert is_of_type(alpha, "float") assert alpha > 1 assert is_of_type(X, "numpy.matrix") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 from scipy.special import zeta z_alpha = zeta(alpha) K = np.shape(X)[1] return (lambda t: np.log(K * z_alpha * (t**alpha) / delta))
def DeviationalBeta(alpha, X, delta): assert is_of_type(alpha, "float") assert alpha > 1 assert is_of_type(X, "numpy.matrix") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 N, K = np.shape(X) from scipy.special import zeta z_alpha = zeta(alpha) C = (K - 1) * z_alpha return (lambda t: np.log(C * (t**alpha) / float(delta)))
def KLLUCBBeta(alpha, k1_diff, X, delta): assert is_of_type(alpha, "float") assert alpha > 1 assert is_of_type(k1_diff, "float") assert k1_diff > 0 assert is_of_type(X, "numpy.matrix") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 K = np.shape(X)[1] k1 = 1 + 1 / float(alpha - 1) + k1_diff B = lambda t: k1 * K * (t**alpha) / delta return (lambda t: np.log(k1 * K * (t**alpha) / delta))
def print_names(bandit, min_list=5, ndec=5): try: names = bandit.problem.names except: names = None if (not utils.is_of_type(bandit.empirical_recommendation, "NoneType") and not utils.is_of_type(names, "NoneType")): m = min(min_list, bandit.m) ids = utils.m_maximal( list(map(float, bandit.empirical_recommendation.tolist())), m) true_ids = utils.m_maximal(list(map(float, bandit.problem.oracle)), m) output = "" output += "-- Names of the first " + str( m) + " most recommended items across all " + str( args.n_simu) + " simulations:\n" output += str([bandit.problem.names[i] for i in ids]) + "\n" output += "-- Associated (empirical) scores:\n" output += str( list( map(lambda x: round(x, ndec), [bandit.empirical_means[i] for i in ids]))) + "\n" output += "-- Names of the first " + str( bandit.m) + " 'truly good' items:\n" output += str([bandit.problem.names[i] for i in true_ids]) + "\n" output += "-- Associated (true) scores:\n" output += str( [round(bandit.problem.oracle[i], ndec) for i in true_ids]) + "\n" print(output) with open( bandit.path_to_plots + bandit.name + "_recommendation_eps=" + str(args.epsilon) + "_beta=" + args.beta + ".txt", "w") as f: f.write(output) return output else: if (not utils.is_of_type(names, "NoneType")): assert utils.is_of_type(bandit.m, "int") assert utils.is_of_type_LIST(bandit.problem.oracle, "float") true_ids = utils.m_maximal(list(map(float, bandit.problem.oracle)), bandit.m) output = "-- Names of the first " + str( bandit.m) + " 'truly good' items:\n" output += str([bandit.problem.names[i] for i in true_ids]) + "\n" output += "-- Associated (true) scores:\n" output += str( [round(bandit.problem.oracle[i], ndec) for i in true_ids]) + "\n" return output
def FrequentistBeta(X, sigma, eta, S, delta): assert is_of_type(sigma, "float") assert sigma > 0 assert is_of_type(eta, "float") assert eta > 0 assert is_of_type(X, "numpy.matrix") assert is_of_type(S, "float") assert S > 0 assert is_of_type(delta, "float") assert 0 < delta and delta < 1 N, K = np.shape(X) L = np.max([np.linalg.norm(X[:, i], 2) for i in range(K)]) lambda_ = float(sigma / float(eta)) frequentist = lambda t: np.sqrt(2 * np.log(1 / float(delta)) + N * np.log( 1 + (t + 1) * L**2 / float(lambda_**2 * N))) + lambda_ / float(sigma) * S ## In linear bandits, C = sqrt{2*beta} return (lambda t: 0.5 * (frequentist(t))**2)
def create_scores_features(args, folder_path, normalized=False): '''Compute/retrieve feature matrix + normalized, if @normalized set to True, "oracle" scores from DR data @data''' data = args.data assert utils.is_of_type(data, "str") assert utils.is_of_type(normalized, "bool") ################################# ## Using drug repurposing data ## ################################# if (not (data in data_list)): assert not (utils.is_of_type(data, "NoneType")) assert data == "epilepsy" from constants import dr_folder ## Not considering the toy DR problem with 10 arms in the paper if (args.small_K != 10): ## Arm features X = import_df(dr_folder+data+"_signatures_nonbinarized.csv") ## Signatures that will be used to compute phenotypes through GRN ## S := binarize(X) S = import_df(dr_folder+data+"_signatures_binarized.csv") ## "True" drug scores A = import_df(dr_folder+data+"_scores.csv") ## Ordered by drug signature ids A, X, S = same_order_df(list(X.columns), [A, X, S], [0, 1, 1]) names = list(A["drug_name"]) scores = list(map(float, A["score"])) df_di = {"S": S, "X": X, "names": names} X = np.matrix(X.values) ## Subset of drugs where rewards were pre-recorded else: file_="rewards_cosine_10drugs_18samples" file_features="epilepsy_signatures.csv" ## Known anti-epileptics names = ["Hydroxyzine", "Acetazolamide", "Pentobarbital", "Topiramate", "Diazepam"] ## Known pro-convulsants names += ["Dmcm", "Brucine", "Fipronil", "Flumazenil", "Fg-7142"] assert len(names) == 10 drug_ids, drug_positions = utils.get_drug_id(names, dr_folder+file_+".txt") assert not any([str(s) == "None" for s in drug_ids]) A = import_df(dr_folder+data+"_scores.csv") drug_cids = A.index A.index = A["drug_name"] A["drug_cid"] = drug_cids drug_cids = list(map(str, A.loc[names]["drug_cid"])) assert len(drug_cids) == len(names) X = import_df(dr_folder+data+"_signatures.csv") S = import_df(dr_folder+data+"_signatures_binarized.csv") ## Ordered by drug signature ids X, S = same_order_df(drug_cids, [X, S], [1]*2) rewards = pd.read_csv(dr_folder+file_+".csv", sep=" ", header=None) means = rewards.mean(axis=0).values scores = [float(means[i]) for i in drug_positions] df_di = {"S": S, "X": X, "names": names} X = np.matrix(X.values) ################################# ## "Classic" linear bandit ## ################################# elif (data == "classic"): assert utils.is_of_type(args.omega, "float") print("Omega = " + str(round(args.omega, 3))) assert args.small_K and args.m and args.omega if (args.problem == "bernouilli"): assert np.cos(args.omega) >= 0 ## canonical base in R^(K-1), modification from case m=1 ## arms 1, ..., m have rewards == 1 ## arm m+1 has reward cos(omega) ## arm m+2, ..., K have rewards == 0 m, K, N, omega = args.m, args.small_K, args.small_K-1, args.omega assert m < N X = np.matrix(np.eye(N, K)) X[0,:m] = 1 X[:,(m+1):] = X[:,m:(K-1)] X[:,m] = np.cos(omega)*e(1, N)+np.sin(omega)*e(m+1, N) theta = e(1, N) scores = simulated_list["linear"](X, theta).flatten().tolist()[0] ################################# ## Using simulated data ## ################################# ## same setting than the one where complexity constants are compared elif (data in list(simulated_list.keys())): max_it_gen = 500 assert args.small_K assert args.small_N N, K = args.small_N, args.small_K matrix_file = folder_path+"generated_matrix_N="+str(N)+"_K="+str(K)+".csv" if (not os.path.exists(matrix_file)): done = False it = 0 while (not done and it < max_it_gen): ## Normalizing the feature matrix X = np.matrix(np.random.normal(0, args.vr, (N, K))) X /= np.linalg.norm(X, 2) done = (np.linalg.matrix_rank(X) >= K) it += 1 if (it == max_it_gen): print("Det value: "+str(np.linalg.det(np.dot(X.T, X)))) print("Got unlucky...") np.savetxt(matrix_file, X) else: X = np.matrix(np.loadtxt(matrix_file), dtype=float) theta = e(1, N) scores = simulated_list[data](X, theta).flatten().tolist()[0] else: print("Data type not found!") raise ValueError if (not data in list(simulated_list.keys())): ## Linear regression to find the "true" theta theta = np.linalg.inv(X.dot(X.T)).dot(X.dot(np.array(scores).T).T) ## residual np.linalg.norm(X.T.dot(theta)-scores, 2) theta_file = folder_path+data+"_theta_K="+str(args.small_K)+".csv" np.savetxt(theta_file, theta) if (data in data_list): names = None df_di = {} assert theta.size == np.shape(X)[0] if (data in list(simulated_list.keys()) or data in ["classic"]): assert len(scores) == args.small_K ## If Bernouilli arms: means must belong to [0,1] if (args.problem == "bernouilli" and data in list(simulated_list.keys()) and data != "classic"): X = np.matrix(np.random.normal(0.5, 0.5, (args.small_N, args.small_K))) X /= np.linalg.norm(X, 2) theta = np.matrix(np.random.normal(0.5, 0.5, (args.small_N, 1))) theta /= np.linalg.norm(theta, 2) scores = list(map(float, theta.T.dot(X).tolist()[0])) assert np.all(np.array(scores) >= 0) and np.all(np.array(scores) <= 1) if (data == "linear"): ## Print Boolean test on complexity constants from compare_complexity_constants import compute_H_UGapE, compute_H_optimized_LinGapE H_LinGapE = compute_H_optimized_LinGapE(theta, X, args.epsilon, args.m) H_UGapE = compute_H_UGapE(theta, X, args.epsilon, args.m) print("Is H_LinGapE < 2*H_UGapE? : "+str(H_LinGapE < 2*H_UGapE)) with open(folder_path+data+"_boolean_test_UGapE_LinGapE_"+data+"N="+str(N)+"_K="+str(K)+"_m="+str(args.m)+".txt", "w+") as f: s_ = ["H_LinGapE = "+str(H_LinGapE)] s_.append("H_UGapE = "+str(H_UGapE)) s_.append("Is H_LinGapE < 2*H_UGapE? : "+str(H_LinGapE < 2*H_UGapE)) f.write("\n".join(s_)) return X, scores, theta, names, df_di
def HeuristicBeta(delta): assert is_of_type(delta, "float") assert 0 < delta and delta < 1 return (lambda t: np.log((np.log(t) + 1) / float(delta)))
def HoeffdingBeta(delta, sigma): assert is_of_type(sigma, "float") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 assert sigma > 0 return (lambda t: np.log(1 / float(delta)))
def LUCB1Beta(delta, X): assert is_of_type(X, "numpy.matrix") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 K = np.shape(X)[1] return (lambda t: np.log(5 * K * (t**4) / (4 * delta)))
def InformationalBeta(X, delta): assert is_of_type(X, "numpy.matrix") assert is_of_type(delta, "float") assert 0 < delta and delta < 1 N, K = np.shape(X) return (lambda t: np.log(2 * t * (K - 1)) / float(delta))
def select_bandit(X, df_di, oracle, data=args.data, theta=None, T_init=args.T_init, m=args.m, sigma=args.sigma, alpha=args.alpha, eta=args.eta, k1_diff=args.k1_diff, is_greedy=args.is_greedy, plot_step=args.plot_step, epsilon=args.epsilon, delta=args.delta, verbose=args.verbose, use_tracking=args.use_tracking, print_test=True): '''Creates the bandit instance associated with feature matrix @X, oracle scores @oracle, wrt. data @data, with bandit-specific parameters, for the (@epsilon, @delta)-PAC EXPLORE-m problem''' assert args.bandit assert utils.is_of_type(X, "numpy.matrix") N, K = np.shape(X) assert utils.is_of_type_LIST(oracle, "float") assert len(oracle) == K assert utils.is_of_type(data, "str") assert m > 0 and m < K assert utils.is_of_type(sigma, "float") assert utils.is_of_type(alpha, "float") assert alpha > 1 assert utils.is_of_type(eta, "float") assert eta > 0 assert utils.is_of_type(k1_diff, "float") assert k1_diff > 0 assert utils.is_of_type(is_greedy, "bool") assert utils.is_of_type(epsilon, "float") assert epsilon >= 0 assert utils.is_of_type(delta, "float") assert delta > 0 assert utils.is_of_type(verbose, "bool") assert not utils.is_of_type(theta, "NoneType") S = float(np.linalg.norm(theta, 2)) assert utils.is_of_type(S, "float") assert args.beta assert S > 0 args_problem = { "sigma": sigma, "grn_name": args.grn_name, "path_to_grn": args.path_to_grn } ## DR instances use data frames in order to match genes properly if (args.data in data_list): args_problem.update({"X": X, "S": None}) else: args_problem.update(df_di) aproblem = args.problem if (args.problem == "epilepsy" and args.small_K == 10): aproblem += "Subset" ## Select problem object problem = problems.problem_factory(aproblem, oracle, data, args_problem, path_to_plots) ## Select threshold function beta = betas.beta_factory( args.beta, { "delta": delta, "alpha": alpha, "X": X, "sigma": sigma, "k1_diff": k1_diff, "eta": eta, "S": S }) ## Annotate the experiment params = "_m=" + str(m) + "_delta=" + str(delta) + "_epsilon=" + str( epsilon) + "_problem=" + aproblem + "_sigma=" + str(sigma) if (args.data != "epilepsy"): params += "_K=" + str(args.small_K) params += "_alpha=" + str(alpha) + "_eta=" + str(eta) params += "_k1_diff=" + str(k1_diff) params += "_data=" + data + "_beta=" + args.beta ## Automatically perform finetuning on the T_unit parameter when the chosen bandit algorithm is TrueUniform without a provided value of T_unit if (utils.is_of_type(T_init, "NoneType") and args.bandit == "TrueUniform"): start, step = 10 if (not args.start) else int( args.start), 10 if (not args.step) else int(args.step) end, n_simu = 100 if (not args.end) else int(args.end), args.n_simu print("Finetuning T_init in seq(" + str(start) + ", " + str(end) + ", " + str(step) + ") across " + str(n_simu) + " simulations "), trueunif = bandits.bandit_factory("TrueUniform", { "T_init": 1, "plot_name": args.bandit }, X, m, problem, theta, "feature", delta, epsilon, verbose, False, params, path_to_plots=path_to_plots, plot_step=plot_step) mat = trueunif.grid_search("T_init", start, step, end, data_name=data, n_simu=n_simu, get_plot=False) pes = mat[:, 2] ## argmax = first (thus smallest) index such that the correctness frequency is maximal T_init = range(start, end + step + 1, step)[np.argmax(pes.T)] print("T_init = " + str(T_init)) assert utils.is_of_type(T_init, "int") assert T_init > 0 params += "_T_init=" + str(T_init) args_ = { "beta": beta, "plot_name": args.bandit + "_" + args.beta, "sigma": sigma, "alpha": alpha, "eta": eta, "k1_diff": k1_diff, "T_init": T_init, "is_greedy": is_greedy, "use_chernoff": args.use_chernoff, "use_tracking": use_tracking } bandit = bandits.bandit_factory(args.bandit, args_, X, m, problem, theta, "feature", delta, epsilon, verbose, False, params, path_to_plots, plot_step) return bandit
## Parsing the value of omega omega_str = "" if (args.data == "classic"): args.omega, omega_str = utils.parse_omega_str(args.omega) ## Define folder and file names associated with each experiment general_args = ["data", "bandit", "problem", "m", "n_simu", "verbose", "plot"] if (args.mode in ["small_test", "recommendation", "generate_latex"]): path_to_plots = target_folder + args.data + ("_" + omega_str if (args.data == "classic") else "") path_to_plots += ("_K" + str(args.small_K) if (args.data != "epilepsy" and ((args.mode == "small_test" or not utils.is_of_type(args.small_K, "NoneType")) and args.data not in ["classic", "epilepsy"]) or args.data in data_list) else "") path_to_plots += ("_N" + str(args.small_N) if (args.data in data_list and not args.data == "classic") else "") path_to_plots += "_" + args.problem + ("_m=" + str(args.m) if (args.m) else "") path_to_plots += "_delta=" + str(args.delta) path_to_plots += "_epsilon=" + str(args.epsilon) path_to_plots += "/" if (not os.path.isdir(path_to_plots)): sb.call("mkdir " + path_to_plots, shell=True) ## Save parameters with open(path_to_plots + "parameters.json", "w") as f: f.write(str(vars(args)))
def KL(self, i, t, args, which): assert utils.is_of_type(which, "str") and which in ["upper", "lower"] mean_, var_ = self.gap(i, args), self.variance(i, t, args) lower, upper = [mean_, 1.] if (which == "upper") else [0., mean_] return self.kl_div(mean_, var_, lower, upper)