overpull_res = [] ### SET K arms arms = [ [ RestedRottingGaussian, { 'decayingFunction': lambda n: mu if n <= HORIZON / 4 else -mu, 'sigma': sigma, } ], [ RestedRottingGaussian, { 'decayingFunction': lambda n: 0, 'sigma': sigma, } ], ] rew, noisy_rew, time, pulls, cumul_pulls = repetedRuns(policy, arms, rep=REPETITIONS, T=HORIZON, parallel=PARALLEL) oracle_rew, noisy_oracle_rew, oracle_time, oracle_pull, oracle_cumul_pulls = repetedRuns( [GreedyOracle, {}], arms, rep=1, T=HORIZON, oracle=True) regret = oracle_rew - rew regret_res.append(regret) time_res.append(time) logging.info("EVENT : SAVING ... ") np.save(regret_path, np.array(regret_res)) np.save(time_path, np.array(regret_res)) logging.info("EVENT : END ... ")
'per_arm_restart': True, 'sig2': SIGMA**2 } ], [Exp3S, { 'alpha': 1 / T, 'gamma': (K * V / T)**(1 / 3) }], ] policy_ind = 2 if len(sys.argv) == 1 else int(sys.argv[2]) policy = policies[policy_ind] policy_name = str(policy[0](nbArms=2, **policy[1])) policy_name_nospace = policy_name.replace(' ', '_') logging.info("CONSTANT CONFIG : POLICY " + str(i) + " " + policy_name) rew, noisy_rew, time, pulls, cumul_pulls = repetedRuns(policy, arms, rep=REPETITIONS, T=HORIZON, parallel=PARALLEL) oracle_rew = df.max(axis=1).cumsum().to_numpy() regret = DRAWS * oracle_rew - rew logging.info("EVENT : SAVING ... ") path_regret = os.path.join('./data/DAY_%s_REGRET_%s_%s' % (DAY, policy_name_nospace, date)) path_time = os.path.join('./data/DAY_%s_TIME_%s_%s' % (DAY, policy_name_nospace, date)) np.save(path_regret, regret) np.save(path_time, time) logging.info("EVENT : END ... ")