def runAlgorithms(self, algorithms, real_mode=False): self.algorithms = algorithms for alg_name, alg in list(algorithms.items()): self.AlgReward[alg_name] = [] self.averageReward[alg_name] = [] self.resultRecord() for iter_ in range(self.iterations): for alg_name, algorithm in list(algorithms.items()): S = algorithm.decide() # print("----------the ", iter_ + 1, " round---------") # print('choose set:', S) if real_mode: reward, live_edges, live_nodes = runReal_DC(self.G, S) else: if alg_name[:2] == 'DI': reward, live_edges, live_nodes = runDC_DILinUCB(self.G, self.Prob, S) else: reward, live_edges, live_nodes = runDC(self.G, self.Prob, S) algorithm.updateParameters(S, live_nodes, live_edges, iter_) print("rewards: " + str(reward)) self.AlgReward[alg_name].append(reward) # self.averageReward[alg_name].append("%.2f" % np.mean(self.AlgReward[alg_name][0:iter_ + 1])) self.resultRecord(iter_)
def simulate(self, S): reward, live_edges, live_nodes = runDC(self.G, self.real_P, S) return live_edges, reward
def simulate(self, S): reward, live_edges, live_nodes = runDC(self.G, self.real_P, S) # print("----------------rewards :", reward) return live_edges, reward
def simulate(self, S): reward, live_edges, live_nodes = runDC(self.G, self.real_P, S) # observed_probabilities : (node n, index i): reward # print("----------------simulation rewards :", reward) return live_edges, reward