def regret(self, action): Ew = {arm: self.theta.dot(self.xt[arm]) for arm in self.arms} opt = self.oracle(Ew, *self.params(None)) p = [self.theta.dot(self.xt[arm]) / 2 + 0.5 for arm in action] popt = [self.theta.dot(self.xt[arm]) / 2 + 0.5 for arm in opt] regret = ereward(popt, self.gamma, self.disj) - ereward(p, self.gamma, self.disj) return ereward(popt, self.gamma, self.disj) - ereward(p, self.gamma, self.disj)
def regret(self, action): Ew = {arm: self.theta.dot(self.xt[arm]) + self.b for arm in self.arms} opt = self.oracle(Ew, *self.params(True)) p = [(self.theta.dot(self.xt[arm]) + self.b) / 2 + 0.5 for arm in action] #p = [self.theta.dot(self.xt[arm]) + self.b for arm in action] popt = [(self.theta.dot(self.xt[arm]) + self.b) / 2 + 0.5 for arm in opt] #popt = [self.theta.dot(self.xt[arm]) + self.b for arm in opt] return ereward(popt, self.gamma, self.disj) - ereward(p, self.gamma, self.disj)
def isp_Coracle(U, G, u, v, k, gamma): for (x, y), _ in U.items(): G[x][y]['weight'] = max(1, 0) s = {} for path in islice(networkx.shortest_simple_paths(G, u, v), k): sarms = tuple(tuple(sorted((path[i], path[i+1]))) for i in range(len(path) - 1)) psarms = [(U[e] + 1) / 2 for e in sarms] s[sarms] = ereward(psarms, gamma, False) return list(max(s, key=s.get))