Esempio n. 1
0
 def regret(self, action):
     Ew = {arm: self.theta.dot(self.xt[arm]) for arm in self.arms}
     opt = self.oracle(Ew, *self.params(None))
     p = [self.theta.dot(self.xt[arm]) / 2 + 0.5 for arm in action]
     popt = [self.theta.dot(self.xt[arm]) / 2 + 0.5 for arm in opt]
     regret = ereward(popt, self.gamma, self.disj) - ereward(p, self.gamma, self.disj)
     return ereward(popt, self.gamma, self.disj) - ereward(p, self.gamma, self.disj)
Esempio n. 2
0
 def regret(self, action):
     Ew = {arm: self.theta.dot(self.xt[arm]) + self.b for arm in self.arms}
     opt = self.oracle(Ew, *self.params(True))
     p = [(self.theta.dot(self.xt[arm]) + self.b) / 2 + 0.5 for arm in action]
     #p = [self.theta.dot(self.xt[arm]) + self.b for arm in action]
     popt = [(self.theta.dot(self.xt[arm]) + self.b) / 2 + 0.5 for arm in opt]
     #popt = [self.theta.dot(self.xt[arm]) + self.b for arm in opt]
     return ereward(popt, self.gamma, self.disj) - ereward(p, self.gamma, self.disj)
Esempio n. 3
0
def isp_Coracle(U, G, u, v, k, gamma):
    for (x, y), _ in U.items():
        G[x][y]['weight'] = max(1, 0)
    s = {}
    for path in islice(networkx.shortest_simple_paths(G, u, v), k):
        sarms = tuple(tuple(sorted((path[i], path[i+1]))) for i in range(len(path) - 1))
        psarms = [(U[e] + 1) / 2 for e in sarms]
        s[sarms] = ereward(psarms, gamma, False)
    return list(max(s, key=s.get))