if __name__ == '__main__': # use rockNum == 0 to represent a test case rewardCandNum = 3 """ def r1(s, a): if s == (0, 0) and a == (1, 0): return 0.9 elif s == (0, 1) and a == (0, 1): return 0.6 else: return 0 def r2(s, a): if s == (0, 1) and a == (1, 0): return 1 elif s == (0, 1) and a == (0, 1): return 0.6 else: return 0 def r3(s, a): if s == (0, 0) and a == (1, 0): return 0.45 elif s == (0, 1) and a == (1, 0): return 0.5 elif s == (0, 1) and a == (0, 1): return 0.6 else: return 0 rewardSet = [r1, r2, r3] """ r1 = lambda s, a: s == (2, 10) r2 = lambda s, a: s == (1, 10) r3 = lambda s, a: s == (3, 10) rewardSet = [r1, r2, r3] initialPhi = [1.0 / rewardCandNum] * rewardCandNum experiment(cmp, rewardSet, initialPhi)
puddle = [(x, y) for x in range(loc[0], loc[0] + puddleSize) for y in range(loc[1], loc[1] + puddleSize)] puddles.append(puddle) for rockID in xrange(rockPerPuddle): rocks.append(random.choice(puddle)) rewardBasic = util.Counter() for id in xrange(len(rocks)): rewardBasic[rocks[id]] = id + 1 # TODO use reward gen # reward cand indicates belief on where the puddle is rewardCandNum = puddleNum rewards = [] for candId in xrange(rewardCandNum): reward = rewardBasic.copy() puddle = puddles[candId] for loc in puddle: reward[loc] = -10 rewards.append(reward) initialPhi = [1.0 / len(rewards)] * len(rewards) terminalReward = util.Counter() for x in range(width / 3) + range(width * 2 / 3, width): terminalReward[(x, height - 1)] = -1000 allPuddles = sum(puddles, []) # concatenate its elements (which are lists) experiment(Domain, width, height, responseTime, horizon, rewardCandNum, allPuddles, rewards, initialPhi, terminalReward)
return numerical else: return 0 return rewardFunc rewardSet = [] """ # a case where trajectory query (actualy state-action preference query) has worse performance than policy queries rewardSet.append(rewardGen([((0, 0), 0), ((0, 1), 0)], 1)) rewardSet.append(rewardGen([((0, 0), 1), ((0, 1), 0)], 1)) rewardSet.append(rewardGen([((0, 0), 0), ((0, 1), 1)], 1)) rewardSet.append(rewardGen([((0, 0), 1), ((0, 1), 1)], 1)) """ terminalReward = util.Counter() cmp = TabularNavigationKWay(responseTime, width, height, horizon=horizon, terminalReward=terminalReward) rocks = cmp.getStateActionPairs() for candId in xrange(rewardCandNum): sampledRocks = random.sample(rocks, rockNum) rewardSet.append(rewardGen(sampledRocks, random.random())) initialPhi = [1.0 / rewardCandNum] * rewardCandNum experiment(cmp, rewardSet, initialPhi)
for opt, arg in opts: if opt == '-t': config.TRAJECTORY_LENGTH = int(arg) elif opt == '-n': rewardCandNum = int(arg) elif opt == '-k': config.NUMBER_OF_RESPONSES = int(arg) elif opt == '-r': random.seed(int(arg)) numpy.random.seed(int(arg)) terminalReward = util.Counter() # three-state domain """ cmp = ThreeStateToy(responseTime, horizon, terminalReward) ws = [(-1,), (1,)] """ # rock collection cmp = RockCollection(responseTime, width, height, horizon, terminalReward, rockNum) ws = [[random.random() for dim in range(config.DIMENSION)] for _ in range(rewardCandNum)] rewardCandNum = len(ws) initialPhi = [1.0 / rewardCandNum] * rewardCandNum config.opts = '_'.join(map(str, [rewardCandNum, config.NUMBER_OF_RESPONSES])) experiment(cmp, ws, initialPhi)
for _ in xrange(puddleNum): loc = (random.randint(0, width - puddleSize - 1), random.randint(0, height - puddleSize - 1)) #loc = puddleLocs[_] puddle = [(x, y) for x in range(loc[0], loc[0] + puddleSize) for y in range(loc[1], loc[1] + puddleSize)] puddles.append(puddle) for rockID in xrange(rockPerPuddle): rocks.append(random.choice(puddle)) rewardBasic = util.Counter() for id in xrange(len(rocks)): rewardBasic[rocks[id]] = id + 1 # TODO use reward gen # reward cand indicates belief on where the puddle is rewardCandNum = puddleNum rewards = [] for candId in xrange(rewardCandNum): reward = rewardBasic.copy() puddle = puddles[candId] for loc in puddle: reward[loc] = -10 rewards.append(reward) initialPhi = [1.0 / len(rewards)] * len(rewards) terminalReward = util.Counter() for x in range(width / 3) + range(width * 2 / 3, width): terminalReward[(x, height - 1)] = -1000 allPuddles = sum(puddles, []) # concatenate its elements (which are lists) experiment(Domain, width, height, responseTime, horizon, rewardCandNum, allPuddles, rewards, initialPhi, terminalReward)
elif opt == '-n': rewardCandNum = int(arg) elif opt == '-k': config.NUMBER_OF_RESPONSES = int(arg) elif opt == '-r': random.seed(int(arg)) numpy.random.seed(int(arg)) terminalReward = util.Counter() # three-state domain """ cmp = ThreeStateToy(responseTime, horizon, terminalReward) ws = [(-1,), (1,)] """ # rock collection cmp = RockCollection(responseTime, width, height, horizon, terminalReward, rockNum) ws = [[random.random() for dim in range(config.DIMENSION)] for _ in range(rewardCandNum)] rewardCandNum = len(ws) initialPhi = [1.0 / rewardCandNum] * rewardCandNum config.opts = '_'.join( map(str, [rewardCandNum, config.NUMBER_OF_RESPONSES])) experiment(cmp, ws, initialPhi)