if __name__ == '__main__':
  # use rockNum == 0 to represent a test case
  rewardCandNum = 3

  """
  def r1(s, a):
    if s == (0, 0) and a == (1, 0): return 0.9
    elif s == (0, 1) and a == (0, 1): return 0.6
    else: return 0
  def r2(s, a):
    if s == (0, 1) and a == (1, 0): return 1
    elif s == (0, 1) and a == (0, 1): return 0.6
    else: return 0
  def r3(s, a):
    if s == (0, 0) and a == (1, 0): return 0.45
    elif s == (0, 1) and a == (1, 0): return 0.5
    elif s == (0, 1) and a == (0, 1): return 0.6
    else: return 0
  rewardSet = [r1, r2, r3]
  """

  r1 = lambda s, a: s == (2, 10)
  r2 = lambda s, a: s == (1, 10)
  r3 = lambda s, a: s == (3, 10)
  rewardSet = [r1, r2, r3]
  
  initialPhi = [1.0 / rewardCandNum] * rewardCandNum

  experiment(cmp, rewardSet, initialPhi)
Exemple #2
0
        puddle = [(x, y) for x in range(loc[0], loc[0] + puddleSize)
                  for y in range(loc[1], loc[1] + puddleSize)]
        puddles.append(puddle)
        for rockID in xrange(rockPerPuddle):
            rocks.append(random.choice(puddle))

    rewardBasic = util.Counter()
    for id in xrange(len(rocks)):
        rewardBasic[rocks[id]] = id + 1

    # TODO use reward gen
    # reward cand indicates belief on where the puddle is
    rewardCandNum = puddleNum
    rewards = []
    for candId in xrange(rewardCandNum):
        reward = rewardBasic.copy()
        puddle = puddles[candId]
        for loc in puddle:
            reward[loc] = -10
        rewards.append(reward)

    initialPhi = [1.0 / len(rewards)] * len(rewards)

    terminalReward = util.Counter()
    for x in range(width / 3) + range(width * 2 / 3, width):
        terminalReward[(x, height - 1)] = -1000

    allPuddles = sum(puddles, [])  # concatenate its elements (which are lists)

    experiment(Domain, width, height, responseTime, horizon, rewardCandNum,
               allPuddles, rewards, initialPhi, terminalReward)
Exemple #3
0
                return numerical
            else:
                return 0

        return rewardFunc

    rewardSet = []
    """
  # a case where trajectory query (actualy state-action preference query) has worse performance than policy queries
  rewardSet.append(rewardGen([((0, 0), 0), ((0, 1), 0)], 1))
  rewardSet.append(rewardGen([((0, 0), 1), ((0, 1), 0)], 1))
  rewardSet.append(rewardGen([((0, 0), 0), ((0, 1), 1)], 1))
  rewardSet.append(rewardGen([((0, 0), 1), ((0, 1), 1)], 1))
  """

    terminalReward = util.Counter()

    cmp = TabularNavigationKWay(responseTime,
                                width,
                                height,
                                horizon=horizon,
                                terminalReward=terminalReward)

    rocks = cmp.getStateActionPairs()
    for candId in xrange(rewardCandNum):
        sampledRocks = random.sample(rocks, rockNum)
        rewardSet.append(rewardGen(sampledRocks, random.random()))

    initialPhi = [1.0 / rewardCandNum] * rewardCandNum
    experiment(cmp, rewardSet, initialPhi)
Exemple #4
0
  for opt, arg in opts:
    if opt == '-t':
      config.TRAJECTORY_LENGTH = int(arg)
    elif opt == '-n':
      rewardCandNum = int(arg)
    elif opt == '-k':
      config.NUMBER_OF_RESPONSES = int(arg)
    elif opt == '-r':
      random.seed(int(arg))
      numpy.random.seed(int(arg))
  
  terminalReward = util.Counter()
  
  # three-state domain
  """
  cmp = ThreeStateToy(responseTime, horizon, terminalReward)
  ws = [(-1,), (1,)]
  """

  # rock collection
  cmp = RockCollection(responseTime, width, height, horizon, terminalReward, rockNum)
  ws = [[random.random() for dim in range(config.DIMENSION)] for _ in range(rewardCandNum)]

  rewardCandNum = len(ws)

  initialPhi = [1.0 / rewardCandNum] * rewardCandNum

  config.opts = '_'.join(map(str, [rewardCandNum, config.NUMBER_OF_RESPONSES]))

  experiment(cmp, ws, initialPhi)
Exemple #5
0
  for _ in xrange(puddleNum):
    loc = (random.randint(0, width - puddleSize - 1), random.randint(0, height - puddleSize - 1))
    #loc = puddleLocs[_]
    puddle = [(x, y) for x in range(loc[0], loc[0] + puddleSize) for y in range(loc[1], loc[1] + puddleSize)]
    puddles.append(puddle)
    for rockID in xrange(rockPerPuddle):
      rocks.append(random.choice(puddle))

  rewardBasic = util.Counter()
  for id in xrange(len(rocks)):
    rewardBasic[rocks[id]] = id + 1

  # TODO use reward gen
  # reward cand indicates belief on where the puddle is
  rewardCandNum = puddleNum
  rewards = []
  for candId in xrange(rewardCandNum):
    reward = rewardBasic.copy()
    puddle = puddles[candId]
    for loc in puddle: reward[loc] = -10
    rewards.append(reward)

  initialPhi = [1.0 / len(rewards)] * len(rewards)
  
  terminalReward = util.Counter()
  for x in range(width / 3) + range(width * 2 / 3, width): terminalReward[(x, height - 1)] = -1000

  allPuddles = sum(puddles, []) # concatenate its elements (which are lists)
  
  experiment(Domain, width, height, responseTime, horizon, rewardCandNum, allPuddles, rewards, initialPhi, terminalReward)
Exemple #6
0
        elif opt == '-n':
            rewardCandNum = int(arg)
        elif opt == '-k':
            config.NUMBER_OF_RESPONSES = int(arg)
        elif opt == '-r':
            random.seed(int(arg))
            numpy.random.seed(int(arg))

    terminalReward = util.Counter()

    # three-state domain
    """
  cmp = ThreeStateToy(responseTime, horizon, terminalReward)
  ws = [(-1,), (1,)]
  """

    # rock collection
    cmp = RockCollection(responseTime, width, height, horizon, terminalReward,
                         rockNum)
    ws = [[random.random() for dim in range(config.DIMENSION)]
          for _ in range(rewardCandNum)]

    rewardCandNum = len(ws)

    initialPhi = [1.0 / rewardCandNum] * rewardCandNum

    config.opts = '_'.join(
        map(str, [rewardCandNum, config.NUMBER_OF_RESPONSES]))

    experiment(cmp, ws, initialPhi)