Beispiel #1
0
def computeRewards(states, reward_halflife=2.0):
    # reward_halflife is measured in seconds
    fps = 60.0
    discount = 0.5**(1.0 / (fps * reward_halflife))

    kills = [isDying(state.players[0]) for state in states]
    deaths = [isDying(state.players[1]) for state in states]

    # print(states[random.randint(0, len(states))].players[0])

    kills = processDeaths(kills)
    deaths = processDeaths(deaths)
    # print("Deaths for current memory: ", sum(deaths))
    # print("Kills for current memory: ", sum(kills))

    damage_dealt = [
        max(states[i + 1].players[0].percent - states[i].players[0].percent, 0)
        for i in range(len(states) - 1)
    ]
    # damage_dealt = util.zipWith(lambda prev, next: max(next.players[0].percent - prev.players[0].percent, 0), states[:-1], states[1:])

    scores = util.zipWith(lambda x, y: x - y, kills[1:], deaths[1:])
    final_scores = util.zipWith(lambda x, y: x + y / 100, scores, damage_dealt)

    # print("Damage for current memory: ", sum(damage_dealt))
    # print("Scores for current memory: ", final_scores[:1000])

    # use last action taken?
    lastQ = max(scoreActions(states[-1]))

    discounted_rewards = util.scanr(lambda r1, r2: r1 + discount * r2, lastQ,
                                    final_scores)[:-1]

    # print("discounted_rewards for current memory: ", discounted_rewards[:])
    return discounted_rewards
Beispiel #2
0
def computeRewards(states, reward_halflife = 2.0):
  # reward_halflife is measured in seconds
  fps = 60.0
  discount = 0.5 ** ( 1.0 / (fps*reward_halflife) )

  kills = [isDying(state.players[0]) for state in states]
  deaths = [isDying(state.players[1]) for state in states]

  # print(states[random.randint(0, len(states))].players[0])

  kills = processDeaths(kills)
  deaths = processDeaths(deaths)
  # print("Deaths for current memory: ", sum(deaths))
  # print("Kills for current memory: ", sum(kills))

  damage_dealt = [max(states[i+1].players[0].percent - states[i].players[0].percent, 0) for i in range(len(states)-1)]
  # damage_dealt = util.zipWith(lambda prev, next: max(next.players[0].percent - prev.players[0].percent, 0), states[:-1], states[1:])

  scores = util.zipWith(lambda x, y: x - y, kills[1:], deaths[1:])
  final_scores = util.zipWith(lambda x, y: x + y / 100, scores, damage_dealt)

  # print("Damage for current memory: ", sum(damage_dealt))
  # print("Scores for current memory: ", final_scores[:1000])

  # use last action taken?
  lastQ = max(scoreActions(states[-1]))

  discounted_rewards = util.scanr(lambda r1, r2: r1 + discount * r2, lastQ, final_scores)[:-1]

  # print("discounted_rewards for current memory: ", discounted_rewards[:])
  return discounted_rewards
Beispiel #3
0
def processDeaths(deaths):
    return util.zipWith(lambda prev, next: (not prev) and next,
                        [False] + deaths[:-1], deaths)
Beispiel #4
0
def processDeaths(deaths):
  return np.array(util.zipWith(lambda prev, next: float((not prev) and next), deaths, deaths[1:]))
Beispiel #5
0
def processDamages(percents):
  return np.array(util.zipWith(lambda prev, next: max(next-prev, 0), percents, percents[1:]))
Beispiel #6
0
def processDeaths(deaths):
  return util.zipWith(lambda prev, next: (not prev) and next, [False] + deaths[:-1] , deaths)