Example #1
0
class Agent(object):

  def __init__(self, position):
    self.encoder = OneDDepthEncoder(positions=positions,
                                    radius=5,
                                    wrapAround=True,
                                    nPerPosition=28,
                                    wPerPosition=3,
                                    minVal=0,
                                    maxVal=1)
    self.plotter = Plotter(self.encoder)
    self.learner = QLearner(ACTIIONS, n=1008)

    self.lastState = None
    self.lastAction = None


  def sync(self, outputData):
    if not ("ForwardsSweepSensor" in outputData and
            "steer" in outputData):
      print "Warning: Missing data:", outputData
      return

    if outputData.get("reset"):
      print "Reset."

    sensor = outputData["ForwardsSweepSensor"]
    steer = outputData["steer"]
    reward = outputData.get("reward") or 0

    encoding = self.encoder.encode(numpy.array(sensor))

    if self.lastState is not None:
      self.learner.update(self.lastState, str(self.lastAction),
                          encoding, str(steer), reward)

    value = self.learner.value(encoding)

    qValues = {}
    for action in ACTIIONS:
      qValues[action] = self.learner.qValue(encoding, action)

    inputData = {}
    inputData["qValues"] = qValues
    inputData["bestAction"] = self.learner.bestAction(encoding)

    self.plotter.update(sensor, encoding, steer, reward, value, qValues)

    if outputData.get("reset"):
      self.plotter.render()

    self.lastState = encoding
    self.lastAction = steer

    return inputData
Example #2
0
class Agent(object):
    def __init__(self, position):
        self.encoder = OneDDepthEncoder(positions=positions,
                                        radius=5,
                                        wrapAround=True,
                                        nPerPosition=28,
                                        wPerPosition=3,
                                        minVal=0,
                                        maxVal=1)
        self.plotter = Plotter(self.encoder)
        self.learner = QLearner(ACTIIONS, n=1008)

        self.lastState = None
        self.lastAction = None

    def sync(self, outputData):
        if not ("ForwardsSweepSensor" in outputData and "steer" in outputData):
            print "Warning: Missing data:", outputData
            return

        if outputData.get("reset"):
            print "Reset."

        sensor = outputData["ForwardsSweepSensor"]
        steer = outputData["steer"]
        reward = outputData.get("reward") or 0

        encoding = self.encoder.encode(numpy.array(sensor))

        if self.lastState is not None:
            self.learner.update(self.lastState, str(self.lastAction), encoding,
                                str(steer), reward)

        value = self.learner.value(encoding)

        qValues = {}
        for action in ACTIIONS:
            qValues[action] = self.learner.qValue(encoding, action)

        inputData = {}
        inputData["qValues"] = qValues
        inputData["bestAction"] = self.learner.bestAction(encoding)

        self.plotter.update(sensor, encoding, steer, reward, value, qValues)

        if outputData.get("reset"):
            self.plotter.render()

        self.lastState = encoding
        self.lastAction = steer

        return inputData
Example #3
0
    def __init__(self, position):
        self.encoder = OneDDepthEncoder(positions=positions,
                                        radius=5,
                                        wrapAround=True,
                                        nPerPosition=28,
                                        wPerPosition=3,
                                        minVal=0,
                                        maxVal=1)
        self.plotter = Plotter(self.encoder)
        self.learner = QLearner(ACTIIONS, n=1008)

        self.lastState = None
        self.lastAction = None
Example #4
0
def run(positions, plotEvery=1):
  encoder = OneDDepthEncoder(positions=positions,
                             radius=5,
                             wrapAround=True,
                             nPerPosition=28,
                             wPerPosition=3,
                             minVal=0,
                             maxVal=1)
  fetcher = Fetcher()
  plotter = Plotter(encoder)
  learner = QLearner(ACTIIONS, n=1008)

  lastState = None
  lastAction = None

  while True:
    outputData = fetcher.sync()

    if outputData is None:
      continue

    if fetcher.skippedTimesteps > 0:
      print ("Warning: skipped {0} timesteps, "
             "now at {1}").format(fetcher.skippedTimesteps, fetcher.timestep)

    if not ("reset" in outputData and
            "ForwardsSweepSensor" in outputData and
            "steer" in outputData):
      print ("Warning: Missing data on timestep {0}: {1}".format(
             fetcher.timestep, outputData))
      continue

    if outputData["reset"]:
      print "Reset."

    sensor = outputData["ForwardsSweepSensor"]
    steer = outputData["steer"]
    reward = outputData.get("reward") or 0

    encoding = encoder.encode(numpy.array(sensor))

    if lastState is not None:
      learner.update(lastState, str(lastAction), encoding, str(steer), reward)

    value = learner.value(encoding)

    qValues = {}
    for action in ACTIIONS:
      qValues[action] = learner.qValue(encoding, action)

    fetcher.inputData["qValues"] = qValues
    fetcher.inputData["bestAction"] = learner.bestAction(encoding)

    plotter.update(sensor, encoding, steer, reward, value, qValues)

    # if fetcher.timestep % plotEvery == 0:
    #   plotter.render()

    lastState = encoding
    lastAction = steer
Example #5
0
  def __init__(self, position):
    self.encoder = OneDDepthEncoder(positions=positions,
                                    radius=5,
                                    wrapAround=True,
                                    nPerPosition=28,
                                    wPerPosition=3,
                                    minVal=0,
                                    maxVal=1)
    self.plotter = Plotter(self.encoder)
    self.learner = QLearner(ACTIIONS, n=1008)

    self.lastState = None
    self.lastAction = None