Esempio n. 1
0
 def __init__(self):
     Responder.__init__(self)
     self.batch_size = 50
     self.gamma = 0.99
     self.update_frequency = 5
     self.learningRate = 0.1
     self.createGraph()
Esempio n. 2
0
    def __init__(self):
        Responder.__init__(self)
        self.numStates = 71
        self.numActions = 71
        tf.reset_default_graph()

        # Set learning parameters
        learningRate = 0.1
        self.action = 0
        self.currentState = 0
        self.y = .99
        self.e = 0.1
        self.numResets = 0

        self.resetCalled = False
        self.learn = self.runNet()

        #These lines establish the feed-forward part of the network used to choose actions
        self.inputs1 = tf.placeholder(shape=[1, self.numStates],
                                      dtype=tf.float32)
        self.W = tf.Variable(
            tf.random_uniform([self.numStates, self.numActions], 0, 0.01))
        self.Qout = tf.matmul(self.inputs1, self.W)
        self.predict = tf.argmax(self.Qout, 1)

        #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.nextQ = tf.placeholder(shape=[1, self.numActions],
                                    dtype=tf.float32)
        self.loss = tf.reduce_sum(tf.square(self.nextQ - self.Qout))
        self.trainer = tf.train.GradientDescentOptimizer(
            learning_rate=learningRate)
        self.updateModel = self.trainer.minimize(self.loss)
        self.init = tf.global_variables_initializer(
        )  #tf.initialize_all_variables()
Esempio n. 3
0
 def __init__(self):
     Responder.__init__(self)
     self.batch_size = 1
     self.gamma = 0.99
     self.numRewardsForUpdate = 1
     self.learningRate = 0.03
     self.numHiddenNeurons = 1
     self.createGraph()
Esempio n. 4
0
 def __init__(self):
     Responder.__init__(self)
     #we are using a trained model, no need to train further or to save or display results
     self.recordRewards = False
     self.plotResults = False
     self.useTensorBoard = False
     self.saveModels = False
     self.loadModels = True
Esempio n. 5
0
 def __init__(self):
     Responder.__init__(self)
Esempio n. 6
0
 def __init__(self):
     Responder.__init__(self)
     self.correctCharacter = [-1 for x in range(256)]
Esempio n. 7
0
 def __init__(self):
     Responder.__init__(self)
     self.characterIndex = -1
     self.foundCharacter = False
     self.specialCharacterOutputted = False
Esempio n. 8
0
 def __init__(self):
     Responder.__init__(self)
     self.characterIndex = -1
     self.foundCharacter = False
Esempio n. 9
0
 def __init__(self):
     Responder.__init__(self)
     self.characterIndex = 0
Esempio n. 10
0
 def __init__(self):
     Responder.__init__(self)
     self.responses = [-1 for x in range(len(self.characters))]
     self.counter = 0
     self.steps = 0
 def __init__(self):
     Responder.__init__(self)
     self.loadModels = True
Esempio n. 12
0
 def __init__(self):
     Responder.__init__(self)
     self.batch_size = 50
     self.gamma=0.99
     update_frequency = 5
     self.createGraph()
Esempio n. 13
0
 def __init__(self):
   Responder.__init__(self)
   self.gamma = 0.99