Example #1
0
        op_holder.append(tfVars[idx+total_vars//2].assign((var.value()*tau) + ((1-tau)*tfVars[idx+total_vars//2].value())))
    return op_holder

def updateTarget(op_holder,sess):
    for op in op_holder:
        sess.run(op)

def saveScore(score):
    my_file = open(reward_savefile, 'a')  # Name and path of the reward text file
    my_file.write("%s\n" % score)
    my_file.close()

###########################################

game = GameSimulator()
game.initialize()

ACTION_COUNT = game.get_action_size()

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.33)

SESSION = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

if LOAD_MODEL:
    EPSILON_MAX = 0.25 # restart after 20+ epoch

agent = Agent(memory_cap = MEMORY_CAP, batch_size = BATCH_SIZE, resolution = RESOLUTION, action_count = ACTION_COUNT,
            session = SESSION, lr = LEARNING_RATE, gamma = GAMMA, epsilon_min = EPSILON_MIN, trace_length=TRACE_LENGTH,
            epsilon_decay_steps = EPSILON_DECAY_STEPS, epsilon_max=EPSILON_MAX, hidden_size=HIDDEN_SIZE)

saver = tf.train.Saver()