def crossover_agents(p1, p2): p1_weights = p1.get_flattened_weights_of_model() p2_weights = p2.get_flattened_weights_of_model() c1_weights, c2_weights = u.numpy_array_crossover(p1_weights, p2_weights) c1 = agents.NeuralAgent() c1.set_weights_of_model(c1_weights) c2 = agents.NeuralAgent() c2.set_weights_of_model(c2_weights) return c1, c2
type=str, default=None, help="weights files, only valid for --agent=neural") parser.add_argument( '--lite-weights', type=str, default=None, help="tf lite weights files, must be set for --agent=neural_lite") parser.add_argument('--trials', type=int, default=10, help='num trials to run; new agent per trial') opts = parser.parse_args() evaluator = cartpole_fitness.CartPoleFitness(render=opts.env_render) print("trial\ttotal_reward") for trial_idx in range(opts.trials): if opts.agent == 'random': agent = agents.RandomAgent() elif opts.agent == 'neural': agent = agents.NeuralAgent() if opts.weights is not None: agent.set_weights_of_model(np.load(opts.weights)) elif opts.agent == 'neural_lite': agent = agents.NeuralLiteAgent(tflite_file=opts.lite_weights) else: raise Exception("unexpected agent type [%s]" % opts.agent) print("%d\t%d" % (trial_idx, evaluator.fitness(agent))) sys.stdout.flush()
def new_member_bytes(): random_agent = agents.NeuralAgent() flat_buffer_bytes = convert_to_tflite.convert_to_file_bytes(random_agent) return flat_buffer_bytes
plt.ylabel("Cumulative Reward") plt.show() def plotQ(Q): states = [[0, 0], [0, 1], [1, 0], [1, 1]] for state in states: for a in [0, 1]: print("Q[{},{}]={}".format(state, a, Q[env.asint(state), a])) # Number of iterations n_iter = 1000 # environment specs env = EvidenceEnv(n=2, p=0.75) agent = agents.RandomAgent(env) runAgent() # define agent agent = agents.TabularQAgent(env) plotQ(agent.Q) runAgent() plotQ(agent.Q) actualQ = agent.Q agent = agents.NeuralAgent(env, actualQ) plotQ(agent.Q) runAgent() plotQ(agent.Q)
def new_agent(): return agents.NeuralAgent()