def run_controller(trials=10, epochs =250, timeslice=0.002, tmax=0.2, threshold =((-pi/2), (pi/2))): #step 0: set up Neural network #step 1: Start pendulum with random force #step 2: at n milliseconds take state #step 3: pass state to genetic algorithm #step 4: GA runs n epochs and evaluates what gives the best output #step 5: GA passes weights to NN to evaluate a force to apply to pendulum #Step 6: NN applies force to pendulum #Step 7: Goto step 2 #TODO run multiple simulations, collect avg data and error and produce graphs MAX_REWARD = 1000 pendulum = InvertedPendulum() NN = NEvoNetwork(inputs=6, outputs=1, hiddenlayers=1, hiddenneurons=12, inputweights=6, activation=TanhActivation) ga = Population(NN=NN, size=30) main_array = [] reward_array =[] for trial in range(0, trials): ga.create(size=30) force = np.random.randint(-5, 5) while force == 0: force = np.random.randint(-5, 5) initial_state = State() initial_state.theta = pi initial_state, time = pendulum.get_State(u=force, initialstate=initial_state, tmax=tmax, timeslice=timeslice) print('Force={1:3f} -Theta={0:4f}'.format(initial_state[-1].theta, force)) reward_array = [] population_array = [] for epoch in range(0, epochs): for induhvidual in ga.individuals: NN.set_weights(induhvidual.alleles) state = [] state.append(initial_state[-1]) airborne = True while airborne: force = NN.get_outputs([state[-1].x, state[-1].xdot, state[-1].x2dot, state[-1].theta, state[-1].thetadot, state[-1].theta2dot])[0] * 5 state, time = pendulum.get_State(u=force, initialstate=state[-1], tmax=tmax, timeslice=timeslice) if state[-1].theta < threshold[0] or state[-1].theta > threshold[1]: airborne = False else: induhvidual.set_fitness(1) if induhvidual.get_fitness() >= MAX_REWARD: break reward_array.append(ga.getFittestIndividual().get_fitness()) population_array.append(ga.getPopulationFitness()) print('Trial: {0} - Epoch {1} --> Best fitness score = {2}, - Pop Fitness = {3}' .format(trial, epoch, ga.getFittestIndividual().get_fitness(), ga.getPopulationFitness())) ga.evolve(epochs=1) main_array.append((reward_array, population_array)) return main_array