예제 #1
0
def run_controller(trials=10, epochs =250, timeslice=0.002, tmax=0.2, threshold =((-pi/2), (pi/2))):
    #step 0: set up Neural network
    #step 1: Start pendulum with random force
    #step 2: at n milliseconds take state
    #step 3: pass state to genetic algorithm
    #step 4: GA runs n epochs and evaluates what gives the best output
    #step 5: GA passes weights to NN to evaluate a force to apply to pendulum
    #Step 6: NN applies force to pendulum
    #Step 7: Goto step 2

    #TODO run multiple simulations, collect avg data and error and produce graphs
    MAX_REWARD = 1000
    pendulum = InvertedPendulum()
    NN = NEvoNetwork(inputs=6, outputs=1, hiddenlayers=1,  hiddenneurons=12, inputweights=6, activation=TanhActivation)
    ga = Population(NN=NN, size=30)

    main_array = []
    reward_array =[]


    for trial in range(0, trials):
        ga.create(size=30)
        force = np.random.randint(-5, 5)
        while force == 0:
            force = np.random.randint(-5, 5)

        initial_state = State()
        initial_state.theta = pi
        initial_state, time = pendulum.get_State(u=force, initialstate=initial_state, tmax=tmax, timeslice=timeslice)
        print('Force={1:3f} -Theta={0:4f}'.format(initial_state[-1].theta, force))

        reward_array = []
        population_array = []
        for epoch in range(0, epochs):
            for induhvidual in ga.individuals:
                NN.set_weights(induhvidual.alleles)
                state = []
                state.append(initial_state[-1])

                airborne = True
                while airborne:
                    force = NN.get_outputs([state[-1].x, state[-1].xdot, state[-1].x2dot, state[-1].theta, state[-1].thetadot, state[-1].theta2dot])[0] * 5
                    state, time = pendulum.get_State(u=force, initialstate=state[-1], tmax=tmax, timeslice=timeslice)

                    if state[-1].theta < threshold[0] or state[-1].theta > threshold[1]:
                        airborne = False
                    else:
                        induhvidual.set_fitness(1)

                    if induhvidual.get_fitness() >= MAX_REWARD: break

            reward_array.append(ga.getFittestIndividual().get_fitness())
            population_array.append(ga.getPopulationFitness())
            print('Trial: {0} - Epoch {1} --> Best fitness score = {2}, - Pop Fitness = {3}'
                  .format(trial, epoch, ga.getFittestIndividual().get_fitness(), ga.getPopulationFitness()))
            ga.evolve(epochs=1)

        main_array.append((reward_array, population_array))

    return main_array