from time import sleep sys.path.append('../') import ball_on_plate_env as env # load the winner with open('winner-ff', 'rb') as f: c = pickle.load(f) local_dir = os.path.dirname(__file__) config_path = os.path.join(local_dir, 'config-feedforward') config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) ballOnPlate = env.BallOnPlate(showGUI=True, randomInitial=True) net = neat.nn.FeedForwardNetwork.create(c, config) ref_point = np.array([0., 0.]) t = 0 envInput = [0, 0] dropDown = False ballOnPlate.reset() posOnPlate = ballOnPlate.intial_pos prevPosOnPlate = posOnPlate prev_err = [0, 0]
def work(self): global GLOBAL_RUNNING_R, GLOBAL_EP total_step = 1 buffer_s, buffer_a, buffer_r = [], [], [] if self.name == 'W_0': self.envir = ball.BallOnPlate(showGUI=True, randomInitial=True) else: self.envir = ball.BallOnPlate(showGUI=False, randomInitial=True) ref_point = np.array([0., 0.]) while not COORD.should_stop(): # and GLOBAL_EP < MAX_GLOBAL_EP: posOnPlate = self.envir.reset() err = ref_point - posOnPlate state = np.array([posOnPlate[0], posOnPlate[1], err[0] / 2., err[1] / 2., 0, 0, 0, 0]) ep_r = 0. # for ep_t in range(MAX_EP_STEP): while True: a = self.AC.choose_action(state) posOnPlate, done = self.envir.step(a) err = ref_point - posOnPlate r = 4 - (err[0]**2 + err[1]**2 + (posOnPlate[0]-state[0])**2/self.envir.dt + (posOnPlate[1]-state[1])**2/self.envir.dt) / 100. # r = float(r) / 4. # print(r) new_state = np.array([posOnPlate[0], posOnPlate[1], err[0] / 2., err[1] / 2., posOnPlate[0]-state[0], posOnPlate[1]-state[1], a[0], a[1]]) if done: pass # r -= self.envir.time / self.envir.dt * 4 else: done = True if self.envir.time > 20 else False ep_r += r buffer_s.append(state) buffer_a.append(a) buffer_r.append(r) # normalize # buffer_r.append((r+8)/8) # normalize # print(total_step, self.envir.time) if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net if done: v_s_ = 0 # terminal else: v_s_ = SESS.run(self.AC.v, {self.AC.state: new_state[np.newaxis, :]})[0, 0] buffer_v_target = [] for r in buffer_r[::-1]: # reverse buffer r v_s_ = r + GAMMA * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() buffer_s, buffer_a, buffer_v_target = np.vstack(buffer_s), np.vstack(buffer_a), np.vstack(buffer_v_target) feed_dict = { self.AC.state: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target, } self.AC.update_global(feed_dict) buffer_s, buffer_a, buffer_r = [], [], [] self.AC.pull_global() state = new_state total_step += 1 if done: # if len(GLOBAL_RUNNING_R) == 0: # record running episode reward GLOBAL_RUNNING_R.append(ep_r) # else: # GLOBAL_RUNNING_R.append(0.9 * GLOBAL_RUNNING_R[-1] + 0.1 * ep_r) print( self.name, "Ep:", GLOBAL_EP, "| Ep_r: %i" % GLOBAL_RUNNING_R[-1], ) GLOBAL_EP += 1 if GLOBAL_EP % 500 == 0: save_path = saver.save(SESS, "./a3c.chkp") print("Model saved in path: %s" % save_path) break
def eval_genome(genome): ballOnPlate = env.BallOnPlate(showGUI=False, randomInitial=False) cost = 0 CONST_VALUE = 0.7 intial_positions = [[CONST_VALUE, CONST_VALUE], [-CONST_VALUE, -CONST_VALUE], [-CONST_VALUE, CONST_VALUE], [CONST_VALUE, -CONST_VALUE], [0., 0.]] reference_positions = [[-CONST_VALUE, -CONST_VALUE], [CONST_VALUE, CONST_VALUE], [CONST_VALUE, -CONST_VALUE], [-CONST_VALUE, CONST_VALUE], [0., 0.]] for i in range(len(intial_positions)): envInput = [0, 0] result = 0 dropDown = False ballOnPlate.intial_pos = np.array(intial_positions[i]) ref_point = np.array(reference_positions[i]) posOnPlate = ballOnPlate.reset() prevPosOnPlate = posOnPlate prev_err = [0, 0] integr_err = 0 while ballOnPlate.time < simulation_seconds: # Get error err = ref_point - posOnPlate result -= (err[0] * err[0] + err[1] * err[1]) * (ballOnPlate.time + 1) / 100. ### PID controller prop = genome[0] * 1. diff = genome[1] * 1. integr = genome[2] * 1. integr_err += err d_err = err - prev_err envInput[ 0] = prop * err[1] + diff * d_err[1] + integr_err[1] * integr envInput[0] = -envInput[0] envInput[ 1] = prop * err[0] + diff * d_err[0] + integr_err[0] * integr prev_err = err ### PID controller envInput = np.clip(envInput, -1, 1) prevPosOnPlate = posOnPlate posOnPlate, isEnd = ballOnPlate.step(envInput) if isEnd: # Bad penalty as fall dropDown = True break # sleep(ballOnPlate.dt) if dropDown: current_cost = (ballOnPlate.time + result) / simulation_seconds * 100. - 1e4 else: current_cost = (ballOnPlate.time + result) / simulation_seconds * 100. cost += current_cost / float(len(intial_positions)) # cost = min(current_cost, cost) ballOnPlate.close() return cost,
def eval_genome(genome, config): ballOnPlate = env.BallOnPlate(showGUI=False, randomInitial=False) net = neat.ctrnn.CTRNN.create(genome, config, ballOnPlate.dt) cost = 0 CONST_VALUE = 0.7 intial_positions = [[CONST_VALUE, CONST_VALUE], [-CONST_VALUE, -CONST_VALUE], [-CONST_VALUE, CONST_VALUE], [CONST_VALUE, -CONST_VALUE], [0., 0.]] reference_positions = [[-CONST_VALUE, -CONST_VALUE], [CONST_VALUE, CONST_VALUE], [CONST_VALUE, -CONST_VALUE], [-CONST_VALUE, CONST_VALUE], [0., 0.]] for i in range(len(intial_positions)): net.reset() envInput = [0, 0] result = 0 dropDown = False ballOnPlate.intial_pos = np.array(intial_positions[i]) ref_point = np.array(reference_positions[i]) posOnPlate = ballOnPlate.reset() prevPosOnPlate = posOnPlate prev_err = [0, 0] integr_err = 0 while ballOnPlate.time < simulation_seconds: # half of plate circle # if i == 4: # ref_point = np.array([.5*math.cos(ballOnPlate.time/2), .5*math.sin(ballOnPlate.time/2)]) # elif i == 5: # ref_point = np.array([.5*math.cos(ballOnPlate.time), .5*math.sin(ballOnPlate.time)]) # Get error err = ref_point - posOnPlate result -= (err[0] * err[0] + err[1] * err[1]) * (ballOnPlate.time + 1) / 100. speed = (posOnPlate - prevPosOnPlate)/ballOnPlate.dt # Process control system netInput = np.array([err[0], err[1], posOnPlate[0], posOnPlate[1], envInput[0], envInput[1], speed[0], speed[1]]) # print(netInput) netOutput = net.advance(netInput, ballOnPlate.dt, ballOnPlate.dt) envInput = netOutput envInput = np.clip(envInput, -1, 1) prevPosOnPlate = posOnPlate posOnPlate, isEnd = ballOnPlate.step(envInput) if isEnd: # Bad penalty as fall dropDown = True break # sleep(ballOnPlate.dt) if dropDown: current_cost = (ballOnPlate.time + result) / simulation_seconds * 100. - 1e4 else: current_cost = (ballOnPlate.time + result) / simulation_seconds * 100. cost += current_cost / float(len(intial_positions)) # cost = min(current_cost, cost) ballOnPlate.close() return cost
import pybullet as p from time import sleep import math import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import ball_on_plate_env as env ballOnPlate = env.BallOnPlate(showGUI=True) ref_alpha_vals = [] ref_beta_vals = [] alpha_vals = [] beta_vals = [] t_vals = [] x_vals = [] y_vals = [] r_x_vals = [] r_y_vals = [] prev_err = [0, 0] integr_err = 0 ref_point = np.array([.05, .05]) posOnPlate = np.array([0., 0.]) input = [0, 0]