def run(self): b = degree_gyro_q_l.acc() timecheck_list = [] acc_gyro_pitch = gyro_pitch_degree = b.pitch() start_time = time.time() timecheck_list.append(start_time) while (True): self.threadlock.acquire() #print "!!threadlock acquire" acc_pitch_degree = b.pitch() timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ( (0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) self.acc_gyro_pitch = acc_gyro_pitch self.p_ang_vel = p_ang_vel self.threadlock.release()
def every5sec() : b = degree_gyro_q_l.acc() global init_pwm_1 init_pwm_1 += 0.01 #print "pwm_v1 = %s pwm_v2 = %s degree = %s \n" % (pwm_1, pwm_2, b.pitch()) print "\n\n\n\n\n\n\n\n\n---------------------motor up---------------------\n" threading.Timer(5, every5sec).start()
def run(self): global acc1 global acc2 b = degree_gyro_q_l.acc() count = 0 timecheck_list = [] acc_gyro_pitch = gyro_pitch_degree = b.pitch() start_time = time.time() timecheck_list.append(start_time) while (True): #print "!!threadlock acquire" acc_pitch_degree = b.pitch() timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc1 = gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) end = time.time()
def main() : a = Servo.servo() b = degree_gyro_q_l.acc() global count global init_pwm_1 global init_pwm_2 global np_ML_data global start_time global memory_degree global memory_ang_vel global memory_acc_degree global memory_semaphore max_episodes = 2000 ## store the previous observations in replay memory replay_buffer = deque() que = [] acc_que = [] timecheck_list = [] pwm_1 = init_pwm_1 pwm_2 = init_pwm_2 ## matplotlib data initialization ## #np_ML_data = np.array([[0, acc_gyro_pitch, b.pitch(), gyro_pitch_degree, init_pwm_1, init_pwm_2]]) with tf.Session() as sess: mainDQN = dqn.DQN(sess, input_size, output_size, name="main") targetDQN = dqn.DQN(sess, input_size, output_size, name="target") tf.global_variables_initializer().run() ## initial copy q_net -> target_net copy_ops = get_copy_var_ops(dest_scope_name="target", src_scope_name="main") sess.run(copy_ops) for episode in range(max_episodes): print "new episodes initializaion" e = 1. / ((episode / 10) + 1) done = False step_count = 0 pwm_left = init_pwm_1 pwm_right = init_pwm_2 """ degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) """ """ timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc_pitch_degree = b.pitch() gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ((0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) """ """ state = np.array([acc_gyro_pitch, p_ang_vel, pwm_left, pwm_right]) """ #state = np.array([acc_gyro_pitch, p_ang_vel]) print "\n\n" while not done: memory_semaphore.acquire(10) degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) acc_degree = memory_acc_degree.read() acc_pitch = float(acc_degree.rstrip('\x00')) memory_semaphore.release() state = np.array([acc_gyro_pitch, p_ang_vel]) print "\t\t\t<state> degree: %s, \tangular velocity: %s" %(state[0], state[1]) if np.random.rand(1) < e: action = np.random.randint(9) else: action = np.argmax(mainDQN.predict(state)) print "Q: %s" % (mainDQN.predict(state)) pwm_left, pwm_right = step_action(action, pwm_left, pwm_right) print "\t\t\t\t\t\t\t\t\t\t<action-motor> left: %s, right: %s <= %s" % (pwm_left, pwm_right, action_print(action)) a.servo_1(pwm_left) a.servo_2(pwm_right) time.sleep(0.01) ## Get new state and reward from environment """ degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) acc_degree = memory_acc_degree.read() acc_pitch = float(acc_degree.rstrip('\x00')) """ memory_semaphore.acquire(10) degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) acc_degree = memory_acc_degree.read() acc_pitch = float(acc_degree.rstrip('\x00')) memory_semaphore.release() """ timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc_pitch_degree = b.pitch() gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ((0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) """ next_state = np.array([acc_gyro_pitch, p_ang_vel]) """ next_state = np.array([acc_gyro_pitch, p_ang_vel, pwm_left, pwm_right]) """ reward, done = reward_done_check(state, next_state) ## Save the experience to our buffer replay_buffer.append((state, action, reward, next_state, done)) if len(replay_buffer) > REPLAY_MEMORY: replay_buffer.popleft() if done: """ if step_count < 10: print "\t\t\t<warm-up>" done = False pass """ print "\t\t\t<finish state> degree: %s, \tangular velocity: %s" %(next_state[0], next_state[1]) time.sleep(3) """ degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) """ """ timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc_pitch_degree = b.pitch() gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ((0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) """ #next_state = np.array([acc_gyro_pitch, p_ang_vel]) #state = next_state step_count += 1 if step_count > 10000: break print "Episode: {} steps: {}".format(episode, step_count) if step_count > 10000: pass if len(replay_buffer) > 10 and episode % 10 == 1: # train every 10 episode # Get a random batch of experiences. for _ in range(50): minibatch = random.sample(replay_buffer, 10) loss, _ = replay_train(mainDQN, targetDQN, minibatch) print "Loss: %s" % (loss) # copy q_net -> target_net sess.run(copy_ops)
def main(): a = Servo.servo() b = degree_gyro_q_l.acc() global count global init_pwm_1 global init_pwm_2 global start_time global memory_degree global memory_ang_vel global memory_acc_degree global memory_semaphore global sess global model_load global done_episode global np_PG_data max_episodes = 2000 pwm_1 = init_pwm_1 pwm_2 = init_pwm_2 #init = tf.global_variables_initializer() sess = tf.Session() if True: #pdb.set_trace() agent = REINFORCE.REINFORCEAgnet(sess, input_size, output_size, name="main") if not model_load: tf.global_variables_initializer().run(session=sess) else: saver = tf.train.Saver() saver.restore(sess, "./TF_Data/"+sys.argv[1]) print "'%s' model is loaded" % (sys.argv[1]) for episode in range(max_episodes): print "new episodes initializaion" done = False done_episode = False score = 0 pwm_left = init_pwm_1 pwm_right = init_pwm_2 timer = threading.Timer(10, done_timer).start() print "\n\n" while not done: memory_semaphore.acquire(10) degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) acc_degree = memory_acc_degree.read() acc_pitch = float(acc_degree.rstrip('\x00')) memory_semaphore.release() state = np.array([acc_gyro_pitch, p_ang_vel]) print "\t\t\t<state> degree: %s, \tangular velocity: %s" %(state[0], state[1]) #state = np.reshape(state, [1, 4]) action = agent.predict(state) pwm_left, pwm_right = step_action(action, pwm_left, pwm_right) pwm_left, pwm_right = safe_pwm(pwm_left, pwm_right) print "\t\t\t\t\t\t\t\t<action-motor> left: %s, right: %s <= %s" % (pwm_left, pwm_right, action_print(action)) a.servo_1(pwm_left) a.servo_2(pwm_right) time.sleep(0.05) ## Get new state and reward from environment memory_semaphore.acquire(10) degree = memory_degree.read() acc_gyro_pitch = float(degree.rstrip('\x00')) ang_vel = memory_ang_vel.read() p_ang_vel = float(ang_vel.rstrip('\x00')) acc_degree = memory_acc_degree.read() acc_pitch = float(acc_degree.rstrip('\x00')) memory_semaphore.release() next_state = np.array([acc_gyro_pitch, p_ang_vel]) print "\t\t\t<next-state> degree: %s, \tangular velocity: %s" %(next_state[0], next_state[1]) reward = reward_check(next_state) #reward = reward_check(state, next_state) if done_episode == True: done = done_episode agent.append_sample(state, action, reward) score += reward #state = copy.deepcopy(next_state) if done: loss = agent.update(keep_prob=0.7) if episode == 0: np_PG_data = np.array([[episode, loss, score]]) else: np_PG_data = np.append(np_PG_data, [[episode, loss, score]], axis=0) score = round(score, 2) print "episode: %s loss: %s score: %s" %(episode, loss ,score) time.sleep(3)
def main(): global np_degree_data share_acc_gyro_pitch = sysv_ipc.SharedMemory(600, flags=01000, size=20, mode=0600) share_p_ang_vel = sysv_ipc.SharedMemory(1024, flags=01000, size=20, mode=0600) share_acc_pitch_degree = sysv_ipc.SharedMemory(256, flags=01000, size=20, mode=0600) smp1 = sysv_ipc.Semaphore(128, flags=01000, mode=0600, initial_value=1) #share_acc_gyro_pitch = sysv_ipc.SharedMemory(1234, flags=0, size=20, mode=0600) #share_p_ang_vel = sysv_ipc.SharedMemory(12345, flags=0) b = degree_gyro_q_l.acc() timecheck_list = [] acc_gyro_pitch = gyro_pitch_degree = b.pitch() np_degree_data = np.array( [[0, acc_gyro_pitch, b.pitch(), gyro_pitch_degree]]) start_time = time.time() timecheck_list.append(start_time) while (True): acc_pitch_degree = b.pitch() timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) degree_sign = np.sign(get_gyro_degree) if ((degree_sign * np.sign(acc_pitch_degree)) == 1): acc_gyro_pitch = 0.97 * get_gyro_degree + 0.03 * acc_pitch_degree else: if (get_gyro_degree < 90 and get_gyro_degree > -90): acc_gyro_pitch = 0.97 * get_gyro_degree + 0.03 * acc_pitch_degree else: acc_gyro_pitch = degree_sign * ( (0.97 * abs(get_gyro_degree)) + (0.03 * (360 - abs(acc_pitch_degree)))) acc_gyro_pitch = safeBoundary(acc_gyro_pitch) data_time = time.time() - start_time np_degree_data = np.append( np_degree_data, [[data_time, acc_gyro_pitch, acc_pitch_degree, gyro_pitch_degree]], axis=0) #acc_gyro_pitch = np.sign(get_gyro_degree) * ((0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) #vari = share_acc_gyro_pitch.read() #print(vari) smp1.acquire(10) #print acc_gyro_pitch share_acc_pitch_degree.write( str("%0.1f" % acc_pitch_degree).ljust(19, " ")) share_acc_gyro_pitch.write( str("%0.1f" % acc_gyro_pitch).ljust(19, " ")) share_p_ang_vel.write(str("%0.1f" % p_ang_vel).ljust(19, " ")) #print "Degree Process Write!!!" smp1.release()
def main(): a = Servo.servo() b = degree_gyro_q_l.acc() global count global init_pwm_1 global init_pwm_2 global np_gyro_degree global np_acc_degree global np_acc_gyro global np_left_motor global np_right_motor global np_ML_data global start_time max_episodes = 2000 ## store the previous observations in replay memory left_replay_buffer = deque() right_replay_buffer = deque() que = [] acc_que = [] timecheck_list = [] pitch_aver = acc_gyro_pitch = gyro_pitch_degree = b.pitch() pwm_1 = init_pwm_1 pwm_2 = init_pwm_2 ## matplotlib data initialization ## np_ML_data = np.array([[ 0, acc_gyro_pitch, b.pitch(), gyro_pitch_degree, init_pwm_1, init_pwm_2 ]]) #every5sec() every1sec() with tf.Session() as sess: left_mainDQN = dqn.DQN(sess, input_size, output_size, name="left_main") left_targetDQN = dqn.DQN(sess, input_size, output_size, name="left_target") #right_mainDQN = dqn.DQN(sess, input_size, output_size, name="right_main") #right_targetDQN = dqn.DQN(sess, input_size, output_size, name="right_target") tf.global_variables_initializer().run() ## initial copy q_net -> target_net copy_left_ops = get_copy_var_ops(dest_scope_name="left_target", src_scope_name="left_main") #copy_right_ops = get_copy_var_ops(dest_scope_name="rightt_target", src_scope_name="right_main") #sess.run([copy_left_ops, copy_right_ops]) sess.run(copy_left_ops) start_time = time.time() timecheck_list.append(start_time) for episode in range(max_episodes): e = 1. / ((episode / 10) + 1) done = False step_count = 0 pwm_left = init_pwm_1 pwm_right = init_pwm_2 timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc_pitch_degree = b.pitch() gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ( (0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) """ state = np.array([acc_gyro_pitch, p_ang_vel, pwm_left, pwm_right]) """ state = np.array([acc_gyro_pitch, p_ang_vel]) print "\n\n" while not done: print "\t\t\t<state> degree: %s, angular velocity: %s" % ( state[0], state[1]) if np.random.rand(1) < e: action_left = np.random.randint(3) #action_right = np.random.randint(3) else: action_left = np.argmax(left_mainDQN.predict(state)) #action_right = np.argmax(right_mainDQN.predict(state)) """ pwm_left, l_m_done = step_action(action_left, pwm_left, "left") pwm_right, r_m_done = step_action(action_right, pwm_right, "right") """ pwm_left = step_action(action_left, pwm_left) #pwm_right = step_action(action_right, pwm_right) print "\t\t\t<action-motor> left: %s, right: %s" % (pwm_left, pwm_right) a.servo_1(pwm_left) a.servo_2(pwm_right) time.sleep(0.01) ## Get new state and reward from environment timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc_pitch_degree = b.pitch() gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch( loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ( (0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) next_state = np.array([acc_gyro_pitch, p_ang_vel]) """ next_state = np.array([acc_gyro_pitch, p_ang_vel, pwm_left, pwm_right]) """ reward, done = reward_done_check(state[0], next_state[0]) ## Save the experience to our buffer left_replay_buffer.append( (state, action_left, reward, next_state, done)) #right_replay_buffer.append((state, action_right, reward, next_state, done)) if len(left_replay_buffer) > REPLAY_MEMORY: left_replay_buffer.popleft() """ if len(right_replay_buffer) > REPLAY_MEMORY: right_replay_buffer.popleft() """ if done: time.sleep(3) timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) acc_pitch_degree = b.pitch() gyro_pitch_degree, _ = b.gyro_pitch( loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch( loop_time, acc_gyro_pitch) acc_gyro_pitch = np.sign(get_gyro_degree) * ( (0.97 * abs(get_gyro_degree)) + (0.03 * abs(acc_pitch_degree))) next_state = np.array([acc_gyro_pitch, p_ang_vel]) state = next_state step_count += 1 if step_count > 10000: break print "Episode: {} steps: {}".format(episode, step_count) if step_count > 10000: pass if len(left_replay_buffer ) > 10 and episode % 10 == 1: # train every 10 episode # Get a random batch of experiences. for _ in range(50): left_minibatch = random.sample(left_replay_buffer, 10) #right_minibatch = random.sample(right_replay_buffer, 10) left_loss, _ = replay_train(left_mainDQN, left_targetDQN, left_minibatch) #right_loss, _ = replay_train(right_mainDQN, right_targetDQN, right_minibatch) #print "Left Loss: %s, Right Loss: %s" % (left_loss, right_loss) print "Left Loss: %s" % (left_loss) # copy q_net -> target_net #sess.run([copy_left_ops, copy_right_ops]) sess.run(copy_left_ops) """
else: ## (pitch_gyro >= 180) print "\t\t\t\t\tbefore safeboundary:", value value = -360 + value print "\t\t\t\t\tafterfore safeboundary:", value return value share_acc_gyro_pitch = sysv_ipc.SharedMemory(600, flags=01000, size=20, mode=0600) share_p_ang_vel = sysv_ipc.SharedMemory(1024, flags=01000, size=10, mode=0600) share_acc_degree = sysv_ipc.SharedMemory(256, flags=01000, size=20, mode=0600) smp1 = sysv_ipc.Semaphore(22, flags=01000, mode=0600, initial_value=1) b = degree_gyro_q_l.acc() timecheck_list = [] acc_gyro_pitch = gyro_pitch_degree = b.pitch() start_time = time.time() timecheck_list.append(start_time) while (True): acc_pitch_degree = b.pitch() timecheck_list.append(time.time()) loop_time = timecheck_list[1] - timecheck_list[0] timecheck_list.pop(0) gyro_pitch_degree, _ = b.gyro_pitch(loop_time, gyro_pitch_degree) get_gyro_degree, p_ang_vel = b.gyro_pitch(loop_time, acc_gyro_pitch)