def program_loop(): global start_time, previous_state, evasion_start_time, evasion_period current_state = -1 if (previous_state == -1): if ((time.time() - start_time) > init_period): current_state = 0 elif ((time.time() - start_time) > loop_period): start_time = time.time() min_distance = 100 min_bearing = 0 distances = [] count = 0 for s_def in settings.ROBOT_SENSOR_MODEL: distance = sensors.get_distance(sensors.read_adc(count), s_def[1]) count += 1 distances.append([s_def[2], distance]) if (distance < min_distance): min_distance = distance min_bearing = s_def[2] #logging.info(distances) if (min_distance > distance_threshold_high): current_state = 1 # No obstacle elif (min_distance < distance_threshold_low): if (previous_state == 5 and (time.time() - evasion_start_time < evasion_period)): current_state = 5 else: current_state = 4 else: #Distance falls between thresholds so decide if it is to left or right if (min_bearing > 180): current_state = 2 else: current_state = 3 else: current_state = previous_state #Set current state to previous state when we haven't passed the loop period if (previous_state != current_state): logging.info("avoid_obstacles: Current state %d, previous state %d" % (current_state, previous_state)) if (current_state == 1): #No obstacle; full speed ahead! motors.forwards(target_speed) led.set_colour_solid(3) #GREEN if (current_state == 2): #Obstacle to left, head right motors.set_motor_speeds(target_speed, target_speed / 2) led.set_left_colour_pulse(1) if (current_state == 3): motors.set_motor_speeds(target_speed / 2, target_speed) led.set_right_colour_pulse(1) if (current_state == 4): evasion_start_time = time.time() evasion_period = 0.2 + (random.random()) if (random.random() > 0.6): motors.set_motor_speeds(-target_speed, target_speed) elif (random.random() > 0.3): motors.set_motor_speeds(target_speed, -target_speed) else: motors.set_motor_speeds(-target_speed / 2, -target_speed / 2) led.animation(8) current_state = 5 previous_state = current_state
def get_info(request): """Returns Hello in JSON.""" #height will be between 50cm and 200cm #distance is distance from sensor (100cm to 50cm) height = 200 - get_distance_2() #height = random.randint(2, 20) distance = get_distance() #distance = random.randint(50, 195) return {'data_height': height, 'data_distance': distance}
def generate_range_data(current_values): range_data = [] for s_def in settings.ROBOT_SENSOR_MODEL: distance = sensors.get_distance(int(current_values[s_def[0]]), s_def[1]) range_data.append( go.Scatterpolar( r=[0, distance, distance, 0], theta=[0, s_def[2] - s_def[3], s_def[2] + s_def[3], 0], mode='lines', fill='toself', fillcolor=get_fill_colour(9, 80, distance), line=dict(color='black'))) return range_data
def run_policy(sd, loaded_q): q_table = loaded_q tilesperepisode = [] final_reward_per_episode = [] for epi in range(NUM_EPISODES): print("Episode: ", epi) # Initialize viewer and agent env = CarRacing() env.seed(sd) env.reset() # Start with speed for i in range(10): env.step(np.array([0.0, 1.0, 0.0])) for i in range(10): env.step(np.array([0.0, 0.0, 0.0])) # Initialize sate state = [MAX_STEP - 1, MAX_STEP - 1, MAX_STEP - 1] # Loop for each step of the episode done = False total_reward = env.reward while not done and total_reward > (-10): env.step(np.array([0.0, 0.0001, 0.0])) # Choose A from S using greedy b = q_table[:, state[0], state[1], state[2]] action_index = np.random.choice(np.flatnonzero(b == b.max())) action = ACTIONSPACE[action_index] # Step twice using selected action for i in range(2): s, r, done, info = env.step(action) if done: break # Render env.render(mode='human') # Obtain modified state from environment defined state bool_state = s[1:66, :, 1] < 150 dist_state = get_distance(bool_state, ratio=4, range=40, max_steps=MAX_STEP) next_state = [dist_state[0], dist_state[1], dist_state[2]] # Update state state = next_state total_reward = env.reward final_reward_per_episode.append(env.reward) print("Reward of Episode: ", env.reward) env.close() tilesperepisode.append(env.tile_visited_count) print("Tiles Visited: ", env.tile_visited_count) return
def right_IR(): distance_right = sensors.get_distance(sensors.read_adc(3), '2y0a21') / 100.0 return distance_right
def left_IR(): distance_left = sensors.get_distance(sensors.read_adc(2), '2y0a21') / 100.0 #print(distance_top) return distance_left
def top_IR(): distance_top = sensors.get_distance(sensors.read_adc(0), '2y0a21') / 100.0 #print(distance_top) return distance_top
def update_values(n_intervals, distance_mode): data = pd.read_csv('/ramdisk/system.csv') try: current_values = data[[ 'system-time', 'analog-1', 'analog-2', 'analog-3', 'analog-4' ]].tail(1).iloc[0] analog_1_raw = int(current_values['analog-1']) analog_2_raw = int(current_values['analog-2']) analog_3_raw = int(current_values['analog-3']) analog_4_raw = int(current_values['analog-4']) uns = 'Raw' max_val = 255 g_c = "#FF5E5E" g_s = 190 g_sty = {"textAlign": "center", "width": "23%"}, if (distance_mode): analog_1_raw = sensors.get_distance(analog_1_raw, '2y0a21') analog_2_raw = sensors.get_distance(analog_2_raw, '2y0a21') analog_3_raw = sensors.get_distance(analog_3_raw, '2y0a21') analog_4_raw = sensors.get_distance(analog_4_raw, '2y0a21') uns = 'CM' max_val = 80 system_time = round(current_values['system-time'], 3) range_figure = go.Figure(data=generate_range_data(current_values), layout=range_layout) ret = html.Div([ html.Div([ daq.Gauge(showCurrentValue=True, units=uns, min=0, max=max_val, value=analog_1_raw, color=g_c, label="Analog-1", size=g_s, style=g_sty), daq.Gauge(showCurrentValue=True, units=uns, min=0, max=max_val, value=analog_2_raw, color=g_c, label="Analog-2", size=g_s, style=g_sty), daq.Gauge(showCurrentValue=True, units=uns, min=0, max=max_val, value=analog_3_raw, color=g_c, label="Analog-3", size=g_s, style=g_sty), daq.Gauge(showCurrentValue=True, units=uns, min=0, max=max_val, value=analog_4_raw, color=g_c, label="Analog-4", size=g_s, style=g_sty), ], style=flex_style), html.Div([ dcc.Graph(figure=range_figure, style={'height': 580}, id='range-graph') ]), html.Div('Last response time: {}'.format( utils.decode_time(system_time).rstrip("0"))), ]) except IndexError: ret = html.Div( 'Could not read system status file.\nIs core.py running, ramdisk correctly setup and system polling enabled in settings.py?' ) return ret
def q_learning(q_table, sd, loaded_q, explore): if loaded_q is not None: q_table = loaded_q tilesperepisode = [] final_reward_per_episode = [] for epi in range(NUM_EPISODES): print("Episode: ", epi) # Initialize S env = CarRacing() env.seed(sd) env.reset() # Start with speed for i in range(10): env.step(np.array([0.0, 1.0, 0.0])) for i in range(10): env.step(np.array([0.0, 0.0, 0.0])) # Initialize state state = [MAX_STEP - 1, MAX_STEP - 1, MAX_STEP - 1] # Loop for each step of the episode done = False total_reward = env.reward while not done and total_reward > (-10): env.step(np.array([0.0, 0.0001, 0.0])) # Choose A from S using e-greedy p = random.uniform(0, 1) if explore: e = EPSILON else: e = -1 if p < e: # Explore action_index = random.choice(range(len(ACTIONSPACE))) else: # Exploit b = q_table[:, state[0], state[1], state[2]] action_index = np.random.choice(np.flatnonzero(b == b.max())) action = ACTIONSPACE[action_index] # Step twice using selected action for i in range(2): s, r, done, info = env.step(action) if done: break # Modified reward r = abs(state[0] + 4 * state[1] + state[2]) / 10 if done: r = 100 # Large negative reward if goes off the track if state[1] == 0: r = -1000 env.render(mode='human') # Obtain modified state from environment defined state bool_state = s[1:66, :, 1] < 150 dist_state = get_distance(bool_state, ratio=4, range=40, max_steps=MAX_STEP) next_state = [dist_state[0], dist_state[1], dist_state[2]] # Update Q current_q = q_table[action_index, state[0], state[1], state[2]] target = r + GAMMA * (np.amax( q_table[:, next_state[0], next_state[1], next_state[2]])) q_table[action_index, state[0], state[1], state[2]] = current_q + ALPHA * (target - current_q) if state[1] == 0: break # Update state state = next_state total_reward = env.reward final_reward_per_episode.append(env.reward) print("Reward of Episode: ", env.reward) env.close() tilesperepisode.append(env.tile_visited_count) print("Tiles Visited: ", env.tile_visited_count) # Saves learning progress np.save('data.npy', q_table) return