# For saving toplist objective = -1 top_UI = params.top * [] #([([],[],policies,100)])*params.top top_UI_summary = [] # []*params.top #np.zeros((params.top,3)) multi_obj_topUI = [[], [], []] ########################################## # TODO: READ UI from the text file uis/remote.txt UI = np.array([[0, 1, 1, 1], [0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 1, 0]]) num_action = params.num_states - 1 # Number of unique buttons ############################# # Call RL code objective, top_UI, best_actions, best_path, klm_value = rl_optimizer( UI, num_action, top_UI, params, batch_num, logging) if objective == -1: # If not learned, skip all button combinations print "Skipping this UI" ################### # Write to file output_file = open('best_path.txt', 'w') #output_file.writelines(tags[int(best_path[i])]+"\n") for i in range(np.size(best_path)): output_file.writelines(tags[int(best_path[i])] + "\n") #print tags[int(best_path[i])] output_file.writelines(str(klm_value)) output_file.close()
tags.append(item_info[0]) x_coord[idx] = float(item_info[1]) y_coord[idx] = float(item_info[2]) widths[idx] = float(item_info[3]) heights[idx] = float(item_info[4]) idx += 1 ################### # Initialize parameters params = initializeParams(x_coord, y_coord, widths, heights) num_action = params.num_states-1 ################### # Call RL code best_path, klm_value = rl_optimizer(num_action, params) ################### # Write to file output_file = open('best_path.txt', 'w') for i in range(np.size(best_path)): output_file.writelines(tags[int(best_path[i])]+"\n") output_file.writelines(str(klm_value)) output_file.close()
# Penalty index for each action. The penalty values in params.penalties actions_penalty = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] print "Sensor errors", params.sensor_errors print "Confusion errors", params.confusion_error ############################# ## Learning policies and calculating KLM esitemate modality_table_total = np.array([0, 0, 0]) klm_total = 0 # If sensor errors are used the number of iterations should be more than 1 to average the effect of errors. num_iters = 5 for j in range(num_iters): modality_table, klm = rl_optimizer(UIs, actionmatrix, actions_in_uis, actions_penalty, num_commands, params) modality_table_total += modality_table # KLM estimates klm_total += klm print "Tactile commands used ", modality_table_total[0], "times." print "Gesture commands used ", modality_table_total[1], "times." print "Speech commands used ", modality_table_total[2], "times." # Average KLM estimate over all iterations print "KLM estimate:", klm_total / num_iters
random_search_idx = random.randint(0, len(button_matrices)-1) while random_search_idx in search_past_idxes: random_search_idx = random.randint(0, len(button_matrices)-1) search_past_idxes.append(random_search_idx) buttonmatrix = button_matrices[random_search_idx] else: if search_iter == len(button_matrices): break buttonmatrix = button_matrices[search_iter] ########################## # Train RL agent ########################## start_time_rl = time.time() top_buttons, objective = rl_optimizer(UI, buttonmatrix, num_buttons, top_buttons, params, batch_num, logging) total_time_rl += (time.time()-start_time_rl) if objective == -1: # If not learned, skip all button combinations print "Skipping this UI" break search_iter += 1 for k in range(len(top_buttons)): top_UI.append(top_buttons[k]) top_UI = sorted(top_UI, key=op.itemgetter(3))[:params.the_top]
[0,4,5,6,7,8,0,10,11,12], [0,4,5,6,7,8,9,0,11,12], [0,4,5,6,7,8,9,10,0,12], [0,4,5,6,7,8,9,10,11,0]]) # Allowed actions in eac state actionmatrix = np.array([[1],[1,2,3,5,6,7,8,9,10,11,12], [1,2,3,4,6,7,8,9,10,11,12], [1,2,3,4,5,7,8,9,10,11,12], [1,2,3,4,5,6,8,9,10,11,12], [1,2,3,4,5,6,7,9,10,11,12], [1,2,3,4,5,6,7,8,10,11,12], [1,2,3,4,5,6,7,8,9,11,12], [1,2,3,4,5,6,7,8,9,10,12], [1,2,3,4,5,6,7,8,9,10,11]]) num_buttons = 12 # Number of unique buttons #All Uis UIs = [UI1, UI2] actions_in_uis = [[1,2,3],[4,5,6,7,8,9,10,11,12]] # [[1,2,3]] # actions_penalty = [[],[2,3],[1,4,5,6,7,8,9,10,11,12]] ############################# # Call RL code klm = rl_optimizer(UIs, actionmatrix, actions_in_uis, actions_penalty, num_buttons, params) print "KLM average:", klm