Esempi in Python per rl_optimizer

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: RL_optimizer

Metodo/funzione: rl_optimizer

Esempi su hotexamples.com: 5

rl_optimizer in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per RL_optimizer.rl_optimizer, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

# For saving toplist
objective = -1
top_UI = params.top * []  #([([],[],policies,100)])*params.top
top_UI_summary = []  # []*params.top #np.zeros((params.top,3))
multi_obj_topUI = [[], [], []]

##########################################

# TODO: READ UI from the text file uis/remote.txt
UI = np.array([[0, 1, 1, 1], [0, 0, 1, 1], [0, 1, 0, 1], [0, 1, 1, 0]])

num_action = params.num_states - 1  # Number of unique buttons

#############################
# Call RL code
objective, top_UI, best_actions, best_path, klm_value = rl_optimizer(
    UI, num_action, top_UI, params, batch_num, logging)

if objective == -1:  # If not learned, skip all button combinations
    print "Skipping this UI"

###################
# Write to file
output_file = open('best_path.txt', 'w')
#output_file.writelines(tags[int(best_path[i])]+"\n")
for i in range(np.size(best_path)):
    output_file.writelines(tags[int(best_path[i])] + "\n")
    #print tags[int(best_path[i])]

output_file.writelines(str(klm_value))
output_file.close()

Esempio n. 2

Mostra file

File: main_case3.py Progetto: aalto-speech/rl-klm

    tags.append(item_info[0])
    x_coord[idx] = float(item_info[1])
    y_coord[idx] = float(item_info[2])
    widths[idx] = float(item_info[3])
    heights[idx] = float(item_info[4])

    idx += 1

###################
# Initialize parameters
params = initializeParams(x_coord, y_coord, widths, heights)
num_action = params.num_states-1


###################
# Call RL code
best_path, klm_value = rl_optimizer(num_action, params)


###################
# Write to file
output_file = open('best_path.txt', 'w')
for i in range(np.size(best_path)):
    output_file.writelines(tags[int(best_path[i])]+"\n")

output_file.writelines(str(klm_value))
output_file.close()

Esempio n. 3

Mostra file

File: main_multimodal.py Progetto: aalto-speech/rl-klm

# Penalty index for each action. The penalty values in params.penalties
actions_penalty = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]

print "Sensor errors", params.sensor_errors
print "Confusion errors", params.confusion_error

#############################
## Learning policies and calculating KLM esitemate

modality_table_total = np.array([0, 0, 0])
klm_total = 0

# If sensor errors are used the number of iterations should be more than 1 to average the effect of errors.
num_iters = 5

for j in range(num_iters):
    modality_table, klm = rl_optimizer(UIs, actionmatrix, actions_in_uis,
                                       actions_penalty, num_commands, params)

    modality_table_total += modality_table

    # KLM estimates
    klm_total += klm

print "Tactile commands used ", modality_table_total[0], "times."
print "Gesture commands used ", modality_table_total[1], "times."
print "Speech commands used ", modality_table_total[2], "times."

# Average KLM estimate over all iterations
print "KLM estimate:", klm_total / num_iters

Esempio n. 4

Mostra file

File: main_remote_random.py Progetto: aalto-speech/rl-klm

                random_search_idx = random.randint(0, len(button_matrices)-1)
                while random_search_idx in search_past_idxes:
                    random_search_idx = random.randint(0, len(button_matrices)-1)
                search_past_idxes.append(random_search_idx)
                buttonmatrix = button_matrices[random_search_idx]
            else:
                if search_iter == len(button_matrices):
                    break
                buttonmatrix = button_matrices[search_iter]


            ##########################
            # Train RL agent
            ##########################
            start_time_rl = time.time()
            top_buttons, objective = rl_optimizer(UI, buttonmatrix, num_buttons, top_buttons, params, batch_num, logging)

            total_time_rl += (time.time()-start_time_rl)

            if objective == -1: # If not learned, skip all button combinations
                print "Skipping this UI"
                break

            search_iter += 1


        for k in range(len(top_buttons)):
            top_UI.append(top_buttons[k])
            top_UI = sorted(top_UI, key=op.itemgetter(3))[:params.the_top]

Esempio n. 5

Mostra file

File: main_remote.py Progetto: aalto-speech/rl-klm

    [0,4,5,6,7,8,0,10,11,12], 
    [0,4,5,6,7,8,9,0,11,12], 
    [0,4,5,6,7,8,9,10,0,12], 
    [0,4,5,6,7,8,9,10,11,0]])

# Allowed actions in eac state
actionmatrix = np.array([[1],[1,2,3,5,6,7,8,9,10,11,12],
    [1,2,3,4,6,7,8,9,10,11,12],
    [1,2,3,4,5,7,8,9,10,11,12],
    [1,2,3,4,5,6,8,9,10,11,12],
    [1,2,3,4,5,6,7,9,10,11,12],
    [1,2,3,4,5,6,7,8,10,11,12],
    [1,2,3,4,5,6,7,8,9,11,12],
    [1,2,3,4,5,6,7,8,9,10,12],
    [1,2,3,4,5,6,7,8,9,10,11]]) 
num_buttons = 12 # Number of unique buttons

#All Uis
UIs = [UI1, UI2] 
actions_in_uis = [[1,2,3],[4,5,6,7,8,9,10,11,12]] # [[1,2,3]] # 
actions_penalty = [[],[2,3],[1,4,5,6,7,8,9,10,11,12]]


#############################
# Call RL code
klm = rl_optimizer(UIs, actionmatrix, actions_in_uis, actions_penalty, num_buttons, params)

print "KLM average:", klm