Beispiel #1
0
def stacking_assign_q_learning(shorter_init, longer_init):
    env = Stacking(shorter_init, longer_init)
    RL = QLearningTable(actions=list(range(6)), e_greedy=1)
    if shorter_init[0] == 'A' and longer_init[0] == 'U':
        RL.q_table = RL.q_table.append(q_table_A_U)
    elif shorter_init[0] == 'C' and longer_init[0] == 'G':
        RL.q_table = RL.q_table.append(q_table_C_G)
    elif shorter_init[0] == 'G' and longer_init[0] == 'C':
        RL.q_table = RL.q_table.append(q_table_G_C)
    elif shorter_init[0] == 'G' and longer_init[0] == 'U':
        RL.q_table = RL.q_table.append(q_table_G_U)
    elif shorter_init[0] == 'U' and longer_init[0] == 'A':
        RL.q_table = RL.q_table.append(q_table_U_A)
    elif shorter_init[0] == 'U' and longer_init[0] == 'G':
        RL.q_table = RL.q_table.append(q_table_U_G)

    observation = env.shorter + "_" + env.longer
    while True:
        action = RL.choose_action(observation)
        shorter_, longer_, reward, done = env.step(action)
        observation_ = shorter_ + "_" + longer_
        # RL.learn(str(observation), action, reward, str(observation_))
        observation = observation_
        if done:
            break
    shorter_final = observation.split('_')[0]
    longer_final = observation.split('_')[1]
    return shorter_final, longer_final
def ubp_6_assign_q_learning(shorter_init):
    env = ubp_6(shorter_init)
    RL = QLearningTable(actions=list(range(4)), e_greedy=1)
    RL.q_table = RL.q_table.append(q_table_ubp_6)
    observation = env.shorter
    while True:
        action = RL.choose_action(observation)
        shorter_, reward, done = env.step(action)
        observation_ = shorter_
        # RL.learn(str(observation), action, reward, str(observation_))
        observation = observation_
        if done:
            break
    return observation