Esempio n. 1
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = None

    # initialize for each episode

    epi_reward = 0
    gamma_t = 1
    (current_room_desc, current_quest_desc, terminal) = framework.newGame()

    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))

        # choose the next action based on epsilon greedy policy
        (action_index, object_index) = epsilon_greedy(current_state_vector,
                                                      epsilon)

        # go to the next state based on the action
        (next_room_desc, next_quest_desc, reward,
         terminal) = framework.step_game(current_room_desc, current_quest_desc,
                                         action_index, object_index)

        # extract the feature vector of the next state
        next_state = next_room_desc + next_quest_desc
        next_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(next_state, dictionary))

        if for_training:
            # update Q-function.
            # TODO Your code here
            #update the parameters of deep Q network
            deep_q_learning(current_state_vector, action_index, object_index,
                            reward, next_state_vector, terminal)
            #pass

        if not for_training:
            # update reward

            #update episodic reward with discount
            epi_reward += gamma_t * reward
            #pass

        # prepare next step

        #update current state to the next state
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc
        gamma_t = gamma_t * GAMMA

    if not for_training:
        return epi_reward
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP

    # initialize for each episode
    # TODO Your code here

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()

    count = 0
    epi_reward = 0

    while not terminal:

        # Choose next action and execute
        # recall
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(
            current_state, dictionary)

        (action_index, object_index) = epsilon_greedy(current_state_vector,
                                                      theta, epsilon)

        (next_room_desc, next_quest_desc, reward, terminal) \
            = framework.step_game(current_room_desc, current_quest_desc, action_index, object_index)

        next_state = next_room_desc + next_quest_desc
        next_state_vector = utils.extract_bow_feature_vector(
            next_state, dictionary)

        if for_training:
            # update Q-function.
            # TODO Your code here
            linear_q_learning(theta, current_state_vector, action_index,
                              object_index, reward, next_state_vector,
                              terminal)

        if not for_training:
            # update reward
            # TODO Your code here
            epi_reward += np.power(GAMMA, count) * reward

        # prepare next step
        # TODO Your code here
        count += 1
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 3
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = None

    # initialize for each episode
    # TODO Your code here
    epi_reward = 0
    t = 0

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))

        # TODO Your code here

        if for_training:
            # update Q-function.
            # TODO Your code here
            action_index, object_index = epsilon_greedy(
                current_state_vector, epsilon)
            next_room_desc, next_quest_desc, reward, terminal = framework.step_game(
                current_room_desc, current_quest_desc, action_index,
                object_index)
            next_state = next_room_desc + next_quest_desc
            next_state_vector = torch.FloatTensor(
                utils.extract_bow_feature_vector(next_state, dictionary))
            deep_q_learning(current_state_vector, action_index, object_index,
                            reward, next_state_vector, terminal)

        if not for_training:
            # update reward
            # TODO Your code here
            action_index, object_index = epsilon_greedy(
                current_state_vector, epsilon)
            next_room_desc, next_quest_desc, reward, terminal = framework.step_game(
                current_room_desc, current_quest_desc, action_index,
                object_index)
            epi_reward = epi_reward + reward * (GAMMA**t)
            t = t + 1

        # prepare next step
        # TODO Your code here
        # update current_room_desc and current_quest_desc
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 4
0
def run_episode(for_training):
    """
        Model, optimiser are freaking global
        Also dictionary. Crap.
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = 0.
    t = 0
    # initialize for each episode

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))
        action_index, object_index = epsilon_greedy(
            current_state_vector, epsilon
        )
        next_room_desc, next_quest_desc, reward, terminal= framework.step_game(
            current_room_desc,
            current_quest_desc,
            action_index, object_index
        )
        next_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(
                next_room_desc + next_quest_desc, dictionary
            )
        )

        if for_training:
            # update Q-function.
            deep_q_learning(
                current_state_vector,
                action_index, object_index,
                reward,
                next_state_vector,
                terminal
            )
        else:
        # if not for_training:
            # update reward
            epi_reward += (GAMMA**t)*reward

        # prepare next step
        t += 1

    if not for_training:
        return epi_reward
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    # My solution:
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = 0

    # initialize for each episode
    current_room_desc, current_quest_desc, terminal = framework.newGame()

    t = 0
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(
            current_state, dictionary)

        action_index, object_index = epsilon_greedy(current_state_vector,
                                                    theta, epsilon)

        next_room_desc, next_quest_desc, reward, terminal = \
            framework.step_game(current_room_desc, current_quest_desc,
                                action_index, object_index)

        next_state = next_room_desc + next_quest_desc
        next_state_vector = utils.extract_bow_feature_vector(
            next_state, dictionary)

        if for_training:
            # update Q-function.
            linear_q_learning(theta, current_state_vector, action_index,
                              object_index, reward, next_state_vector,
                              terminal)

        if not for_training:
            # update reward
            epi_reward += GAMMA**t * reward
            t += 1

        # prepare next step
        current_room_desc, current_quest_desc = next_room_desc, next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 6
0
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP \
    # initialize for each episode

    gamma_step = 1
    epi_reward = 0

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(
            current_state, dictionary)

        (action_index, object_index) = epsilon_greedy(current_state_vector,
                                                      theta, epsilon)
        (next_room_desc, next_quest_desc, reward,
         terminal) = framework.step_game(current_room_desc, current_quest_desc,
                                         action_index, object_index)

        if for_training:
            # update Q-function.
            next_state = next_room_desc + next_quest_desc
            # Returns the bag-of-words vector representation of the state
            next_state_vector = utils.extract_bow_feature_vector(
                next_state, dictionary)
            linear_q_learning(theta, current_state_vector, action_index,
                              object_index, reward, next_state_vector,
                              terminal)

        if not for_training:
            # update reward
            epi_reward = epi_reward + gamma_step * reward
            gamma_step = gamma_step * GAMMA

        # prepare next step
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    
    epi_reward = 0.0 # initialize for each episode

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()

    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(
            current_state, dictionary)

        next_action_index, next_object_index = epsilon_greedy(current_state_vector, 
                                                              theta, 
                                                              epsilon) # Get next action, object
        
        next_room_desc, next_quest_desc, reward, terminal = framework.step_game(
                                                            current_room_desc,
                                                            current_quest_desc,
                                                            next_action_index,
                                                            next_object_index) # Take a step
        
        next_state = next_room_desc + next_quest_desc   # Build next state vector
        next_state_vector = utils.extract_bow_feature_vector(next_state, dictionary)      

        if for_training:
            # update Q-function.
            linear_q_learning(theta, current_state_vector, next_action_index, 
                          next_object_index, reward, next_state_vector, terminal) # Update theta

        if not for_training:
            # update reward
            epi_reward += (GAMMA**(framework.STEP_COUNT - 1))*reward

        # prepare next step
        current_room_desc, current_quest_desc = next_room_desc, next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 8
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    
    # initialize for each episode
    epi_reward = 0.0
    step = 0
    (current_room_desc, current_quest_desc, terminal) = framework.newGame()

    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))

        # Get next c
        next_action_i, next_object_i = epsilon_greedy(current_state_vector, epsilon)
        
        # Make a move
        step += 1
        next_room_desc, next_quest_desc, reward, terminal = framework.step_game(
                                                            current_room_desc,
                                                            current_quest_desc,
                                                            next_action_i,
                                                            next_object_i) 

        # Next state vector
        next_state = next_room_desc + next_quest_desc 
        next_state_vector = torch.FloatTensor(utils.extract_bow_feature_vector(next_state, dictionary)) 

        if for_training:
            # update Q-function.
            deep_q_learning(current_state_vector, next_action_i, 
                            next_object_i, reward, next_state_vector, terminal)

        if not for_training:
            # update reward
            epi_reward += (GAMMA**(step - 1)) * reward
            
        # prepare next step
        current_room_desc, current_quest_desc = next_room_desc, next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 9
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = 0
    count = 0
    # initialize for each episode

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state,
                                             dictionary)).type(dtype)

        (action_index, object_index) = epsilon_greedy(current_state_vector,
                                                      epsilon)

        (next_room_desc, next_quest_desc, reward, terminal) = \
            framework.step_game(current_room_desc, current_quest_desc,
                                action_index, object_index)

        next_state = next_room_desc + next_quest_desc
        next_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(next_state,
                                             dictionary)).type(dtype)

        if for_training:
            deep_q_learning(current_state_vector, action_index, object_index,
                            reward, next_state_vector, terminal)

        if not for_training:
            # update reward
            epi_reward += (GAMMA**count) * reward
            count += 1

        # prepare next step
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 10
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    gamma_step = 1
    epi_reward = 0

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))

        (action_index, object_index) = epsilon_greedy(current_state_vector,
                                                      epsilon)
        (next_room_desc, next_quest_desc, reward,
         terminal) = framework.step_game(current_room_desc, current_quest_desc,
                                         action_index, object_index)

        if for_training:
            # update Q-function.
            next_state = next_room_desc + next_quest_desc
            # 32-bit floating point CPU tensor
            next_state_vector = torch.FloatTensor(
                utils.extract_bow_feature_vector(next_state, dictionary))
            deep_q_learning(current_state_vector, action_index, object_index,
                            reward, next_state_vector, terminal)

        if not for_training:
            # update reward
            epi_reward = epi_reward + gamma_step * reward
            gamma_step = gamma_step * GAMMA

        # prepare next step
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 11
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = 0.0

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()

    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))

        next_action_index, next_object_index = epsilon_greedy(
            current_state_vector, epsilon)

        next_room_desc, next_quest_desc, reward, terminal = framework.step_game(
            current_room_desc, current_quest_desc, next_action_index,
            next_object_index)
        next_state = next_room_desc + next_quest_desc
        next_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(next_state, dictionary))

        if for_training:
            deep_q_learning(current_state_vector, next_action_index,
                            next_object_index, reward, next_state_vector,
                            terminal)

        if not for_training:
            epi_reward += (GAMMA**(framework.STEP_COUNT - 1)) * reward

        # prepare next step
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 12
0
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = 0

    # initialize for each episode
    # TODO Your code here

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(
            current_state, dictionary)
        # TODO Your code here

        if for_training:
            # update Q-function.
            # TODO Your code here
            tabular_q_learning(q_func, current_state_1, current_state_2,
                               action_index, object_index, reward,
                               next_state_1, next_state_2, terminal)
            pass

        if not for_training:
            # update reward
            # TODO Your code here
            epi_reward = epi_reward + (GAMMA**t) * reward
            pass

        # prepare next step
        # TODO Your code here
        t = t + 1
        current_room_desc, current_quest_desc = next_state_1, next_state_2

    if not for_training:
        return epi_reward
Esempio n. 13
0
def run_episode(for_training):
    """
        Runs one episode
        If for training, update Q function
        If for testing, computes and return cumulative discounted reward
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = None

    # initialize for each episode
    # TODO Your code here

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = torch.FloatTensor(
            utils.extract_bow_feature_vector(current_state, dictionary))

        # TODO Your code here

        if for_training:
            # update Q-function.
            # TODO Your code here
            pass

        if not for_training:
            # update reward
            # TODO Your code here
            pass

        # prepare next step
        # TODO Your code here

    if not for_training:
        return epi_reward
Esempio n. 14
0
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP
    epi_reward = 0

    # initialize for each episode
    # TODO Your code here

    (current_room_desc, current_quest_desc, terminal) = framework.newGame()
    t = 0
    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(
            current_state, dictionary)
        # TODO Your code here

        # Decidir acción con epsilon_greedy
        action_index, object_index = epsilon_greedy(current_state_vector,
                                                    theta, epsilon)

        # Paso del juego + traducir descripciones
        next_room_desc, next_quest_desc, reward, terminal = framework.step_game(
            current_room_desc, current_quest_desc, action_index, object_index)
        # next_room  = dict_room_desc[ next_room_desc]
        # next_quest = dict_quest_desc[ next_quest_desc]

        next_state = next_room_desc + next_quest_desc
        next_state_vector = utils.extract_bow_feature_vector(
            next_state, dictionary)

        if for_training:
            # update Q-function.
            # TODO Your code here
            linear_q_learning(theta, current_state_vector, action_index,
                              object_index, reward, next_state_vector,
                              terminal)
            pass

        if not for_training:
            # update reward
            # TODO Your code here
            epi_reward += (GAMMA**t) * reward
            pass

        # prepare next step
        # TODO Your code here
        t += 1
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward
Esempio n. 15
0
def run_episode(for_training):
    """ Runs one episode
    If for training, update Q function
    If for testing, computes and return cumulative discounted reward

    Args:
        for_training (bool): True if for training

    Returns:
        None
    """
    epsilon = TRAINING_EP if for_training else TESTING_EP

    # initialize for each episode
    # TODO Your code here

    # Look into framework.py file for hint
    # A tuple where the first element is a description of the initial room,
    # the second element is a description of the quest for this new game episode, and
    # the last element is a Boolean variable with value False implying that the game is not over.
    (current_room_desc, current_quest_desc, terminal) = framework.newGame()

    # initial value
    count = 0
    epi_reward = 0

    while not terminal:
        # Choose next action and execute
        current_state = current_room_desc + current_quest_desc
        current_state_vector = utils.extract_bow_feature_vector(current_state, dictionary)
        # TODO Your code here
        (action_index, object_index) = epsilon_greedy(current_state_vector, theta, epsilon)

        # Renaming to shorter name
        a_idx = action_index  # rename for shorter name for action index
        o_idx = object_index  # rename for shorter name for object index
        crd = current_room_desc  # rename for shorter name for current room description
        cqd = current_quest_desc  # rename for shorter name for current quest description

        # the system next state when the selected command is applied at the current state
        (next_room_desc, next_quest_desc, reward, terminal) = framework.step_game(crd, cqd, a_idx, o_idx)

        next_state = next_room_desc + next_quest_desc
        # Look into utils.py for the bag-of-words vector representation of the state
        next_state_vector = utils.extract_bow_feature_vector(next_state, dictionary)

        if for_training:
            # update Q-function.
            # TODO Your code here
            linear_q_learning(theta, current_state_vector, a_idx, o_idx, reward, next_state_vector, terminal)
            pass

        if not for_training:
            # update reward
            # TODO Your code here
            epi_reward += np.power(GAMMA, count) * reward
            pass

        # prepare next step
        # TODO Your code here
        count += 1
        current_room_desc = next_room_desc
        current_quest_desc = next_quest_desc

    if not for_training:
        return epi_reward