Example #1
0
    def run_forward(self, initial_scores, transition_scores, final_scores,
                    emission_scores, length, N):
        """ Forward trellis scores."""
        #try an alternative vectorized implementation  of the loops here that eliminates the loop over "current_state"...
        forward = np.zeros([length, N], 'f') + logzero()

        #Initialization
        forward[0, :] = emission_scores[0, :] + initial_scores

        #Forward loop
        for pos in range(1, length):
            #in log: sum the forward scores of previous position states and transition scores from previous state
            # up to the current state; logsum all this to obtain a scalar.
            #logsum over the rows
            forward_last = project_kbest(
                forward[pos - 1, :]) if self.approximate else forward[pos -
                                                                      1, :]
            for current_state in range(N):
                forward[pos, current_state] = sselogsum(
                    forward_last +
                    transition_scores[pos - 1, current_state, :])
            #add emission
            forward[pos, :] += emission_scores[pos, :]
        #Termination
        ll = sselogsum(project_kbest(forward[length - 1, :]) + final_scores) if self.approximate else \
            sselogsum(forward[length - 1, :] + final_scores)

        return ll, forward
Example #2
0
    def get_loss(self, N, forward, backward, ll, norm_emission_counts):
        """
        :param forward: forward trellis
        :param backward: backward trellis
        :param ll: log likelihood of the current sequence
        :param emission_scores: for access to state posteriors needed for weighting backward merges
        :param N: number of split states
        """
        logger = logging.getLogger(__name__)
        loss_seq = np.zeros(N / 2)  # there are N/2 possible merges
        assert forward.shape[1] == N

        n_merge = 0
        # iterate over possible merges
        for i in range(len(loss_seq)):
            i_merge = i + n_merge
            # prepare trellis
            #forward_merge = np.zeros((forward.shape[0], forward.shape[1]-1), 'f') + logzero()
            forward_to_merge = forward[:, i_merge:i_merge + 2]
            sum_split_forward = np.zeros((forward.shape[0], 1), 'f') + logzero()
            for row_n, row in enumerate(forward_to_merge):
                # sum split states
                sum_split_forward[row_n] = sselogsum(row)
            forward_merge = np.hstack((forward[:, :i_merge], sum_split_forward, forward[:, i_merge + 2:]))

            backward_to_merge = backward[:, i_merge:i_merge + 2]
            sum_split_backward = np.zeros((backward.shape[0], 1), 'f') + logzero()
            # incorporate weights for each element (Petrov 2009, p. 89)
            # weights are normalized emission counts
            # accumulated from state posteriors over all sequences
            assert backward.shape[1] == N
            backward_to_merge += norm_emission_counts[i_merge:i_merge + 2]
            for row_n, row in enumerate(backward_to_merge):
                # sum weighted split states
                sum_split_backward[row_n] = sselogsum(row)
            backward_merge = np.hstack((backward[:, :i_merge], sum_split_backward, backward[:, i_merge + 2:]))

            ll_merged_positions = np.zeros(forward_merge.shape[0], 'f') + logzero()
            fb_merge = forward_merge + backward_merge
            for row_n, row in enumerate(fb_merge):
                ll_merged_positions[row_n] = sselogsum(row)
            # likelihood for one merge at all t's
            #for row_n in range(forward_merge.shape[0]):
            #    ll_merged_positions[row_n] = sselogsum(forward_merge[row_n] + backward_merge[row_n])

            # get loss (difference in likelihoods)
            #assert np.all(ll_merged_positions < ll)
            #assert np.all((ll_merged_positions < ll).astype(int) |
            #              (np.isclose(ll_merged_positions, ll, rtol=0.01)).astype(int))
            loss_seq[i] = (ll_merged_positions - ll).sum()
            n_merge += 1

        return loss_seq
Example #3
0
    def pass_msg_down(self, sender, tree, N, receiver):
        """
        downward message is the sum over all states of the product of
         the downward message from sender's parent,
         the product of upward messages to the sender from his other children,
         the sender-receiver edge potential,
         the sender node potential
        If the sender is the root, the calculation is somewhat simplified.

        :param sender: parent
        :param receiver: child
        """
        down_msg_temp = np.zeros(N, 'f') + logzero()
        curr_edge = tree.get_edge_by_nodes(sender, receiver)
        # can be zero if there's not children
        product_child = sum([
            tree.get_edge_by_nodes(sender, c).up_msg
            for c in sender.get_children() if c != receiver
        ])

        if sender.is_root():
            down_msg_temp = curr_edge.potentials + product_child
        else:
            prev_edge = tree.get_edge_by_nodes(sender.get_parent(), sender)
            product = project_kbest(
                prev_edge.down_msg + product_child
            ) if self.approximate else prev_edge.down_msg + product_child
            product += sender.potentials
            for curr_state in range(N):  # TODO optimize
                down_msg_temp[curr_state] = sselogsum(
                    product + curr_edge.potentials[:, curr_state])

        curr_edge.down_msg = down_msg_temp
Example #4
0
    def pass_msg_down(self, sender, tree, N, receiver):
        """
        downward message is the sum over all states of the product of
         the downward message from sender's parent,
         the product of upward messages to the sender from his other children,
         the sender-receiver edge potential,
         the sender node potential
        If the sender is the root, the calculation is somewhat simplified.

        :param sender: parent
        :param receiver: child
        """
        down_msg_temp = np.zeros(N, 'f') + logzero()
        curr_edge = tree.get_edge_by_nodes(sender, receiver)
        # can be zero if there's not children
        product_child = sum([tree.get_edge_by_nodes(sender, c).up_msg
                             for c in sender.get_children()
                             if c != receiver])

        if sender.is_root():
            down_msg_temp = curr_edge.potentials + product_child
        else:
            prev_edge = tree.get_edge_by_nodes(sender.get_parent(), sender)
            product = project_kbest(
                prev_edge.down_msg + product_child) if self.approximate else prev_edge.down_msg + product_child
            product += sender.potentials
            for curr_state in range(N):  # TODO optimize
                down_msg_temp[curr_state] = sselogsum(product + curr_edge.potentials[:, curr_state])

        curr_edge.down_msg = down_msg_temp
Example #5
0
    def run_backward(self, initial_scores, transition_scores, final_scores, emission_scores, length, N):
        """ Backward trellis scores. """
        backward = np.zeros([length, N], 'f') + logzero()
        #Initialization
        backward[length - 1, :] = final_scores
        #Backward loop
        for pos in range(length - 2, -1, -1):
            #transition_oldbackward = transition_scores[pos, :, :] + backward[pos+1, :].reshape(-1, 1)
            #transition_oldbackward += emission_scores[pos+1, :].reshape(-1,1)
            #backward[pos, :] = np.apply_along_axis(logsum, 0, transition_oldbackward)
            product = project_kbest(backward[pos + 1, :] + emission_scores[pos + 1, :]) if self.approximate else \
                backward[pos + 1, :] + emission_scores[pos + 1, :]
            for current_state in range(N):
                backward[pos, current_state] = sselogsum(product + transition_scores[pos, :, current_state])

        ll = sselogsum(project_kbest(backward[0, :] + emission_scores[0, :]) + initial_scores) if self.approximate else \
            sselogsum(backward[0, :] + emission_scores[0, :] + initial_scores)

        return ll, backward
Example #6
0
    def run_backward(self, initial_scores, transition_scores, final_scores,
                     emission_scores, length, N):
        """ Backward trellis scores. """
        backward = np.zeros([length, N], 'f') + logzero()
        #Initialization
        backward[length - 1, :] = final_scores
        #Backward loop
        for pos in range(length - 2, -1, -1):
            #transition_oldbackward = transition_scores[pos, :, :] + backward[pos+1, :].reshape(-1, 1)
            #transition_oldbackward += emission_scores[pos+1, :].reshape(-1,1)
            #backward[pos, :] = np.apply_along_axis(logsum, 0, transition_oldbackward)
            product = project_kbest(backward[pos + 1, :] + emission_scores[pos + 1, :]) if self.approximate else \
                backward[pos + 1, :] + emission_scores[pos + 1, :]
            for current_state in range(N):
                backward[pos, current_state] = sselogsum(
                    product + transition_scores[pos, :, current_state])

        ll = sselogsum(project_kbest(backward[0, :] + emission_scores[0, :]) + initial_scores) if self.approximate else \
            sselogsum(backward[0, :] + emission_scores[0, :] + initial_scores)

        return ll, backward
Example #7
0
    def pass_msg_up(self, tree, sender, receiver, N):
        """
        upward message is the sum over all states of the product of
         the upward belief of the sender and
         the sender-receiver edge potential

        :param sender: child
        :param receiver: parent
        """
        # edge to store the message
        curr_edge = tree.get_edge_by_nodes(receiver, sender)
        up_msg_temp = np.zeros(N, 'f') + logzero()

        if receiver.is_root():
            # edge potential here only Nx1
            up_msg_temp = sselogsum(sender.up_belief + curr_edge.potentials)
        else:
            for curr_state in range(N):  # vectorize!
                # print("curr_edge.potentials[{}, :]\n{}".format(curr_state, curr_edge.potentials[curr_state, :]))
                up_msg_temp[curr_state] = sselogsum(sender.up_belief + curr_edge.potentials[curr_state, :])

        # curr_edge.up_msg = sparse.csr_matrix(up_msg_temp)
        curr_edge.up_msg = up_msg_temp
Example #8
0
    def pass_msg_up(self, tree, sender, receiver, N):
        """
        upward message is the sum over all states of the product of
         the upward belief of the sender and
         the sender-receiver edge potential

        :param sender: child
        :param receiver: parent
        """
        # edge to store the message
        curr_edge = tree.get_edge_by_nodes(receiver, sender)
        up_msg_temp = np.zeros(N, 'f') + logzero()

        if receiver.is_root():
            # edge potential here only Nx1
            up_msg_temp = sselogsum(sender.up_belief + curr_edge.potentials)
        else:
            for curr_state in range(N):  # vectorize!
                # print("curr_edge.potentials[{}, :]\n{}".format(curr_state, curr_edge.potentials[curr_state, :]))
                up_msg_temp[curr_state] = sselogsum(
                    sender.up_belief + curr_edge.potentials[curr_state, :])

        # curr_edge.up_msg = sparse.csr_matrix(up_msg_temp)
        curr_edge.up_msg = up_msg_temp
Example #9
0
    def run_forward(self, initial_scores, transition_scores, final_scores, emission_scores, length, N):
        """ Forward trellis scores."""
        #try an alternative vectorized implementation  of the loops here that eliminates the loop over "current_state"...
        forward = np.zeros([length, N], 'f') + logzero()

        #Initialization
        forward[0, :] = emission_scores[0, :] + initial_scores

        #Forward loop
        for pos in range(1, length):
            #in log: sum the forward scores of previous position states and transition scores from previous state
            # up to the current state; logsum all this to obtain a scalar.
            #logsum over the rows
            forward_last = project_kbest(forward[pos - 1, :]) if self.approximate else forward[pos - 1, :]
            for current_state in range(N):
                forward[pos, current_state] = sselogsum(forward_last +
                                                        transition_scores[pos - 1, current_state, :])
            #add emission
            forward[pos, :] += emission_scores[pos, :]
        #Termination
        ll = sselogsum(project_kbest(forward[length - 1, :]) + final_scores) if self.approximate else \
            sselogsum(forward[length - 1, :] + final_scores)

        return ll, forward
Example #10
0
def project_kbest(v, k_prop=1 / 8):
    """
    Only keep k largest coefficients; remaining elements are set to logzero. Form of regularization.
    Following Grave et al. 2013 (for a 128-state model they set k=16).
    So k proportion should be roughly 1/8 of the state size.

    TODO: use different data structure
    (sparse array) to bring speed again.
    :param v: vector
    :param k_prop: k proportion of states to keep
    """
    assert isinstance(v, np.ndarray)
    k = k_prop * v.shape[0]
    k_largest = v.argsort()[-k:]
    v_approx = np.zeros(v.shape[0], 'f') + logzero()
    v_approx[k_largest] = v[k_largest]

    return rescale_projected(v_approx, sselogsum(v))
Example #11
0
def project_kbest(v, k_prop=1 / 8):
    """
    Only keep k largest coefficients; remaining elements are set to logzero. Form of regularization.
    Following Grave et al. 2013 (for a 128-state model they set k=16).
    So k proportion should be roughly 1/8 of the state size.

    TODO: use different data structure
    (sparse array) to bring speed again.
    :param v: vector
    :param k_prop: k proportion of states to keep
    """
    assert isinstance(v, np.ndarray)
    k = k_prop * v.shape[0]
    k_largest = v.argsort()[-k:]
    v_approx = np.zeros(v.shape[0], 'f') + logzero()
    v_approx[k_largest] = v[k_largest]

    return rescale_projected(v_approx, sselogsum(v))
Example #12
0
def rescale_projected(v, total):
    return v + (total - sselogsum(v))
Example #13
0
def rescale_projected(v, total):
    return v + (total - sselogsum(v))