Ejemplo n.º 1
0
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        # First turn chars to integers, so that 'a' is represented by 0,
        # 'b' by 1, and so on.
        index = [Key.char_to_index(x) for x in s]

        # The Viterbi matrices
        self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS))
        self.v[:,:] = -float("inf")
        self.backptr = np.zeros((len(s) + 1, Key.NUMBER_OF_CHARS), dtype='int')

        # Initialization
        self.backptr[0,:] = Key.START_END
        self.v[0,:] = self.a[Key.START_END,:] + self.b[index[0],:]
                    
        for t in range(1,len(s)):
            for k in range(Key.NUMBER_OF_CHARS):
                self.v[t][k] = np.max([(self.v[t-1][i] + self.a[i][k] + self.b[k][index[t]]) for i in range(Key.NUMBER_OF_CHARS)])
                self.backptr[t][k] = np.argmax([(self.v[t-1][i] + self.a[i][k] + self.b[k][index[t]]) for i in range(Key.NUMBER_OF_CHARS)])
        
        l = ''
        path = Key.NUMBER_OF_CHARS - 1
        for i in range (len(s),1,-1):
            path = self.backptr[i-1][path]
            l = l + Key.index_to_char(path)
            
        l = l[::-1]
        
        return l
Ejemplo n.º 2
0
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        # First turn chars to integers, so that 'a' is represented by 0,
        # 'b' by 1, and so on.
        index = [Key.char_to_index(x) for x in s]

        # The Viterbi matrices
        self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS),
                          dtype='double')
        self.v[:, :, :] = -float("inf")
        self.backptr = np.zeros(
            (len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS), dtype='int')

        # Initialization

        # YOUR CODE HERE
        self.backptr[0, :, :] = Key.START_END
        self.v[0, Key.START_END, :] = self.a[
            Key.START_END, Key.START_END, :] + self.b[index[0], :]

        # Induction step

        # YOUR CODE HERE
        for ndx in range(1, len(index)):
            for char in range(Key.NUMBER_OF_CHARS):
                for j in range(Key.NUMBER_OF_CHARS):
                    maxProb = -float("inf")
                    maxPrev = -1

                    for prev in range(Key.NUMBER_OF_CHARS):
                        if self.v[ndx - 1, prev, j] + self.a[prev, j,
                                                             char] > maxProb:
                            maxProb = self.v[ndx - 1, prev,
                                             j] + self.a[prev, j, char]
                            maxPrev = prev

                    self.v[ndx, j, char] = maxProb + self.b[index[ndx], char]
                    self.backptr[ndx, j, char] = maxPrev

        # Finally return the result
        mostLikelyString = ""
        curNdx = len(index) - 2
        prevStateEnd = self.backptr[len(index) - 1, Key.START_END,
                                    Key.START_END]
        prevStateBgn = self.backptr[curNdx, prevStateEnd, Key.START_END]

        while curNdx > 0:
            curNdx -= 1
            mostLikelyString += Key.index_to_char(prevStateEnd)
            tmp = prevStateBgn
            prevStateBgn = self.backptr[curNdx, prevStateBgn, prevStateEnd]
            prevStateEnd = tmp
        # REPLACE THE LINE BELOW WITH YOUR CODE

        return mostLikelyString[::-1]
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        words = s.replace('\r', ' ').split(" ")
        out = ''
        for s in words:
            if s == '':
                out += ' '
                continue
            s += '  '
            word = ''

            # First turn chars to integers, so that 'a' is represented by 0,
            # 'b' by 1, and so on.
            index = [Key.char_to_index(x) for x in s]

            # The Viterbi matrices
            self.v = np.zeros(
                (len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS),
                dtype='double')
            self.v[:, :, :] = -float("inf")
            self.backptr = np.zeros(
                (len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS),
                dtype='int')

            # Initialization
            self.v[0, Key.START_END, :] = self.a[
                Key.START_END, Key.START_END, :] + self.b[index[0], :]
            self.backptr[0, Key.START_END, :] = Key.START_END

            for t in range(1, len(s) - 1):
                for j in range(0, Key.NUMBER_OF_CHARS):
                    for k in range(0, Key.NUMBER_OF_CHARS):
                        score = self.v[t - 1, :, j] + self.a[:, j, k]
                        maxScore = max(score)
                        self.v[t, j, k] = self.b[index[t], k] + maxScore
                        self.backptr[t, j, k] = int(np.argmax(score))

            jArray = self.v[len(s) - 2, :, Key.START_END]
            l = [np.argmax(jArray)]
            jTemp = Key.START_END
            jArray = np.argmax(jArray)

            for i in range(len(s) - 1, 1, -1):
                temp = self.backptr[i - 1, jArray, jTemp]
                jTemp = jArray
                jArray = temp
                l.append(jArray)
            l.reverse()

            for i in l:
                word += Key.index_to_char(i)
            out += word

        return out
Ejemplo n.º 4
0
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        # First turn chars to integers, so that 'a' is represented by 0,
        # 'b' by 1, and so on.
        index = [Key.char_to_index(x) for x in s]

        # The Viterbi matrices
        self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS))
        self.v[:, :] = -float("inf")
        self.backptr = np.zeros((len(s), Key.NUMBER_OF_CHARS), dtype='int')

        # Initialization
        self.backptr[0, :] = Key.START_END
        self.v[0, :] = self.a[Key.START_END, :] + self.b[index[0], :]

        # Induction step
        # YOUR CODE HERE

        # Emission, b[observation, character] -> prob of observation given character
        # Transition, a[prev_state, new_state] -> prob of new state given previous state

        n_states = len(self.a)
        n_obs = len(self.v)

        for obs_idx in range(1, n_obs):
            for state_idx in range(n_states):
                # Add previous state probabilities with state transition probabilities
                t_probs = self.v[obs_idx - 1] + self.a[:, state_idx]
                # Get most probable previous state
                self.backptr[obs_idx, state_idx] = np.argmax(t_probs)
                # Add emission probability to the best transition
                best_t = t_probs[self.backptr[obs_idx, state_idx]]
                self.v[obs_idx,
                       state_idx] = best_t + self.b[index[obs_idx], state_idx]

        c = len(self.backptr) - 1
        idx = Key.START_END
        msg = ''
        while c > 0:
            msg = Key.index_to_char(self.backptr[c][idx]) + msg
            idx = self.backptr[c][idx]
            c -= 1

        # Finally return the result

        # REPLACE THE LINE BELOW WITH YOUR CODE

        return msg.strip()
Ejemplo n.º 5
0
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        # First turn chars to integers, so that 'a' is represented by 0,
        # 'b' by 1, and so on.
        index = [Key.char_to_index(x) for x in s]

        # The Viterbi matrices
        self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS))
        self.v[:, :] = -float("inf")
        self.backptr = np.zeros((len(s) + 1, Key.NUMBER_OF_CHARS), dtype='int')

        # Initialization
        self.backptr[0, :] = Key.START_END
        self.v[0, :] = self.a[Key.START_END, :] + self.b[index[0], :]

        # Induction step

        # TODO YOUR CODE HERE
        for ndx in range(1, len(index)):
            obs = index[ndx]  # index of observed character
            for char in range(Key.NUMBER_OF_CHARS):
                maxProb = -float("inf")
                maxNdx = -1

                for prevState in range(Key.NUMBER_OF_CHARS):
                    if self.v[ndx - 1, prevState] + self.a[prevState,
                                                           char] > maxProb:
                        maxProb = self.v[ndx - 1,
                                         prevState] + self.a[prevState, char]
                        maxNdx = prevState

                self.v[ndx, char] = maxProb + self.b[obs, char]
                self.backptr[ndx, char] = maxNdx
        # Finally return the result

        # REPLACE THE LINE BELOW WITH YOUR CODE
        mostLikelyString = ""
        prevState = self.backptr[len(index) - 1, Key.START_END]
        curNdx = len(index) - 2

        while curNdx >= 0:
            mostLikelyString += Key.index_to_char(prevState)
            prevState = self.backptr[curNdx, prevState]
            curNdx -= 1

        return mostLikelyString[::-1]
Ejemplo n.º 6
0
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        # First turn chars to integers, so that 'a' is represented by 0,
        # 'b' by 1, and so on.
        index = [Key.char_to_index(x) for x in s]

        # The Viterbi matrices
        self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS))
        self.v[:, :] = -float("inf")
        self.backptr = np.zeros((len(s) + 1, Key.NUMBER_OF_CHARS), dtype='int')

        # Initialization
        self.backptr[0, :] = Key.START_END
        self.v[0, :] = self.a[Key.START_END, :] + self.b[index[0], :]

        # Induction step
        for observation in range(1, len(index)):
            for hidden_state in range(Key.NUMBER_OF_CHARS):
                likelihood_estimates = np.zeros(Key.NUMBER_OF_CHARS)
                likelihood_estimates[:] = -float("inf")
                for letter in range(Key.NUMBER_OF_CHARS):
                    likelihood_estimates[letter] = self.v[
                        observation -
                        1][letter] + self.a[letter][hidden_state] + self.b[
                            hidden_state][index[observation]]

                self.v[observation][hidden_state] = max(likelihood_estimates)
                self.backptr[observation +
                             1][hidden_state] = np.argmax(likelihood_estimates)

        # YOUR CODE HERE

        legible_text = ""

        pointer = np.argmax(self.v[len(index) - 1])
        for step in range(len(index), 0, -1):
            letter_in_legible_text = Key.index_to_char(pointer)
            legible_text = letter_in_legible_text + legible_text
            pointer = self.backptr[step][pointer]

        # Finally return the result

        # REPLACE THE LINE BELOW WITH YOUR CODE

        return legible_text.strip()
Ejemplo n.º 7
0
    def init_b(self):
        """
        Initializes the observation probabilities (the 'B' matrix).
        """
        for i in range(Key.NUMBER_OF_CHARS):
            cs = Key.neighbour[i]

            # Initialize all log-probabilities to some small value.
            for j in range(Key.NUMBER_OF_CHARS):
                self.b[i][j] = -float("inf")

            # All neighbouring keys are assigned the probability 0.1
            for j in range(len(cs)):
                self.b[i][Key.char_to_index(cs[j])] = math.log(0.1)

            # The remainder of the probability mass is given to the correct key.
            self.b[i][i] = math.log((10 - len(cs)) / 10.0)
Ejemplo n.º 8
0
	def viterbi(self, s):
		"""
		Performs the Viterbi decoding and returns the most likely
		string.
		"""
		# First turn chars to integers, so that 'a' is represented by 0,
		# 'b' by 1, and so on.
		index = [Key.char_to_index(x) for x in s]

		# The Viterbi matrices
		self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS), dtype='double')
		self.v[:,:,:] = -float("inf")
		self.backptr = np.zeros((len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS), dtype='int')

		# Initialization

		# YOUR CODE HERE

		self.v[0,:,:] = self.a[Key.START_END,Key.START_END,:] + self.b[index[0],:]
		# v[0,:,:] -> prob of all states at obs 0 for succeding state key.START_END
		# prob all states 0->26 transition | 26,26
		# prob all states 0->26 emitting 26 -> only space can emit 26

		self.backptr[0,:,:] = Key.START_END

		# a[i,j,k] -> prob of going to state k given preceding was j and the one before was i, P(i,j,k) = P(k|i,j)
		# b[i,k] -> prob of state k emitting oi
		# v[t,i,k] -> prob of being in state k, passing through most probable succeding state i after first t obs

		# Induction step

		# YOUR CODE HERE

		n_obs = len(s)
		n_states = len(self.a)

		for obs_idx in range(1, n_obs):
			for state_idx in range(n_states):
				for prec_idx in range(n_states):
					
					t_probs = self.v[obs_idx-1,:,prec_idx] + self.a[:,prec_idx,state_idx]
					self.v[obs_idx,prec_idx,state_idx] = max(t_probs) + self.b[index[obs_idx],state_idx]
					self.backptr[obs_idx,prec_idx,state_idx] = np.argmax(t_probs)


		c = len(self.v)-1
		this = Key.START_END
		next_state = Key.START_END
		after_next = Key.START_END
		msg = ''
		while c >= -1:
			msg = Key.index_to_char(this) + msg
			this = next_state
			next_state = after_next
			after_next = self.backptr[c,next_state,this]
			c -= 1

		# Finally return the result

		# REPLACE THE LINE BELOW WITH YOUR CODE

		return msg.strip()
    def viterbi(self, s):
        """
        Performs the Viterbi decoding and returns the most likely
        string.
        """
        # First turn chars to integers, so that 'a' is represented by 0,
        # 'b' by 1, and so on.
        index = [Key.char_to_index(x) for x in s]

        # The Viterbi matrices
        self.v = np.zeros((len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS),
                          dtype='double')
        self.v[:, :, :] = -float("inf")
        self.backptr = np.zeros(
            (len(s), Key.NUMBER_OF_CHARS, Key.NUMBER_OF_CHARS), dtype='int')

        # Initialization

        # YOUR CODE HERE

        self.v[0, Key.START_END, :] = self.a[
            Key.START_END, Key.START_END, :] + self.b[index[0], :]
        self.backptr[0, Key.START_END, :] = Key.START_END

        # Induction step
        for observation in range(1, len(s)):
            for hidden_state in range(Key.NUMBER_OF_CHARS):
                for preceding_state in range(Key.NUMBER_OF_CHARS):
                    likelihood_estimates = np.zeros(Key.NUMBER_OF_CHARS)
                    likelihood_estimates[:] = -float("inf")
                    for prec_prec_state in range(Key.NUMBER_OF_CHARS):
                        likelihood_estimates[prec_prec_state] = self.v[
                            observation - 1, prec_prec_state,
                            preceding_state] + self.a[
                                prec_prec_state, preceding_state,
                                hidden_state] + self.b[hidden_state,
                                                       index[observation]]
                    self.v[observation, preceding_state,
                           hidden_state] = max(likelihood_estimates)
                    self.backptr[observation, preceding_state,
                                 hidden_state] = np.argmax(
                                     likelihood_estimates)

        # YOUR CODE HERE

        legible_text = ""

        bestpathprob = np.amax(self.v[len(s) - 1])
        extra_pointer, pointer = np.where(self.v[len(s) - 1] == bestpathprob)
        extra_pointer = int(extra_pointer)
        for step in range(len(index) - 1, -1, -1):
            letter = Key.index_to_char(pointer)
            legible_text = letter + legible_text
            value_to_row = self.backptr[step, extra_pointer, pointer]
            #print(value_to_row)
            pointer = extra_pointer
            extra_pointer = value_to_row

        # Finally return the result

        # REPLACE THE LINE BELOW WITH YOUR CODE

        return legible_text.strip()