def initial_distribution():
    # returns a Distribution for the initial hidden state
    prior = Distribution()
    prior['F'] = 1
    prior['B'] = 1
    prior.renormalize()
    return prior
def observation_model(state):
    observed_states = Distribution()
    if state == 'F':
        observed_states['H'] = .5
        observed_states['T'] = 0.5
    elif state == 'B':
        observed_states['H'] = .25
        observed_states['T'] = .75
    observed_states.renormalize()
    return observed_states
def mostLikely(neglogphi, prevMsgHat):
    mHat = Distribution()
    tBack = {}
    tmp = Distribution()
    for x1Key, x1Val in neglogphi.items():
        tmp[x1Key] = x1Val + prevMsgHat[x1Key]
    minVal, minKey = myDictMin(tmp)
    mHat = minVal
    tBack = minKey
    return mHat, tBack
def observation_model(state):
    observed_states = Distribution()
    if state == '1':
        observed_states['hot'] = 1
        observed_states['cold'] = 0
    elif state == '2':
        observed_states['hot'] = 0
        observed_states['cold'] = 1
    elif state == '3':
        observed_states['hot'] = 1
        observed_states['cold'] = 0
    observed_states.renormalize()
    return observed_states
def transition_model(state):
    # given a hidden state, return the Distribution for the next hidden state
    next_states = Distribution()

    # we can always stay where we are
    if state == 'F':
        next_states['F'] = .75
        next_states['B'] = .25
    elif state == 'B':
        next_states['F'] = 0.25
        next_states['B'] = 0.75
    next_states.renormalize()
    return next_states
def ViterbiWkHorse(neglogphi, prevMsgHat):
    mHat = Distribution()
    tBack = {}
    for x2Key in all_possible_hidden_states:
        tmp = Distribution()
        for x1Key, x1Val in neglogphi.items():
            x2Poss = transition_model(x1Key)
            neglogx2Poss = myneglog(x2Poss)
            tmp[x1Key] = x1Val + neglogx2Poss[x2Key] + prevMsgHat[x1Key]
        minVal, minKey = myDictMin(tmp)
        mHat[x2Key] = minVal
        tBack[x2Key] = minKey
    return mHat, tBack
def rev_transition_model(curState):
    # given a hidden state, return the Distribution for the prev hidden state
    revModel = Distribution()
    for x in all_possible_hidden_states:
        tmp = transition_model(x)
        revModel[x] = tmp[curState]
    return revModel
def forward(alphaIn, phi_x, y):
    """compute the next forward message"""
    alphaPhi_X = Distribution()
    for x, alphaX in alphaIn.items():
        yProb = phi_x[x]
        tmpProd = yProb * alphaX
        if tmpProd > 0:
            alphaPhi_X[x] = tmpProd
    
    # compute alpha out
    alphaOut = Distribution()
    for x, alphaPhi in alphaPhi_X.items():
        x2Poss = transition_model(x)
        # multiply and add x2Poss to o/p
        for x2Key, x2pVal in x2Poss.items():
            alphaOut[x2Key] += x2pVal*alphaPhi
        #print(alphaOut)
    return alphaOut         
def buildPhi(y):
    phi_X = Distribution()
    for x in all_possible_hidden_states:
        if y is None:
            phi_X[x] = 1
        else:
            yPoss = observation_model(x)
            phi_X[x] = yPoss[y]
    return phi_X
def forward_backward(observations):
    """
    Input
    -----
    observations: a list of observations, one per hidden state
        (a missing observation is encoded as None)

    Output
    ------
    A list of marginal distributions at each time step; each distribution
    should be encoded as a Distribution (see the Distribution class in
    robot.py and see how it is used in both robot.py and the function
    generate_data() above, and the i-th Distribution should correspond to time
    step i
    """

    # -------------------------------------------------------------------------
    # YOUR CODE GOES HERE
    #

    num_time_steps = len(observations)
    forward_messages = [None] * num_time_steps
    forward_messages[0] = prior_distribution

    # pre-build phi_x for all nodes
    phi_XList = [None] * num_time_steps
    for idx, y in enumerate(observations):
        phi_XList[idx] = buildPhi(y)

    # TODO: Compute the forward messages
    for idx, y in enumerate(observations[0:-1]):
        alphaIn = forward_messages[idx]
        nxtFwd = forward(alphaIn, phi_XList[idx], y)
        forward_messages[idx+1] = nxtFwd

    backward_messages = [None] * num_time_steps
    # TODO: Compute the backward messages
    backMsg = Distribution()
    for x in all_possible_hidden_states:
        backMsg[x] = 1
    backward_messages[-1] = backMsg
    for idx, y in enumerate(observations[-1:0:-1]):
        nodeIdx = num_time_steps-idx-1
        if nodeIdx == 99:
            print('Enter debug')
            print('Enter debug')
        betaIn = backward_messages[nodeIdx]
        nxtBeta = backward(betaIn, phi_XList[nodeIdx], y)
        backward_messages[nodeIdx-1] = nxtBeta
 
    testIdx = 1
    fwdTest = forward_messages[testIdx]
    fwdTest.renormalize()
    printProb(fwdTest)
    backTest = backward_messages[testIdx]
    backTest.renormalize()
    printProb(backTest)

    #marginals = [None] * num_time_steps # remove this
    marginals = []
    # TODO: Compute the marginals 
    fbpZip = zip(forward_messages, backward_messages, phi_XList)
    for fwd, back, phi in fbpZip:        
        marg = mkMarginals(fwd, back, phi)
        marginals.append(marg)
    return marginals
def mkMarginals(fwd, back, phi):
    marg = Distribution()
    for x in all_possible_hidden_states:
        marg[x] = phi[x] * fwd[x] * back[x]
    marg.renormalize()
    return marg
def Viterbi(observations):
    """
    Input
    -----
    observations: a list of observations, one per hidden state
        (a missing observation is encoded as None)

    Output
    ------
    A list of esimated hidden states, each encoded as a tuple
    (<x>, <y>, <action>)
    """

    # -------------------------------------------------------------------------
    # YOUR CODE GOES HERE
    #

    observations = ['H', 'H', 'T', 'T', 'T']
    num_time_steps = len(observations)
    estimated_hidden_states = [None] * num_time_steps  # remove this

    # pre-build phi_x for all nodes
    phi_XList = [None] * num_time_steps
    for idx, y in enumerate(observations):
        phi_XList[idx] = buildPhi(y)
    # change phi for first observation


#    phi_XList[0]['F'] *= prior_distribution['F']
#    phi_XList[0]['B'] *= prior_distribution['B']
    for key, val in phi_XList[0].items():
        phi_XList[0][key] *= prior_distribution[key]

    # compute neg log of ph
    neglogphiList = [myneglog(x) for x in phi_XList]

    mHatList = [None] * num_time_steps
    tBackList = [None] * num_time_steps
    mHatZero = Distribution()
    for x in all_possible_hidden_states:
        mHatZero[x] = 0
    mHatList[0] = mHatZero

    for idx, y in enumerate(observations[:-1]):
        mHatPrev = mHatList[idx]
        mHat, tBack = ViterbiWkHorse(neglogphiList[idx], mHatPrev)
        mHatList[idx + 1] = mHat
        tBackList[idx + 1] = tBack
        pass

    # return the estimated hidden states
    finhat, finState = mostLikely(neglogphiList[-1], mHatList[-1])
    finStates = [None] * num_time_steps
    finStates[-1] = finState
    for idx in range(num_time_steps - 1, 0, -1):
        curState = finStates[idx]
        tBack = tBackList[idx]
        prevState = tBack[curState]
        finStates[idx - 1] = prevState
    estimated_hidden_states = finStates
    print(finStates)
    return estimated_hidden_states