예제 #1
0
파일: testHMM.py 프로젝트: JGLee6/kddc2010
def hmm_tester(x, start, trans, emit):
    """
    This is a function to test the forward-backward algorithm in hmm.py. Splits 
    by student and runs f-b on all steps up to n-1, then compares prediction to
     nth step
    
    Inputs
    ------
    x : ndarray
        training observation data, nx1
    start : ndarray
        starting probabilities, kx1
    trans : ndarray
        transition probabilities, kxk
    emit : ndarray
        emission probabilities, kxd
        
    Returns
    -------
    rmse : ndarray
        array of root-mean-square-error on prediction of first correct on next 
        question compared to actual next data point result. This is currently 
        not using the test data.
    """
    #Initialize array of predictions, probability of correct on next question
    predicts = np.zeros(numStud)
    
    #Initialize array for rmse to compare to actual test data
    rmse = np.zeros(numStud)
    
    #Run forward-backward on first student
    f,b,probF,probB,post = hmm.frwd_bkwd(observations[:idSplit[0]-1],
                                         startP,transP,emitP)
    #Predict and compute error on first student
    predicts[0] = np.dot(emitP[:,2],np.dot(transP,post[-1]))
    rmse[0] = np.sqrt((data[idSplit[0]-1,1]-predicts[0])**2)
    
    #Run forward-backward on last student
    f,b,probF,probB,post = hmm.frwd_bkwd(observations[idSplit[-1]:-1],
                                         startP,transP,emitP)
                                         
    #Predict and compute error on last student
    predicts[-1] = np.dot(emitP[:,2],np.dot(transP,post[-1]))
    rmse[-1] = np.sqrt((data[-1,1]-predicts[-1])**2)
    
    #Run fwd-bkwd, predict, and compute error on remaining students
    for k in range(numStud-2):
        f,b,probF,probB,post = hmm.frwd_bkwd(
                                    observations[idSplit[k]:idSplit[k+1]-1],
                                    startP,transP,emitP)
        predicts[k] = np.dot(emitP[:,2],np.dot(transP,post[-1]))
        rmse[k] = np.sqrt((data[idSplit[k]-1,1]-predicts[k])**2)
        
    return rmse
예제 #2
0
def test_MCSGDBW(train, test, splitIds, outPairs, sp, tp, ep,index, binList):
    testLen = len(test)
    #binList = [0,.1,1.1,6,15,30,100]
    bins = np.array(binList)
    binVec = bins#(bins[1:]-bins[:-1])/2.+bins[:-1]
    observations = np.digitize(train[:,index],binList)
    observations -= 1.
    print np.max(observations),np.min(observations)
    for k in range(testLen):
        if outPairs[k,1] != 0:
            print k
            index0 = outPairs[k,0]
            index1 = outPairs[k,1]+outPairs[k,0]
            f,b,pF,pB,post = hmm.frwd_bkwd(observations[index0:index1], 
                                           sp, tp, ep)
            state = np.dot(ep.T,np.dot(tp.T,post[-1]))
            test[k,index] = np.dot(binVec,state)
            print test[k,index]
        else:
            test[k,index] = 0
            
    test[:,13] = np.digitize(test[:,index],binList)
    
    return test