예제 #1
0
def expectation_maximization(sequences, params):
    forward_table = calculateForwardAlgoLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences)
    likelihood = likelihoodOfSequence(forward_table)
    backward_table = calculateBackwardsAlgoLog(params['t'], params['e']['i'], params['e']['d'], sequences)
    posterior_table_and_route_log = computePosteriorDecodingLog(forward_table, backward_table, len(sequences[0]), likelihood)
    check = calculateBigVars (posterior_table_and_route_log[0], sequences, forward_table, backward_table, params['t'], params['e']['i'], params['e']['d'], likelihood)
    results = calculateNewInitialValues(check) 
    params['m'] = results[0]
    params['t'] = results[1]
    params['e']['i'] = results[2]
    params['e']['d'] = results[3]
    return params, likelihood
예제 #2
0
def mainRunnerEM(inputFileName, initialProbabilities, transitionProbalities, emissionI, emissionD, converstionTable, delim):
    try:
        inputFile = open(inputFileName, 'r')
    except IOError:
        print 'The input file does not exist to read'
        exit(1)
    
    sequences = stripAwayNewLines(inputFile) #tuple pair of each sequence
    seqLength = len(sequences[0])   
    inputFile.close()

    forwardTable = calculateForwardAlgoLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences )

    likelihood = likelihoodOfSequence(forwardTable)

    backwardTable = calculateBackwardsAlgoLog(transitionProbalities, emissionI, emissionD, sequences )
     
    posteriorTableAndRouteLog = computePosteriorDecodingLog(forwardTable, backwardTable, seqLength, likelihood)# tuple pair. Has all post values and most probable
    check = calculateBigVars (posteriorTableAndRouteLog[0], sequences, forwardTable, backwardTable, transitionProbalities, emissionI, emissionD, likelihood) #E
    temp = calculateNewInitialValues(check)  #M    
    
    return (temp, likelihood)