def expectation_maximization(sequences, params): forward_table = calculateForwardAlgoLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences) likelihood = likelihoodOfSequence(forward_table) backward_table = calculateBackwardsAlgoLog(params['t'], params['e']['i'], params['e']['d'], sequences) posterior_table_and_route_log = computePosteriorDecodingLog(forward_table, backward_table, len(sequences[0]), likelihood) check = calculateBigVars (posterior_table_and_route_log[0], sequences, forward_table, backward_table, params['t'], params['e']['i'], params['e']['d'], likelihood) results = calculateNewInitialValues(check) params['m'] = results[0] params['t'] = results[1] params['e']['i'] = results[2] params['e']['d'] = results[3] return params, likelihood
def mainRunnerEM(inputFileName, initialProbabilities, transitionProbalities, emissionI, emissionD, converstionTable, delim): try: inputFile = open(inputFileName, 'r') except IOError: print 'The input file does not exist to read' exit(1) sequences = stripAwayNewLines(inputFile) #tuple pair of each sequence seqLength = len(sequences[0]) inputFile.close() forwardTable = calculateForwardAlgoLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences ) likelihood = likelihoodOfSequence(forwardTable) backwardTable = calculateBackwardsAlgoLog(transitionProbalities, emissionI, emissionD, sequences ) posteriorTableAndRouteLog = computePosteriorDecodingLog(forwardTable, backwardTable, seqLength, likelihood)# tuple pair. Has all post values and most probable check = calculateBigVars (posteriorTableAndRouteLog[0], sequences, forwardTable, backwardTable, transitionProbalities, emissionI, emissionD, likelihood) #E temp = calculateNewInitialValues(check) #M return (temp, likelihood)