def mainRunner(inputFileName, initialProbabilities, transitionProbalities, emissionI, emissionD, converstionTable, delim, initial): try: inputFile = open(inputFileName, 'r') except IOError: print 'The input file does not exist to read' exit(1) sequences = stripAwayNewLines(inputFile) #tuple pair of each sequence seqLength = len(sequences[0]) inputFile.close() print "Calculating Forward" forwardTableLog = calculateForwardAlgoLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences ) print "Calculatring Probability of Path" likelihood = likelihoodOfSequence(forwardTableLog) print "Calculating Backwards" backwardsTableLog = calculateBackwardsAlgoLog(transitionProbalities, emissionI, emissionD, sequences ) print "Calculating Viterbi Encoding" veterbiEncodingLog = calculateVeterbiEncodingLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences ) veterbiStatesLog = hiddenStatePath(veterbiEncodingLog[1], seqLength, getLastState(veterbiEncodingLog[0],seqLength )) print "Calculating Most Probable States Posterior Encoding" posteriorTableAndRouteLog = computePosteriorDecodingLog(forwardTableLog, backwardsTableLog, seqLength, likelihood)# tuple pair. Has all post values and most probable posteriorStatesLog = posteriorTableAndRouteLog[1] posteriorMeanLog = calculatePosteriorMeanLog(posteriorTableAndRouteLog[0], seqLength, converstionTable) outputFile = open(delim +"/"+initial+"_veterbiEncoding_" + delim + ".txt", 'w') writeToFileWithBreaks(outputFile, veterbiStatesLog, converstionTable) print "Finished Writing Veterb Encoding for " + delim outputFile.close() outputFile2 = open(delim +"/"+initial+"_Posterior_"+ delim + ".txt", 'w') writeToFileWithBreaks(outputFile2, posteriorStatesLog, converstionTable) print "Finished Writing Posterior Encoding for " + delim outputFile2.close() outputFile3 = open(delim +"/"+initial+"_PosteriorMean_"+delim + ".txt", 'w') writeToFileWithBreaksStraight(outputFile3, posteriorMeanLog) print "Finished Writing Posterior Mean for " + delim outputFile3.close() outPutFile4 = open(delim +"/"+initial+"_ThreeCol_"+delim + ".txt", 'w') writeToFileThreeCol(outPutFile4, (veterbiStatesLog,posteriorStatesLog, posteriorMeanLog ), converstionTable) print "Finished Writing Three Columns for " + delim outPutFile4.close() outPutFile5 = open(delim +"/"+initial+"_likelihoods_"+delim + ".txt", 'w') outPutFile5.write(str(likelihood) + '\n') print "Finished Writing Log-likelihoods for " + delim outPutFile5.close() print "Done"
def expectation_maximization(sequences, params): forward_table = calculateForwardAlgoLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences) likelihood = likelihoodOfSequence(forward_table) backward_table = calculateBackwardsAlgoLog(params['t'], params['e']['i'], params['e']['d'], sequences) posterior_table_and_route_log = computePosteriorDecodingLog(forward_table, backward_table, len(sequences[0]), likelihood) check = calculateBigVars (posterior_table_and_route_log[0], sequences, forward_table, backward_table, params['t'], params['e']['i'], params['e']['d'], likelihood) results = calculateNewInitialValues(check) params['m'] = results[0] params['t'] = results[1] params['e']['i'] = results[2] params['e']['d'] = results[3] return params, likelihood
def viterbi(sequences, params): decoding = [] forward_table = calculateForwardAlgoLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences) likelihood = likelihoodOfSequence(forward_table) backward_table = calculateBackwardsAlgoLog(params['t'], params['e']['i'], params['e']['d'], sequences) viterbi_encoding_log = calculateVeterbiEncodingLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences) viterbi_states_log = hiddenStatePath(viterbi_encoding_log[1], len(sequences[0]), getLastState(viterbi_encoding_log[0], len(sequences[0]))) posterior_table_and_route_log = computePosteriorDecodingLog(forward_table, backward_table, len(sequences[0]), likelihood) posterior_states_log = posterior_table_and_route_log[1] posterior_mean_log = calculatePosteriorMeanLog(posterior_table_and_route_log[0], len(sequences[0]), conversion_table) decoding.extend('# Viterbi_decoding posterior_decoding posterior_mean\n') for i in range(len(viterbi_states_log)): decoding.extend('{0} {1} {2}\n'.format(conversion_table[viterbi_states_log[i]], conversion_table[posterior_states_log[i]], posterior_mean_log[i])) return likelihood, ''.join(decoding)
def mainRunnerEM(inputFileName, initialProbabilities, transitionProbalities, emissionI, emissionD, converstionTable, delim): try: inputFile = open(inputFileName, 'r') except IOError: print 'The input file does not exist to read' exit(1) sequences = stripAwayNewLines(inputFile) #tuple pair of each sequence seqLength = len(sequences[0]) inputFile.close() forwardTable = calculateForwardAlgoLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences ) likelihood = likelihoodOfSequence(forwardTable) backwardTable = calculateBackwardsAlgoLog(transitionProbalities, emissionI, emissionD, sequences ) posteriorTableAndRouteLog = computePosteriorDecodingLog(forwardTable, backwardTable, seqLength, likelihood)# tuple pair. Has all post values and most probable check = calculateBigVars (posteriorTableAndRouteLog[0], sequences, forwardTable, backwardTable, transitionProbalities, emissionI, emissionD, likelihood) #E temp = calculateNewInitialValues(check) #M return (temp, likelihood)