Esempio n. 1
0
def mainRunner(inputFileName, initialProbabilities, transitionProbalities, emissionI, emissionD, converstionTable, delim, initial):

    try:
        inputFile = open(inputFileName, 'r')
    except IOError:
        print 'The input file does not exist to read'
        exit(1)
    
    sequences = stripAwayNewLines(inputFile) #tuple pair of each sequence
    seqLength = len(sequences[0])   
    inputFile.close()
    print "Calculating Forward"
    forwardTableLog = calculateForwardAlgoLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences )

    print "Calculatring Probability of Path"
    likelihood = likelihoodOfSequence(forwardTableLog)
    print "Calculating Backwards"
    backwardsTableLog = calculateBackwardsAlgoLog(transitionProbalities, emissionI, emissionD, sequences )
    print "Calculating Viterbi Encoding"
    veterbiEncodingLog = calculateVeterbiEncodingLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences )
    veterbiStatesLog = hiddenStatePath(veterbiEncodingLog[1], seqLength, getLastState(veterbiEncodingLog[0],seqLength ))
     
    print "Calculating Most Probable States Posterior Encoding"
    posteriorTableAndRouteLog = computePosteriorDecodingLog(forwardTableLog, backwardsTableLog, seqLength, likelihood)# tuple pair. Has all post values and most probable
    posteriorStatesLog = posteriorTableAndRouteLog[1]
    posteriorMeanLog = calculatePosteriorMeanLog(posteriorTableAndRouteLog[0], seqLength, converstionTable)
    
    outputFile = open(delim +"/"+initial+"_veterbiEncoding_" + delim + ".txt", 'w')
    writeToFileWithBreaks(outputFile, veterbiStatesLog, converstionTable)
    print "Finished Writing Veterb Encoding for " + delim
    outputFile.close()
    
    outputFile2 = open(delim +"/"+initial+"_Posterior_"+ delim + ".txt", 'w')
    writeToFileWithBreaks(outputFile2, posteriorStatesLog, converstionTable)
    print "Finished Writing Posterior Encoding for " + delim
    outputFile2.close()
    
    outputFile3 = open(delim +"/"+initial+"_PosteriorMean_"+delim + ".txt", 'w')
    writeToFileWithBreaksStraight(outputFile3, posteriorMeanLog)
    print "Finished Writing Posterior Mean for " + delim
    outputFile3.close()
    
    outPutFile4 = open(delim +"/"+initial+"_ThreeCol_"+delim + ".txt", 'w')
    writeToFileThreeCol(outPutFile4, (veterbiStatesLog,posteriorStatesLog, posteriorMeanLog ), converstionTable)
    print "Finished Writing Three Columns for " + delim
    outPutFile4.close()
    
    outPutFile5 = open(delim +"/"+initial+"_likelihoods_"+delim + ".txt", 'w')
    outPutFile5.write(str(likelihood) + '\n')
    print "Finished Writing Log-likelihoods for " + delim
    outPutFile5.close()
    
    print "Done"
    
    
    
Esempio n. 2
0
def expectation_maximization(sequences, params):
    forward_table = calculateForwardAlgoLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences)
    likelihood = likelihoodOfSequence(forward_table)
    backward_table = calculateBackwardsAlgoLog(params['t'], params['e']['i'], params['e']['d'], sequences)
    posterior_table_and_route_log = computePosteriorDecodingLog(forward_table, backward_table, len(sequences[0]), likelihood)
    check = calculateBigVars (posterior_table_and_route_log[0], sequences, forward_table, backward_table, params['t'], params['e']['i'], params['e']['d'], likelihood)
    results = calculateNewInitialValues(check) 
    params['m'] = results[0]
    params['t'] = results[1]
    params['e']['i'] = results[2]
    params['e']['d'] = results[3]
    return params, likelihood
Esempio n. 3
0
def viterbi(sequences, params):
    decoding = []

    forward_table = calculateForwardAlgoLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences)
    likelihood = likelihoodOfSequence(forward_table)
    backward_table = calculateBackwardsAlgoLog(params['t'], params['e']['i'], params['e']['d'], sequences)
    viterbi_encoding_log = calculateVeterbiEncodingLog(params['m'], params['t'], params['e']['i'], params['e']['d'], sequences)
    viterbi_states_log = hiddenStatePath(viterbi_encoding_log[1], len(sequences[0]), getLastState(viterbi_encoding_log[0], len(sequences[0])))
    posterior_table_and_route_log = computePosteriorDecodingLog(forward_table, backward_table, len(sequences[0]), likelihood)
    posterior_states_log = posterior_table_and_route_log[1]
    posterior_mean_log = calculatePosteriorMeanLog(posterior_table_and_route_log[0], len(sequences[0]), conversion_table)

    decoding.extend('# Viterbi_decoding posterior_decoding posterior_mean\n')
    for i in range(len(viterbi_states_log)):
        decoding.extend('{0} {1} {2}\n'.format(conversion_table[viterbi_states_log[i]], conversion_table[posterior_states_log[i]], posterior_mean_log[i]))

    return likelihood, ''.join(decoding)
Esempio n. 4
0
def mainRunnerEM(inputFileName, initialProbabilities, transitionProbalities, emissionI, emissionD, converstionTable, delim):
    try:
        inputFile = open(inputFileName, 'r')
    except IOError:
        print 'The input file does not exist to read'
        exit(1)
    
    sequences = stripAwayNewLines(inputFile) #tuple pair of each sequence
    seqLength = len(sequences[0])   
    inputFile.close()

    forwardTable = calculateForwardAlgoLog(initialProbabilities, transitionProbalities, emissionI, emissionD, sequences )

    likelihood = likelihoodOfSequence(forwardTable)

    backwardTable = calculateBackwardsAlgoLog(transitionProbalities, emissionI, emissionD, sequences )
     
    posteriorTableAndRouteLog = computePosteriorDecodingLog(forwardTable, backwardTable, seqLength, likelihood)# tuple pair. Has all post values and most probable
    check = calculateBigVars (posteriorTableAndRouteLog[0], sequences, forwardTable, backwardTable, transitionProbalities, emissionI, emissionD, likelihood) #E
    temp = calculateNewInitialValues(check)  #M    
    
    return (temp, likelihood)