Esempio n. 1
0
File: hmm.py Progetto: poneill/amic
def test_viterbi():
    site = [0,1,2,3,0,1,2,3]
    background = lambda n:[random.choice(range(4)) for i in range(n)]
    obs = (site + background(1000) +
           site + background(1000) +
           site)

    states = [0,1,2,3,4,5,6,7,8] # 0 is off, 1-8 are bs positions
    start_p = [0.99,0.01,0,0,0,0,0,0,0]
    trans_p = [[0.99,0.01,0,0,0,0,0,0,0],
               [0,0,1,0,0,0,0,0,0],
               [0,0,0,1,0,0,0,0,0],
               [0,0,0,0,1,0,0,0,0],
               [0,0,0,0,0,1,0,0,0],
               [0,0,0,0,0,0,1,0,0],
               [0,0,0,0,0,0,0,1,0],
               [0,0,0,0,0,0,0,0,1],
               [0.99,0.01,0,0,0,0,0,0,0],
               ]
    emit_p = [[0.25,0.25,0.25,0.25],
              [1,0,0,0],
              [0,1,0,0],
              [0,0,1,0],
              [0,0,0,1],
              [1,0,0,0],
              [0,1,0,0],
              [0,0,1,0],
              [0,0,0,1]
              ]
    return viterbi(obs, states, start_p, trans_p, emit_p)
Esempio n. 2
0
def tagger(testFile,taggedFile,n):
	dicOfWordTag,dicOfTag,dicOfTags=read_corpus("corpus/CoNLL2009-ST-English-train-pos.txt")
	f=open(testFile,'r')
	fw=open(taggedFile,'w')
	totalNumber=0
	noSolution=0
	words=[]
	lines=[]
	for line in f:
		line=line.strip()
		if line=='':
			if len(words)>n:
				for row in lines:
					for item in row:
						print >> fw,item,
				words=[]
				lines=[]
				continue

			prob,path=viterbi(words,dicOfWordTag,dicOfTag,dicOfTags)
			
			if prob==0:
				#print >>fw,"no solution"
				noSolution+=1
				for row in lines:
					for item in row:
						print >> fw,item,
					print >> fw
				print >>fw
				words=[]
				lines=[]
				continue
			
			i=0
			while i<len(path):
				for item in lines[i]:
					print >> fw,item,
				print >> fw,path[i],path[i]
				i+=1
			print >> fw
			words=[]
			lines=[]

		else:
			line=line.split()
			words.append(line[1])
			lines.append(line)
			totalNumber+=1
	
	print 1-float(noSolution)/totalNumber
Esempio n. 3
0
def main():
    """main function
    """
    n = 2  # Bigram HMM
    args = parse_arguments()
    treebank = TaggedCorpusReader(
        os.path.split(args.train_f)[0],
        os.path.split(args.train_f)[1])
    observation_space = [item[0] for item in treebank.sents()]  # all words
    state_space = [item[1] for item in treebank.sents()]  # all pos tags

    words = dict.fromkeys(observation_space)
    tags = dict.fromkeys(state_space)

    # HMM parameter estimation- initial, transition and emission probablity
    start = time.time()
    init_p = [item[1] for item in comp_initial(tags, treebank)]
    trans_p = comp_transition(n, tags, state_space)
    emission_p = comp_emission(words,
                               tags,
                               state_space,
                               treebank,
                               smoothing=args.smoothing)
    end = time.time()
    print("Runtime (training): %.3f s" % (end - start))

    # Test your HMM-trained model
    treebank = TaggedCorpusReader(
        os.path.split(args.eval_f)[0],
        os.path.split(args.eval_f)[1])
    viterbi_tags = []

    start = time.time()
    for sentence in treebank.paras():
        test_words = [item[0] for item in sentence]
        O, S, Y, pi, A, B = pre_process(words, tags, test_words, init_p,
                                        trans_p, emission_p)
        # Computes Viterbi's most likely tags

        if args.log_prob:
            X = viterbi_log(O, S, Y, pi, A, B)
        else:
            X = viterbi(O, S, Y, pi, A, B)
        viterbi_tags.append(X)
    end = time.time()

    print("Runtime (viterbi): %.3f s" % (end - start))
    output_path = "./" + "de-tagger.tt"
    post_processing(viterbi_tags, args.test_f, output_path)
Esempio n. 4
0
def evaluate(testFile,n):
	dicOfWordTag,dicOfTag,dicOfTags=read_corpus("corpus/CoNLL2009-ST-English-train-pos.txt")
	f=open(testFile,'r')
	rightNumber=0
	denominator=0
	totalNumber=0
	noSolution=0
	words=[]
	right=[]
	for line in f:
		line=line.strip()
		if line=='':
			
			if len(words)>n:
				#print "length> ",n
				words=[]
				right=[]
				continue

			prob,path=viterbi(words,dicOfWordTag,dicOfTag,dicOfTags)
			
			if prob==0:
				words=[]
				right=[]
				noSolution+=1
				continue
			
			i=0
			while i<len(path):
				if path[i]==right[i]:
					rightNumber+=1
				denominator+=1
				i+=1
			words=[]
			right=[]

		else:
			line=line.split()
			words.append(line[1])
			right.append(line[4])
			totalNumber+=1
			

	precision=float(rightNumber)/denominator
	recall=1-float(denominator)/totalNumber
	return precision,recall
Esempio n. 5
0
def __cut(sen):
    prob, pos_list = viterbi(sen,
                             status,
                             start_P,
                             trans_P,
                             emit_P,
                             end_status='ES')
    flag = 0
    for num, pos in enumerate(pos_list):
        if pos == 'E':
            word = sen[flag:num + 1]
            flag = num + 1
            yield word

        elif pos == 'S':
            word = sen[flag:num + 1]
            flag = num + 1
            yield word
Esempio n. 6
0
def main():
    outFileName = sys.argv[1]
    n = int(sys.argv[2])
    outFile = open(outFileName,'w')
    
    data = genDieNumber(n)
    prediction = viterbi(data[0])
    for i in range(0,len(prediction)//50):
        outFile.write("Rolls    ")
        for j in range(0,50):
            outFile.write(str(data[0][j]))
        outFile.write("\n")
        outFile.write("Die      ")
        for j in range(0,50):
            outFile.write(str(data[1][j]))
        outFile.write("\n")
        outFile.write("Viterbi  ")
        for j in range(0,50):
            outFile.write(str(prediction[j]))
        outFile.write("\n")
Esempio n. 7
0
def test(trainfile,testfile):
	word_dict,pos_dict,tag_list,emission_probabilities,transition_probabilities,prefixp,suffixp = learnFromTraining(trainfile)
	file = open(testfile,"r")
	unknown = count_unknown(file)
	file = open(testfile,"r")
	words = []
	for line in file:
		words.append(line.rstrip())
	pos=[]
	new_sentence=[]
	for i in range(0,len(words)):
		if words[i]=='' or words[i]=='\n':
			v,p = viterbi(new_sentence,emission_probabilities,transition_probabilities,pos_dict,prefixp,suffixp,unknown)
			tags = bestPath(new_sentence,v,p,transition_probabilities.keys())
			tags.append('')
			tags=tags[1:]
			pos.extend(tags)
			new_sentence=[]
		else:
			new_sentence.append(words[i])
	return pos,words
Esempio n. 8
0
def evaluate():
    seqLen = []
    avg_accuracy = []
    avg_MCC = []
    for n in range(1000, 10100, 100):
        for i in range(0, 10):
            accuracy = []
            MCC = []
            data = genDieNumber(n)
            true_result = data[1]
            prediction = viterbi(data[0])
            TP = 0
            TN = 0
            FP = 0
            FN = 0
            for i in range(0, len(data[0])):
                if true_result[i] == "F":
                    if prediction[i] == "F":
                        TP += 1
                    else:
                        FN += 1
                else:
                    if prediction[i] == "F":
                        FP += 1
                    else:
                        TN += 1
            accuracy.append((TP + TN) / (TP + TN + FP + FN))
            if (TP + TN) * (TP + FP) * (TN + FP) * (TN + FN) != 0:
                MCC.append((TP * TN - FP * FN) / math.sqrt(
                    (TP + TN) * (TP + FP) * (TN + FP) * (TN + FN)))
            else:
                MCC.append((TP * TN - FP * FN) / 0.000001)

        seqLen.append(n)
        avg_accuracy.append(sum(accuracy) / len(accuracy))
        avg_MCC.append(sum(MCC) / len(MCC))
    return [seqLen, avg_accuracy, avg_MCC]
Esempio n. 9
0
File: hmm.py Progetto: poneill/amic
def baum_welch(obs,L):
    """Given sequence and bs length L, approximate MLE parameters for
    emission probabilities,transition rate a01 (background->site).
    TODO: non-uniform background frequencies"""
    states = range(L+1)
    a01 = random.random()
    start_p = make_start_p(a01)
    trans_p = make_trans_p(a01)
    emit_p = [simplex_sample(4) for state in states]
    hidden_states = [random.choice(states) for ob in obs]
    iterations = 0
    while True:
        # compute hidden states, given probs
        prob,hidden_states_new = viterbi(obs, states, start_p, trans_p, emit_p)
        # compute probs, given hidden states
        # first compute a01
        a01_new = estimate_a01(hidden_states_new)
        start_p_new = make_start_p(a01_new)
        trans_p_new = make_trans_p(a01_new)
        emit_p_new = estimate_emit_p(obs,hidden_states_new,states)
        if (start_p_new == start_p and
            trans_p_new == trans_p and
            emit_p_new == emit_p and
            hidden_states_new == hidden_states):
            break
        else:
            print iterations,a01,l2(start_p,start_p_new),
            print l2(concat(trans_p),concat(trans_p_new)),
            print l2((hidden_states),hidden_states_new)
            a01 = a01_new
            start_p = start_p_new
            trans_p = trans_p_new
            emit_p = emit_p_new
            hidden_states = hidden_states_new
            iterations += 1
    return start_p,trans_p,emit_p,hidden_states
Esempio n. 10
0
def memm(classifier, test_file, all_pos, start, transition, w):

    test = open(test_file, "r")

    tokens = []
    index = []
    pos = []
    counter = 0
    #used for validation only
    correct = 0

    seen = set()
    greedy_predictions = []
    memm_predictions = []
    memm_lex = defaultdict(lambda: defaultdict(float))

    for line in test:
        predictions = []

        if (counter % 3 == 0):
            tokens = line.split()
            for t in tokens:
                seen.add(t)
            #print ("tokens: " + "".join(tokens))
        elif (counter % 3 == 1):
            pos = line.split()
        else:
            index = line.split()
            for i in range(len(tokens)):
                dict = {}
                #feature one = is this token captialized
                #print ("word is " + tokens[i])
                if tokens[i][0].isupper():
                    #print ("upper")
                    dict["caps"] = 1
                else:
                    #print ("lower")
                    dict["caps"] = 0

                dict["pos"] = pos[i]

                if len(predictions) == 0:
                    dict["prevBIO"] = "<s>"
                else:
                    dict["prevBIO"] = predictions[-1]
                #feature 4 = does prev word start in caps
                if i == 0:
                    dict["prevCaps"] = 0
                else:
                    if tokens[i - 1][0].isupper():
                        dict["prevCaps"] = 1
                    else:
                        dict["prevCaps"] = 0

                # feature 5 = previous pos
                # if pos[i - 1] == "NNP":
                #     dict["prevNNP"] = 1
                # else:
                #     dict["prevNNP"] = 0
                if i == 0:
                    dict["prevPOS"] = "<s>"
                else:
                    dict["prevPOS"] = pos[i - 1]
                # feature 6 = previous word
                if i == 0:
                    dict["prevWord"] = "<s>"
                else:
                    dict["prevWord"] = tokens[i - 1]

                # feature 7 = next word
                if i == len(tokens) - 1:
                    dict["nextWord"] = "</s>"
                else:
                    dict["nextWord"] = tokens[i + 1]

                # feature 8 = next word pos
                if i == len(tokens) - 1:
                    dict["nextPOS"] = "</s>"
                else:
                    dict["nextPOS"] = pos[i + 1]

                # feature 9 = does this word appear in training
                dict["training"] = "nan"
                # w[word] = tag
                if tokens[i] in w:
                    dict["training"] = w[tokens[i]]

                #print (dict)
                probs = classifier.prob_classify(dict)

                #greedy predictions
                maxscore = 0
                best_tag = ""
                for tag in TAGS:
                    score = probs.prob(tag)
                    memm_lex[tag][tokens[i]] = score
                    if score > maxscore:
                        maxscore = score
                        best_tag = tag
                #print(best_tag)
                predictions.append(best_tag)

            memm_pre = viterbi(tokens, start, transition, memm_lex, seen)
            memm_predictions.append(memm_pre)
            greedy_predictions.append(predictions)
        counter += 1
    test.close()
    return greedy_predictions, memm_predictions
Esempio n. 11
0
def execute():
    # If there is no sentence input, asks for a sentence input
    if len(sentence_input.get()) == 0:
        canvas.delete("all")
        canvas.create_text(375, 50, text="Please Enter a Sentence")
    # If there is an illegal word in the input, tells the user
    elif len(sentence_in_corpus(word_tokenize(sentence_input.get()))) > 0:
        canvas.delete("all")
        canvas.create_text(375, 50, text="Found an Illegal Word")
    # If the user has selected "Forward Algorithm", it shows the results of running the Forward
    # Algorithm on the entire sentence with only the most likely part of speech shown
    elif v.get() == "1":
        canvas.delete("all")
        new_sentence = word_tokenize(sentence_input.get())
        non_normalized_result = forward(set(pos_tags), new_sentence,
                                        transition_matrix, emission_matrix)
        result = normalize(non_normalized_result)
        top_results = {}
        for i in range(len(new_sentence)):
            highest = 0.0
            best_pos = None
            for pos in set(pos_tags):
                if result[i][pos] >= highest:
                    best_pos = pos
                    highest = result[i][pos]
            top_results[new_sentence[i]] = best_pos, highest
        new_len = 100 * len(result[0])
        new_height = 60 * len(result) + 100
        if new_len > 750 or new_height > 750:
            canvas.config(height=new_height, width=new_len)
        for i in range(len(result)):
            canvas.create_rectangle(10 + 100 * i,
                                    25,
                                    100 + 100 * i,
                                    75,
                                    fill="white")
            canvas.create_text(55 + 100 * i, 50, text=new_sentence[i])
            canvas.create_rectangle(10 + 100 * i,
                                    90,
                                    100 + 100 * i,
                                    135,
                                    fill="lightblue")
            val = top_results[new_sentence[i]][0] + ": " + "{0:.4f}".format(
                top_results[new_sentence[i]][1])
            canvas.create_text(55 + 100 * i, 112, text=val)
    # If the user has selected "Viterbi Algorithm", it shows the results of running the Viterbi
    # Algorithm on the entire sentence
    elif v.get() == "2":
        canvas.delete("all")
        new_sentence = word_tokenize(sentence_input.get())
        result = viterbi(set(pos_tags), new_sentence, transition_matrix,
                         emission_matrix)
        new_len = 100 * len(result["predicted_tags"])
        new_height = 750
        if new_len > 750 or new_height > 750:
            canvas.config(height=new_height, width=new_len)
        for i in range(len(new_sentence)):
            canvas.create_rectangle(10 + 100 * i,
                                    25,
                                    100 + 100 * i,
                                    75,
                                    fill="white")
            canvas.create_text(55 + 100 * i, 50, text=new_sentence[i])
            canvas.create_rectangle(10 + 100 * i,
                                    90,
                                    100 + 100 * i,
                                    135,
                                    fill="lightblue")
            canvas.create_text(55 + 100 * i,
                               112,
                               text=result["predicted_tags"][i + 1])
    # If the user has selected "Forward with All Parts of Speech", then it shows a trellis diagram for
    # all the parts of speech
    elif v.get() == "3":
        canvas.delete("all")
        new_sentence = word_tokenize(sentence_input.get())
        non_normalized_result = forward(set(pos_tags), new_sentence,
                                        transition_matrix, emission_matrix)
        result = normalize(non_normalized_result)
        print(result)
        new_len = 100 * len(result)
        new_height = 60 * len(result[0]) + 100
        if new_len > 750 or new_height > 750:
            canvas.config(height=new_height, width=new_len)
        for i in range(len(result)):
            canvas.create_rectangle(10 + 100 * i,
                                    25,
                                    100 + 100 * i,
                                    75,
                                    fill="white")
            canvas.create_text(55 + 100 * i, 50, text=new_sentence[i])
            j = 0
            for pos in result[i]:
                val = str(pos) + ": " + "{0:.3f}".format(result[i][pos])
                canvas.create_rectangle(10 + 100 * i,
                                        90 + 50 * j,
                                        100 + 100 * i,
                                        135 + 50 * j,
                                        fill="lightblue")
                canvas.create_text(55 + 100 * i, 112 + 50 * j, text=val)
                j += 1
    # If the user has selected "Show All Progressions for Forward", it shows the step progression after each word
    # is added
    elif v.get() == "4":
        canvas.delete("all")
        new_sentence = word_tokenize(sentence_input.get())
        for i in range(1, len(new_sentence) + 1):
            non_normalized_result = forward(set(pos_tags), new_sentence[:i],
                                            transition_matrix, emission_matrix)
            result = normalize(non_normalized_result)
            top_results = {}
            for j in range(len(new_sentence[:i])):
                highest = 0.0
                best_pos = None
                for pos in set(pos_tags):
                    if result[j][pos] >= highest:
                        best_pos = pos
                        highest = result[j][pos]
                top_results[new_sentence[j]] = best_pos, highest
            new_len = 100 * len(result[0])
            new_height = 60 * len(result) + 100
            if new_len > 750 or new_height > 750:
                canvas.config(height=new_height, width=new_len)
            for k in range(len(result)):
                canvas.create_rectangle(10 + 100 * k,
                                        25,
                                        100 + 100 * k,
                                        75,
                                        fill="white")
                canvas.create_text(55 + 100 * k, 50, text=new_sentence[k])
                canvas.create_rectangle(10 + 100 * k,
                                        30 + 60 * i,
                                        100 + 100 * k,
                                        75 + 60 * i,
                                        fill="lightblue")
                val = top_results[
                    new_sentence[k]][0] + ": " + "{0:.4f}".format(
                        top_results[new_sentence[k]][1])
                canvas.create_text(55 + 100 * k, 52 + 60 * i, text=val)
    # If the user has selected "Show All Progressions for Viterbi", it shows the step progression after each word
    # is added to the algorithm
    elif v.get() == "5":
        canvas.delete("all")
        new_sentence = word_tokenize(sentence_input.get())
        for i in range(1, len(new_sentence) + 1):
            result = viterbi(set(pos_tags), new_sentence[:i],
                             transition_matrix, emission_matrix)
            new_len = 100 * len(result["predicted_tags"])
            new_height = 750
            if new_len > 750 or new_height > 750:
                canvas.config(height=new_len, width=new_len)
            for j in range(len(new_sentence[:i])):
                canvas.create_rectangle(10 + 100 * j,
                                        25,
                                        100 + 100 * j,
                                        75,
                                        fill="white")
                canvas.create_text(55 + 100 * j, 50, text=new_sentence[j])
                canvas.create_rectangle(10 + 100 * j,
                                        30 + 60 * i,
                                        100 + 100 * j,
                                        75 + 60 * i,
                                        fill="lightblue")
                canvas.create_text(55 + 100 * j,
                                   52 + 60 * i,
                                   text=result["predicted_tags"][j + 1])
Esempio n. 12
0
print(sum(B[1, :]))

# ## Question 4

# In[12]:

tags_true = []
tags_pred = []
scores = []

for sent in tqdm(testing):
    word_list = to_ids(V, [word for word, _ in sent])
    tag_list = to_ids(Q, [tag for _, tag in sent])
    tags_true.append(tag_list)

    predicted, score = viterbi((Pi, A, B), word_list)
    tags_pred.append(predicted)
    scores.append(score)

# In[13]:

predicted_set = set(flatten(tags_pred))
reference_set = set(flatten(tags_true))

print('Precision :', precision(predicted_set, reference_set))
print('Recall    :', recall(predicted_set, reference_set))
print('F1-score  :', f_measure(predicted_set, reference_set))

# ## Question 5

# We only conserve the pairs of tags which appears at least once in the whole set.
Esempio n. 13
0
        f.close()

    if (function == 'viterbi'):
        f = open(outfile, 'w')
        print("generating tables..")
        EMISSION = emissionTable(train_X, train_Y, test_X)
        print("emission done")
        TRANSITION = transitionTable(train_Y)
        print("transition done")
        unique_tags = getUniqueY(train_Y)
        print("unique tags gotten from text")
        print("All pre-requisites done, now running viterbi")
        for i in range(0, len(test_X)):
            # print(test_X[i])
            print("Writing one sentence, " + str(len(test_X) - i) + " to go.")
            viterbi_sentence = viterbi(test_X[i], len(test_X[i]), TRANSITION,
                                       EMISSION, unique_tags)
            for j in range(0, len(test_X[i])):
                towrite = str(test_X[i][j]) + " " + str(viterbi_sentence[j])
                f.write(towrite + '\n')
            f.write('\n')
        f.close()

    if (function == 'viterbi_topk'):
        f = open(outfile, 'w')
        print("generating tables..")
        EMISSION = emissionTable(train_X, train_Y, test_X)
        print("emission done")
        TRANSITION = transitionTable(train_Y)
        print("transition done")
        unique_tags = getUniqueY(train_Y)
        print("unique tags gotten from text")
Esempio n. 14
0
def main(
        param=0.2,
        PATH_LOAD_FILE='/home/keums/Melody/dataset/adc2004_full_set/file/pop4.wav',
        PATH_SAVE_FILE='./SAVE_RESULTS/pop4.txt'):

    #    PATH_LOAD_FILE = sys.argv[1]
    #    PATH_SAVE_FILE = sys.argv[2]

    #==================================
    # Feature Extraction
    # .wav --> spectrogram
    #==================================
    x_test_log = myFeatureExtraction(PATH_LOAD_FILE)  #path ??

    #==================================
    # making multi column spectrogram
    # for trainging
    #==================================
    x_test_SF = making_multi_frame(x_test_log, num_frames=1)
    x_test_MF = making_multi_frame(x_test_log, num_frames=11)

    select_res_1st = 1
    select_res_2nd = 2
    select_res_3rd = 4
    pitch_range = np.arange(min_pitch, max_pitch + 1.0 / select_res_3rd,
                            1.0 / select_res_3rd)

    #==================================
    # Melody extraction
    # using DNN
    #==================================
    y_predict_1st = MelodyExtraction_SCDNN(x_test_MF, select_res_1st)
    y_predict_2nd = MelodyExtraction_SCDNN(x_test_MF, select_res_2nd)
    y_predict_3rd = MelodyExtraction_SCDNN(x_test_MF, select_res_3rd)

    #==================================
    # merge SCDNN
    #==================================
    #    print 'Merging....'
    ratio_res_1_3 = select_res_3rd / select_res_1st
    ratio_res_2_3 = select_res_3rd / select_res_2nd

    y_predict_tmp_1_3 = np.zeros(y_predict_3rd.shape)
    y_predict_tmp_2_3 = np.zeros(y_predict_3rd.shape)

    for i in range(y_predict_3rd.shape[0]):
        for j in range(y_predict_1st.shape[1] - 1):
            y_predict_tmp_1_3[i, j * ratio_res_1_3:j * ratio_res_1_3 +
                              ratio_res_1_3] = y_predict_1st[i, j]
        y_predict_tmp_1_3[i, -1] = y_predict_1st[i, -1]

    for i in range(y_predict_3rd.shape[0]):
        for j in range(y_predict_2nd.shape[1] - 1):
            y_predict_tmp_2_3[i, j * ratio_res_2_3:j * ratio_res_2_3 +
                              ratio_res_2_3] = y_predict_2nd[i, j]
        y_predict_tmp_2_3[i, -1] = y_predict_2nd[i, -1]


#    y_predict = (y_predict_tmp_1_3+0.0000001) *(y_predict_tmp_2_3+0.0000001) * (y_predict_3rd +0.0000001)
    y_predict = 10**(np.log10(y_predict_tmp_1_3) +
                     np.log10(y_predict_tmp_2_3) + np.log10(y_predict_3rd))
    del y_predict_tmp_1_3
    del y_predict_tmp_2_3

    #==================================
    # singing voice detection
    #==================================
    voice_frame_vad = VAD_DNN(x_test_SF, y_predict_1st, param=0.2)

    #==================================
    # viterbi algorithm
    #==================================
    path_viterbi = './viterbi/'
    path_prior_matrix_file = path_viterbi + 'prior_' + str(
        select_res_3rd) + '.npy'
    path_transition_matrix_file = path_viterbi + 'transition_matrix_' + str(
        select_res_3rd) + '.npy'

    prior = np.load(path_prior_matrix_file)
    transition_matrix = np.load(path_transition_matrix_file)
    viterbi_path = viterbi(y_predict,
                           transition_matrix=transition_matrix,
                           prior=prior,
                           penalty=0,
                           scaled=True)

    pitch_MIDI = np.zeros([y_predict.shape[0], 1])
    pitch_freq = np.zeros([y_predict.shape[0], 1])

    for i in range(y_predict.shape[0]):
        # for test : origianl
        #        index_predict[i] = np.argmax(y_predict[i,:])
        #        pitch_MIDI[i] = pitch_range[index_predict[i]]

        #viterbi_path
        pitch_MIDI[i] = pitch_range[viterbi_path[i]]
        pitch_freq[i] = 2**((pitch_MIDI[i] - 69) / 12.) * 440

    est_pitch = np.multiply(pitch_freq, voice_frame_vad)
    #==================================
    #adjust frame
    #==================================

    idx_shift = 2
    shift_array = np.zeros(idx_shift)
    est_pitch = np.append(shift_array, est_pitch[:-idx_shift])

    #==================================
    # save result
    #==================================

    PATH_est_pitch = PATH_SAVE_FILE

    if not os.path.exists(os.path.dirname(PATH_est_pitch)):
        os.makedirs(os.path.dirname(PATH_est_pitch))
    f = open(PATH_est_pitch, 'w')

    for j in range(len(est_pitch)):
        est = "%f\t%f\n" % (0.01 * j, est_pitch[j])
        f.write(est)
    f.close()
    print PATH_est_pitch
Esempio n. 15
0
    if random.random() < P[1]:
        state = 1
    for i in range(60):
        switched_state = (state + 1) % 2
        if i > 0 and random.random() < Tm[state][switched_state]:
            state = switched_state
        lines[0] += str(random.choices(list(range(1, 7)), Em[state])[0])
        lines[1] += 'F' if state == 0 else 'L'
    return lines


probabilities_file, rolls_file = parse_args()
P, Tm, Em = parse_probabilities(probabilities_file)
if rolls_file is None:
    lines = generate_rolls()
else:
    with open(rolls_file) as input:
        lines = input.read().splitlines()

print('Rolls:     ' + lines[0])
print('Die:       ' + lines[1])
observations = [int(i) for i in lines[0]]
viter = viterbi(S, P, observations, Tm, Em)
forw_back = forward_backward(observations, S, P, Tm, Em)
percent = lambda obs: str(
    round(
        sum([obs[i] == lines[1][i]
             for i in range(len(observations))]) / len(observations) * 100, 2))
print('Viterbi:   ' + ''.join(viter) + ' (' + percent(viter) + '%)')
print('Posterior: ' + ''.join(forw_back) + ' (' + percent(forw_back) + '%)')