-
Notifications
You must be signed in to change notification settings - Fork 0
/
cost_viterbi.py
94 lines (77 loc) · 2.57 KB
/
cost_viterbi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#! /usr/bin/python
'''
Featurized Viterbi algorithm
Created on Sep 12, 2013
@author: swabha
'''
from viterbi import check
from features import extract
from collections import defaultdict
def_label = ''
def execute(sentence, labelset, postags, weights, goldlabels, info):
if '*' not in labelset:
labelset.append('*')
n = len(sentence)
pi = []
bp = []
#print 'initializing...'
for i in xrange(0, n+1):
pi.append(defaultdict())
bp.append(defaultdict())
for label in labelset:
pi[i][label] = float("-inf")
bp[i][label] = def_label
pi[0]['*'] = 0.0
# print 'main viterbi algorithm ...'
for k in xrange(1, n+1):
#print k-2, goldlabels[k-2]
for u in labelset:
max_score = float("-inf")
argmax = def_label
for w in labelset:
if k >= 2 and w != goldlabels[k-2]:
cost = 1.0
if goldlabels[k-2] in ('B','I') and w == 'O':
cost += 20.0
else:
cost = 0.0
local_score = get_score(sentence[k-1], u, w, postags[k-1], weights, info)
score = pi[k-1][w] + local_score + cost
if score > max_score:
max_score = score
argmax = w
pi[k][u] = max_score
bp[k][u] = argmax
# for w in labelset:
# print "{0:.2f}".format(pi[k][w]) + " ",
# print
# print "decoding..."
tags = []
max_score = float("-inf")
best_last_label = def_label
for w in labelset:
if w != goldlabels[-1]:
cost = 1.0
if goldlabels[-1] in ('B','I') and w == 'O':
cost += 9.0
local_score = get_score('', '<STOP>', w, '', weights, info)
score = pi[n][w] + local_score + cost
if score > max_score:
max_score = score
best_last_label = w
tags.append(best_last_label)
# tag extraction
for k in range(n-1, 0, -1):
last_tag = tags[len(tags)-1]
tags.append(bp[k+1][last_tag])
tags = list(reversed(tags))
# print "actual score =", max_score
# check(sentence, postags, tags, weights, info)
return tags
def get_score(word, current_tag, prev_tag, postag, weights, info):
score = 0.0
features_list = extract(word, current_tag, prev_tag, postag, info)
for feature in features_list:
if feature in weights:
score += weights[feature]
return score