forked from turian/parser-model
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract-features.py
executable file
·40 lines (32 loc) · 1.08 KB
/
extract-features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/python
import common.hyperparameters, common.options
HYPERPARAMETERS = common.hyperparameters.read("attardi07_english_ptb")
common.options.reparse(HYPERPARAMETERS)
from common.file import myopen
import string
import common.deprecated.featuremap
labelmap = common.deprecated.featuremap.get(name="labels")
featuremap = common.deprecated.featuremap.get(name="features")
#from collection import defaultdict
#cnt = defaultdict(int)
for l in myopen(HYPERPARAMETERS["original examples file"]):
i = string.split(l)
if len(i) == 0: continue
l = i[0]
feats = i[1:]
labelmap.id(l, can_add=True)
for fline in feats:
flst = string.split(fline, sep=":")
if len(flst) == 1:
f, v = flst[0], 1.
elif len(flst) == 2:
f, v = flst[0], float(flst[1])/HYPERPARAMETERS["divide feature values by"]
else: assert 0
featuremap.id(f, can_add=True)
# cnt[f] += 1
featuremap.readonly = True
labelmap.readonly = True
print "%d features" % featuremap.len
print "%d labels " % labelmap.len
featuremap.dump()
labelmap.dump()