/
linearSVM.py
51 lines (42 loc) · 1.25 KB
/
linearSVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
__author__ = 'Ariel'
import util
import numpy as np
def process(X, Y, file):
# X is csr_matrix
with open(file, 'w') as f:
for x in xrange(X.shape[0]):
s = ""
s += str(Y[x])
s += " "
tmp = X[x].toarray().flatten()
col = tmp.nonzero()[0]
data = tmp[col]
for y, z in zip(col, data):
s += str(y+1)
s += ":"
s += str(z)
s += " "
s += "\n"
f.write(s)
return
def generate():
ifHash = True
trainfile = 'yelp_reviews_train.json'
X, y, top = util.preprocess(trainfile, ifTrain=True, ifHash=ifHash, trainTop=[])
predfile = 'yelp_reviews_dev.json'
x, _, _ = util.preprocess(predfile, ifTrain=False, ifHash=ifHash, trainTop=top)
process(X, y, 'libtrainHash.txt')
process(x, np.zeros(x.shape[0]), 'libdevHash.txt')
return
def duplicate():
t = []
with open('liblinear-2.1/libdevHashpred', 'r') as f:
for line in f:
t.append(line.strip())
with open('liblinear-2.1/devHashpred1.txt', 'w') as f1:
for item in t:
f1.write("%s %s \n" % (item,item))
return
if __name__ == '__main__':
#generate()
duplicate()