/
kCrossForACM.py
129 lines (109 loc) · 3.71 KB
/
kCrossForACM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import shutil, os ,sys, TestingForACM
from TestingForACM import *
from pymongo import Connection
from ACM import ACM
from Experiments import *
connect = 'mongodb://project:project1234@yeda.cs.technion.ac.il/'
# input:
# 1- Data dir
# 2- num of k
# 3- num of checks
# 4- num of elements to compare
def kCrossFix(data,k):
filesAmount = len(os.listdir(data))
groupSize=int(filesAmount/k)
i=0
folds = {n:({},{}) for n in range(1,k+1)}
while i < k:
j=1
while j <= groupSize:
filenum = (groupSize*i) + j
shutil.move(data+'/'+str(filenum)+'.txt','Test')
j+=1
acm = ACM('db'+str((i+1)))
acm.dropDicts('db'+str(i+1))
acm.learn(data)
folds[i+1] = (weight_expr(acm,'Test') , None)#buff_size_expr(acm,'Test')))
i+=1
for file in os.listdir('Test'):
shutil.move('Test/'+file,data)
weight_res = [folds[fold][0] for fold in folds]
#buff_size_res = [folds[fold][1] for fold in folds]
tot_w_res = {}
for key in weight_res[0]:
for res in weight_res:
tot_w_res[key] += res[key]
tot_b_res = {}
#for key in buff_size_res[0]:
# for res in buff_size_res:
# tot_b_res[key] += res[key]
tot_w_res = {key:float(tot_w_res[key]/k) for key in tot_w_res}
#tot_b_res = {key:float(tot_b_res[key]/k) for key in tot_b_res}
return [tot_w_res[i] for i in sorted(tot_w_res)] ,None# [tot_b_res[i] for i in sorted(tot_b_res)]
def learnAll(inDir,k,type):
filesAmount = len(os.listdir(inDir))
groupSize=int(filesAmount/k)
i=0
while i < k:
j=1
while j <= groupSize:
filenum = (groupSize*i) + j
shutil.move(inDir+'/'+str(filenum)+'.txt','Test')
j+=1
if type == 'LDA':
acm = LDA('db'+str((i+1)))
acm.dropDicts('db'+str((i+1)))
acm.learn(inDir)
else:
acm = AutoCompModule('db'+str((i+1)))
acm.dropDicts('db'+str((i+1)))
acm.learn(inDir)
i+=1
for file in os.listdir('Test'):
shutil.move('Test/'+file,inDir)
print ('Done learning')
def checkLDA(inDir,k):
filesAmount = len(os.listdir(inDir))
groupSize=int(filesAmount/k)
i=0
while i < k:
j=1
while j <= groupSize:
filenum = (groupSize*i) + j
shutil.move(inDir+'/'+str(filenum)+'.txt','Test')
j+=1
lda = LDA('db'+str((i+1)))
lda.learn(inDir)
i+=1
for file in os.listdir('Test'):
shutil.move('Test/'+file,inDir)
print ("Done learning")
def runTest(data,k):
weight_g , _ = kCrossFix(data, k)
import numpy as np
import matplotlib.pyplot as plt
plt.title(str(k)+'-Cross validation')
plt.xlabel('Weight of trigram')
plt.ylabel('Success rate')
plt.plot(range(10,101,10),weight_g,'k')
plt.savefig(str(k)+'Cross-weights.png')
#plt.title(str(k)+'-Cross validation')
#plt.xlabel('Size of buffer')
#plt.ylabel('Success rate')
#plt.plot(range(15,121,15),buff_size_g,'k')
#plt.savefig(str(k)+'Cross-buffer_size.png')
def main():
if sys.argv[1] == 'LDA':
checkLDA(sys.argv[2],int(sys.argv[3]))
else:
runTest(sys.argv[1], int(sys.argv[2]))
with open('finish.txt','w') as finish:
finish.write('finish')
from notifier import notify
notify()
if __name__ == '__main__':
import signal
signal.signal(signal.SIGHUP,signal.SIG_IGN)
signal.signal(signal.SIGINT,signal.SIG_IGN)
signal.signal(signal.SIGTERM,signal.SIG_IGN)
main()