forked from categulario/estadistica
/
compute_p.py
83 lines (77 loc) · 1.86 KB
/
compute_p.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
from gmpy2 import log, exp
if __name__ == '__main__':
ß = np.array([
-0.1304872044,
0.0044359242,
-0.0073997135,
0.0049064469,
0.0136753262,
0.0053199764,
0.0030747525,
0.0058595307,
0.0065312279,
0.0027749817,
0.0119486008,
0.0084906961,
-0.0071369348,
0.0111621659,
0.0107097929,
0.0095900680,
0.0100376958,
0.0078948492,
0.0101152806,
-0.1137999010,
0.0025213808,
-0.0248121621,
0.0102444762,
0.0130512402,
0.0225180184,
0.0027041271,
0.0038154447,
-0.0019889984,
0.0040718530,
-0.0043652449,
0.0035052964,
0.0115125718,
0.0108878716,
0.0059364008,
0.0106850746,
0.0096340528,
0.0019104727,
0.0073117573,
0.0067469463,
0.0048177387,
0.0114278735,
0.0085407650,
0.0029670354,
0.0084244794,
0.0080599022,
0.0006081326,
0.0059236393,
0.0079667724,
0.0081770204,
0.0078493733,
0.0052903681,
0.0081219285,
0.0150988575,
0.0164040146,
0.0023174244,
-0.0032760498,
-0.0011112326,
-0.0002531620,
0.0090297403,
])
def make_P(ß):
def P(x):
dot = ß.dot(x)
return exp(dot)/(1 + exp(dot))
return P
P = make_P(ß)
# will test this P(x) against real data
with open('spambase.data') as datafile,\
open('spambase.result') as resultfile:
for index, line in enumerate(datafile):
x = np.array([1] + list(map(float, line.strip().split(','))))
print(P(x), resultfile.readline().strip())
if index == 100: break