/
dynaesti.py
347 lines (316 loc) · 12.8 KB
/
dynaesti.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
from __future__ import division, print_function
import matplotlib.pylab as P
import torch as T
from curvfife import CurvFiFE
from tqdm import trange, tqdm
import hickle
from scipy.optimize import fmin_l_bfgs_b as BFGS_min
norm_cdf = lambda x : (1+T.erf(x/P.sqrt(2)))/2
norm_ppf = lambda x : P.sqrt(2)*T.erfinv(2*T.clamp(x, 1e-9, 1-1e-9)-1)
from datetime import datetime
import os
import errno
def to_tens(a, dims=None):
"""Converts to pytorch tensor with `dims` dimensions if not already"""
if not (type(a) == T.Tensor):
a = T.tensor(a, dtype=T.float64)
if dims is None:
d = T.tensor(0.0, dtype=T.float64)
else:
d = T.zeros((1,)*dims, dtype=T.float64)
return a.double() + d
def trapz(y, x):
y_avg = (y[1:, :] + y[:-1, :])/2
dx = x[1:] - x[:-1]
return dx.matmul(y_avg)
def log(x):
x = to_tens(x)
return T.log(x + P.spacing(1))
class IRF(object):
"""Abstract class detailing the structure of an IRF"""
def __init__(self, params):
"""Initialize the IRF with its params"""
self._params = to_tens(params)
def __call__(self, theta, resp):
"""Return the IRF evaluated at `theta` given item response `resp`.
This returns the probability (density) `p` of observing a response of
`resp` to this item given the responder has a latent trait of
`theta`.
`theta` and `resp` are both scalar or array-like, and broadcasting
should be respected. E.g. if `theta.shape==(n, 1)` and
`resp.shape==(1,m)`, then the return should have `p.shape==(n,m)`.
"""
make_tens = (T.Tensor in map(type, [self._params, theta, resp]))
if make_tens:
return T.exp(self.log_p(theta, resp))
def log_p(self, theta, resp):
"""Return the log of the IRF evaluated at `theta` given item response `resp`.
This returns the LOG probability (density) `p` of observing a response of
`resp` to this item given the responder has a latent trait of
`theta`.
`theta` and `resp` are both scalar or array-like, and broadcasting
should be respected. E.g. if `theta.shape==(n, 1)` and
`resp.shape==(1,m)`, then the return should have `p.shape==(n,m)`.
"""
make_tens = (T.Tensor in map(type, [self._params, theta, resp]))
if make_tens:
return T.log(self(theta, resp))
def get_func(self, resps):
"Return a function `f` so that `f(theta) == self.__call__(theta, resps)`"
def f(theta):
return self(theta, resps)
return f
def get_log_func(self, resps):
"Return a function `f` such that `f(theta) == self.log_p(theta, resps)`"
def f(theta):
return self.log_p(theta, resps)
return f
def fit_params_from_marginals(self, resps, theta, marginals,
bounds=None, params0=None):
"""Update IRF params given the latent traits of responders and their responses.
Update the parameters given responses and the distributions on the
latent traits of the responders that produced them. The update should
be done to maximize the expected log probability (density) of
responses, where expectation is taken over the distributions in the
latent traits.
Provided here is a base implementation.
Parameters
----------
resps : length n array-like
The list of item responses by the `n` responders. `resps[i]` is the
response of the `i`th responder.
theta : length N array-like of floats
The values at which the marginal distributions are evaluated at.
marginals : N by n array-like of floats
`marginals[j, i]` is the marginal probability (density) that the
latent trait of responder `i` is equal to `theta[j]`.
bounds : list of pairs, or else None
Bounds on the values the parameters can take. `bounds[i]` is of the
form `(min_value, max_value)`, dictating the min and max respectively
of `params[i]`. If `min_value` or `max_value` are `None`, there will
be no boundary in that direction. The default is `bounds` is None,
which means all params are unbounded when being fit.
params0 : same type as `params`, else None
The initial point at which the optimization starts for the
parameters. If None, uses old params as the starting point.
"""
marginals = to_tens(marginals)
theta = to_tens(theta)
n = len(resps)
N, n_check = marginals.shape
assert n == n_check, \
"resps and marginals aren't the same length. {0}!={1}"\
.format(n, n_check)
theta = theta.view(-1, 1)
resps = to_tens(resps).view(1, -1)
def loss(params):
self._params = to_tens(params)
self._params.requires_grad_(True)
lp = self.log_p(theta, resps)
neg_ll = -trapz(marginals*lp, theta.view(-1)).sum()
neg_ll.backward()
grad = self._params.grad
self._params.requires_grad_(False)
return neg_ll.detach().item(), P.array(grad)
if params0 is None:
params0 = self._params
params_opt, loss_val, d = \
BFGS_min(loss, params0, bounds=bounds,
maxls=100, pgtol=1e-08)
self._params = to_tens(params_opt)
return self._params
def get_params(self):
"""Return the params of the IRF"""
return self._params
class IRF_3PL(IRF):
"""3 Parameter Logistic IRF
Example usage:
>>> a = 1
>>> b = 0.3
>>> c = 0.1
>>> irf = IRF_3PL((a, b, c))
>>> theta = 0.2
>>> response = 1
>>> irf(theta, response) == c + (1-c)*1./(1 + P.exp(-a*(theta - b)))
True
"""
def __call__(self, theta, resp):
"""Return the 3PL IRF evaluated at `theta` given item response `resp`.
Parameters
----------
theta : scalar or array-like of floats
The latent traits.
resp : scalar or array-like of 0's and 1's
Whether the response was correct or not.
Returns
-------
p : scalar or array-like (following the shape of theta+resp)
The conditional probability of `resp` given `theta`. Broadcasting is
respected. E.g. if `theta.shape==(n, 1)` and `resp.shape==(1,m)`,
then p will have `p.shape==(n,m)`.
"""
a, b, c = self._params
make_tens = T.Tensor in map(type, [a, b, c, theta, resp])
if make_tens:
theta = to_tens(theta)
resp_temp = to_tens(resp)
if make_tens:
resp = to_tens(resp)
if make_tens:
p = c + (1-c)*1./(1 + T.exp(-a*(theta-b)))
else:
p = c + (1-c)*1./(1 + P.exp(-a*(theta-b)))
return resp*p + (1-resp)*(1-p)
def fit_params_from_marginals(self, resps, theta, marginals,
a_min=0.1, a_max=5, c_max=0.30, params0=None):
"""Update IRF params given the latent traits of responders and their responses.
Update the parameters given responses and the distributions on the
latent traits of the responders that produced them. The update should
be done to maximize the expected log probability (density) of
responses, where expectation is taken over the distributions in the
latent traits.
Parameters
----------
resps : length n array-like
The list of item responses by the `n` responders. `resps[i]` is the
response of the `i`th responder.
theta : length N array-like of floats
The values at which the marginal distributions are evaluated at.
marginals : N by n array-like of floats
`marginals[j, i]` is the marginal probability (density) that the
latent trait of responder `i` is equal to `theta[j]`.
a_min, a_max, c_max
The min and maxes for the `a` and `c` parameters. Won't fit anything
beyond these limits.
params0 : same type as `params`, else None
The initial point at which the optimization starts for the
parameters. If None, uses old params as the starting point.
"""
bounds = ((a_min, a_max), (None, None), (0, c_max))
return super().fit_params_from_marginals(resps, theta, marginals,
bounds=bounds, params0=params0)
def DynAEsti(items, responses, bounded=False, R=15, Nf=1001, bar=True,
default_h=None, save_each_round=False, Theta0=None, **kwargs):
"""Dynamic Ability Estimation
Given responders' `responses` to `items` and corresponding `times` that
they responded, `DynAEsti` outputs a fitted `CurvFiFE` object for each
responder's ability over time, as well as fits the IRFs for the items.
Parameters
----------
items : length m list of IRF objects
The `m` items that responders can respond to. The parameters of each
IRF should be initialized to their initial guesses. These parameters
will by optimized by DynAEsti.
responses : list of n lists of triplets
The list of the `n` responders' responses to items. `responses[i]` is a
list of triplets corresponding to responses by the `i`th responder. The
triplets are of the form `(time, item_index, resp)`. `time` is the time
they responded to the item. `item_index` is the index of the item they
responded to in `items`. `resp` is their response to the item.
bounded : boolean
Whether the latent trait should be bounded (between 0 and 1), or
unbounded (-infty, infty).
R : positive int
Number of rounds of EM to run.
Nf : int
Controls the amount of discretization for the problem distributions.
bar : boolean
Whether you want a progress bar to display tracking progress of
calculations (highly recommended).
default_h : positive float
If all of a responder's responses to items happen at the same time,
it's impossible to know how fast or slow their latent trait changes.
`default_h` gives the default bandwidth `h` used to fit these cases.
If `None`, defaults to the maximum of 0.1, and `0.19 * ((the overall max
response time) - (the overall min response time))`.
save_each_round : boolean
Whether to save the Thetas and IRF params each round to a file or not.
Theta0 : None (default) or list of n fitted CurvFiFE objects
If not `None`, then this is the initial guess for the distributions of
the latent trait curves of the responders. DynAEsti will start with
fitting the items using these.
kwargs
Extra key-word arguments to pass to CurvFiFE when using the
feed_data_CV method. These can include `hh`, `k`, `monte_samps`,
`shuffle`, `eps`, `y_max`, `Ny`, `tol`, `dx_min`, `max_iter`, `s`,
`auto_increase_eps`, and `eps_0_for_h_inf`. For speed, we recommend
`k=5` and `monte_samps=1000`. For quality, `k=10` and
monte_samps=10000`.
Returns
-------
Thetas : length-n list of CurvFiFE objects
List of CurvFiFE objects for the responders, containing the
distribution of their latent traits over time. `Theta[i]` is the
CurvFiFE object for the `i`th student.
"""
NOW = datetime.now().strftime("%B_%d_%Y_at_%I%M%p")
dirname = NOW + "/"
def savedata(darr, name):
name = dirname + name + '.hkl'
hickle.dump(darr, name, mode='w', compression='gzip')
n = len(responses)
m = len(items)
if default_h is None:
max_time = max(
max( trip[0] for trip in trip_list )
for trip_list in responses
)
min_time = min(
min( trip[0] for trip in trip_list )
for trip_list in responses
)
default_h = max(0.1, 0.19 * (max_time - min_time))
Thetas = [ CurvFiFE() for _ in range(n) ] \
if Theta0 is None else Theta0
if not 'y_max' in kwargs:
kwargs['y_max'] = 6
if bounded:
theta = T.linspace(0, 1, Nf)
else:
theta = T.linspace(-kwargs['y_max'], kwargs['y_max'], Nf)
def n_loop_iter(i):
"""Learn the `i`th ability curve"""
times = to_tens([ triplet[0] for triplet in responses[i] ])
ldists = [
items[triplet[1]].get_log_func(triplet[2])
for triplet in responses[i]
]
Thetas[i].feed_data_CV(times, ldists, bounded=bounded, bar=bar,
default_h=default_h, **kwargs)
def m_loop_iter(j):
"""Learn the `j`th item parameters"""
resps = []
marginals = []
# collect relevant marginals
for i in range(n):
for t, ind, r in responses[i]:
if ind==j:
resps.append(r)
marginals.append(
T.exp(
Thetas[i].get_lmarginals(t, theta, bounded=bounded)[:, 0]
)
)
items[j].fit_params_from_marginals(resps, theta, T.stack(marginals).t())
R_rang = trange(R, ascii=True, desc="EM rounds") if bar else range(R)
for round_num in R_rang:
if (Theta0 is None) or (round_num!=0):
n_rang = trange(n, ascii=True, desc="Fitting ability curves") \
if bar else range(n)
for i in n_rang:
n_loop_iter(i)
m_rang = trange(m, ascii=True, desc="Fitting Problems") if bar else range(m)
for j in m_rang:
m_loop_iter(j)
if save_each_round:
if not os.path.exists(os.path.dirname(dirname)):
try:
os.makedirs(os.path.dirname(dirname))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
if save_each_round:
Theta_bundles = [Thetas[i].export_to_bundle() for i in range(n)]
params = [P.array(item.get_params()) for item in items]
savedata([Theta_bundles, params], 'round_{0}'.format(round_num))
return Thetas