def test_lut_creation(self): """Test the lookup table creation function.""" lut_func = create_beta_LUT((0.5, 2, 500), (-3, 3, 500)) # do two values options = validate_estimation_options(None) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] theta, weight = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) alpha1 = 0.89 beta1 = 1.76 p_value1 = ((weight * distribution) / (1.0 + np.exp(-alpha1 * (theta - beta1)))).sum() estimated_beta = lut_func(alpha1, p_value1) self.assertAlmostEqual(beta1, estimated_beta, places=4) alpha1 = 1.89 beta1 = -2.34 p_value1 = ((weight * distribution) / (1.0 + np.exp(-alpha1 * (theta - beta1)))).sum() estimated_beta = lut_func(alpha1, p_value1) self.assertAlmostEqual(beta1, estimated_beta, places=4)
def __init__(self, options=None): """Constructor for latent estimation class""" options = validate_estimation_options(options) # Quadrature Parameters quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop) self.quad_bounds = (quad_start, quad_stop) # The locations and weight to use by default self.quadrature_locations = theta self.weights = weights self.null_distribution = options['distribution'](theta) # Triggers to run the estimation or use default self.estimate_distribution = options['estimate_distribution'] self.n_points = options[ 'number_of_samples'] if self.estimate_distribution else 3 # Initialize the first cubic-spline class # and set the distibution be an inverted U-shape cubic_spline = self._init_cubic_spline() cubic_spline.coefficients[self.n_points // 2 + 2] = 1 self.cubic_splines = [cubic_spline]
def test_credit_partial_integration(self): """Testing the partial integral in the graded model.""" theta = _get_quadrature_points(61, -5, 5) response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2]) betas = np.array([0, -0.4, 0.94, -.37]) discrimination = 1.42 # Hand calculations offsets = np.cumsum(betas)[1:] first_pos = np.ones_like(theta) second_pos = np.exp(discrimination * (theta - offsets[0])) third_pos = np.exp(2*discrimination * (theta - offsets[1]/2)) last_pos = np.exp(3*discrimination * (theta - offsets[2]/3)) norm_term = first_pos + second_pos + third_pos + last_pos probability_values = [first_pos / norm_term, second_pos / norm_term, third_pos / norm_term, last_pos / norm_term] expected = np.zeros((response_set.size, theta.size)) for ndx, response in enumerate(response_set): expected[ndx] = probability_values[response] result = _credit_partial_integral(theta, betas, discrimination, response_set) np.testing.assert_array_almost_equal(result, expected)
def onepl_mml(dataset, alpha=None, options=None): """ Estimates parameters in an 1PL IRT Model. Args: dataset: [items x participants] matrix of True/False Values alpha: [int] discrimination constraint options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] # Difficulty Estimation parameters n_items = dataset.shape[0] n_no, n_yes = get_true_false_counts(dataset) scalar = n_yes / (n_yes + n_no) unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) # Quadrature Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Inline definition of cost function to minimize def min_func(estimate): discrimination[:] = estimate _mml_abstract(difficulty, scalar, discrimination, theta, distribution, options) partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) # add distribution partial_int *= distribution otpt = integrate.fixed_quad( lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) # Perform the minimization if alpha is None: # OnePL Method alpha = fminbound(min_func, 0.25, 10) else: # Rasch Method min_func(alpha) return alpha, difficulty
def test_quadrature_points(self): """Testing the creation of quadrtature points""" n_points = 11 # A smoke test to make sure it's running properly quad_points = _get_quadrature_points(n_points, -1, 1) x, _ = roots_legendre(n_points) np.testing.assert_allclose(x, quad_points)
def ability_eap(dataset, difficulty, discrimination, options=None): """Estimates the abilities for dichotomous models. Estimates the ability parameters (theta) for dichotomous models via expaected a posterior likelihood estimation. Args: dataset: [n_items, n_participants] (2d Array) of measured responses difficulty: (1d Array) of difficulty parameters for each item discrimination: (1d Array) of disrimination parameters for each item options: dictionary with updates to default options Returns: abilities: (1d array) estimated abilities Options: * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] if np.atleast_1d(discrimination).size == 1: discrimination = np.full(dataset.shape[0], discrimination, dtype='float') the_sign = convert_responses_to_kernel_sign(dataset) theta = _get_quadrature_points(quad_n, quad_start, quad_stop) partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) # Weight by the input ability distribution partial_int *= options['distribution'](theta) # Compute the denominator denominator = integrate.fixed_quad(lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] # compute the numerator partial_int *= theta numerator = integrate.fixed_quad(lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] return numerator / denominator
def test_array_LUT(self): """Test the creation of the array look up table.""" alpha = np.linspace(.2, 4, 500) beta = np.linspace(-6, 6, 500) theta, weights = _get_quadrature_points(41, -5, 5) output = np.zeros((alpha.size, beta.size)) _array_LUT(alpha, beta, theta, weights, output) #Expected z = alpha[:, None, None] * (beta[None, :, None] - theta[None, None, :]) expected = np.sum(1.0 / (1. + np.exp(z)) * weights[None, None, :], axis=2) np.testing.assert_allclose(output, expected, atol=1e-4, rtol=1e-3)
def test_graded_partial_integral(self): """Testing the partial integral in the graded model.""" theta, _ = _get_quadrature_points(61, -5, 5) responses = np.random.randint(0, 3, (10, 100)) betas = np.array([-10000, -.3, 0.1, 1.2]) betas_roll = np.roll(betas, -1) betas_roll[-1] = 10000 invalid_response_mask = np.zeros_like(responses, dtype='bool') output = np.ones((responses.shape[1], theta.size)) for ndx in range(responses.shape[0]): output *= _graded_partial_integral(theta, betas, betas_roll, np.array([ 1, ]), responses[ndx], invalid_response_mask[ndx]) # Compare to hand calculations hand_calc = list() for ndx in range(responses.shape[1]): left_betas = betas[responses[:, ndx]] right_betas = betas_roll[responses[:, ndx]] probability = ( 1.0 / (1.0 + np.exp(left_betas[:, None] - theta[None, :])) - 1.0 / (1.0 + np.exp(right_betas[:, None] - theta[None, :]))) hand_calc.append(probability.prod(0)) hand_calc = np.asarray(hand_calc) np.testing.assert_array_equal(hand_calc, output) # Test invalid response invalid_response_mask[0, 1] = True invalid_response_mask[0, 7] = True output = _graded_partial_integral(theta, betas, betas_roll, np.array([ 1, ]), responses[0], invalid_response_mask[0]) np.testing.assert_equal(output[1], np.ones(61, )) np.testing.assert_equal(output[7], np.ones(61, )) with np.testing.assert_raises(AssertionError): for ndx in [0, 2, 3, 4, 5, 6, 8, 9]: np.testing.assert_equal(output[ndx], np.ones(61, ))
def test_integral_equations(self): """Tests solving for integral given a ratio.""" np.random.seed(786) theta = np.random.randn(50000) discrimination = 1.43 difficulty = np.array([-.4, .1, .5]) # Compare against dichotomous data syn_data = create_synthetic_irt_dichotomous( difficulty, discrimination, theta) n0 = np.count_nonzero(~syn_data, axis=1) n1 = np.count_nonzero(syn_data, axis=1) ratio = n1 / (n1 + n0) theta = _get_quadrature_points(61, -5, 5) distribution = np.exp(-np.square(theta) / 2) / np.sqrt(2 * np.pi) results = _solve_integral_equations( discrimination, ratio, distribution, theta) np.testing.assert_array_almost_equal(results, difficulty, decimal=2)
def test_partial_integration_single(self): """Tests the integration quadrature function.""" # Set seed for repeatability np.random.seed(154) discrimination = 1.32 difficulty = .67 response = np.random.randint(low=0, high=2, size=(1, 10)) quad_points, _ = _get_quadrature_points(61, -6, 6) value = _compute_partial_integral( quad_points, difficulty, discrimination, response[0], np.zeros_like(response, dtype='bool')[0]) discrrm = discrimination * np.power(-1, response) expected = 1.0 / (1 + np.exp(np.outer(discrrm, (quad_points - difficulty)))) np.testing.assert_allclose(value, expected)
def test_credit_partial_integration(self): """Testing the partial integral in the graded model.""" theta, _ = _get_quadrature_points(61, -5, 5) response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2]) betas = np.array([0, -0.4, 0.94, -.37]) discrimination = 1.42 invalid_response_mask = np.zeros_like(response_set, dtype='bool') # Hand calculations offsets = np.cumsum(betas)[1:] first_pos = np.ones_like(theta) second_pos = np.exp(discrimination * (theta - offsets[0])) third_pos = np.exp(2 * discrimination * (theta - offsets[1] / 2)) last_pos = np.exp(3 * discrimination * (theta - offsets[2] / 3)) norm_term = first_pos + second_pos + third_pos + last_pos probability_values = [ first_pos / norm_term, second_pos / norm_term, third_pos / norm_term, last_pos / norm_term ] expected = np.zeros((response_set.size, theta.size)) for ndx, response in enumerate(response_set): expected[ndx] = probability_values[response] result = _credit_partial_integral(theta, betas, discrimination, response_set, invalid_response_mask) np.testing.assert_array_almost_equal(result, expected) invalid_response_mask[1] = True invalid_response_mask[7] = True result = _credit_partial_integral(theta, betas, discrimination, response_set, invalid_response_mask) np.testing.assert_equal(result[1], np.ones(61, )) np.testing.assert_equal(result[7], np.ones(61, )) with np.testing.assert_raises(AssertionError): for ndx in [0, 2, 3, 4, 5, 6, 8, 9]: np.testing.assert_equal(result[ndx], np.ones(61, ))
def test_unfold_partial_integration(self): """Testing the unfolding integral.""" theta, _ = _get_quadrature_points(61, -5, 5) response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2]) betas = np.array([-1.3, -.4, 0.2]) delta = -0.76 invalid_response_mask = np.zeros_like(response_set, dtype='bool') # (2N -1) / 2 - n folding = 3.5 - np.arange(4) discrimination = 1.42 # Convert to PCM thresholds full = np.concatenate((betas, [0], -betas[::-1])) full += delta scratch = np.zeros((full.size + 1, theta.size)) _unfold_func(full, discrimination, theta, scratch) expected = np.zeros((response_set.size, theta.size)) for ndx, response in enumerate(response_set): expected[ndx] = scratch[response] result = _unfold_partial_integral(theta, delta, betas, discrimination, folding, response_set, invalid_response_mask) np.testing.assert_array_almost_equal(result, expected) invalid_response_mask[1] = True invalid_response_mask[7] = True result = _unfold_partial_integral(theta, delta, betas, discrimination, folding, response_set, invalid_response_mask) np.testing.assert_equal(result[1], np.ones(61, )) np.testing.assert_equal(result[7], np.ones(61, )) with np.testing.assert_raises(AssertionError): for ndx in [0, 2, 3, 4, 5, 6, 8, 9]: np.testing.assert_equal(result[ndx], np.ones(61, ))
def test_partial_integration_array(self): """Tests the integration quadrature function on array.""" # Set seed for repeatability np.random.seed(121) discrimination = np.random.rand(5) + 0.5 difficuly = np.linspace(-1.3, 1.3, 5) the_sign = (-1)**np.random.randint(low=0, high=2, size=(5, 1)) quad_points = _get_quadrature_points(61, -6, 6) dataset = _compute_partial_integral(quad_points, difficuly, discrimination, the_sign) value = integrate.fixed_quad(lambda x: dataset, -6, 6, n=61)[0] discrrm = discrimination * the_sign.squeeze() * -1 xx = np.linspace(-6, 6, 5001) yy = irt_evaluation(difficuly, discrrm, xx) yy = yy.prod(axis=0) expected = yy.sum() * 12 / 5001 self.assertAlmostEqual(value, expected.sum(), places=3)
def test_graded_partial_integral(self): """Testing the partial integral in the graded model.""" theta = _get_quadrature_points(61, -5, 5) responses = np.random.randint(0, 3, (10, 100)) betas = np.array([-10000, -.3, 0.1, 1.2]) betas_roll = np.roll(betas, -1) betas_roll[-1] = 10000 output = _graded_partial_integral(theta, betas, betas_roll, 1.0, responses) # Compare to hand calculations hand_calc = list() for ndx in range(responses.shape[1]): left_betas = betas[responses[:, ndx]] right_betas = betas_roll[responses[:, ndx]] probability = (1.0 / (1.0 + np.exp(left_betas[:, None] - theta[None, :])) - 1.0 / (1.0 + np.exp(right_betas[:, None] - theta[None, :]))) hand_calc.append(probability.prod(0)) hand_calc = np.asarray(hand_calc) np.testing.assert_array_equal(hand_calc, output)
def test_unfold_partial_integration(self): """Testing the unfolding integral.""" theta = _get_quadrature_points(61, -5, 5) response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2]) betas = np.array([-1.3, -.4, 0.2]) delta = -0.76 # (2N -1) / 2 - n folding = 3.5 - np.arange(4) discrimination = 1.42 # Convert to PCM thresholds full = np.concatenate((betas, [0], -betas[::-1])) full += delta scratch = np.zeros((full.size + 1, theta.size)) _unfold_func(full, discrimination, theta, scratch) expected = np.zeros((response_set.size, theta.size)) for ndx, response in enumerate(response_set): expected[ndx] = scratch[response] result = _unfold_partial_integral(theta, delta, betas, discrimination, folding, response_set) np.testing.assert_array_almost_equal(result, expected)
def gum_mml(dataset, options=None): """Estimate parameters for graded unfolding model. Estimate the discrimination, delta and threshold parameters for the graded unfolding model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination delta: (1d array) estimates of item folding values difficulty: (2d array) estimates of item thresholds x item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Initialize item parameters for iterations discrimination = np.ones((n_items,)) betas = np.full((n_items, item_counts.max() - 1), np.nan) delta = np.zeros((n_items,)) partial_int = np.ones((responses.shape[1], theta.size)) # Set initial estimates to evenly spaced for ndx in range(n_items): item_length = item_counts[ndx] - 1 betas[ndx, :item_length] = np.linspace(-1, 1, item_length) # This is the index associated with "folding" about the center fold_span = ((item_counts[:, None] - 0.5) - np.arange(betas.shape[1] + 1)[None, :]) ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_delta = delta.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int *= 0.0 partial_int += distribution[None, :] for item_ndx in range(n_items): partial_int *= _unfold_partial_integral(theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx]) # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] - 1 # Remove the previous output old_values = _unfold_partial_integral(theta, previous_delta[item_ndx], previous_betas[item_ndx], previous_discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas = estimate[2:] new_values = _unfold_partial_integral(theta, estimate[1], new_betas, estimate[0], fold_span[item_ndx], responses[item_ndx]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate(([discrimination[item_ndx]], [delta[item_ndx]], betas[item_ndx])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-2, 2)] + [(-6, 6)] * item_length) discrimination[item_ndx] = otpt[0] delta[item_ndx] = otpt[1] betas[item_ndx, :] = otpt[2:] new_values = _unfold_partial_integral(theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break return discrimination, delta, betas
def onepl_full(dataset, alpha=None, options=None): """ Estimates parameters in an 1PL IRT Model. This function is slow, please use onepl_mml Args: dataset: [items x participants] matrix of True/False Values alpha: scalar of discrimination used in model (default to 1) options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int Notes: If alpha is supplied then this solves a Rasch model """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] n_items = dataset.shape[0] unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) def alpha_min_func(alpha_estimate): discrimination[:] = alpha_estimate for iteration in range(options['max_iteration']): previous_difficulty = difficulty.copy() # Quadrature evaluation for values that do not change partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # remove contribution from current item local_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) partial_int /= local_int def min_local_func(beta_estimate): difficulty[item_ndx] = beta_estimate estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) estimate_int *= partial_int otpt = integrate.fixed_quad( lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) fminbound(min_local_func, -4, 4) # Update the partial integral based on the new found values estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) # update partial integral partial_int *= estimate_int if(np.abs(previous_difficulty - difficulty).max() < 1e-3): break cost = integrate.fixed_quad( lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(cost).dot(counts) if alpha is None: # OnePl Solver alpha = fminbound(alpha_min_func, 0.1, 4) else: # Rasch Solver alpha_min_func(alpha) return alpha, difficulty
def pcm_mml(dataset, options=None): """Estimate parameters for partial credit model. Estimate the discrimination and difficulty parameters for the partial credit model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (2d array) estimates of item difficulties x item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Initialize difficulty parameters for estimation betas = np.full((n_items, item_counts.max()), np.nan) discrimination = np.ones((n_items,)) partial_int = np.ones((responses.shape[1], theta.size)) # Not all items need to have the same # number of response categories betas[:, 0] = 0 for ndx in range(n_items): betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx]-1) ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int *= 0.0 partial_int += distribution[None, :] for item_ndx in range(n_items): partial_int *= _credit_partial_integral(theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx]) # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] new_betas = np.zeros((item_length)) # Remove the previous output old_values = _credit_partial_integral(theta, previous_betas[item_ndx], previous_discrimination[item_ndx], responses[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral(theta, new_betas, estimate[0], responses[item_ndx]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate(([discrimination[item_ndx]], betas[item_ndx, 1:item_length])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1)) discrimination[item_ndx] = otpt[0] betas[item_ndx, 1:item_length] = otpt[1:] new_values = _credit_partial_integral(theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # TODO: look where missing values are and place NAN there instead # of appending them to the end return discrimination, betas[:, 1:]
def twopl_full(dataset, options=None): """ Estimates parameters in a 2PL IRT model. Please use twopl_mml instead. Args: dataset: [items x participants] matrix of True/False Values options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (1d array) estimates of item difficulties Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] n_items = dataset.shape[0] unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() # Quadrature evaluation for values that do not change partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop local_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) partial_int /= local_int def min_func_local(estimate): discrimination[item_ndx] = estimate[0] difficulty[item_ndx] = estimate[1] estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) estimate_int *= partial_int otpt = integrate.fixed_quad( lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) # Two parameter solver that doesn't need derivatives initial_guess = np.concatenate((discrimination[item_ndx, None], difficulty[item_ndx, None])) fmin_slsqp(min_func_local, initial_guess, disp=False, bounds=[(0.25, 4), (-4, 4)]) # Update the partial integral based on the new found values estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) # update partial integral partial_int *= estimate_int if(np.abs(discrimination - previous_discrimination).max() < 1e-3): break return discrimination, difficulty
def threepl_mml(dataset, options=None): """ Estimates parameters in a 3PL IRT model. Args: dataset: [items x participants] matrix of True/False Values options: dictionary with updates to default options Returns: discrimination: (1d array) estimate of item discriminations difficulty: (1d array) estimates of item diffiulties guessing: (1d array) estimates of item guessing Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] n_items = dataset.shape[0] n_no, n_yes = get_true_false_counts(dataset) scalar = n_yes / (n_yes + n_no) unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) distribution_x_weights = distribution * weights # Perform the minimization discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) guessing = np.zeros((n_items,)) local_scalar = np.zeros((1, 1)) for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = _compute_partial_integral_3pl(theta, difficulty, discrimination, guessing, the_sign) partial_int *= distribution for ndx in range(n_items): # pylint: disable=cell-var-from-loop # remove contribution from current item local_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None], discrimination[ndx, None], guessing[ndx, None], the_sign[ndx, None]) partial_int /= local_int def min_func_local(estimate): discrimination[ndx] = estimate[0] guessing[ndx] = estimate[1] local_scalar[0, 0] = (scalar[ndx] - guessing[ndx]) / (1. - guessing[ndx]) _mml_abstract(difficulty[ndx, None], local_scalar, discrimination[ndx, None], theta, distribution_x_weights) estimate_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None], discrimination[ndx, None], guessing[ndx, None], the_sign[ndx, None]) estimate_int *= partial_int otpt = integrate.fixed_quad( lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) # Solve for the discrimination parameters initial_guess = [discrimination[ndx], guessing[ndx]] fmin_slsqp(min_func_local, initial_guess, bounds=([0.25, 4], [0, .33]), iprint=False) # Update the partial integral based on the new found values estimate_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None], discrimination[ndx, None], guessing[ndx, None], the_sign[ndx, None]) # update partial integral partial_int *= estimate_int if np.abs(discrimination - previous_discrimination).max() < 1e-3: break return {'Discrimination': discrimination, 'Difficulty': difficulty, 'Guessing': guessing}
def grm_mml(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimate of item discriminations difficulty: (2d array) estimates of item diffiulties by item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(),), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts-1] = 10000 ############# # 1. Start the iteration loop # 2. estimate discrimination # 3. solve for difficulties # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral(theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx][None, :]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _solve_integral_equations(estimate, integral_counts[item_ndx], distribution, theta) betas[start_ndx+1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx-1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Univariate minimization for discrimination parameter fminbound(_local_min_func, 0.2, 5.0) new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max()-1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx-start_ndx-1] = betas[start_ndx+1:end_ndx] return discrimination[start_indices], output_betas
def onepl_mml(dataset, alpha=None, options=None): """ Estimates parameters in an 1PL IRT Model. Args: dataset: [items x participants] matrix of True/False Values alpha: [int] discrimination constraint options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] # Difficulty Estimation parameters n_items = dataset.shape[0] n_no, n_yes = get_true_false_counts(dataset) scalar = n_yes / (n_yes + n_no) unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) invalid_response_mask = unique_sets == INVALID_RESPONSE unique_sets[invalid_response_mask] = 0 # For Indexing, fixed later discrimination = np.ones((n_items, )) difficulty = np.zeros((n_items, )) # Quadrature Locations theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) distribution_x_weights = distribution * weights # Inline definition of cost function to minimize def min_func(estimate): discrimination[:] = estimate _mml_abstract(difficulty, scalar, discrimination, theta, distribution_x_weights) partial_int = np.ones((unique_sets.shape[1], theta.size)) for ndx in range(n_items): partial_int *= _compute_partial_integral( theta, difficulty[ndx], discrimination[ndx], unique_sets[ndx], invalid_response_mask[ndx]) partial_int *= distribution_x_weights # compute_integral otpt = np.sum(partial_int, axis=1) return -np.log(otpt).dot(counts) # Perform the minimization if alpha is None: # OnePL Method alpha = fminbound(min_func, 0.25, 10) else: # Rasch Method min_func(alpha) return {"Discrimination": alpha, "Difficulty": difficulty}