def test_credit_partial_integration(self): """Testing the partial integral in the graded model.""" theta = _get_quadrature_points(61, -5, 5) response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2]) betas = np.array([0, -0.4, 0.94, -.37]) discrimination = 1.42 # Hand calculations offsets = np.cumsum(betas)[1:] first_pos = np.ones_like(theta) second_pos = np.exp(discrimination * (theta - offsets[0])) third_pos = np.exp(2*discrimination * (theta - offsets[1]/2)) last_pos = np.exp(3*discrimination * (theta - offsets[2]/3)) norm_term = first_pos + second_pos + third_pos + last_pos probability_values = [first_pos / norm_term, second_pos / norm_term, third_pos / norm_term, last_pos / norm_term] expected = np.zeros((response_set.size, theta.size)) for ndx, response in enumerate(response_set): expected[ndx] = probability_values[response] result = _credit_partial_integral(theta, betas, discrimination, response_set) np.testing.assert_array_almost_equal(result, expected)
def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral( theta, new_betas, estimate[0], responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return -np.log(otpt).sum()
def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral(theta, new_betas, estimate[0], responses[item_ndx]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum()
def test_credit_partial_integration(self): """Testing the partial integral in the graded model.""" theta, _ = _get_quadrature_points(61, -5, 5) response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2]) betas = np.array([0, -0.4, 0.94, -.37]) discrimination = 1.42 invalid_response_mask = np.zeros_like(response_set, dtype='bool') # Hand calculations offsets = np.cumsum(betas)[1:] first_pos = np.ones_like(theta) second_pos = np.exp(discrimination * (theta - offsets[0])) third_pos = np.exp(2 * discrimination * (theta - offsets[1] / 2)) last_pos = np.exp(3 * discrimination * (theta - offsets[2] / 3)) norm_term = first_pos + second_pos + third_pos + last_pos probability_values = [ first_pos / norm_term, second_pos / norm_term, third_pos / norm_term, last_pos / norm_term ] expected = np.zeros((response_set.size, theta.size)) for ndx, response in enumerate(response_set): expected[ndx] = probability_values[response] result = _credit_partial_integral(theta, betas, discrimination, response_set, invalid_response_mask) np.testing.assert_array_almost_equal(result, expected) invalid_response_mask[1] = True invalid_response_mask[7] = True result = _credit_partial_integral(theta, betas, discrimination, response_set, invalid_response_mask) np.testing.assert_equal(result[1], np.ones(61, )) np.testing.assert_equal(result[7], np.ones(61, )) with np.testing.assert_raises(AssertionError): for ndx in [0, 2, 3, 4, 5, 6, 8, 9]: np.testing.assert_equal(result[ndx], np.ones(61, ))
def pcm_mml(dataset, options=None): """Estimate parameters for partial credit model. Estimate the discrimination and difficulty parameters for the partial credit model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (2d array) estimates of item difficulties x item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Initialize difficulty parameters for estimation betas = np.full((n_items, item_counts.max()), np.nan) discrimination = np.ones((n_items,)) partial_int = np.ones((responses.shape[1], theta.size)) # Not all items need to have the same # number of response categories betas[:, 0] = 0 for ndx in range(n_items): betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx]-1) ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int *= 0.0 partial_int += distribution[None, :] for item_ndx in range(n_items): partial_int *= _credit_partial_integral(theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx]) # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] new_betas = np.zeros((item_length)) # Remove the previous output old_values = _credit_partial_integral(theta, previous_betas[item_ndx], previous_discrimination[item_ndx], responses[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral(theta, new_betas, estimate[0], responses[item_ndx]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate(([discrimination[item_ndx]], betas[item_ndx, 1:item_length])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1)) discrimination[item_ndx] = otpt[0] betas[item_ndx, 1:item_length] = otpt[1:] new_values = _credit_partial_integral(theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # TODO: look where missing values are and place NAN there instead # of appending them to the end return discrimination, betas[:, 1:]
def pcm_mml(dataset, options=None): """Estimate parameters for partial credit model. Estimate the discrimination and difficulty parameters for the partial credit model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (2d array) estimates of item difficulties x item thresholds Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Initialize difficulty parameters for estimation betas = np.full((n_items, item_counts.max()), np.nan) discrimination = np.ones((n_items, )) partial_int = np.ones((responses.shape[1], theta.size)) # Not all items need to have the same # number of response categories betas[:, 0] = 0 for ndx in range(n_items): betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx] - 1) # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _credit_partial_integral( theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] new_betas = np.zeros((item_length)) # Remove the previous output old_values = _credit_partial_integral( theta, previous_betas[item_ndx], previous_discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral( theta, new_betas, estimate[0], responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate( ([discrimination[item_ndx]], betas[item_ndx, 1:item_length])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1)) discrimination[item_ndx] = otpt[0] betas[item_ndx, 1:item_length] = otpt[1:] new_values = _credit_partial_integral( theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _credit_partial_integral( theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) # TODO: look where missing values are and place NAN there instead # of appending them to the end # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination, 'Difficulty': betas[:, 1:], 'Ability': eap_abilities, 'LatentPDF': latent_pdf, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }