def test_graded_partial_integral(self): """Testing the partial integral in the graded model.""" theta, _ = _get_quadrature_points(61, -5, 5) responses = np.random.randint(0, 3, (10, 100)) betas = np.array([-10000, -.3, 0.1, 1.2]) betas_roll = np.roll(betas, -1) betas_roll[-1] = 10000 invalid_response_mask = np.zeros_like(responses, dtype='bool') output = np.ones((responses.shape[1], theta.size)) for ndx in range(responses.shape[0]): output *= _graded_partial_integral(theta, betas, betas_roll, np.array([ 1, ]), responses[ndx], invalid_response_mask[ndx]) # Compare to hand calculations hand_calc = list() for ndx in range(responses.shape[1]): left_betas = betas[responses[:, ndx]] right_betas = betas_roll[responses[:, ndx]] probability = ( 1.0 / (1.0 + np.exp(left_betas[:, None] - theta[None, :])) - 1.0 / (1.0 + np.exp(right_betas[:, None] - theta[None, :]))) hand_calc.append(probability.prod(0)) hand_calc = np.asarray(hand_calc) np.testing.assert_array_equal(hand_calc, output) # Test invalid response invalid_response_mask[0, 1] = True invalid_response_mask[0, 7] = True output = _graded_partial_integral(theta, betas, betas_roll, np.array([ 1, ]), responses[0], invalid_response_mask[0]) np.testing.assert_equal(output[1], np.ones(61, )) np.testing.assert_equal(output[7], np.ones(61, )) with np.testing.assert_raises(AssertionError): for ndx in [0, 2, 3, 4, 5, 6, 8, 9]: np.testing.assert_equal(output[ndx], np.ones(61, ))
def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _integral_func(estimate, integral_counts[item_ndx], distribution_x_weight, theta, _interp_func) betas[start_ndx + 1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx - 1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return np.log(otpt.clip(1e-313, np.inf)).sum()
def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _solve_integral_equations(estimate, integral_counts[item_ndx], distribution, theta) betas[start_ndx+1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx-1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum()
def test_graded_partial_integral(self): """Testing the partial integral in the graded model.""" theta = _get_quadrature_points(61, -5, 5) responses = np.random.randint(0, 3, (10, 100)) betas = np.array([-10000, -.3, 0.1, 1.2]) betas_roll = np.roll(betas, -1) betas_roll[-1] = 10000 output = _graded_partial_integral(theta, betas, betas_roll, 1.0, responses) # Compare to hand calculations hand_calc = list() for ndx in range(responses.shape[1]): left_betas = betas[responses[:, ndx]] right_betas = betas_roll[responses[:, ndx]] probability = (1.0 / (1.0 + np.exp(left_betas[:, None] - theta[None, :])) - 1.0 / (1.0 + np.exp(right_betas[:, None] - theta[None, :]))) hand_calc.append(probability.prod(0)) hand_calc = np.asarray(hand_calc) np.testing.assert_array_equal(hand_calc, output)
def grm_mml_eap(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using a mixed Bayesian / Marginal Maximum Likelihood algorithm, good for small sample sizes Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: results_dictionary: * Discrimination: (1d array) estimate of item discriminations * Difficulty: (2d array) estimates of item difficulties by item thresholds * LatentPDF: (object) contains information about the pdf * Rayleigh_Scale: (int) Rayleigh scale value of the discrimination prior * AIC: (dictionary) null model and final model AIC value * BIC: (dictionary) null model and final model BIC value Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int * hyper_quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Only use LUT _integral_func = _solve_integral_equations_LUT _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), options) # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx, valid_response_mask[ndx]]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(), ), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts - 1] = 10000 # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 # Prior Parameters ray_scale = 1.0 eap_options = { 'distribution': stats.rayleigh(loc=.25, scale=ray_scale).pdf, 'quadrature_n': options['hyper_quadrature_n'], 'quadrature_bounds': (0.25, 5) } prior_pdf = LatentPDF(eap_options) alpha_evaluation = np.zeros((eap_options['quadrature_n'], )) # Meta-Prior Parameter hyper_options = { 'distribution': stats.lognorm(loc=0, s=0.25).pdf, 'quadrature_n': options['hyper_quadrature_n'], 'quadrature_bounds': (0.1, 5) } hyper_pdf = LatentPDF(hyper_options) hyper_evaluation = np.zeros((hyper_options['quadrature_n'], )) base_hyper = (hyper_pdf.weights * hyper_pdf.null_distribution).astype('float128') linear_hyper = base_hyper * hyper_pdf.quadrature_locations for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Update the lookup table if necessary if (options['estimate_distribution'] and iteration > 0): new_options = dict(options) new_options.update({'distribution': latent_pdf.cubic_splines[-1]}) _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), new_options) # EAP Discrimination Parameter discrimination_pdf = stats.rayleigh(loc=0.25, scale=ray_scale).pdf base_alpha = (prior_pdf.weights * discrimination_pdf( prior_pdf.quadrature_locations)).astype('float128') linear_alpha = (base_alpha * prior_pdf.quadrature_locations).astype('float128') for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral( theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _integral_func(estimate, integral_counts[item_ndx], distribution_x_weight, theta, _interp_func) betas[start_ndx + 1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx - 1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return np.log(otpt.clip(1e-313, np.inf)).sum() # Mean Discrimination Value for ndx, disc_location in enumerate( prior_pdf.quadrature_locations): alpha_evaluation[ndx] = _local_min_func(disc_location) alpha_evaluation -= alpha_evaluation.max() total_probability = np.exp(alpha_evaluation.astype('float128')) numerator = np.sum(total_probability * linear_alpha) denominator = np.sum(total_probability * base_alpha) alpha_eap = numerator / denominator # Reset the Value the updated discrimination estimation _local_min_func(alpha_eap.astype('float64')) new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values # Compute the Hyper prior mean value for ndx, scale_value in enumerate(hyper_pdf.quadrature_locations): temp_distribution = stats.rayleigh(loc=0.25, scale=scale_value).pdf hyper_evaluation[ndx] = np.log( temp_distribution(discrimination) + 1e-313).sum() hyper_evaluation -= hyper_evaluation.max() hyper_evaluation = np.exp(hyper_evaluation.astype('float128')) ray_scale = (np.sum(hyper_evaluation * linear_hyper) / np.sum(hyper_evaluation * base_hyper)).astype('float64') # Check Termination Criterion if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max() - 1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx - start_ndx - 1] = betas[start_ndx + 1:end_ndx] # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination[start_indices], 'Difficulty': output_betas, 'Ability': eap_abilities, 'LatentPDF': latent_pdf, 'Rayleigh_Scale': ray_scale, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }
def grm_mml(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimate of item discriminations difficulty: (2d array) estimates of item diffiulties by item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(),), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts-1] = 10000 ############# # 1. Start the iteration loop # 2. estimate discrimination # 3. solve for difficulties # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral(theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx][None, :]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _solve_integral_equations(estimate, integral_counts[item_ndx], distribution, theta) betas[start_ndx+1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx-1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Univariate minimization for discrimination parameter fminbound(_local_min_func, 0.2, 5.0) new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max()-1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx-start_ndx-1] = betas[start_ndx+1:end_ndx] return discrimination[start_indices], output_betas
def grm_mml(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: results_dictionary: * Discrimination: (1d array) estimate of item discriminations * Difficulty: (2d array) estimates of item diffiulties by item thresholds * LatentPDF: (object) contains information about the pdf * AIC: (dictionary) null model and final model AIC value * BIC: (dictionary) null model and final model BIC value Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * use_LUT: boolean * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Should we use the LUT _integral_func = _solve_integral_equations _interp_func = None if options['use_LUT']: _integral_func = _solve_integral_equations_LUT _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), options) # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx, valid_response_mask[ndx]]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(), ), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts - 1] = 10000 # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 ############# # 1. Start the iteration loop # 2. estimate discrimination # 3. solve for difficulties # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Update the lookup table if necessary if (options['use_LUT'] and options['estimate_distribution'] and iteration > 0): new_options = dict(options) new_options.update({'distribution': latent_pdf.cubic_splines[-1]}) _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), new_options) for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral( theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _integral_func(estimate, integral_counts[item_ndx], distribution_x_weight, theta, _interp_func) betas[start_ndx + 1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx - 1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return -np.log(otpt).sum() # Univariate minimization for discrimination parameter fminbound(_local_min_func, 0.2, 5.0) new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max() - 1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx - start_ndx - 1] = betas[start_ndx + 1:end_ndx] # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination[start_indices], 'Difficulty': output_betas, 'Ability': eap_abilities, 'LatentPDF': latent_pdf, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }