def test_lut_creation(self): """Test the lookup table creation function.""" lut_func = create_beta_LUT((0.5, 2, 500), (-3, 3, 500)) # do two values options = validate_estimation_options(None) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] theta, weight = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) alpha1 = 0.89 beta1 = 1.76 p_value1 = ((weight * distribution) / (1.0 + np.exp(-alpha1 * (theta - beta1)))).sum() estimated_beta = lut_func(alpha1, p_value1) self.assertAlmostEqual(beta1, estimated_beta, places=4) alpha1 = 1.89 beta1 = -2.34 p_value1 = ((weight * distribution) / (1.0 + np.exp(-alpha1 * (theta - beta1)))).sum() estimated_beta = lut_func(alpha1, p_value1) self.assertAlmostEqual(beta1, estimated_beta, places=4)
def rasch_conditional(dataset, discrimination=1, options=None): """ Estimates the difficulty parameters in a Rasch IRT model Args: dataset: [items x participants] matrix of True/False Values discrimination: scalar of discrimination used in model (default to 1) options: dictionary with updates to default options Returns: difficulty: (1d array) estimates of item difficulties Options: * max_iteration: int Notes: This function sets the sum of difficulty parameters to zero for identification purposes """ options = validate_estimation_options(options) n_items = dataset.shape[0] unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) # Initialize all the difficulty parameters to zeros # Set an identifying_mean to zero ##TODO: Add option to specifiy position betas = np.zeros((n_items, )) identifying_mean = 0.0 # Remove the zero and full count values unique_sets, counts = trim_response_set_and_counts(unique_sets, counts) response_set_sums = unique_sets.sum(axis=0) for iteration in range(options['max_iteration']): previous_betas = betas.copy() for ndx in range(n_items): partial_conv = _symmetric_functions(np.delete(betas, ndx)) def min_func(estimate): betas[ndx] = estimate full_convolution = np.convolve([1, np.exp(-estimate)], partial_conv) denominator = full_convolution[response_set_sums] return (np.sum(unique_sets * betas[:,None], axis=0).dot(counts) + np.log(denominator).dot(counts)) # Solve for the difficulty parameter betas[ndx] = fminbound(min_func, -5, 5) # recenter betas += (identifying_mean - betas.mean()) # Check termination criterion if np.abs(betas - previous_betas).max() < 1e-3: break return {'Discrimination': discrimination, 'Difficulty': betas / discrimination}
def __init__(self, options=None): """Constructor for latent estimation class""" options = validate_estimation_options(options) # Quadrature Parameters quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop) self.quad_bounds = (quad_start, quad_stop) # The locations and weight to use by default self.quadrature_locations = theta self.weights = weights self.null_distribution = options['distribution'](theta) # Triggers to run the estimation or use default self.estimate_distribution = options['estimate_distribution'] self.n_points = options[ 'number_of_samples'] if self.estimate_distribution else 3 # Initialize the first cubic-spline class # and set the distibution be an inverted U-shape cubic_spline = self._init_cubic_spline() cubic_spline.coefficients[self.n_points // 2 + 2] = 1 self.cubic_splines = [cubic_spline]
def onepl_mml(dataset, alpha=None, options=None): """ Estimates parameters in an 1PL IRT Model. Args: dataset: [items x participants] matrix of True/False Values alpha: [int] discrimination constraint options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] # Difficulty Estimation parameters n_items = dataset.shape[0] n_no, n_yes = get_true_false_counts(dataset) scalar = n_yes / (n_yes + n_no) unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) # Quadrature Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Inline definition of cost function to minimize def min_func(estimate): discrimination[:] = estimate _mml_abstract(difficulty, scalar, discrimination, theta, distribution, options) partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) # add distribution partial_int *= distribution otpt = integrate.fixed_quad( lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) # Perform the minimization if alpha is None: # OnePL Method alpha = fminbound(min_func, 0.25, 10) else: # Rasch Method min_func(alpha) return alpha, difficulty
def ability_eap(dataset, difficulty, discrimination, options=None): """Estimates the abilities for dichotomous models. Estimates the ability parameters (theta) for dichotomous models via expaected a posterior likelihood estimation. Args: dataset: [n_items, n_participants] (2d Array) of measured responses difficulty: (1d Array) of difficulty parameters for each item discrimination: (1d Array) of disrimination parameters for each item options: dictionary with updates to default options Returns: abilities: (1d array) estimated abilities Options: * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] if np.atleast_1d(discrimination).size == 1: discrimination = np.full(dataset.shape[0], discrimination, dtype='float') the_sign = convert_responses_to_kernel_sign(dataset) theta = _get_quadrature_points(quad_n, quad_start, quad_stop) partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) # Weight by the input ability distribution partial_int *= options['distribution'](theta) # Compute the denominator denominator = integrate.fixed_quad(lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] # compute the numerator partial_int *= theta numerator = integrate.fixed_quad(lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] return numerator / denominator
def test_no_input(self): """Testing validation for No input.""" result = validate_estimation_options(None) x = np.linspace(-3, 3, 101) expected = stats.norm(0, 1).pdf(x) self.assertEqual(len(result.keys()), 4) self.assertEqual(result['max_iteration'], 25) self.assertEqual(result['quadrature_n'], 61) self.assertTupleEqual(result['quadrature_bounds'], (-5, 5)) result = result['distribution'](x) np.testing.assert_array_almost_equal(expected, result, decimal=6)
def ability_3pl_map(dataset, difficulty, discrimination, guessing, options=None): """Estimates the abilities for dichotomous models. Estimates the ability parameters (theta) for dichotomous models via maximum a posterior likelihood estimation. Args: dataset: [n_items, n_participants] (2d Array) of measured responses difficulty: (1d Array) of difficulty parameters for each item discrimination: (1d Array) of disrimination parameters for each item guessing: (1d Array) of guessing parameters for each item options: dictionary with updates to default options Returns: abilities: (1d array) estimated abilities Options: distribution: Notes: If distribution is uniform, please use ability_mle instead. A large set of probability distributions can be found in scipy.stats https://docs.scipy.org/doc/scipy/reference/stats.html """ options = validate_estimation_options(options) distribution = options['distribution'] n_takers = dataset.shape[1] the_sign = convert_responses_to_kernel_sign(dataset) thetas = np.zeros((n_takers,)) # Pre-Compute guessing offset multiplier = 1.0 - guessing additive = guessing[:, None] * (the_sign == -1).astype('float') for ndx in range(n_takers): # pylint: disable=cell-var-from-loop scalar = the_sign[:, ndx] * discrimination adder = additive[:, ndx] def _theta_min(theta): otpt = multiplier / (1.0 + np.exp(scalar * (theta - difficulty))) otpt += adder return -(np.log(otpt).sum() + np.log(distribution(theta))) # Solves for the ability for each person thetas[ndx] = fminbound(_theta_min, -6, 6) return thetas
def onepl_jml(dataset, options=None): """ Estimates parameters in an 1PL IRT Model. Args: dataset: [items x participants] matrix of True/False Values options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * max_iteration: int """ options = validate_estimation_options(options) # Defines item parameter update function def _item_min_func(n_items, alphas, thetas, betas, the_sign, counts): # pylint: disable=cell-var-from-loop def _alpha_min(estimate): # Initialize cost evaluation to zero cost = 0 for ndx in range(n_items): # pylint: disable=cell-var-from-loop scalar = the_sign[ndx, :] * estimate def _beta_min(beta): otpt = np.exp(scalar * (thetas - beta)) return np.log1p(otpt).dot(counts) # Solves for the difficulty parameter for a given item at # a specific discrimination parameter betas[ndx] = fminbound(_beta_min, -6, 6) cost += _beta_min(betas[ndx]) return cost min_alpha = fminbound(_alpha_min, 0.25, 5) alphas[:] = min_alpha return alphas, betas result = _jml_abstract(dataset, _item_min_func, discrimination=1, max_iter=options['max_iteration']) result['Discrimination'] = result['Discrimination'][0] return result
def test_population_update(self): """Testing update to options.""" x = np.linspace(-3, 3, 101) expected = stats.norm(2, 1).pdf(x) new_parameters = {'distribution': stats.norm(2, 1).pdf} output = validate_estimation_options(new_parameters) self.assertEqual(len(output.keys()), self.expected_length) result = output['distribution'](x) np.testing.assert_array_almost_equal(expected, result, decimal=6) new_parameters = { 'quadrature_bounds': (-7, -5), 'quadrature_n': 13, 'hyper_quadrature_n': 44, 'estimate_distribution': True } output = validate_estimation_options(new_parameters) self.assertEqual(output['max_iteration'], 25) self.assertEqual(output['quadrature_n'], 13) self.assertEqual(output['hyper_quadrature_n'], 44) self.assertEqual(output['estimate_distribution'], True) self.assertTupleEqual(output['quadrature_bounds'], (-7, -5)) self.assertEqual(len(output.keys()), self.expected_length) new_parameters = {'max_iteration': 43} output = validate_estimation_options(new_parameters) self.assertEqual(output['max_iteration'], 43) self.assertEqual(len(output.keys()), self.expected_length) new_parameters = {'use_LUT': False, 'number_of_samples': 142} output = validate_estimation_options(new_parameters) self.assertEqual(output['use_LUT'], False) self.assertEqual(output['number_of_samples'], 142) self.assertEqual(len(output.keys()), self.expected_length)
def test_population_update(self): """Testing update to options.""" x = np.linspace(-3, 3, 101) expected = stats.norm(2, 1).pdf(x) new_parameters = {'distribution': stats.norm(2, 1).pdf} output = validate_estimation_options(new_parameters) self.assertEqual(len(output.keys()), 4) result = output['distribution'](x) np.testing.assert_array_almost_equal(expected, result, decimal=6) new_parameters = {'quadrature_bounds': (-7, -5), 'quadrature_n': 13} output = validate_estimation_options(new_parameters) self.assertEqual(output['max_iteration'], 25) self.assertEqual(output['quadrature_n'], 13) self.assertTupleEqual(output['quadrature_bounds'], (-7, -5)) self.assertEqual(len(output.keys()), 4) new_parameters = {'max_iteration': 43} output = validate_estimation_options(new_parameters) self.assertEqual(output['max_iteration'], 43) self.assertEqual(len(output.keys()), 4)
def test_no_input(self): """Testing validation for No input.""" result = validate_estimation_options(None) x = np.linspace(-3, 3, 101) expected = stats.norm(0, 1).pdf(x) self.assertEqual(len(result.keys()), self.expected_length) self.assertEqual(result['max_iteration'], 25) self.assertEqual(result['quadrature_n'], 41) self.assertEqual(result['hyper_quadrature_n'], 41) self.assertEqual(result['use_LUT'], True) self.assertEqual(result['estimate_distribution'], False) self.assertEqual(result['number_of_samples'], 9) self.assertTupleEqual(result['quadrature_bounds'], (-4.5, 4.5)) result = result['distribution'](x) np.testing.assert_array_almost_equal(expected, result, decimal=6)
def twopl_jml(dataset, options=None): """ Estimates parameters in a 2PL IRT model. Args: dataset: [items x participants] matrix of True/False Values options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (1d array) estimates of item difficulties Options: * max_iteration: int """ options = validate_estimation_options(options) # Defines item parameter update function def _item_min_func(n_items, alphas, thetas, betas, the_sign, counts): # pylint: disable=cell-var-from-loop for ndx in range(n_items): def _alpha_beta_min(estimates): otpt = np.exp( (thetas - estimates[1]) * the_sign[ndx, :] * estimates[0]) return np.log1p(otpt).dot(counts) # Solves jointly for parameters using numerical derivatives otpt = fmin_slsqp(_alpha_beta_min, (alphas[ndx], betas[ndx]), bounds=[(0.25, 4), (-6, 6)], disp=False) alphas[ndx], betas[ndx] = otpt return alphas, betas return _jml_abstract(dataset, _item_min_func, discrimination=1, max_iter=options['max_iteration'])
def rasch_jml(dataset, discrimination=1, options=None): """ Estimates difficulty parameters in an IRT model Args: dataset: [items x participants] matrix of True/False Values discrimination: scalar of discrimination used in model (default to 1) options: dictionary with updates to default options Returns: difficulty: (1d array) estimates of item difficulties Options: * max_iterations: int """ options = validate_estimation_options(options) # Defines item parameter update function def _item_min_func(n_items, alphas, thetas, betas, the_sign, counts): # pylint: disable=cell-var-from-loop for ndx in range(n_items): scalar = alphas[0] * the_sign[ndx, :] def _beta_min(beta): otpt = np.exp(scalar * (thetas - beta)) return np.log1p(otpt).dot(counts) # Solves for the beta parameters betas[ndx] = fminbound(_beta_min, -6, 6) return alphas, betas result = _jml_abstract(dataset, _item_min_func, discrimination, options['max_iteration']) return result
def twopl_full(dataset, options=None): """ Estimates parameters in a 2PL IRT model. Please use twopl_mml instead. Args: dataset: [items x participants] matrix of True/False Values options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (1d array) estimates of item difficulties Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] n_items = dataset.shape[0] unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() # Quadrature evaluation for values that do not change partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop local_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) partial_int /= local_int def min_func_local(estimate): discrimination[item_ndx] = estimate[0] difficulty[item_ndx] = estimate[1] estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) estimate_int *= partial_int otpt = integrate.fixed_quad( lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) # Two parameter solver that doesn't need derivatives initial_guess = np.concatenate((discrimination[item_ndx, None], difficulty[item_ndx, None])) fmin_slsqp(min_func_local, initial_guess, disp=False, bounds=[(0.25, 4), (-4, 4)]) # Update the partial integral based on the new found values estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) # update partial integral partial_int *= estimate_int if(np.abs(discrimination - previous_discrimination).max() < 1e-3): break return discrimination, difficulty
def onepl_mml(dataset, alpha=None, options=None): """ Estimates parameters in an 1PL IRT Model. Args: dataset: [items x participants] matrix of True/False Values alpha: [int] discrimination constraint options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] # Difficulty Estimation parameters n_items = dataset.shape[0] n_no, n_yes = get_true_false_counts(dataset) scalar = n_yes / (n_yes + n_no) unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) invalid_response_mask = unique_sets == INVALID_RESPONSE unique_sets[invalid_response_mask] = 0 # For Indexing, fixed later discrimination = np.ones((n_items, )) difficulty = np.zeros((n_items, )) # Quadrature Locations theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) distribution_x_weights = distribution * weights # Inline definition of cost function to minimize def min_func(estimate): discrimination[:] = estimate _mml_abstract(difficulty, scalar, discrimination, theta, distribution_x_weights) partial_int = np.ones((unique_sets.shape[1], theta.size)) for ndx in range(n_items): partial_int *= _compute_partial_integral( theta, difficulty[ndx], discrimination[ndx], unique_sets[ndx], invalid_response_mask[ndx]) partial_int *= distribution_x_weights # compute_integral otpt = np.sum(partial_int, axis=1) return -np.log(otpt).dot(counts) # Perform the minimization if alpha is None: # OnePL Method alpha = fminbound(min_func, 0.25, 10) else: # Rasch Method min_func(alpha) return {"Discrimination": alpha, "Difficulty": difficulty}
def grm_mml_eap(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using a mixed Bayesian / Marginal Maximum Likelihood algorithm, good for small sample sizes Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: results_dictionary: * Discrimination: (1d array) estimate of item discriminations * Difficulty: (2d array) estimates of item difficulties by item thresholds * LatentPDF: (object) contains information about the pdf * Rayleigh_Scale: (int) Rayleigh scale value of the discrimination prior * AIC: (dictionary) null model and final model AIC value * BIC: (dictionary) null model and final model BIC value Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int * hyper_quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Only use LUT _integral_func = _solve_integral_equations_LUT _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), options) # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx, valid_response_mask[ndx]]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(), ), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts - 1] = 10000 # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 # Prior Parameters ray_scale = 1.0 eap_options = { 'distribution': stats.rayleigh(loc=.25, scale=ray_scale).pdf, 'quadrature_n': options['hyper_quadrature_n'], 'quadrature_bounds': (0.25, 5) } prior_pdf = LatentPDF(eap_options) alpha_evaluation = np.zeros((eap_options['quadrature_n'], )) # Meta-Prior Parameter hyper_options = { 'distribution': stats.lognorm(loc=0, s=0.25).pdf, 'quadrature_n': options['hyper_quadrature_n'], 'quadrature_bounds': (0.1, 5) } hyper_pdf = LatentPDF(hyper_options) hyper_evaluation = np.zeros((hyper_options['quadrature_n'], )) base_hyper = (hyper_pdf.weights * hyper_pdf.null_distribution).astype('float128') linear_hyper = base_hyper * hyper_pdf.quadrature_locations for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Update the lookup table if necessary if (options['estimate_distribution'] and iteration > 0): new_options = dict(options) new_options.update({'distribution': latent_pdf.cubic_splines[-1]}) _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), new_options) # EAP Discrimination Parameter discrimination_pdf = stats.rayleigh(loc=0.25, scale=ray_scale).pdf base_alpha = (prior_pdf.weights * discrimination_pdf( prior_pdf.quadrature_locations)).astype('float128') linear_alpha = (base_alpha * prior_pdf.quadrature_locations).astype('float128') for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral( theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _integral_func(estimate, integral_counts[item_ndx], distribution_x_weight, theta, _interp_func) betas[start_ndx + 1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx - 1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return np.log(otpt.clip(1e-313, np.inf)).sum() # Mean Discrimination Value for ndx, disc_location in enumerate( prior_pdf.quadrature_locations): alpha_evaluation[ndx] = _local_min_func(disc_location) alpha_evaluation -= alpha_evaluation.max() total_probability = np.exp(alpha_evaluation.astype('float128')) numerator = np.sum(total_probability * linear_alpha) denominator = np.sum(total_probability * base_alpha) alpha_eap = numerator / denominator # Reset the Value the updated discrimination estimation _local_min_func(alpha_eap.astype('float64')) new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values # Compute the Hyper prior mean value for ndx, scale_value in enumerate(hyper_pdf.quadrature_locations): temp_distribution = stats.rayleigh(loc=0.25, scale=scale_value).pdf hyper_evaluation[ndx] = np.log( temp_distribution(discrimination) + 1e-313).sum() hyper_evaluation -= hyper_evaluation.max() hyper_evaluation = np.exp(hyper_evaluation.astype('float128')) ray_scale = (np.sum(hyper_evaluation * linear_hyper) / np.sum(hyper_evaluation * base_hyper)).astype('float64') # Check Termination Criterion if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max() - 1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx - start_ndx - 1] = betas[start_ndx + 1:end_ndx] # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination[start_indices], 'Difficulty': output_betas, 'Ability': eap_abilities, 'LatentPDF': latent_pdf, 'Rayleigh_Scale': ray_scale, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }
def threepl_mml(dataset, options=None): """ Estimates parameters in a 3PL IRT model. Args: dataset: [items x participants] matrix of True/False Values options: dictionary with updates to default options Returns: discrimination: (1d array) estimate of item discriminations difficulty: (1d array) estimates of item diffiulties guessing: (1d array) estimates of item guessing Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] n_items = dataset.shape[0] n_no, n_yes = get_true_false_counts(dataset) scalar = n_yes / (n_yes + n_no) unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) distribution_x_weights = distribution * weights # Perform the minimization discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) guessing = np.zeros((n_items,)) local_scalar = np.zeros((1, 1)) for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = _compute_partial_integral_3pl(theta, difficulty, discrimination, guessing, the_sign) partial_int *= distribution for ndx in range(n_items): # pylint: disable=cell-var-from-loop # remove contribution from current item local_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None], discrimination[ndx, None], guessing[ndx, None], the_sign[ndx, None]) partial_int /= local_int def min_func_local(estimate): discrimination[ndx] = estimate[0] guessing[ndx] = estimate[1] local_scalar[0, 0] = (scalar[ndx] - guessing[ndx]) / (1. - guessing[ndx]) _mml_abstract(difficulty[ndx, None], local_scalar, discrimination[ndx, None], theta, distribution_x_weights) estimate_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None], discrimination[ndx, None], guessing[ndx, None], the_sign[ndx, None]) estimate_int *= partial_int otpt = integrate.fixed_quad( lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) # Solve for the discrimination parameters initial_guess = [discrimination[ndx], guessing[ndx]] fmin_slsqp(min_func_local, initial_guess, bounds=([0.25, 4], [0, .33]), iprint=False) # Update the partial integral based on the new found values estimate_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None], discrimination[ndx, None], guessing[ndx, None], the_sign[ndx, None]) # update partial integral partial_int *= estimate_int if np.abs(discrimination - previous_discrimination).max() < 1e-3: break return {'Discrimination': discrimination, 'Difficulty': difficulty, 'Guessing': guessing}
def gum_mml(dataset, delta_sign=(0, 1), options=None): """Estimate parameters for graded unfolding model. Estimate the discrimination, delta and threshold parameters for the graded unfolding model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses delta_sign: (tuple) (ndx, sign: [+1 | -1]) sets the sign of the ndx delta value to positive or negative options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination delta: (1d array) estimates of item folding values difficulty: (2d array) estimates of item thresholds Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Interpolation Locations # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Initialize item parameters for iterations discrimination = np.ones((n_items, )) betas = np.full((n_items, item_counts.max() - 1), np.nan) delta = np.zeros((n_items, )) partial_int = np.ones((responses.shape[1], theta.size)) # Set initial estimates to evenly spaced for ndx in range(n_items): item_length = item_counts[ndx] - 1 betas[ndx, :item_length] = np.linspace(-1, 1, item_length) # This is the index associated with "folding" about the center fold_span = ((item_counts[:, None] - 0.5) - np.arange(betas.shape[1] + 1)[None, :]) # Sets the first value for the delta_ndx = delta_sign[0] delta_multiplier = np.sign(delta_sign[1]) # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_delta = delta.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _unfold_partial_integral( theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] - 1 # Remove the previous output old_values = _unfold_partial_integral( theta, previous_delta[item_ndx], previous_betas[item_ndx], previous_discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values new_betas = np.full((betas.shape[1], ), np.nan) def _local_min_func(estimate): new_betas[:item_length] = estimate[2:] new_values = _unfold_partial_integral( theta, estimate[1], new_betas, estimate[0], fold_span[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate( ([discrimination[item_ndx]], [delta[item_ndx]], betas[item_ndx, :item_length])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-2, 2)] + [(-6, 6)] * item_length) discrimination[item_ndx] = otpt[0] delta[item_ndx] = otpt[1] betas[item_ndx, :item_length] = otpt[2:] new_values = _unfold_partial_integral( theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Adjust delta values to conform to delta sign delta *= np.sign(delta[delta_ndx]) * delta_multiplier # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _unfold_partial_integral( theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination, 'Difficulties': np.c_[betas, np.zeros( (delta.size, )), -betas[:, ::-1]] + delta[:, None], 'Ability': eap_abilities, 'Delta': delta, 'Tau': betas, 'LatentPDF': latent_pdf, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }
def pcm_mml(dataset, options=None): """Estimate parameters for partial credit model. Estimate the discrimination and difficulty parameters for the partial credit model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (2d array) estimates of item difficulties x item thresholds Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Initialize difficulty parameters for estimation betas = np.full((n_items, item_counts.max()), np.nan) discrimination = np.ones((n_items, )) partial_int = np.ones((responses.shape[1], theta.size)) # Not all items need to have the same # number of response categories betas[:, 0] = 0 for ndx in range(n_items): betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx] - 1) # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _credit_partial_integral( theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] new_betas = np.zeros((item_length)) # Remove the previous output old_values = _credit_partial_integral( theta, previous_betas[item_ndx], previous_discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral( theta, new_betas, estimate[0], responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate( ([discrimination[item_ndx]], betas[item_ndx, 1:item_length])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1)) discrimination[item_ndx] = otpt[0] betas[item_ndx, 1:item_length] = otpt[1:] new_values = _credit_partial_integral( theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _credit_partial_integral( theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx], invalid_response_mask[item_ndx]) # TODO: look where missing values are and place NAN there instead # of appending them to the end # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination, 'Difficulty': betas[:, 1:], 'Ability': eap_abilities, 'LatentPDF': latent_pdf, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }
def pcm_jml(dataset, options=None): """Estimate parameters for partial credit model. Estimate the discrimination and difficulty parameters for the partial credit model using joint maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (2d array) estimates of item difficulties x item thresholds Options: * max_iteration: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, _reference=0.0) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items, n_takers = responses.shape # Set initial parameter estimates to default thetas = np.zeros((n_takers, )) # Initialize item parameters for iterations discrimination = np.ones((n_items, )) betas = np.full((n_items, item_counts.max() - 1), np.nan) scratch = np.zeros((n_items, betas.shape[1] + 1)) for ndx in range(n_items): item_length = item_counts[ndx] - 1 betas[ndx, :item_length] = np.linspace(-1, 1, item_length) # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() ##################### # STEP 1 # Estimate theta, given betas / alpha # Loops over all persons ##################### for ndx in range(n_takers): # pylint: disable=cell-var-from-loop response_set = responses[:, ndx] def _theta_min(theta, scratch): # Solves for ability parameters (theta) # Graded PCM Model scratch *= 0. scratch[:, 1:] = theta - betas scratch *= discrimination[:, None] np.cumsum(scratch, axis=1, out=scratch) np.exp(scratch, out=scratch) scratch /= np.nansum(scratch, axis=1)[:, None] # Probability associated with response values = np.take_along_axis(scratch, response_set[:, None], axis=1).squeeze() return -np.log(values[valid_response_mask[:, ndx]] + 1e-313).sum() thetas[ndx] = fminbound(_theta_min, -6, 6, args=(scratch, )) # Recenter theta to identify model thetas -= thetas.mean() thetas /= thetas.std(ddof=1) ##################### # STEP 2 # Estimate Betas / alpha, given Theta # Loops over all items ##################### for ndx in range(n_items): # pylint: disable=cell-var-from-loop # Compute ML for static items response_set = responses[ndx] def _alpha_beta_min(estimates): # PCM_Model kernel = thetas[:, None] - estimates[None, :] kernel *= estimates[0] kernel[:, 0] = 0 np.cumsum(kernel, axis=1, out=kernel) np.exp(kernel, out=kernel) kernel /= np.nansum(kernel, axis=1)[:, None] # Probability associated with response values = np.take_along_axis(kernel, response_set[:, None], axis=1).squeeze() return -np.log(values[valid_response_mask[ndx]]).sum() # Solves jointly for parameters using numerical derivatives initial_guess = np.concatenate( ([discrimination[ndx]], betas[ndx, :item_counts[ndx] - 1])) otpt = fmin_slsqp(_alpha_beta_min, initial_guess, disp=False, bounds=[(.25, 4)] + [(-6, 6)] * (item_counts[ndx] - 1)) discrimination[ndx] = otpt[0] betas[ndx, :item_counts[ndx] - 1] = otpt[1:] # Check termination criterion if (np.abs(previous_discrimination - discrimination).max() < 1e-3): break return {'Discrimination': discrimination, 'Difficulty': betas}
def gum_mml(dataset, options=None): """Estimate parameters for graded unfolding model. Estimate the discrimination, delta and threshold parameters for the graded unfolding model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination delta: (1d array) estimates of item folding values difficulty: (2d array) estimates of item thresholds x item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Initialize item parameters for iterations discrimination = np.ones((n_items,)) betas = np.full((n_items, item_counts.max() - 1), np.nan) delta = np.zeros((n_items,)) partial_int = np.ones((responses.shape[1], theta.size)) # Set initial estimates to evenly spaced for ndx in range(n_items): item_length = item_counts[ndx] - 1 betas[ndx, :item_length] = np.linspace(-1, 1, item_length) # This is the index associated with "folding" about the center fold_span = ((item_counts[:, None] - 0.5) - np.arange(betas.shape[1] + 1)[None, :]) ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_delta = delta.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int *= 0.0 partial_int += distribution[None, :] for item_ndx in range(n_items): partial_int *= _unfold_partial_integral(theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx]) # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] - 1 # Remove the previous output old_values = _unfold_partial_integral(theta, previous_delta[item_ndx], previous_betas[item_ndx], previous_discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas = estimate[2:] new_values = _unfold_partial_integral(theta, estimate[1], new_betas, estimate[0], fold_span[item_ndx], responses[item_ndx]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate(([discrimination[item_ndx]], [delta[item_ndx]], betas[item_ndx])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-2, 2)] + [(-6, 6)] * item_length) discrimination[item_ndx] = otpt[0] delta[item_ndx] = otpt[1] betas[item_ndx, :] = otpt[2:] new_values = _unfold_partial_integral(theta, delta[item_ndx], betas[item_ndx], discrimination[item_ndx], fold_span[item_ndx], responses[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break return discrimination, delta, betas
def pcm_mml(dataset, options=None): """Estimate parameters for partial credit model. Estimate the discrimination and difficulty parameters for the partial credit model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimates of item discrimination difficulty: (2d array) estimates of item difficulties x item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False, _reference=0.0) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Initialize difficulty parameters for estimation betas = np.full((n_items, item_counts.max()), np.nan) discrimination = np.ones((n_items,)) partial_int = np.ones((responses.shape[1], theta.size)) # Not all items need to have the same # number of response categories betas[:, 0] = 0 for ndx in range(n_items): betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx]-1) ############# # 1. Start the iteration loop # 2. Estimate Dicriminatin/Difficulty Jointly # 3. Integrate of theta # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors # and for speed partial_int *= 0.0 partial_int += distribution[None, :] for item_ndx in range(n_items): partial_int *= _credit_partial_integral(theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx]) # Loop over each item and solve for the alpha / beta parameters for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop item_length = item_counts[item_ndx] new_betas = np.zeros((item_length)) # Remove the previous output old_values = _credit_partial_integral(theta, previous_betas[item_ndx], previous_discrimination[item_ndx], responses[item_ndx]) partial_int /= old_values def _local_min_func(estimate): new_betas[1:] = estimate[1:] new_values = _credit_partial_integral(theta, new_betas, estimate[0], responses[item_ndx]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Initial Guess of Item Parameters initial_guess = np.concatenate(([discrimination[item_ndx]], betas[item_ndx, 1:item_length])) otpt = fmin_slsqp(_local_min_func, initial_guess, disp=False, bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1)) discrimination[item_ndx] = otpt[0] betas[item_ndx, 1:item_length] = otpt[1:] new_values = _credit_partial_integral(theta, betas[item_ndx], discrimination[item_ndx], responses[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # TODO: look where missing values are and place NAN there instead # of appending them to the end return discrimination, betas[:, 1:]
def grm_mml(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: results_dictionary: * Discrimination: (1d array) estimate of item discriminations * Difficulty: (2d array) estimates of item diffiulties by item thresholds * LatentPDF: (object) contains information about the pdf * AIC: (dictionary) null model and final model AIC value * BIC: (dictionary) null model and final model BIC value Options: * estimate_distribution: Boolean * number_of_samples: int >= 5 * use_LUT: boolean * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset, trim_ends=False) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items = responses.shape[0] # Should we use the LUT _integral_func = _solve_integral_equations _interp_func = None if options['use_LUT']: _integral_func = _solve_integral_equations_LUT _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), options) # Quadrature Locations latent_pdf = LatentPDF(options) theta = latent_pdf.quadrature_locations # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx, valid_response_mask[ndx]]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(), ), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts - 1] = 10000 # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 ############# # 1. Start the iteration loop # 2. estimate discrimination # 3. solve for difficulties # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Estimate the distribution if requested distribution_x_weight = latent_pdf(partial_int, iteration) partial_int *= distribution_x_weight # Update the lookup table if necessary if (options['use_LUT'] and options['estimate_distribution'] and iteration > 0): new_options = dict(options) new_options.update({'distribution': latent_pdf.cubic_splines[-1]}) _interp_func = create_beta_LUT((.15, 5.05, 500), (-6, 6, 500), new_options) for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral( theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _integral_func(estimate, integral_counts[item_ndx], distribution_x_weight, theta, _interp_func) betas[start_ndx + 1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx - 1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) new_values *= partial_int otpt = np.sum(new_values, axis=1) return -np.log(otpt).sum() # Univariate minimization for discrimination parameter fminbound(_local_min_func, 0.2, 5.0) new_values = _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Recompute partial int for later calculations partial_int = np.ones((responses.shape[1], theta.size)) for item_ndx in range(n_items): partial_int *= _graded_partial_integral( theta, betas, betas_roll, discrimination, responses[item_ndx], invalid_response_mask[item_ndx]) # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max() - 1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx - start_ndx - 1] = betas[start_ndx + 1:end_ndx] # Compute statistics for final iteration null_metrics = latent_pdf.compute_metrics( partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0) full_metrics = latent_pdf.compute_metrics(partial_int, distribution_x_weight, latent_pdf.n_points - 3) # Ability estimates eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight, theta) return { 'Discrimination': discrimination[start_indices], 'Difficulty': output_betas, 'Ability': eap_abilities, 'LatentPDF': latent_pdf, 'AIC': { 'final': full_metrics[0], 'null': null_metrics[0], 'delta': null_metrics[0] - full_metrics[0] }, 'BIC': { 'final': full_metrics[1], 'null': null_metrics[1], 'delta': null_metrics[1] - full_metrics[1] } }
def onepl_full(dataset, alpha=None, options=None): """ Estimates parameters in an 1PL IRT Model. This function is slow, please use onepl_mml Args: dataset: [items x participants] matrix of True/False Values alpha: scalar of discrimination used in model (default to 1) options: dictionary with updates to default options Returns: discrimination: (float) estimate of test discrimination difficulty: (1d array) estimates of item diffiulties Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int Notes: If alpha is supplied then this solves a Rasch model """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] n_items = dataset.shape[0] unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) the_sign = convert_responses_to_kernel_sign(unique_sets) theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) discrimination = np.ones((n_items,)) difficulty = np.zeros((n_items,)) def alpha_min_func(alpha_estimate): discrimination[:] = alpha_estimate for iteration in range(options['max_iteration']): previous_difficulty = difficulty.copy() # Quadrature evaluation for values that do not change partial_int = _compute_partial_integral(theta, difficulty, discrimination, the_sign) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # remove contribution from current item local_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) partial_int /= local_int def min_local_func(beta_estimate): difficulty[item_ndx] = beta_estimate estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) estimate_int *= partial_int otpt = integrate.fixed_quad( lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).dot(counts) fminbound(min_local_func, -4, 4) # Update the partial integral based on the new found values estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None], discrimination[item_ndx, None], the_sign[item_ndx, None]) # update partial integral partial_int *= estimate_int if(np.abs(previous_difficulty - difficulty).max() < 1e-3): break cost = integrate.fixed_quad( lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0] return -np.log(cost).dot(counts) if alpha is None: # OnePl Solver alpha = fminbound(alpha_min_func, 0.1, 4) else: # Rasch Solver alpha_min_func(alpha) return alpha, difficulty
def grm_mml(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using marginal maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimate of item discriminations difficulty: (2d array) estimates of item diffiulties by item thresholds Options: * max_iteration: int * distribution: callable * quadrature_bounds: (float, float) * quadrature_n: int """ options = validate_estimation_options(options) quad_start, quad_stop = options['quadrature_bounds'] quad_n = options['quadrature_n'] responses, item_counts = condition_polytomous_response(dataset, trim_ends=False) n_items = responses.shape[0] # Interpolation Locations theta = _get_quadrature_points(quad_n, quad_start, quad_stop) distribution = options['distribution'](theta) # Compute the values needed for integral equations integral_counts = list() for ndx in range(n_items): temp_output = _solve_for_constants(responses[ndx]) integral_counts.append(temp_output) # Initialize difficulty parameters for estimation betas = np.full((item_counts.sum(),), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts-1] = 10000 ############# # 1. Start the iteration loop # 2. estimate discrimination # 3. solve for difficulties # 4. minimize and repeat ############# for iteration in range(options['max_iteration']): previous_discrimination = discrimination.copy() previous_betas = betas.copy() previous_betas_roll = betas_roll.copy() # Quadrature evaluation for values that do not change # This is done during the outer loop to address rounding errors partial_int = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses) partial_int *= distribution for item_ndx in range(n_items): # pylint: disable=cell-var-from-loop # Indices into linearized difficulty parameters start_ndx = start_indices[item_ndx] end_ndx = cumulative_item_counts[item_ndx] old_values = _graded_partial_integral(theta, previous_betas, previous_betas_roll, previous_discrimination, responses[item_ndx][None, :]) partial_int /= old_values def _local_min_func(estimate): # Solve integrals for diffiulty estimates new_betas = _solve_integral_equations(estimate, integral_counts[item_ndx], distribution, theta) betas[start_ndx+1:end_ndx] = new_betas betas_roll[start_ndx:end_ndx-1] = new_betas discrimination[start_ndx:end_ndx] = estimate new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) new_values *= partial_int otpt = integrate.fixed_quad( lambda x: new_values, quad_start, quad_stop, n=quad_n)[0] return -np.log(otpt).sum() # Univariate minimization for discrimination parameter fminbound(_local_min_func, 0.2, 5.0) new_values = _graded_partial_integral(theta, betas, betas_roll, discrimination, responses[item_ndx][None, :]) partial_int *= new_values if np.abs(previous_discrimination - discrimination).max() < 1e-3: break # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max()-1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx-start_ndx-1] = betas[start_ndx+1:end_ndx] return discrimination[start_indices], output_betas
def test_warnings(self): """Testing validation when inputs are bad.""" test = {'Bad Key': "Come at me Bro"} with self.assertRaises(KeyError): validate_estimation_options(test) test = [21.0] with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'max_iteration': 12.0} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'max_iteration': -2} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'distribution': stats.norm(0, 1)} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'quadrature_bounds': 4.3} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'quadrature_bounds': (4, -3)} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'quadrature_n': 12.2} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'hyper_quadrature_n': 7.2} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'hyper_quadrature_n': 5} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'quadrature_bounds': 2} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'use_LUT': 1} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'estimate_distribution': 1} with self.assertRaises(AssertionError): validate_estimation_options(test) test = {'number_of_samples': 3} with self.assertRaises(AssertionError): validate_estimation_options(test)
def grm_jml(dataset, options=None): """Estimate parameters for graded response model. Estimate the discrimination and difficulty parameters for a graded response model using joint maximum likelihood. Args: dataset: [n_items, n_participants] 2d array of measured responses options: dictionary with updates to default options Returns: discrimination: (1d array) estimate of item discriminations difficulty: (2d array) estimates of item diffiulties by item thresholds Options: * max_iteration: int """ options = validate_estimation_options(options) cpr_result = condition_polytomous_response(dataset) responses, item_counts, valid_response_mask = cpr_result invalid_response_mask = ~valid_response_mask n_items, n_takers = responses.shape # Set initial parameter estimates to default thetas = np.zeros((n_takers, )) # Initialize difficulty parameters for iterations betas = np.full((item_counts.sum(), ), -10000.0) discrimination = np.ones_like(betas) cumulative_item_counts = item_counts.cumsum() start_indices = np.roll(cumulative_item_counts, 1) start_indices[0] = 0 for ndx in range(n_items): end_ndx = cumulative_item_counts[ndx] start_ndx = start_indices[ndx] + 1 betas[start_ndx:end_ndx] = np.linspace(-1, 1, item_counts[ndx] - 1) betas_roll = np.roll(betas, -1) betas_roll[cumulative_item_counts - 1] = 10000 # Set invalid index to zero, this allows minimal # changes for invalid data and it is corrected # during integration responses[invalid_response_mask] = 0 for iteration in range(options['max_iteration']): previous_betas = betas.copy() ##################### # STEP 1 # Estimate theta, given betas / alpha # Loops over all persons ##################### for ndx in range(n_takers): def _theta_min(theta): # Solves for ability parameters (theta) graded_prob = ( irt_evaluation(betas, discrimination, theta) - irt_evaluation(betas_roll, discrimination, theta)) values = graded_prob[responses[:, ndx]] return -np.log(values[valid_response_mask[:, ndx]] + 1e-313).sum() thetas[ndx] = fminbound(_theta_min, -6, 6) # Recenter theta to identify model thetas -= thetas.mean() thetas /= thetas.std(ddof=1) ##################### # STEP 2 # Estimate Betas / alpha, given Theta # Loops over all items ##################### for ndx in range(n_items): # pylint: disable=cell-var-from-loop # Compute ML for static items start_ndx = start_indices[ndx] end_ndx = cumulative_item_counts[ndx] def _alpha_beta_min(estimates): # Set the estimates int discrimination[start_ndx:end_ndx] = estimates[0] betas[start_ndx + 1:end_ndx] = estimates[1:] betas_roll[start_ndx:end_ndx - 1] = estimates[1:] graded_prob = ( irt_evaluation(betas, discrimination, thetas) - irt_evaluation(betas_roll, discrimination, thetas)) values = np.take_along_axis(graded_prob, responses[None, ndx], axis=0).squeeze() np.clip(values, 1e-23, np.inf, out=values) return -np.log(values[valid_response_mask[ndx]]).sum() # Solves jointly for parameters using numerical derivatives initial_guess = np.concatenate( ([discrimination[start_ndx]], betas[start_ndx + 1:end_ndx])) otpt = fmin_slsqp(_alpha_beta_min, initial_guess, disp=False, f_ieqcons=_jml_inequality, bounds=[(.25, 4)] + [(-6, 6)] * (item_counts[ndx] - 1)) discrimination[start_ndx:end_ndx] = otpt[0] betas[start_ndx + 1:end_ndx] = otpt[1:] betas_roll[start_ndx:end_ndx - 1] = otpt[1:] # Check termination criterion if (np.abs(previous_betas - betas).max() < 1e-3): break # Trim difficulties to conform to standard output # TODO: look where missing values are and place NAN there instead # of appending them to the end output_betas = np.full((n_items, item_counts.max() - 1), np.nan) for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)): output_betas[ndx, :end_ndx - start_ndx - 1] = betas[start_ndx + 1:end_ndx] return { 'Discrimination': discrimination[start_indices], 'Difficulty': output_betas }