def test_trim_response_set(self): """Testing trim of all yes/no values.""" np.random.seed(439) dataset = np.random.rand(10, 300) counts = np.random.rand(300) # Pass through new_set, new_counts = trim_response_set_and_counts(dataset, counts) np.testing.assert_array_equal(dataset, new_set) np.testing.assert_array_equal(counts, new_counts) # Make first column zeros dataset[:, 0] = 0 new_set, new_counts = trim_response_set_and_counts(dataset, counts) np.testing.assert_array_equal(dataset[:, 1:], new_set) np.testing.assert_array_equal(counts[1:], new_counts) # Make last column all 1 dataset[:, -1] = 1 new_set, new_counts = trim_response_set_and_counts(dataset, counts) np.testing.assert_array_equal(dataset[:, 1:-1], new_set) np.testing.assert_array_equal(counts[1:-1], new_counts) # Test when bad value is present dataset = np.ones((10, 300), dtype=int) dataset[0] = -1 dataset[0, 0] = INVALID_RESPONSE counts = np.random.randint(0, 10, 300) new_set, new_counts = trim_response_set_and_counts(dataset, counts) self.assertEqual(new_set.shape[1], dataset.shape[1] - 1) np.testing.assert_array_equal(dataset[:, 1:], new_set) np.testing.assert_array_equal(counts[1:], new_counts)
def rasch_conditional(dataset, discrimination=1, options=None): """ Estimates the difficulty parameters in a Rasch IRT model Args: dataset: [items x participants] matrix of True/False Values discrimination: scalar of discrimination used in model (default to 1) options: dictionary with updates to default options Returns: difficulty: (1d array) estimates of item difficulties Options: * max_iteration: int Notes: This function sets the sum of difficulty parameters to zero for identification purposes """ options = validate_estimation_options(options) n_items = dataset.shape[0] unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) # Initialize all the difficulty parameters to zeros # Set an identifying_mean to zero ##TODO: Add option to specifiy position betas = np.zeros((n_items, )) identifying_mean = 0.0 # Remove the zero and full count values unique_sets, counts = trim_response_set_and_counts(unique_sets, counts) response_set_sums = unique_sets.sum(axis=0) for iteration in range(options['max_iteration']): previous_betas = betas.copy() for ndx in range(n_items): partial_conv = _symmetric_functions(np.delete(betas, ndx)) def min_func(estimate): betas[ndx] = estimate full_convolution = np.convolve([1, np.exp(-estimate)], partial_conv) denominator = full_convolution[response_set_sums] return (np.sum(unique_sets * betas[:,None], axis=0).dot(counts) + np.log(denominator).dot(counts)) # Solve for the difficulty parameter betas[ndx] = fminbound(min_func, -5, 5) # recenter betas += (identifying_mean - betas.mean()) # Check termination criterion if np.abs(betas - previous_betas).max() < 1e-3: break return {'Discrimination': discrimination, 'Difficulty': betas / discrimination}
def _jml_abstract(dataset, _item_min_func, discrimination=1, max_iter=25): """ Defines common framework for joint maximum likelihood estimation in dichotomous models.""" unique_sets, counts = np.unique(dataset, axis=1, return_counts=True) n_items, _ = unique_sets.shape # Use easy model to seed guess alphas = np.full((n_items, ), discrimination, dtype='float') # discrimination betas = mml_approx(dataset, alphas) # difficulty # Remove the zero and full count values unique_sets, counts = trim_response_set_and_counts(unique_sets, counts) n_takers = unique_sets.shape[1] the_sign = convert_responses_to_kernel_sign(unique_sets) thetas = np.zeros((n_takers, )) for iteration in range(max_iter): previous_betas = betas.copy() ##################### # STEP 1 # Estimate theta, given betas # Loops over all persons ##################### for ndx in range(n_takers): # pylint: disable=cell-var-from-loop scalar = the_sign[:, ndx] * alphas def _theta_min(theta): otpt = np.exp(scalar * (theta - betas)) return np.log1p(otpt).sum() # Solves for the ability for each person thetas[ndx] = fminbound(_theta_min, -6, 6) # Recenter theta to identify model thetas -= thetas.mean() thetas /= thetas.std(ddof=1) ##################### # STEP 2 # Estimate Item Parameters # given Theta, ##################### alphas, betas = _item_min_func(n_items, alphas, thetas, betas, the_sign, counts) if (np.abs(previous_betas - betas).max() < 1e-3): break return {'Discrimination': alphas, 'Difficulty': betas}
def test_trim_response_set(self): """Testing trim of all yes/no values.""" np.random.seed(439) dataset = np.random.rand(10, 300) counts = np.random.rand(300) # Pass through new_set, new_counts = trim_response_set_and_counts(dataset, counts) np.testing.assert_array_equal(dataset, new_set) np.testing.assert_array_equal(counts, new_counts) # Make first column zeros dataset[:, 0] = 0 new_set, new_counts = trim_response_set_and_counts(dataset, counts) np.testing.assert_array_equal(dataset[:, 1:], new_set) np.testing.assert_array_equal(counts[1:], new_counts) # Make last column all 1 dataset[:, -1] = 1 new_set, new_counts = trim_response_set_and_counts(dataset, counts) np.testing.assert_array_equal(dataset[:, 1:-1], new_set) np.testing.assert_array_equal(counts[1:-1], new_counts) # Test when array contains nans mask = np.random.rand(*dataset.shape) < 0.1 dataset[mask] = np.nan # There are responses with zero variance locations = np.where(np.nanstd(dataset, axis=0) == 0) self.assertTrue(locations[0].size > 0) new_set, new_counts = trim_response_set_and_counts(dataset, counts) locations = np.where(np.nanstd(new_set, axis=0) == 0) self.assertTrue(locations[0].size == 0)