コード例 #1
0
ファイル: test_utils.py プロジェクト: eribean/girth
    def test_trim_response_set(self):
        """Testing trim of all yes/no values."""
        np.random.seed(439)
        dataset = np.random.rand(10, 300)
        counts = np.random.rand(300)

        # Pass through
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)
        np.testing.assert_array_equal(dataset, new_set)
        np.testing.assert_array_equal(counts, new_counts)

        # Make first column zeros
        dataset[:, 0] = 0
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)

        np.testing.assert_array_equal(dataset[:, 1:], new_set)
        np.testing.assert_array_equal(counts[1:], new_counts)

        # Make last column all 1
        dataset[:, -1] = 1
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)

        np.testing.assert_array_equal(dataset[:, 1:-1], new_set)
        np.testing.assert_array_equal(counts[1:-1], new_counts)

        # Test when bad value is present
        dataset = np.ones((10, 300), dtype=int)
        dataset[0] = -1
        dataset[0, 0] = INVALID_RESPONSE
        counts = np.random.randint(0, 10, 300)
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)
        self.assertEqual(new_set.shape[1], dataset.shape[1] - 1)
        np.testing.assert_array_equal(dataset[:, 1:], new_set)
        np.testing.assert_array_equal(counts[1:], new_counts)
コード例 #2
0
ファイル: conditional_methods.py プロジェクト: eribean/girth
def rasch_conditional(dataset, discrimination=1, options=None):
    """ Estimates the difficulty parameters in a Rasch IRT model

    Args:
        dataset: [items x participants] matrix of True/False Values
        discrimination: scalar of discrimination used in model (default to 1)
        options: dictionary with updates to default options

    Returns:
        difficulty: (1d array) estimates of item difficulties

    Options:
        * max_iteration: int

    Notes:
        This function sets the sum of difficulty parameters to 
        zero for identification purposes
    """
    options = validate_estimation_options(options)
    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)

    # Initialize all the difficulty parameters to zeros
    # Set an identifying_mean to zero
    ##TODO: Add option to specifiy position
    betas = np.zeros((n_items, ))
    identifying_mean = 0.0

    # Remove the zero and full count values
    unique_sets, counts = trim_response_set_and_counts(unique_sets, counts)

    response_set_sums = unique_sets.sum(axis=0)

    for iteration in range(options['max_iteration']):
        previous_betas = betas.copy()

        for ndx in range(n_items):
            partial_conv = _symmetric_functions(np.delete(betas, ndx))

            def min_func(estimate):
                betas[ndx] = estimate
                full_convolution = np.convolve([1, np.exp(-estimate)], partial_conv)

                denominator = full_convolution[response_set_sums]

                return (np.sum(unique_sets * betas[:,None], axis=0).dot(counts) + 
                        np.log(denominator).dot(counts))

            # Solve for the difficulty parameter
            betas[ndx] = fminbound(min_func, -5, 5)

            # recenter
            betas += (identifying_mean - betas.mean())

        # Check termination criterion
        if np.abs(betas - previous_betas).max() < 1e-3:
            break

    return {'Discrimination': discrimination,
            'Difficulty': betas / discrimination}
コード例 #3
0
ファイル: jml_methods.py プロジェクト: eribean/girth
def _jml_abstract(dataset, _item_min_func, discrimination=1, max_iter=25):
    """ Defines common framework for joint maximum likelihood
        estimation in dichotomous models."""
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    n_items, _ = unique_sets.shape

    # Use easy model to seed guess
    alphas = np.full((n_items, ), discrimination,
                     dtype='float')  # discrimination
    betas = mml_approx(dataset, alphas)  # difficulty

    # Remove the zero and full count values
    unique_sets, counts = trim_response_set_and_counts(unique_sets, counts)

    n_takers = unique_sets.shape[1]
    the_sign = convert_responses_to_kernel_sign(unique_sets)
    thetas = np.zeros((n_takers, ))

    for iteration in range(max_iter):
        previous_betas = betas.copy()

        #####################
        # STEP 1
        # Estimate theta, given betas
        # Loops over all persons
        #####################
        for ndx in range(n_takers):
            # pylint: disable=cell-var-from-loop
            scalar = the_sign[:, ndx] * alphas

            def _theta_min(theta):
                otpt = np.exp(scalar * (theta - betas))

                return np.log1p(otpt).sum()

            # Solves for the ability for each person
            thetas[ndx] = fminbound(_theta_min, -6, 6)

        # Recenter theta to identify model
        thetas -= thetas.mean()
        thetas /= thetas.std(ddof=1)

        #####################
        # STEP 2
        # Estimate Item Parameters
        # given Theta,
        #####################
        alphas, betas = _item_min_func(n_items, alphas, thetas, betas,
                                       the_sign, counts)

        if (np.abs(previous_betas - betas).max() < 1e-3):
            break

    return {'Discrimination': alphas, 'Difficulty': betas}
コード例 #4
0
ファイル: test_utils.py プロジェクト: xuek622/girth
    def test_trim_response_set(self):
        """Testing trim of all yes/no values."""
        np.random.seed(439)
        dataset = np.random.rand(10, 300)
        counts = np.random.rand(300)

        # Pass through
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)
        np.testing.assert_array_equal(dataset, new_set)
        np.testing.assert_array_equal(counts, new_counts)

        # Make first column zeros
        dataset[:, 0] = 0
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)

        np.testing.assert_array_equal(dataset[:, 1:], new_set)
        np.testing.assert_array_equal(counts[1:], new_counts)

        # Make last column all 1
        dataset[:, -1] = 1
        new_set, new_counts = trim_response_set_and_counts(dataset, counts)

        np.testing.assert_array_equal(dataset[:, 1:-1], new_set)
        np.testing.assert_array_equal(counts[1:-1], new_counts)

        # Test when array contains nans
        mask = np.random.rand(*dataset.shape) < 0.1
        dataset[mask] = np.nan

        # There are responses with zero variance
        locations = np.where(np.nanstd(dataset, axis=0) == 0)
        self.assertTrue(locations[0].size > 0)

        new_set, new_counts = trim_response_set_and_counts(dataset, counts)
        locations = np.where(np.nanstd(new_set, axis=0) == 0)
        self.assertTrue(locations[0].size == 0)