Beispiel #1
0
    def test_lut_creation(self):
        """Test the lookup table creation function."""
        lut_func = create_beta_LUT((0.5, 2, 500), (-3, 3, 500))

        # do two values
        options = validate_estimation_options(None)
        quad_start, quad_stop = options['quadrature_bounds']
        quad_n = options['quadrature_n']

        theta, weight = _get_quadrature_points(quad_n, quad_start, quad_stop)
        distribution = options['distribution'](theta)

        alpha1 = 0.89
        beta1 = 1.76

        p_value1 = ((weight * distribution) /
                    (1.0 + np.exp(-alpha1 * (theta - beta1)))).sum()
        estimated_beta = lut_func(alpha1, p_value1)
        self.assertAlmostEqual(beta1, estimated_beta, places=4)

        alpha1 = 1.89
        beta1 = -2.34

        p_value1 = ((weight * distribution) /
                    (1.0 + np.exp(-alpha1 * (theta - beta1)))).sum()
        estimated_beta = lut_func(alpha1, p_value1)
        self.assertAlmostEqual(beta1, estimated_beta, places=4)
    def __init__(self, options=None):
        """Constructor for latent estimation class"""
        options = validate_estimation_options(options)

        # Quadrature Parameters
        quad_start, quad_stop = options['quadrature_bounds']
        quad_n = options['quadrature_n']
        theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop)
        self.quad_bounds = (quad_start, quad_stop)

        # The locations and weight to use by default
        self.quadrature_locations = theta
        self.weights = weights
        self.null_distribution = options['distribution'](theta)

        # Triggers to run the estimation or use default
        self.estimate_distribution = options['estimate_distribution']
        self.n_points = options[
            'number_of_samples'] if self.estimate_distribution else 3

        # Initialize the first cubic-spline class
        # and set the distibution be an inverted U-shape
        cubic_spline = self._init_cubic_spline()
        cubic_spline.coefficients[self.n_points // 2 + 2] = 1
        self.cubic_splines = [cubic_spline]
Beispiel #3
0
    def test_credit_partial_integration(self):
        """Testing the partial integral in the graded model."""
        theta = _get_quadrature_points(61, -5, 5)
        response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2])
        betas = np.array([0, -0.4, 0.94, -.37])
        discrimination = 1.42

        # Hand calculations
        offsets = np.cumsum(betas)[1:]
        first_pos = np.ones_like(theta)
        second_pos = np.exp(discrimination * (theta - offsets[0]))
        third_pos = np.exp(2*discrimination * (theta - offsets[1]/2))
        last_pos = np.exp(3*discrimination * (theta - offsets[2]/3))
        norm_term = first_pos + second_pos + third_pos + last_pos

        probability_values = [first_pos / norm_term, second_pos / norm_term,
                              third_pos / norm_term, last_pos / norm_term]
        expected = np.zeros((response_set.size, theta.size))
        for ndx, response in enumerate(response_set):
            expected[ndx] = probability_values[response]

        result = _credit_partial_integral(theta, betas, discrimination,
                                          response_set)

        np.testing.assert_array_almost_equal(result, expected)
Beispiel #4
0
def onepl_mml(dataset, alpha=None, options=None):
    """ Estimates parameters in an 1PL IRT Model.

    Args:
        dataset: [items x participants] matrix of True/False Values
        alpha: [int] discrimination constraint
        options: dictionary with updates to default options

    Returns:
        discrimination: (float) estimate of test discrimination
        difficulty: (1d array) estimates of item diffiulties

    Options:
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    # Difficulty Estimation parameters
    n_items = dataset.shape[0]
    n_no, n_yes = get_true_false_counts(dataset)
    scalar = n_yes / (n_yes + n_no)

    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = convert_responses_to_kernel_sign(unique_sets)

    discrimination = np.ones((n_items,))
    difficulty = np.zeros((n_items,))

    # Quadrature Locations
    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    # Inline definition of cost function to minimize
    def min_func(estimate):
        discrimination[:] = estimate
        _mml_abstract(difficulty, scalar, discrimination,
                      theta, distribution, options)

        partial_int = _compute_partial_integral(theta, difficulty,
                                                discrimination, the_sign)

        # add distribution
        partial_int *= distribution
        otpt = integrate.fixed_quad(
            lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0]

        return -np.log(otpt).dot(counts)

    # Perform the minimization
    if alpha is None:  # OnePL Method
        alpha = fminbound(min_func, 0.25, 10)
    else:  # Rasch Method
        min_func(alpha)

    return alpha, difficulty
Beispiel #5
0
    def test_quadrature_points(self):
        """Testing the creation of quadrtature points"""
        n_points = 11

        # A smoke test to make sure it's running properly
        quad_points = _get_quadrature_points(n_points, -1, 1)

        x, _ = roots_legendre(n_points)

        np.testing.assert_allclose(x, quad_points)
Beispiel #6
0
def ability_eap(dataset, difficulty, discrimination, options=None):
    """Estimates the abilities for dichotomous models.

    Estimates the ability parameters (theta) for dichotomous models via
    expaected a posterior likelihood estimation.

    Args:
        dataset: [n_items, n_participants] (2d Array) of measured responses
        difficulty: (1d Array) of difficulty parameters for each item
        discrimination: (1d Array) of disrimination parameters for each item
        options: dictionary with updates to default options

    Returns:
        abilities: (1d array) estimated abilities

    Options:
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int

    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    if np.atleast_1d(discrimination).size == 1:
        discrimination = np.full(dataset.shape[0],
                                 discrimination,
                                 dtype='float')

    the_sign = convert_responses_to_kernel_sign(dataset)

    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    partial_int = _compute_partial_integral(theta, difficulty, discrimination,
                                            the_sign)

    # Weight by the input ability distribution
    partial_int *= options['distribution'](theta)

    # Compute the denominator
    denominator = integrate.fixed_quad(lambda x: partial_int,
                                       quad_start,
                                       quad_stop,
                                       n=quad_n)[0]

    # compute the numerator
    partial_int *= theta

    numerator = integrate.fixed_quad(lambda x: partial_int,
                                     quad_start,
                                     quad_stop,
                                     n=quad_n)[0]

    return numerator / denominator
Beispiel #7
0
    def test_array_LUT(self):
        """Test the creation of the array look up table."""
        alpha = np.linspace(.2, 4, 500)
        beta = np.linspace(-6, 6, 500)
        theta, weights = _get_quadrature_points(41, -5, 5)
        output = np.zeros((alpha.size, beta.size))
        _array_LUT(alpha, beta, theta, weights, output)

        #Expected
        z = alpha[:, None, None] * (beta[None, :, None] - theta[None, None, :])
        expected = np.sum(1.0 / (1. + np.exp(z)) * weights[None, None, :],
                          axis=2)

        np.testing.assert_allclose(output, expected, atol=1e-4, rtol=1e-3)
Beispiel #8
0
    def test_graded_partial_integral(self):
        """Testing the partial integral in the graded model."""
        theta, _ = _get_quadrature_points(61, -5, 5)
        responses = np.random.randint(0, 3, (10, 100))
        betas = np.array([-10000, -.3, 0.1, 1.2])
        betas_roll = np.roll(betas, -1)
        betas_roll[-1] = 10000
        invalid_response_mask = np.zeros_like(responses, dtype='bool')

        output = np.ones((responses.shape[1], theta.size))
        for ndx in range(responses.shape[0]):
            output *= _graded_partial_integral(theta, betas, betas_roll,
                                               np.array([
                                                   1,
                                               ]), responses[ndx],
                                               invalid_response_mask[ndx])

        # Compare to hand calculations
        hand_calc = list()
        for ndx in range(responses.shape[1]):
            left_betas = betas[responses[:, ndx]]
            right_betas = betas_roll[responses[:, ndx]]
            probability = (
                1.0 / (1.0 + np.exp(left_betas[:, None] - theta[None, :])) -
                1.0 / (1.0 + np.exp(right_betas[:, None] - theta[None, :])))
            hand_calc.append(probability.prod(0))

        hand_calc = np.asarray(hand_calc)

        np.testing.assert_array_equal(hand_calc, output)

        # Test invalid response
        invalid_response_mask[0, 1] = True
        invalid_response_mask[0, 7] = True
        output = _graded_partial_integral(theta, betas, betas_roll,
                                          np.array([
                                              1,
                                          ]), responses[0],
                                          invalid_response_mask[0])

        np.testing.assert_equal(output[1], np.ones(61, ))
        np.testing.assert_equal(output[7], np.ones(61, ))

        with np.testing.assert_raises(AssertionError):
            for ndx in [0, 2, 3, 4, 5, 6, 8, 9]:
                np.testing.assert_equal(output[ndx], np.ones(61, ))
Beispiel #9
0
    def test_integral_equations(self):
        """Tests solving for integral given a ratio."""
        np.random.seed(786)
        theta = np.random.randn(50000)
        discrimination = 1.43
        difficulty = np.array([-.4, .1, .5])

        # Compare against dichotomous data
        syn_data = create_synthetic_irt_dichotomous(
            difficulty, discrimination, theta)
        n0 = np.count_nonzero(~syn_data, axis=1)
        n1 = np.count_nonzero(syn_data, axis=1)
        ratio = n1 / (n1 + n0)

        theta = _get_quadrature_points(61, -5, 5)
        distribution = np.exp(-np.square(theta) / 2) / np.sqrt(2 * np.pi)
        results = _solve_integral_equations(
            discrimination, ratio, distribution, theta)
        np.testing.assert_array_almost_equal(results, difficulty, decimal=2)
Beispiel #10
0
    def test_partial_integration_single(self):
        """Tests the integration quadrature function."""

        # Set seed for repeatability
        np.random.seed(154)

        discrimination = 1.32
        difficulty = .67
        response = np.random.randint(low=0, high=2, size=(1, 10))

        quad_points, _ = _get_quadrature_points(61, -6, 6)

        value = _compute_partial_integral(
            quad_points, difficulty, discrimination, response[0],
            np.zeros_like(response, dtype='bool')[0])

        discrrm = discrimination * np.power(-1, response)
        expected = 1.0 / (1 +
                          np.exp(np.outer(discrrm,
                                          (quad_points - difficulty))))
        np.testing.assert_allclose(value, expected)
Beispiel #11
0
    def test_credit_partial_integration(self):
        """Testing the partial integral in the graded model."""
        theta, _ = _get_quadrature_points(61, -5, 5)
        response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2])
        betas = np.array([0, -0.4, 0.94, -.37])
        discrimination = 1.42
        invalid_response_mask = np.zeros_like(response_set, dtype='bool')

        # Hand calculations
        offsets = np.cumsum(betas)[1:]
        first_pos = np.ones_like(theta)
        second_pos = np.exp(discrimination * (theta - offsets[0]))
        third_pos = np.exp(2 * discrimination * (theta - offsets[1] / 2))
        last_pos = np.exp(3 * discrimination * (theta - offsets[2] / 3))
        norm_term = first_pos + second_pos + third_pos + last_pos

        probability_values = [
            first_pos / norm_term, second_pos / norm_term,
            third_pos / norm_term, last_pos / norm_term
        ]
        expected = np.zeros((response_set.size, theta.size))
        for ndx, response in enumerate(response_set):
            expected[ndx] = probability_values[response]

        result = _credit_partial_integral(theta, betas, discrimination,
                                          response_set, invalid_response_mask)

        np.testing.assert_array_almost_equal(result, expected)

        invalid_response_mask[1] = True
        invalid_response_mask[7] = True
        result = _credit_partial_integral(theta, betas, discrimination,
                                          response_set, invalid_response_mask)

        np.testing.assert_equal(result[1], np.ones(61, ))
        np.testing.assert_equal(result[7], np.ones(61, ))

        with np.testing.assert_raises(AssertionError):
            for ndx in [0, 2, 3, 4, 5, 6, 8, 9]:
                np.testing.assert_equal(result[ndx], np.ones(61, ))
Beispiel #12
0
    def test_unfold_partial_integration(self):
        """Testing the unfolding integral."""
        theta, _ = _get_quadrature_points(61, -5, 5)
        response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2])
        betas = np.array([-1.3, -.4, 0.2])
        delta = -0.76
        invalid_response_mask = np.zeros_like(response_set, dtype='bool')

        # (2N -1) / 2 - n
        folding = 3.5 - np.arange(4)
        discrimination = 1.42

        # Convert to PCM thresholds
        full = np.concatenate((betas, [0], -betas[::-1]))
        full += delta
        scratch = np.zeros((full.size + 1, theta.size))
        _unfold_func(full, discrimination, theta, scratch)

        expected = np.zeros((response_set.size, theta.size))
        for ndx, response in enumerate(response_set):
            expected[ndx] = scratch[response]

        result = _unfold_partial_integral(theta, delta, betas, discrimination,
                                          folding, response_set,
                                          invalid_response_mask)
        np.testing.assert_array_almost_equal(result, expected)

        invalid_response_mask[1] = True
        invalid_response_mask[7] = True
        result = _unfold_partial_integral(theta, delta, betas, discrimination,
                                          folding, response_set,
                                          invalid_response_mask)

        np.testing.assert_equal(result[1], np.ones(61, ))
        np.testing.assert_equal(result[7], np.ones(61, ))

        with np.testing.assert_raises(AssertionError):
            for ndx in [0, 2, 3, 4, 5, 6, 8, 9]:
                np.testing.assert_equal(result[ndx], np.ones(61, ))
Beispiel #13
0
    def test_partial_integration_array(self):
        """Tests the integration quadrature function on array."""

        # Set seed for repeatability
        np.random.seed(121)

        discrimination = np.random.rand(5) + 0.5
        difficuly = np.linspace(-1.3, 1.3, 5)
        the_sign = (-1)**np.random.randint(low=0, high=2, size=(5, 1))

        quad_points = _get_quadrature_points(61, -6, 6)
        dataset = _compute_partial_integral(quad_points, difficuly, discrimination,
                                            the_sign)

        value = integrate.fixed_quad(lambda x: dataset, -6, 6, n=61)[0]

        discrrm = discrimination * the_sign.squeeze() * -1
        xx = np.linspace(-6, 6, 5001)
        yy = irt_evaluation(difficuly, discrrm, xx)
        yy = yy.prod(axis=0)
        expected = yy.sum() * 12 / 5001

        self.assertAlmostEqual(value, expected.sum(), places=3)
Beispiel #14
0
    def test_graded_partial_integral(self):
        """Testing the partial integral in the graded model."""
        theta = _get_quadrature_points(61, -5, 5)
        responses = np.random.randint(0, 3, (10, 100))
        betas = np.array([-10000, -.3, 0.1, 1.2])
        betas_roll = np.roll(betas, -1)
        betas_roll[-1] = 10000

        output = _graded_partial_integral(theta, betas, betas_roll,
                                          1.0, responses)

        # Compare to hand calculations
        hand_calc = list()
        for ndx in range(responses.shape[1]):
            left_betas = betas[responses[:, ndx]]
            right_betas = betas_roll[responses[:, ndx]]
            probability = (1.0 / (1.0 + np.exp(left_betas[:, None] - theta[None, :])) -
                           1.0 / (1.0 + np.exp(right_betas[:, None] - theta[None, :])))
            hand_calc.append(probability.prod(0))

        hand_calc = np.asarray(hand_calc)

        np.testing.assert_array_equal(hand_calc, output)
Beispiel #15
0
    def test_unfold_partial_integration(self):
        """Testing the unfolding integral."""
        theta = _get_quadrature_points(61, -5, 5)
        response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2])
        betas = np.array([-1.3, -.4, 0.2])
        delta = -0.76
        # (2N -1) / 2 - n
        folding = 3.5 - np.arange(4)
        discrimination = 1.42

        # Convert to PCM thresholds
        full = np.concatenate((betas, [0], -betas[::-1]))
        full += delta
        scratch = np.zeros((full.size + 1, theta.size))
        _unfold_func(full, discrimination, theta, scratch)

        expected = np.zeros((response_set.size, theta.size))
        for ndx, response in enumerate(response_set):
            expected[ndx] = scratch[response]

        result = _unfold_partial_integral(theta, delta, betas,
                                          discrimination, folding,
                                          response_set)
        np.testing.assert_array_almost_equal(result, expected)
Beispiel #16
0
def gum_mml(dataset, options=None):
    """Estimate parameters for graded unfolding model.

    Estimate the discrimination, delta and threshold parameters for
    the graded unfolding model using marginal maximum likelihood.

    Args:
        dataset: [n_items, n_participants] 2d array of measured responses
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimates of item discrimination
        delta: (1d array) estimates of item folding values
        difficulty: (2d array) estimates of item thresholds x item thresholds

    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    responses, item_counts = condition_polytomous_response(dataset, trim_ends=False,
                                                           _reference=0.0)
    n_items = responses.shape[0]

    # Interpolation Locations
    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    # Initialize item parameters for iterations
    discrimination = np.ones((n_items,))
    betas = np.full((n_items, item_counts.max() - 1), np.nan)
    delta = np.zeros((n_items,))
    partial_int = np.ones((responses.shape[1], theta.size))

    # Set initial estimates to evenly spaced
    for ndx in range(n_items):
        item_length = item_counts[ndx] - 1
        betas[ndx, :item_length] = np.linspace(-1, 1, item_length)

    # This is the index associated with "folding" about the center
    fold_span = ((item_counts[:, None] - 0.5) -
                 np.arange(betas.shape[1] + 1)[None, :])

    #############
    # 1. Start the iteration loop
    # 2. Estimate Dicriminatin/Difficulty Jointly
    # 3. Integrate of theta
    # 4. minimize and repeat
    #############
    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()
        previous_betas = betas.copy()
        previous_delta = delta.copy()

        # Quadrature evaluation for values that do not change
        # This is done during the outer loop to address rounding errors
        # and for speed
        partial_int *= 0.0
        partial_int += distribution[None, :]
        for item_ndx in range(n_items):
            partial_int *= _unfold_partial_integral(theta, delta[item_ndx],
                                                    betas[item_ndx],
                                                    discrimination[item_ndx],
                                                    fold_span[item_ndx],
                                                    responses[item_ndx])

        # Loop over each item and solve for the alpha / beta parameters
        for item_ndx in range(n_items):
            # pylint: disable=cell-var-from-loop
            item_length = item_counts[item_ndx] - 1

            # Remove the previous output
            old_values = _unfold_partial_integral(theta, previous_delta[item_ndx],
                                                  previous_betas[item_ndx],
                                                  previous_discrimination[item_ndx],
                                                  fold_span[item_ndx],
                                                  responses[item_ndx])
            partial_int /= old_values

            def _local_min_func(estimate):
                new_betas = estimate[2:]
                new_values = _unfold_partial_integral(theta, estimate[1],
                                                      new_betas,
                                                      estimate[0], fold_span[item_ndx],
                                                      responses[item_ndx])

                new_values *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]

                return -np.log(otpt).sum()

            # Initial Guess of Item Parameters
            initial_guess = np.concatenate(([discrimination[item_ndx]],
                                            [delta[item_ndx]],
                                            betas[item_ndx]))

            otpt = fmin_slsqp(_local_min_func, initial_guess,
                              disp=False,
                              bounds=[(.25, 4)] + [(-2, 2)] + [(-6, 6)] * item_length)

            discrimination[item_ndx] = otpt[0]
            delta[item_ndx] = otpt[1]
            betas[item_ndx, :] = otpt[2:]

            new_values = _unfold_partial_integral(theta, delta[item_ndx],
                                                  betas[item_ndx],
                                                  discrimination[item_ndx],
                                                  fold_span[item_ndx],
                                                  responses[item_ndx])

            partial_int *= new_values

        if np.abs(previous_discrimination - discrimination).max() < 1e-3:
            break

    return discrimination, delta, betas
Beispiel #17
0
def onepl_full(dataset, alpha=None, options=None):
    """ Estimates parameters in an 1PL IRT Model.

    This function is slow, please use onepl_mml

    Args:
        dataset: [items x participants] matrix of True/False Values
        alpha: scalar of discrimination used in model (default to 1)
        options: dictionary with updates to default options

    Returns:
        discrimination: (float) estimate of test discrimination
        difficulty: (1d array) estimates of item diffiulties

    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int

    Notes:
        If alpha is supplied then this solves a Rasch model
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = convert_responses_to_kernel_sign(unique_sets)

    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    discrimination = np.ones((n_items,))
    difficulty = np.zeros((n_items,))

    def alpha_min_func(alpha_estimate):
        discrimination[:] = alpha_estimate

        for iteration in range(options['max_iteration']):
            previous_difficulty = difficulty.copy()

            # Quadrature evaluation for values that do not change
            partial_int = _compute_partial_integral(theta, difficulty,
                                                    discrimination, the_sign)
            partial_int *= distribution

            for item_ndx in range(n_items):
                # pylint: disable=cell-var-from-loop

                # remove contribution from current item
                local_int = _compute_partial_integral(theta, difficulty[item_ndx, None],
                                                      discrimination[item_ndx, None],
                                                      the_sign[item_ndx, None])

                partial_int /= local_int

                def min_local_func(beta_estimate):
                    difficulty[item_ndx] = beta_estimate

                    estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None],
                                                             discrimination[item_ndx, None],
                                                             the_sign[item_ndx, None])

                    estimate_int *= partial_int

                    otpt = integrate.fixed_quad(
                        lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0]

                    return -np.log(otpt).dot(counts)

                fminbound(min_local_func, -4, 4)

                # Update the partial integral based on the new found values
                estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None],
                                                         discrimination[item_ndx, None],
                                                         the_sign[item_ndx, None])
                # update partial integral
                partial_int *= estimate_int

            if(np.abs(previous_difficulty - difficulty).max() < 1e-3):
                break

        cost = integrate.fixed_quad(
            lambda x: partial_int, quad_start, quad_stop, n=quad_n)[0]
        return -np.log(cost).dot(counts)

    if alpha is None:  # OnePl Solver
        alpha = fminbound(alpha_min_func, 0.1, 4)
    else:  # Rasch Solver
        alpha_min_func(alpha)

    return alpha, difficulty
Beispiel #18
0
def pcm_mml(dataset, options=None):
    """Estimate parameters for partial credit model.

    Estimate the discrimination and difficulty parameters for
    the partial credit model using marginal maximum likelihood.

    Args:
        dataset: [n_items, n_participants] 2d array of measured responses
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimates of item discrimination
        difficulty: (2d array) estimates of item difficulties x item thresholds

    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    responses, item_counts = condition_polytomous_response(dataset, trim_ends=False,
                                                           _reference=0.0)
    n_items = responses.shape[0]

    # Interpolation Locations
    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    # Initialize difficulty parameters for estimation
    betas = np.full((n_items, item_counts.max()), np.nan)
    discrimination = np.ones((n_items,))
    partial_int = np.ones((responses.shape[1], theta.size))

    # Not all items need to have the same
    # number of response categories
    betas[:, 0] = 0
    for ndx in range(n_items):
        betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx]-1)

    #############
    # 1. Start the iteration loop
    # 2. Estimate Dicriminatin/Difficulty Jointly
    # 3. Integrate of theta
    # 4. minimize and repeat
    #############
    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()
        previous_betas = betas.copy()

        # Quadrature evaluation for values that do not change
        # This is done during the outer loop to address rounding errors
        # and for speed
        partial_int *= 0.0
        partial_int += distribution[None, :]
        for item_ndx in range(n_items):
            partial_int *= _credit_partial_integral(theta, betas[item_ndx],
                                                    discrimination[item_ndx],
                                                    responses[item_ndx])

        # Loop over each item and solve for the alpha / beta parameters
        for item_ndx in range(n_items):
            # pylint: disable=cell-var-from-loop
            item_length = item_counts[item_ndx]
            new_betas = np.zeros((item_length))

            # Remove the previous output
            old_values = _credit_partial_integral(theta, previous_betas[item_ndx],
                                                  previous_discrimination[item_ndx],
                                                  responses[item_ndx])
            partial_int /= old_values

            def _local_min_func(estimate):
                new_betas[1:] = estimate[1:]
                new_values = _credit_partial_integral(theta, new_betas,
                                                      estimate[0],
                                                      responses[item_ndx])

                new_values *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]

                return -np.log(otpt).sum()

            # Initial Guess of Item Parameters
            initial_guess = np.concatenate(([discrimination[item_ndx]],
                                            betas[item_ndx, 1:item_length]))

            otpt = fmin_slsqp(_local_min_func, initial_guess,
                              disp=False,
                              bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1))

            discrimination[item_ndx] = otpt[0]
            betas[item_ndx, 1:item_length] = otpt[1:]

            new_values = _credit_partial_integral(theta, betas[item_ndx],
                                                  discrimination[item_ndx],
                                                  responses[item_ndx])

            partial_int *= new_values

        if np.abs(previous_discrimination - discrimination).max() < 1e-3:
            break

    # TODO:  look where missing values are and place NAN there instead
    # of appending them to the end
    return discrimination, betas[:, 1:]
Beispiel #19
0
def twopl_full(dataset, options=None):
    """ Estimates parameters in a 2PL IRT model.

    Please use twopl_mml instead.

    Args:
        dataset: [items x participants] matrix of True/False Values
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimates of item discrimination
        difficulty: (1d array) estimates of item difficulties

    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
"""
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    n_items = dataset.shape[0]
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = convert_responses_to_kernel_sign(unique_sets)

    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    discrimination = np.ones((n_items,))
    difficulty = np.zeros((n_items,))

    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()

        # Quadrature evaluation for values that do not change
        partial_int = _compute_partial_integral(theta, difficulty,
                                                discrimination, the_sign)
        partial_int *= distribution

        for item_ndx in range(n_items):
            # pylint: disable=cell-var-from-loop
            local_int = _compute_partial_integral(theta, difficulty[item_ndx, None],
                                                  discrimination[item_ndx, None],
                                                  the_sign[item_ndx, None])

            partial_int /= local_int

            def min_func_local(estimate):
                discrimination[item_ndx] = estimate[0]
                difficulty[item_ndx] = estimate[1]

                estimate_int = _compute_partial_integral(theta,
                                                         difficulty[item_ndx, None],
                                                         discrimination[item_ndx, None],
                                                         the_sign[item_ndx, None])

                estimate_int *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0]

                return -np.log(otpt).dot(counts)

            # Two parameter solver that doesn't need derivatives
            initial_guess = np.concatenate((discrimination[item_ndx, None],
                                            difficulty[item_ndx, None]))
            fmin_slsqp(min_func_local, initial_guess, disp=False,
                       bounds=[(0.25, 4), (-4, 4)])

            # Update the partial integral based on the new found values
            estimate_int = _compute_partial_integral(theta, difficulty[item_ndx, None],
                                                     discrimination[item_ndx, None],
                                                     the_sign[item_ndx, None])
            # update partial integral
            partial_int *= estimate_int

        if(np.abs(discrimination - previous_discrimination).max() < 1e-3):
            break

    return discrimination, difficulty
Beispiel #20
0
def threepl_mml(dataset, options=None):
    """ Estimates parameters in a 3PL IRT model.

    Args:
        dataset: [items x participants] matrix of True/False Values
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimate of item discriminations
        difficulty: (1d array) estimates of item diffiulties
        guessing: (1d array) estimates of item guessing
    
    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    n_items = dataset.shape[0]
    n_no, n_yes = get_true_false_counts(dataset)
    scalar = n_yes / (n_yes + n_no)
    
    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    the_sign = convert_responses_to_kernel_sign(unique_sets)

    theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)
    distribution_x_weights = distribution * weights

    # Perform the minimization
    discrimination = np.ones((n_items,))
    difficulty = np.zeros((n_items,))
    guessing = np.zeros((n_items,))
    
    local_scalar = np.zeros((1, 1))

    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()

        # Quadrature evaluation for values that do not change
        # This is done during the outer loop to address rounding errors
        partial_int = _compute_partial_integral_3pl(theta, difficulty,
                                                discrimination, guessing, the_sign)
        partial_int *= distribution

        for ndx in range(n_items):
            # pylint: disable=cell-var-from-loop

            # remove contribution from current item
            local_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None],
                                                  discrimination[ndx, None], 
                                                  guessing[ndx, None],
                                                  the_sign[ndx, None])

            partial_int /= local_int

            def min_func_local(estimate):
                discrimination[ndx] = estimate[0]
                guessing[ndx] = estimate[1]
                
                local_scalar[0, 0] = (scalar[ndx] - guessing[ndx]) / (1. - guessing[ndx])
                _mml_abstract(difficulty[ndx, None], local_scalar,
                              discrimination[ndx, None], theta, distribution_x_weights)
                estimate_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None],
                                                         discrimination[ndx, None],
                                                         guessing[ndx, None],
                                                         the_sign[ndx, None])

                estimate_int *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: estimate_int, quad_start, quad_stop, n=quad_n)[0]
                
                return -np.log(otpt).dot(counts)

            # Solve for the discrimination parameters
            initial_guess = [discrimination[ndx], guessing[ndx]]
            fmin_slsqp(min_func_local, initial_guess, 
                       bounds=([0.25, 4], [0, .33]), iprint=False)

            # Update the partial integral based on the new found values
            estimate_int = _compute_partial_integral_3pl(theta, difficulty[ndx, None],
                                                     discrimination[ndx, None],
                                                     guessing[ndx, None], 
                                                     the_sign[ndx, None])
            # update partial integral
            partial_int *= estimate_int

        if np.abs(discrimination - previous_discrimination).max() < 1e-3:
            break

    return {'Discrimination': discrimination, 
            'Difficulty': difficulty, 
            'Guessing': guessing}
Beispiel #21
0
def grm_mml(dataset, options=None):
    """Estimate parameters for graded response model.

    Estimate the discrimination and difficulty parameters for
    a graded response model using marginal maximum likelihood.

    Args:
        dataset: [n_items, n_participants] 2d array of measured responses
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimate of item discriminations
        difficulty: (2d array) estimates of item diffiulties by item thresholds

    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    responses, item_counts = condition_polytomous_response(dataset, trim_ends=False)
    n_items = responses.shape[0]

    # Interpolation Locations
    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    # Compute the values needed for integral equations
    integral_counts = list()
    for ndx in range(n_items):
        temp_output = _solve_for_constants(responses[ndx])
        integral_counts.append(temp_output)

    # Initialize difficulty parameters for estimation
    betas = np.full((item_counts.sum(),), -10000.0)
    discrimination = np.ones_like(betas)
    cumulative_item_counts = item_counts.cumsum()
    start_indices = np.roll(cumulative_item_counts, 1)
    start_indices[0] = 0

    for ndx in range(n_items):
        end_ndx = cumulative_item_counts[ndx]
        start_ndx = start_indices[ndx] + 1
        betas[start_ndx:end_ndx] = np.linspace(-1, 1,
                                               item_counts[ndx] - 1)
    betas_roll = np.roll(betas, -1)
    betas_roll[cumulative_item_counts-1] = 10000

    #############
    # 1. Start the iteration loop
    # 2. estimate discrimination
    # 3. solve for difficulties
    # 4. minimize and repeat
    #############
    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()
        previous_betas = betas.copy()
        previous_betas_roll = betas_roll.copy()

        # Quadrature evaluation for values that do not change
        # This is done during the outer loop to address rounding errors
        partial_int = _graded_partial_integral(theta, betas, betas_roll,
                                               discrimination, responses)
        partial_int *= distribution

        for item_ndx in range(n_items):
            # pylint: disable=cell-var-from-loop

            # Indices into linearized difficulty parameters
            start_ndx = start_indices[item_ndx]
            end_ndx = cumulative_item_counts[item_ndx]

            old_values = _graded_partial_integral(theta, previous_betas,
                                                  previous_betas_roll,
                                                  previous_discrimination,
                                                  responses[item_ndx][None, :])
            partial_int /= old_values

            def _local_min_func(estimate):
                # Solve integrals for diffiulty estimates
                new_betas = _solve_integral_equations(estimate,
                                                      integral_counts[item_ndx],
                                                      distribution,
                                                      theta)
                betas[start_ndx+1:end_ndx] = new_betas
                betas_roll[start_ndx:end_ndx-1] = new_betas
                discrimination[start_ndx:end_ndx] = estimate

                new_values = _graded_partial_integral(theta, betas, betas_roll,
                                                      discrimination,
                                                      responses[item_ndx][None, :])

                new_values *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]

                return -np.log(otpt).sum()

            # Univariate minimization for discrimination parameter
            fminbound(_local_min_func, 0.2, 5.0)

            new_values = _graded_partial_integral(theta, betas, betas_roll,
                                                  discrimination,
                                                  responses[item_ndx][None, :])

            partial_int *= new_values

        if np.abs(previous_discrimination - discrimination).max() < 1e-3:
            break

    # Trim difficulties to conform to standard output
    # TODO:  look where missing values are and place NAN there instead
    # of appending them to the end
    output_betas = np.full((n_items, item_counts.max()-1), np.nan)
    for ndx, (start_ndx, end_ndx) in enumerate(zip(start_indices, cumulative_item_counts)):
        output_betas[ndx, :end_ndx-start_ndx-1] = betas[start_ndx+1:end_ndx]

    return discrimination[start_indices], output_betas
Beispiel #22
0
def onepl_mml(dataset, alpha=None, options=None):
    """ Estimates parameters in an 1PL IRT Model.

    Args:
        dataset: [items x participants] matrix of True/False Values
        alpha: [int] discrimination constraint
        options: dictionary with updates to default options

    Returns:
        discrimination: (float) estimate of test discrimination
        difficulty: (1d array) estimates of item diffiulties

    Options:
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    # Difficulty Estimation parameters
    n_items = dataset.shape[0]
    n_no, n_yes = get_true_false_counts(dataset)
    scalar = n_yes / (n_yes + n_no)

    unique_sets, counts = np.unique(dataset, axis=1, return_counts=True)
    invalid_response_mask = unique_sets == INVALID_RESPONSE
    unique_sets[invalid_response_mask] = 0  # For Indexing, fixed later

    discrimination = np.ones((n_items, ))
    difficulty = np.zeros((n_items, ))

    # Quadrature Locations
    theta, weights = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)
    distribution_x_weights = distribution * weights

    # Inline definition of cost function to minimize
    def min_func(estimate):
        discrimination[:] = estimate
        _mml_abstract(difficulty, scalar, discrimination, theta,
                      distribution_x_weights)

        partial_int = np.ones((unique_sets.shape[1], theta.size))
        for ndx in range(n_items):
            partial_int *= _compute_partial_integral(
                theta, difficulty[ndx], discrimination[ndx], unique_sets[ndx],
                invalid_response_mask[ndx])
        partial_int *= distribution_x_weights

        # compute_integral
        otpt = np.sum(partial_int, axis=1)
        return -np.log(otpt).dot(counts)

    # Perform the minimization
    if alpha is None:  # OnePL Method
        alpha = fminbound(min_func, 0.25, 10)
    else:  # Rasch Method
        min_func(alpha)

    return {"Discrimination": alpha, "Difficulty": difficulty}