Exemple #1
0
    def estimate(self,
                 index: int = None,
                 items: numpy.ndarray = None,
                 administered_items: list = None,
                 response_vector: list = None,
                 est_theta: float = None,
                 **kwargs) -> float:
        """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the
         test for the given examinee.

        :param index: index of the current examinee in the simulator
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param response_vector: a boolean list containing the examinee's answers to the administered items
        :param est_theta: a float containing the current estimated proficiency
        :returns: the current :math:`\\hat\\theta`
        """
        if (index is None or self.simulator is None) and (
                items is None and administered_items is None
                or response_vector is None or est_theta is None):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and response_vector is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            response_vector = self.simulator.response_vectors[index]
            est_theta = self.simulator.latest_estimations[index]

        self._calls += 1
        self._evaluations = 0

        if len(set(response_vector)) == 1 and self._dodd:
            return cat.dodd(est_theta, items, response_vector[-1])

        if set(response_vector) == 1:
            return float('inf')
        elif set(response_vector) == 0:
            return float('-inf')

        if len(administered_items) > 0:
            lower_bound = min(items[administered_items][:, 1])
            upper_bound = max(items[administered_items][:, 1])
        else:
            lower_bound = min(items[:, 1])
            upper_bound = max(items[:, 1])

        best_theta = float('-inf')
        max_ll = float('-inf')

        # the estimator starts with a rough search, which gets finer with each pass
        for granularity in range(10):

            # generate a list of candidate theta values
            candidates = numpy.linspace(lower_bound, upper_bound, 10)
            interval_size = candidates[1] - candidates[0]

            if self._verbose:
                print('Pass: {0}\n\tBounds: {1} {2}\n\tInterval size: {3}'.
                      format(granularity + 1, lower_bound, upper_bound,
                             interval_size))

            # we'll use the concave nature of the log-likelihood function
            # to program a primitive early stopping method in our search
            previous_ll = float('-inf')

            # iterate through each candidate
            for candidate_theta in candidates:
                self._evaluations += 1

                current_ll = irt.log_likelihood(candidate_theta,
                                                response_vector,
                                                items[administered_items])

                # we search the function from left to right, so when the
                # log-likelihood of the current theta is smaller than the one
                # from the previous theta we tested, it means it's all downhill
                # from then on, so we stop our search
                if current_ll < previous_ll:
                    break
                previous_ll = current_ll

                # check if the LL of the current candidate theta is larger than the best one checked as of yet
                if current_ll > max_ll:
                    if self._verbose:
                        print('\t\tTheta: {0}, LL: {1}'.format(
                            candidate_theta, current_ll))

                    if abs(best_theta -
                           candidate_theta) < float('1e-' +
                                                    str(self._precision)):
                        return self._getout(candidate_theta)

                    max_ll = current_ll
                    best_theta = candidate_theta

            # the bounds of the new candidates are adjusted around the current best theta value
            lower_bound = best_theta - interval_size
            upper_bound = best_theta + interval_size

        return self._getout(best_theta)
Exemple #2
0
    def estimate(
        self,
        index: int = None,
        items: numpy.ndarray = None,
        administered_items: list = None,
        response_vector: list = None,
        est_theta: float = None,
        **kwargs
    ) -> float:
        """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the
         test for the given examinee.

        :param index: index of the current examinee in the simulator
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param response_vector: a boolean list containing the examinee's answers to the administered items
        :param est_theta: a float containing the current estimated proficiency
        :returns: the current :math:`\\hat\\theta`
        """
        if (index is None or self.simulator is None) and (
            items is None and administered_items is None or response_vector is None or
            est_theta is None
        ):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and response_vector is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            response_vector = self.simulator.response_vectors[index]
            est_theta = self.simulator.latest_estimations[index]

        self._calls += 1
        self._evaluations = 0

        if len(set(response_vector)) == 1 and self._dodd:
            return cat.dodd(est_theta, items, response_vector[-1])

        if set(response_vector) == 1:
            return float('inf')
        elif set(response_vector) == 0:
            return float('-inf')

        if len(administered_items) > 0:
            lower_bound = min(items[administered_items][:, 1])
            upper_bound = max(items[administered_items][:, 1])
        else:
            lower_bound = min(items[:, 1])
            upper_bound = max(items[:, 1])

        best_theta = float('-inf')
        max_ll = float('-inf')
        
        # the estimator starts with a rough search, which gets finer with each pass
        for granularity in range(10):

            # generate a list of candidate theta values
            candidates = numpy.linspace(lower_bound, upper_bound, 10)
            interval_size = candidates[1] - candidates[0]

            if self._verbose:
                print(
                    'Pass: {0}\n\tBounds: {1} {2}\n\tInterval size: {3}'.format(
                        granularity + 1, lower_bound, upper_bound, interval_size
                    )
                )

            # we'll use the concave nature of the log-likelihood function
            # to program a primitive early stopping method in our search
            previous_ll = float('-inf')

            # iterate through each candidate
            for candidate_theta in candidates:
                self._evaluations += 1

                current_ll = irt.log_likelihood(candidate_theta, response_vector, items[administered_items])

                # we search the function from left to right, so when the
                # log-likelihood of the current theta is smaller than the one
                # from the previous theta we tested, it means it's all downhill
                # from then on, so we stop our search
                if current_ll < previous_ll:
                    break
                previous_ll = current_ll

                # check if the LL of the current candidate theta is larger than the best one checked as of yet
                if current_ll > max_ll:
                    if self._verbose:
                        print('\t\tTheta: {0}, LL: {1}'.format(candidate_theta, current_ll))

                    if abs(best_theta - candidate_theta) < float('1e-' + str(self._precision)):
                        return self._getout(candidate_theta)

                    max_ll = current_ll
                    best_theta = candidate_theta

            # the bounds of the new candidates are adjusted around the current best theta value
            lower_bound = best_theta - interval_size
            upper_bound = best_theta + interval_size

        return self._getout(best_theta)
Exemple #3
0
    def estimate(
        self,
        index: int = None,
        items: numpy.ndarray = None,
        administered_items: list = None,
        response_vector: list = None,
        est_theta: float = None,
        **kwargs
    ) -> float:
        """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the
         test for the given examinee.

        :param index: index of the current examinee in the simulator
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param response_vector: a boolean list containing the examinee's answers to the administered items
        :param est_theta: a float containing the current estimated proficiency
        :returns: the current :math:`\\hat\\theta`
        """
        if (index is None or self.simulator is None) and (
            items is None and administered_items is None or response_vector is None or
            est_theta is None
        ):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and response_vector is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            response_vector = self.simulator.response_vectors[index]
            est_theta = self.simulator.latest_estimations[index]

        self._calls += 1

        # need to constrain all estimates between these bounds, rather then, e.g.
        # min / max difficulties
        lower_bound, upper_bound = self._bounds

        if len(set(response_vector)) == 1 and self._dodd:
            # append bounds in mock "items", so that the dodd procedure will
            # at least step toward the bounds we set. Note that this is stretching
            # the use of the term dodd.
            min_item = [0, lower_bound, 0, 0]
            max_item = [0, upper_bound, 0, 0]
            bound_items = numpy.vstack([min_item, max_item])
            return cat.dodd(est_theta, bound_items, response_vector[-1])

        if set(response_vector) == 1:
            return float('inf')
        elif set(response_vector) == 0:
            return float('-inf')

        best_theta = float('-inf')
        max_ll = float('-inf')

        self._evaluations = 0

        for _ in range(10):
            intervals = numpy.linspace(lower_bound, upper_bound, 10)
            if self._verbose:
                print(('Bounds: ' + str(lower_bound) + ' ' + str(upper_bound)))
                print(('Interval size: ' + str(intervals[1] - intervals[0])))

            for ii in intervals:
                self._evaluations += 1
                ll = irt.log_likelihood(ii, response_vector, items[administered_items])
                if ll > max_ll:
                    max_ll = ll

                    if self._verbose:
                        print(
                            (
                                'Iteration: {0}, Theta: {1}, LL: {2}'.format(
                                    self._evaluations, ii, ll
                                )
                            )
                        )

                    if abs(best_theta - ii) < float('1e-' + str(self._precision)):
                        return self._bound_estimate(ii)

                    best_theta = ii

                else:
                    lower_bound = best_theta - (intervals[1] - intervals[0])
                    upper_bound = ii
                    # reset best_theta, in case optimum is to the left of it
                    max_ll = float('-inf')
                    break

        return self._bound_estimate(best_theta)
Exemple #4
0
    def estimate(self,
                 index: int = None,
                 items: numpy.ndarray = None,
                 administered_items: list = None,
                 response_vector: list = None,
                 est_theta: float = None,
                 **kwargs) -> float:
        """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the
         test for the given examinee.

        :param index: index of the current examinee in the simulator
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param response_vector: a boolean list containing the examinee's answers to the administered items
        :param est_theta: a float containing the current estimated proficiency
        :returns: the current :math:`\\hat\\theta`
        """
        if (index is None or self.simulator is None) and (
                items is None and administered_items is None
                or response_vector is None or est_theta is None):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and response_vector is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            response_vector = self.simulator.response_vectors[index]
            est_theta = self.simulator.latest_estimations[index]

        self._calls += 1

        if len(set(response_vector)) == 1 and self._dodd:
            return cat.dodd(est_theta, items, response_vector[-1])

        if set(response_vector) == 1:
            return float('inf')
        elif set(response_vector) == 0:
            return float('-inf')

        if len(administered_items) > 0:
            lower_bound = min(items[administered_items][:, 1])
            upper_bound = max(items[administered_items][:, 1])
        else:
            lower_bound = min(items[:, 1])
            upper_bound = max(items[:, 1])

        best_theta = float('-inf')
        max_ll = float('-inf')

        self._evaluations = 0

        for _ in range(10):
            intervals = numpy.linspace(lower_bound, upper_bound, 10)
            if self._verbose:
                print(('Bounds: ' + str(lower_bound) + ' ' + str(upper_bound)))
                print(('Interval size: ' + str(intervals[1] - intervals[0])))

            for ii in intervals:
                self._evaluations += 1
                ll = irt.log_likelihood(ii, response_vector,
                                        items[administered_items])
                if ll > max_ll:
                    max_ll = ll

                    if self._verbose:
                        print(('Iteration: {0}, Theta: {1}, LL: {2}'.format(
                            self._evaluations, ii, ll)))

                    if abs(best_theta - ii) < float('1e-' +
                                                    str(self._precision)):
                        return ii

                    best_theta = ii

                else:
                    lower_bound = best_theta - (intervals[1] - intervals[0])
                    upper_bound = ii
                    break

        return best_theta
Exemple #5
0
    def estimate(
        self,
        index: int = None,
        items: numpy.ndarray = None,
        administered_items: list = None,
        response_vector: list = None,
        est_theta: float = None,
        **kwargs
    ) -> float:
        """Returns the theta value that corresponds to the maximum a posteriori estimate, given the current state of the
         test for the given examinee. The posterior is obtained from summing the log-likelihood and the log of the normal
         density function.

        :param index: index of the current examinee in the simulator
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param response_vector: a boolean list containing the examinee's answers to the administered items
        :param est_theta: a float containing the current estimated proficiency
        :returns: the current theta estimate (based on the bounded MAP estimate)
        """
        if (index is None or self.simulator is None) and (
            items is None and administered_items is None or response_vector is None or
            est_theta is None
        ):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and response_vector is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            response_vector = self.simulator.response_vectors[index]
            est_theta = self.simulator.latest_estimations[index]

        self._calls += 1

        # need to constrain all estimates between these bounds, rather then, e.g.
        # min / max difficulties
        lower_bound, upper_bound = self._bounds

        best_theta = float('-inf')
        max_ll = float('-inf')

        self._evaluations = 0

        for _ in range(10):
            intervals = numpy.linspace(lower_bound, upper_bound, 10)
            if self._verbose:
                print(('Bounds: ' + str(lower_bound) + ' ' + str(upper_bound)))
                print(('Interval size: ' + str(intervals[1] - intervals[0])))

            for ii in intervals:
                self._evaluations += 1
                ll = irt.log_likelihood(ii, response_vector, items[administered_items]) + norm.logpdf(ii, loc = self._prior_mean, scale = self._prior_sd)
                if ll > max_ll:
                    max_ll = ll

                    if self._verbose:
                        print(
                            (
                                'Iteration: {0}, Theta: {1}, LL: {2}'.format(
                                    self._evaluations, ii, ll
                                )
                            )
                        )

                    if abs(best_theta - ii) < float('1e-' + str(self._precision)):
                        return self._bound_estimate(ii)

                    best_theta = ii

                else:
                    lower_bound = best_theta - (intervals[1] - intervals[0])
                    upper_bound = ii
                    # reset best_theta, in case optimum is to the left of it
                    max_ll = float('-inf')
                    break

        return self._bound_estimate(best_theta)