Ejemplo n.º 1
0
def CalcINF(dict_theta, irt_dict):
    """
    Função que calcula a probabilidade de acerto de todos os classificadores
    para todas as instâncias de um dataset.
    
    Entrada:
        dict_tmp: Dicionário com os valores de theta dos classificadores.
        irt_dict: Dicionário com todos os datasets e seus respectivos
        parâmetros de item em array numpy.
        
    Saída: Retorna um dicionário com o valor da probabilidade de acerto.
    """

    icc_dict = {}
    for dataset in list(dict_theta.keys()):
        p = {}
        for parameter in list(dict_theta[dataset].keys()):
            tmp = {}
            for clf in list(dict_theta[dataset][parameter].keys()):
                t = dict_theta[dataset][parameter][clf]
                tmp[clf] = list(inf_hpc(t, irt_dict[dataset]))
                p[parameter] = tmp

        icc_dict[dataset] = p

    return icc_dict
Ejemplo n.º 2
0
    def select(self,
               index: int = None,
               items: numpy.ndarray = None,
               administered_items: list = None,
               est_theta: float = None,
               **kwargs) -> int:
        """Returns the index of the next item to be administered.

        :param index: the index of the current examinee in the simulator.
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param est_theta: a float containing the current estimated proficiency
        :returns: index of the next item to be applied or `None` if there are no more items in the item bank.
        """
        if (index is None
                or self.simulator is None) and (items is None
                                                or administered_items is None
                                                or est_theta is None):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            est_theta = self.simulator.latest_estimations[index]

        valid_indexes = [
            x for x in range(items.shape[0]) if x not in administered_items
        ]
        inf_values = irt.inf_hpc(est_theta, items[valid_indexes])
        valid_indexes = [
            item_index
            for (inf_value,
                 item_index) in sorted(zip(inf_values, valid_indexes),
                                       key=lambda pair: pair[0],
                                       reverse=True)
        ]

        if len(valid_indexes) == 0:
            warn('There are no more items to be applied.')
            return None

        return valid_indexes[0]
Ejemplo n.º 3
0
    def select(self,
               index: int = None,
               items: numpy.ndarray = None,
               administered_items: list = None,
               est_theta: float = None,
               **kwargs) -> int:
        """Returns the index of the next item to be administered.

        :param index: the index of the current examinee in the simulator.
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param est_theta: a float containing the current estimated proficiency
        :returns: index of the next item to be applied or `None` if there are no more items in the item bank.
        """
        if (index is None
                or self.simulator is None) and (items is None
                                                or administered_items is None
                                                or est_theta is None):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            est_theta = self.simulator.latest_estimations[index]

        # sort item indexes by their information value and remove indexes of administered items
        organized_items = [
            x for x in reversed(irt.inf_hpc(est_theta, items).argsort())
            if x not in administered_items
        ]

        bin_size = self._test_size - len(administered_items)

        if len(organized_items) == 0:
            warn('There are no more items to apply.')
            return None

        return numpy.random.choice(organized_items[:-bin_size])
Ejemplo n.º 4
0
 def sort_items(items: numpy.ndarray) -> numpy.ndarray:
     maxinfo = irt.max_info_hpc(items)
     return numpy.lexsort((irt.inf_hpc(maxinfo, items), maxinfo))
Ejemplo n.º 5
0
 def sort_items(items: numpy.ndarray) -> numpy.ndarray:
     maxinfo = irt.max_info_hpc(items)
     return irt.inf_hpc(maxinfo, items).argsort()
Ejemplo n.º 6
0
    def select(self,
               index: int = None,
               items: numpy.ndarray = None,
               administered_items: list = None,
               est_theta: float = None,
               **kwargs) -> int:
        """Returns the index of the next item to be administered.

        :param index: the index of the current examinee in the simulator.
        :param items: a matrix containing item parameters in the format that `catsim` understands
                      (see: :py:func:`catsim.cat.generate_item_bank`)
        :param administered_items: a list containing the indexes of items that were already administered
        :param est_theta: a float containing the current estimated proficiency
        :returns: index of the next item to be applied.
        """
        if (index is None
                or self.simulator is None) and (items is None
                                                or administered_items is None
                                                or est_theta is None):
            raise ValueError(
                'Either pass an index for the simulator or all of the other optional parameters to use this component independently.'
            )

        if items is None and administered_items is None and est_theta is None:
            items = self.simulator.items
            administered_items = self.simulator.administered_items[index]
            est_theta = self.simulator.latest_estimations[index]

        selected_cluster = None
        existent_clusters = set(self._clusters)
        # this part of the code selects the cluster from which the item at
        # the current point of the test will be chosen
        if self._method == 'item_info':
            # get the item indexes sorted by their information value
            infos = irt.inf_hpc(est_theta, items).argsort()[::-1]

            evaluated_clusters = set()

            # iterate over every item in order of information value
            for i in range(items.shape[0]):
                # get the current non-examined item
                max_info_item = infos[i]

                # if the cluster of the current item has already been fully examined, go to the next item
                if self._clusters[max_info_item] in evaluated_clusters:
                    continue

                # get the indexes of all items in the same cluster as the current item
                items_in_cluster = numpy.nonzero([
                    x == self._clusters[max_info_item] for x in self._clusters
                ])[0]

                # if all the items in the current cluster have already been administered (it happens, theoretically),
                # add this cluster to the list of fully evaluated clusters
                if set(items_in_cluster) <= set(administered_items):
                    evaluated_clusters.add(self._clusters[max_info_item])

                    # if all clusters have been evaluated, the loop ends and the method returns None somewhere below
                    if evaluated_clusters == existent_clusters:
                        break

                    # else, if there are still items and clusters to be explored, keep going
                    continue

                # if the algorithm gets here, it means this cluster can be used
                selected_cluster = self._clusters[max_info_item]
                break

        elif self._method in ['cluster_info', 'weighted_info']:
            # calculates the cluster information, depending on the method
            # selected
            if self._method == 'cluster_info':
                cluster_infos = ClusterSelector.sum_cluster_infos(
                    est_theta, items, self._clusters)
            else:
                cluster_infos = ClusterSelector.weighted_cluster_infos(
                    est_theta, items, self._clusters)

            # sorts clusters descending by their information values
            # this type of sorting was seem on
            # http://stackoverflow.com/a/6618543
            sorted_clusters = numpy.array([
                cluster for (
                    inf_value,
                    cluster) in sorted(zip(cluster_infos, set(self._clusters)),
                                       key=lambda pair: pair[0],
                                       reverse=True)
            ],
                                          dtype=float)

            # walks through the sorted clusters in order
            for i in range(len(sorted_clusters)):
                valid_indexes = numpy.nonzero(
                    [r == sorted_clusters[i] for r in items[:, 4]])[0]

                # checks if at least one item from this cluster has not
                # been administered to this examinee yet
                if set(valid_indexes).intersection(administered_items) != set(
                        valid_indexes):
                    selected_cluster = sorted_clusters[i]
                    break
                    # the for loop ends with the cluster that has a) the maximum
                    # information possible and b) at least one item that has not
                    # yet been administered

        # if the test size gets larger than the item bank size, end the test
        if selected_cluster is None:
            warn("There are no more items to be applied.")
            return None

        # in this part, an item is chosen from the cluster that was
        # selected above

        # gets the indexes and information values from the items in the
        # selected cluster that have not been administered
        valid_indexes = [
            index for index in numpy.nonzero(
                [cluster == selected_cluster for cluster in self._clusters])[0]
            if index not in administered_items
        ]

        # gets the indexes and information values from the items in the
        # selected cluster with r < rmax that have not been
        # administered
        valid_indexes_low_r = [
            index for index in valid_indexes
            if items[index,
                     4] < self._r_max and index not in administered_items
        ]

        if len(valid_indexes_low_r) > 0:
            # return the item with maximum information from the ones available
            inf_values = irt.inf_hpc(est_theta, items[valid_indexes_low_r])
            selected_item = valid_indexes_low_r[numpy.nonzero(
                inf_values == max(inf_values))[0][0]]

        # if all items in the selected cluster have exceed their r values,
        # select the one with smallest r, regardless of information
        else:
            if self._r_control == 'passive':
                inf_values = irt.inf_hpc(est_theta, items[valid_indexes])
                selected_item = valid_indexes[numpy.nonzero(
                    inf_values == max(inf_values))[0][0]]
            else:
                selected_item = valid_indexes[items[:,
                                                    4].index(min(items[:, 4]))]

        return selected_item
Ejemplo n.º 7
0
def sort_items(item_params):
    """
    Returns ndarray of indices of items sorted by maximum item information.
    """
    return (-inf_hpc(max_info_hpc(item_params), item_params)).argsort()