def CalcINF(dict_theta, irt_dict): """ Função que calcula a probabilidade de acerto de todos os classificadores para todas as instâncias de um dataset. Entrada: dict_tmp: Dicionário com os valores de theta dos classificadores. irt_dict: Dicionário com todos os datasets e seus respectivos parâmetros de item em array numpy. Saída: Retorna um dicionário com o valor da probabilidade de acerto. """ icc_dict = {} for dataset in list(dict_theta.keys()): p = {} for parameter in list(dict_theta[dataset].keys()): tmp = {} for clf in list(dict_theta[dataset][parameter].keys()): t = dict_theta[dataset][parameter][clf] tmp[clf] = list(inf_hpc(t, irt_dict[dataset])) p[parameter] = tmp icc_dict[dataset] = p return icc_dict
def select(self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, est_theta: float = None, **kwargs) -> int: """Returns the index of the next item to be administered. :param index: the index of the current examinee in the simulator. :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param est_theta: a float containing the current estimated proficiency :returns: index of the next item to be applied or `None` if there are no more items in the item bank. """ if (index is None or self.simulator is None) and (items is None or administered_items is None or est_theta is None): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] est_theta = self.simulator.latest_estimations[index] valid_indexes = [ x for x in range(items.shape[0]) if x not in administered_items ] inf_values = irt.inf_hpc(est_theta, items[valid_indexes]) valid_indexes = [ item_index for (inf_value, item_index) in sorted(zip(inf_values, valid_indexes), key=lambda pair: pair[0], reverse=True) ] if len(valid_indexes) == 0: warn('There are no more items to be applied.') return None return valid_indexes[0]
def select(self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, est_theta: float = None, **kwargs) -> int: """Returns the index of the next item to be administered. :param index: the index of the current examinee in the simulator. :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param est_theta: a float containing the current estimated proficiency :returns: index of the next item to be applied or `None` if there are no more items in the item bank. """ if (index is None or self.simulator is None) and (items is None or administered_items is None or est_theta is None): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] est_theta = self.simulator.latest_estimations[index] # sort item indexes by their information value and remove indexes of administered items organized_items = [ x for x in reversed(irt.inf_hpc(est_theta, items).argsort()) if x not in administered_items ] bin_size = self._test_size - len(administered_items) if len(organized_items) == 0: warn('There are no more items to apply.') return None return numpy.random.choice(organized_items[:-bin_size])
def sort_items(items: numpy.ndarray) -> numpy.ndarray: maxinfo = irt.max_info_hpc(items) return numpy.lexsort((irt.inf_hpc(maxinfo, items), maxinfo))
def sort_items(items: numpy.ndarray) -> numpy.ndarray: maxinfo = irt.max_info_hpc(items) return irt.inf_hpc(maxinfo, items).argsort()
def select(self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, est_theta: float = None, **kwargs) -> int: """Returns the index of the next item to be administered. :param index: the index of the current examinee in the simulator. :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param est_theta: a float containing the current estimated proficiency :returns: index of the next item to be applied. """ if (index is None or self.simulator is None) and (items is None or administered_items is None or est_theta is None): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] est_theta = self.simulator.latest_estimations[index] selected_cluster = None existent_clusters = set(self._clusters) # this part of the code selects the cluster from which the item at # the current point of the test will be chosen if self._method == 'item_info': # get the item indexes sorted by their information value infos = irt.inf_hpc(est_theta, items).argsort()[::-1] evaluated_clusters = set() # iterate over every item in order of information value for i in range(items.shape[0]): # get the current non-examined item max_info_item = infos[i] # if the cluster of the current item has already been fully examined, go to the next item if self._clusters[max_info_item] in evaluated_clusters: continue # get the indexes of all items in the same cluster as the current item items_in_cluster = numpy.nonzero([ x == self._clusters[max_info_item] for x in self._clusters ])[0] # if all the items in the current cluster have already been administered (it happens, theoretically), # add this cluster to the list of fully evaluated clusters if set(items_in_cluster) <= set(administered_items): evaluated_clusters.add(self._clusters[max_info_item]) # if all clusters have been evaluated, the loop ends and the method returns None somewhere below if evaluated_clusters == existent_clusters: break # else, if there are still items and clusters to be explored, keep going continue # if the algorithm gets here, it means this cluster can be used selected_cluster = self._clusters[max_info_item] break elif self._method in ['cluster_info', 'weighted_info']: # calculates the cluster information, depending on the method # selected if self._method == 'cluster_info': cluster_infos = ClusterSelector.sum_cluster_infos( est_theta, items, self._clusters) else: cluster_infos = ClusterSelector.weighted_cluster_infos( est_theta, items, self._clusters) # sorts clusters descending by their information values # this type of sorting was seem on # http://stackoverflow.com/a/6618543 sorted_clusters = numpy.array([ cluster for ( inf_value, cluster) in sorted(zip(cluster_infos, set(self._clusters)), key=lambda pair: pair[0], reverse=True) ], dtype=float) # walks through the sorted clusters in order for i in range(len(sorted_clusters)): valid_indexes = numpy.nonzero( [r == sorted_clusters[i] for r in items[:, 4]])[0] # checks if at least one item from this cluster has not # been administered to this examinee yet if set(valid_indexes).intersection(administered_items) != set( valid_indexes): selected_cluster = sorted_clusters[i] break # the for loop ends with the cluster that has a) the maximum # information possible and b) at least one item that has not # yet been administered # if the test size gets larger than the item bank size, end the test if selected_cluster is None: warn("There are no more items to be applied.") return None # in this part, an item is chosen from the cluster that was # selected above # gets the indexes and information values from the items in the # selected cluster that have not been administered valid_indexes = [ index for index in numpy.nonzero( [cluster == selected_cluster for cluster in self._clusters])[0] if index not in administered_items ] # gets the indexes and information values from the items in the # selected cluster with r < rmax that have not been # administered valid_indexes_low_r = [ index for index in valid_indexes if items[index, 4] < self._r_max and index not in administered_items ] if len(valid_indexes_low_r) > 0: # return the item with maximum information from the ones available inf_values = irt.inf_hpc(est_theta, items[valid_indexes_low_r]) selected_item = valid_indexes_low_r[numpy.nonzero( inf_values == max(inf_values))[0][0]] # if all items in the selected cluster have exceed their r values, # select the one with smallest r, regardless of information else: if self._r_control == 'passive': inf_values = irt.inf_hpc(est_theta, items[valid_indexes]) selected_item = valid_indexes[numpy.nonzero( inf_values == max(inf_values))[0][0]] else: selected_item = valid_indexes[items[:, 4].index(min(items[:, 4]))] return selected_item
def sort_items(item_params): """ Returns ndarray of indices of items sorted by maximum item information. """ return (-inf_hpc(max_info_hpc(item_params), item_params)).argsort()