Beispiel #1
0
    def test_check_parameter_range(self):
        # verify parameter type correction
        with assert_raises(TypeError):
            check_parameter('f', 0, 100)

        with assert_raises(TypeError):
            check_parameter(argmaxn(value_list=[1, 2, 3], n=1), 0, 100)

        # if low and high are both unset
        with assert_raises(ValueError):
            check_parameter(50)

        # if low <= high
        with assert_raises(ValueError):
            check_parameter(50, 100, 99)

        with assert_raises(ValueError):
            check_parameter(50, 100, 100)

        # check one side
        with assert_raises(ValueError):
            check_parameter(50, low=100)
        with assert_raises(ValueError):
            check_parameter(50, high=0)

        assert_equal(True, check_parameter(50, low=10))
        assert_equal(True, check_parameter(50, high=100))

        # if check fails
        with assert_raises(ValueError):
            check_parameter(-1, 0, 100)

        with assert_raises(ValueError):
            check_parameter(101, 0, 100)

        with assert_raises(ValueError):
            check_parameter(0.5, 0.2, 0.3)

        # if check passes
        assert_equal(True, check_parameter(50, 0, 100))

        assert_equal(True, check_parameter(0.5, 0.1, 0.8))

        # if includes left or right bounds
        with assert_raises(ValueError):
            check_parameter(100, 0, 100, include_left=False,
                            include_right=False)
        assert_equal(True, check_parameter(0, 0, 100, include_left=True,
                                           include_right=False))
        assert_equal(True, check_parameter(0, 0, 100, include_left=True,
                                           include_right=True))
        assert_equal(True, check_parameter(100, 0, 100, include_left=False,
                                           include_right=True))
        assert_equal(True, check_parameter(100, 0, 100, include_left=True,
                                           include_right=True))
Beispiel #2
0
    def test_argmaxn(self):
        ind = argmaxn(self.value_lists, 3)
        assert_equal(len(ind), 3)

        ind = argmaxn(self.value_lists, 3)
        assert_equal(np.sum(ind), np.sum([4, 6, 9]))

        ind = argmaxn(self.value_lists, 3, order='asc')
        assert_equal(np.sum(ind), np.sum([3, 7, 8]))

        with assert_raises(ValueError):
            argmaxn(self.value_lists, -1)
        with assert_raises(ValueError):
            argmaxn(self.value_lists, 20)
Beispiel #3
0
    def _get_competent_detectors(self, scores):
        """ Identifies competent base detectors based on correlation scores

        Parameters
        ----------
        scores : numpy array, shape (n_clf,)
            Correlation scores for each classifier (for a specific
            test instance)

        Returns
        -------
        candidates : List
            Indices for competent detectors (for given test instance)
        """

        # create histogram of correlation scores
        scores = scores.reshape(-1, 1)

        # TODO: handle when Pearson score is 0
        # if scores contain nan, change it to 0
        if np.isnan(scores).any():
            scores = np.nan_to_num(scores)

        if self.n_bins > self.n_clf:
            warnings.warn(
                "The number of histogram bins is greater than the number of "
                "classifiers, reducing n_bins to n_clf.")
            self.n_bins = self.n_clf
        hist, bin_edges = np.histogram(scores, bins=self.n_bins)

        # find n_selected largest bins
        max_bins = argmaxn(hist, n=self.n_selected)
        candidates = []

        # iterate through bins
        for max_bin in max_bins:
            # determine which detectors are inside this bin
            selected = np.where((scores >= bin_edges[max_bin])
                                & (scores <= bin_edges[max_bin + 1]))

            # add to list of candidates
            candidates = candidates + selected[0].tolist()

        return candidates
Beispiel #4
0
    def _get_competent_detectors(self, scores):
        """ algorithm for selecting the most competent detectors
        :param scores:
        :param n_bins:
        :param n_selected:
        :return:
        """
        scores = scores.reshape(-1, 1)
        hist, bin_edges = np.histogram(scores, bins=self.n_bins)
        #    dense_bin = np.argmax(hist)
        max_bins = argmaxn(hist, n=self.n_selected)
        candidates = []
        #    print(hist)
        for max_bin in max_bins:
            #        print(bin_edges[max_bin], bin_edges[max_bin+1])
            selected = np.where((scores >= bin_edges[max_bin])
                                & (scores <= bin_edges[max_bin + 1]))
            #        print(selected)
            candidates = candidates + selected[0].tolist()

        #    print(np.mean(scores[candidates,:]), np.mean(scores))
        # return np.mean(scores[candidates, :])
        return candidates
Beispiel #5
0
    def _get_competent_detectors(self, scores):
        """ Identifies competent base detectors based on correlation scores

        Parameters
        ----------
        scores : numpy array, shape (n_clf,)
            Correlation scores for each classifier (for a specific
            test instance)

        Returns
        -------
        candidates : List
            Indices for competent detectors (for given test instance)
        """

        # create histogram of correlation scores
        scores = scores.reshape(-1, 1)
        if self.n_bins > self.n_clf:
            warnings.warn("Number of histogram bins greater than number of "
                          "classifiers, reducing n_bins to n_clf.")
            self.n_bins = self.n_clf
        hist, bin_edges = np.histogram(scores, bins=self.n_bins)

        # find n_selected largest bins
        max_bins = argmaxn(hist, n=self.n_selected)
        candidates = []

        # iterate through bins
        for max_bin in max_bins:
            # determine which detectors are inside this bin
            selected = np.where((scores >= bin_edges[max_bin])
                                & (scores <= bin_edges[max_bin + 1]))

            # add to list of candidates
            candidates = candidates + selected[0].tolist()

        return candidates
Beispiel #6
0
    U = EMF.user_vecs
    V = EMF.item_vecs
    bias_global = EMF.global_bias
    bias_user = EMF.user_bias
    bias_item = EMF.item_bias

    # print(EMF.regr_multirf.predict(test_meta).shape)
    predicted_scores = EMF.predict_new(test_meta_transformed)
    predicted_scores_max = np.nanargmax(predicted_scores, axis=1)

    for i in range(len(test_index)):
        roc_comp_mat[i, 10] = test_set[i, predicted_scores_max[i]]
        header_mat[i, 10] = config_headers[predicted_scores_max[i]]

        # get the top 3 index
        temp_index = argmaxn(predicted_scores[i, ], n=3)
        roc_comp_mat[i, 18] = test_set[i, temp_index].mean()
        header_mat[i,
                   18] = config_headers[temp_index[0]] + '|' + config_headers[
                       temp_index[1]] + '|' + config_headers[temp_index[2]]

    #################################### ISAC
    clustering = KMeans(n_clusters=5)
    clustering.fit(train_meta_transformed)
    train_clusters = clustering.labels_
    predicted_clusters = clustering.predict(test_meta_transformed)

    for i in range(len(test_index)):
        train_data_index = np.where(train_clusters == predicted_clusters[i])[0]
        train_data_performance = train_set[train_data_index, :]
        train_data_performance_individual = np.nanargmax(
Beispiel #7
0
    def _predict_internal(self, X, predict_proba):
        """Internal function for predict and predict_proba

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        predict_proba : bool
            if True, return the result of predict_proba

        Returns
        -------
        """
        check_is_fitted(self, ['fitted_'])
        X = check_array(X)
        n_samples = X.shape[0]

        # Find neighbors for all test instances
        _, ind_arr = self.tree_.query(X, k=self.local_region_size)

        if predict_proba:
            y_predicted = np.zeros([n_samples, self._classes])
        else:
            y_predicted = np.zeros([
                n_samples,
            ])

        # For each test sample
        for i in range(n_samples):
            test_sample = X[i, :].reshape(1, -1)
            train_inds = ind_arr[i, :]

            # ground truth
            y_train_sample = self.y_train_[train_inds]
            clf_performance = np.zeros([
                self.n_base_estimators_,
            ])

            for j, clf in enumerate(self.base_estimators):
                y_train_clf = self.y_train_predicted_[train_inds, j]
                clf_performance[j] = accuracy_score(y_train_sample,
                                                    y_train_clf)

            # print(clf_performance)

            # get the indices of the best performing clfs
            select_clf_inds = argmaxn(clf_performance, n=self.n_selected_clfs)
            select_clf_weights = clf_performance[select_clf_inds]. \
                reshape(1, len(select_clf_inds))

            # print(select_clf_inds)

            all_scores = np.zeros([1, len(select_clf_inds)])
            all_proba = np.zeros([1, self._classes, len(select_clf_inds)])

            for k, clf_ind in enumerate(select_clf_inds):

                clf = self.base_estimators[clf_ind]
                # make prediction
                if predict_proba:
                    all_proba[:, :, k] = clf.predict_proba(test_sample)
                else:
                    all_scores[:, k] = clf.predict(test_sample)

                # print('score', len(select_clf_inds), all_scores)

            if predict_proba:
                if self.use_weights:
                    y_predicted[i] = np.mean(all_proba * select_clf_weights,
                                             axis=2)
                else:
                    y_predicted[i] = np.mean(all_proba, axis=2)

            else:
                if self.use_weights:
                    y_predicted[i] = majority_vote(all_scores,
                                                   n_classes=self._classes,
                                                   weights=select_clf_weights)
                else:
                    y_predicted[i] = majority_vote(all_scores,
                                                   n_classes=self._classes)
        if predict_proba:
            return score_to_proba(y_predicted)
        else:
            return y_predicted