Esempio n. 1
0
def test_cosine_positive():
    from freediscovery.metrics import _scale_cosine_similarity

    S_res = _scale_cosine_similarity(0.5, metric='cosine-positive')
    assert S_res == 0.5
    S_res = _scale_cosine_similarity(-0.5, metric='cosine-positive')
    assert S_res == 0
Esempio n. 2
0
def test_cosine_jaccard_norm(metric):
    from freediscovery.metrics import _scale_cosine_similarity
    S_cos = 0.70710678

    S_res = _scale_cosine_similarity(S_cos, metric=metric)
    if metric == 'cosine':
        assert_allclose(S_res, S_cos)
    elif metric == 'jaccard':
        assert_allclose(S_res, 0.5, rtol=0.1)
    S_res2 = _scale_cosine_similarity(S_res, metric=metric, inverse=True)
    assert_allclose(S_res2, S_cos, rtol=0.1)
Esempio n. 3
0
    def predict(self, chunk_size=5000, ml_output='probability', metric='cosine'):
        """
        Predict the relevance using a previously trained model

        Parameters
        ----------
        chunck_size : int
           chunk size
        ml_output : str
           type of the output in ['decision_function', 'probability'],
           only affects ML methods. default: 'probability'
        metric : str   
            The similarity returned by nearest neighbor classifier in
            ['cosine', 'jaccard', 'cosine_norm', 'jaccard_norm'].
            default: 'cosine'

        Returns
        -------
        res : ndarray [n_samples, n_classes]
           the score for each class
        nn_ind : {ndarray [n_samples, n_classes], None}
           the index of the nearest neighbor for each class
           (when the NearestNeighborRanker is used)
        """
        if ml_output not in ['probability', 'decision_function']:
            raise ValueError(("Wrong input value ml_output={}, must be one of "
                              "['probability', 'decision_function']")
                             .format(ml_output))

        if ml_output == 'probability':
            ml_output = 'predict_proba'

        if self.cmod is not None:
            cmod = self.cmod
        else:
            raise WrongParameter('The model must be trained first, or sid must be provided to load\
                    a previously trained model!')

        ds = self.pipeline.data

        nn_ind = None
        if isinstance(cmod, NearestNeighborRanker):
            res, nn_ind_orig = cmod.kneighbors(ds)
            res = _scale_cosine_similarity(res, metric=metric)
            nn_ind = self._pars['index'][nn_ind_orig]
        elif hasattr(cmod, ml_output):
            res = getattr(cmod, ml_output)(ds)
        elif hasattr(cmod, 'decision_function'):
            # and we need predict_proba
            res = cmod.decision_function(ds)
            res = expit(res)
        elif hasattr(cmod, 'predict_proba'):
            # and we need decision_function
            res = cmod.predict_proba(ds)
            res = logit(res)
        else:
            raise ValueError('Model {} has neither decision_function nor predict_proba methods!'.format(cmod))

        # handle the case of binary categorization
        # as two classes categorization
        if res.ndim == 1:
            if ml_output == 'decision_function':
                res_p = res
                res_n = - res
            else:
                res_p = res
                res_n = 1 - res
            res = np.hstack((res_n[:, None], res_p[:, None]))
        return res, nn_ind