Python categorization_score Examples

Programming Language: Python

Namespace/Package Name: freediscovery.metrics

Method/Function: categorization_score

Examples at hotexamples.com: 4

Python categorization_score - 4 examples found. These are the top rated real world Python examples of freediscovery.metrics.categorization_score extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_unique_label():
    """Check that testing works with only one label in the training test"""
    np.random.seed(10)
    Nshape = ground_truth.file_path.values.shape
    is_relevant = np.zeros(Nshape).astype(int)

    idx = np.arange(len(is_relevant), dtype='int')

    categorization_score(idx, is_relevant, idx, np.random.rand(*Nshape))

Example #2

Show file

def test_categorization(use_lsi, method, cv):

    if 'CIRCLECI' in os.environ and cv == 'fast'\
            and method in ['LinearSVC', 'xgboost']:
        raise SkipTest  # Circle CI is too slow and timesout

    if method == 'xgboost':
        try:
            import xgboost
        except ImportError:
            raise SkipTest

    if not use_lsi:
        uuid = vect_uuid
    else:
        uuid = lsi.mid

    cat = _CategorizerWrapper(cache_dir=cache_dir,
                              parent_id=uuid, cv_n_folds=2)
    cat.fe.db_.filenames_ = cat.fe.filenames_
    index = cat.fe.db_._search_filenames(ground_truth.file_path.values)

    try:
        model, Y_train = cat.fit(
                                index,
                                ground_truth.is_relevant.values,
                                method=method,
                                cv=cv)
    except OptionalDependencyMissing:
        raise SkipTest
    except WrongParameter:
        if method in ['NearestNeighbor', 'NearestCentroid']:
            return
        else:
            raise

    Y_pred, md = cat.predict()
    X_pred = np.arange(cat.fe.n_samples_, dtype='int')
    idx_gt = cat.fe.db_._search_filenames(ground_truth.file_path.values)

    scores = categorization_score(idx_gt,
                                  ground_truth.is_relevant.values,
                                  X_pred, np.argmax(Y_pred, axis=1))

    assert cat.get_params() is not None

    assert Y_pred.shape == (cat.fe.n_samples_,
                            len(np.unique(ground_truth.is_relevant.values)))

    if method == 'NearestNeighbor':
        assert md.shape == Y_pred.shape
    else:
        assert md is None

    if method in ['xgboost', 'ensemble-stacking']:
        # this parameter fail for some reason so far...
        return
    assert_allclose(scores['precision'], 1, rtol=0.5)
    assert_allclose(scores['recall'], 1, rtol=0.68)
    cat.delete()

Example #3

Show file

def test_categorization_score():
    idx = [1, 2,  3,  4,  5, 6]
    y = [1, 1, -1, -1, -1, 1]
    idx_ref = [10, 5, 3, 2, 6]
    y_ref = [0,  1, 0, 1, 1]

    scores = categorization_score(idx_ref, y_ref, idx, y)

    assert_allclose(scores['precision'], 1.0)
    assert_allclose(scores['recall'], 0.66666666, rtol=1e-4)

    # make sure permutations don't affect the result
    idx_ref2 = [10, 5, 2, 3, 6]
    y_ref2 = [0, 1, 1, 0, 1]
    scores2 = categorization_score(idx_ref2, y_ref2, idx, y)
    assert scores['average_precision'] == scores2['average_precision']

Example #4

Show file

File: test_integration.py Project: rvaughan/FreeDiscovery

def test_features_hashing(use_hashing, use_lsi, method):
    # check that models work both with and without hashing

    cache_dir = check_cache()

    n_features = 20000

    fe = FeatureVectorizer(cache_dir=cache_dir)
    uuid = fe.setup(n_features=n_features, use_hashing=use_hashing)
    fe.ingest(data_dir, file_pattern='.*\d.txt')

    ground_truth = parse_ground_truth_file(
        os.path.join(data_dir, "..", "ground_truth_file.txt"))

    lsi = _LSIWrapper(cache_dir=cache_dir, parent_id=uuid)
    lsi_res, exp_var = lsi.fit_transform(n_components=100)
    assert lsi._load_pars() is not None
    lsi._load_model()

    if method == 'Categorization':
        if use_lsi:
            parent_id = lsi.mid
            method = 'NearestNeighbor'
        else:
            parent_id = uuid
            method = 'LogisticRegression'
        cat = _CategorizerWrapper(cache_dir=cache_dir,
                                  parent_id=parent_id,
                                  cv_n_folds=2)
        cat.fe.db_.filenames_ = cat.fe.filenames_
        index = cat.fe.db_._search_filenames(ground_truth.file_path.values)

        try:
            coefs, Y_train = cat.fit(index,
                                     ground_truth.is_relevant.values,
                                     method=method)
        except OptionalDependencyMissing:
            raise SkipTest

        Y_pred, md = cat.predict()
        X_pred = np.arange(cat.fe.n_samples_, dtype='int')
        idx_gt = cat.fe.db_._search_filenames(ground_truth.file_path.values)

        scores = categorization_score(idx_gt, ground_truth.is_relevant.values,
                                      X_pred, np.argmax(Y_pred, axis=1))
        assert_allclose(scores['precision'], 1, rtol=0.5)
        assert_allclose(scores['recall'], 1, rtol=0.7)
        cat.delete()
    elif method == 'DuplicateDetection':
        dd = _DuplicateDetectionWrapper(cache_dir=cache_dir, parent_id=uuid)
        try:
            dd.fit()
        except ImportError:
            raise SkipTest
        cluster_id = dd.query(distance=10)
    elif method == 'Clustering':
        if not use_hashing:
            if use_lsi:
                parent_id = lsi.mid
                method = 'birch'
            else:
                parent_id = uuid
                method = 'k_means'
            cat = _ClusteringWrapper(cache_dir=cache_dir, parent_id=parent_id)
            cm = getattr(cat, method)
            labels = cm(2)

            htree = cat._get_htree(cat.pipeline.data)

            terms = cat.compute_labels(n_top_words=10)
        else:
            with pytest.raises(NotImplementedError):
                _ClusteringWrapper(cache_dir=cache_dir, parent_id=uuid)
    else:
        raise ValueError