Ejemplo n.º 1
0
    def test_score(self):
        dnet = DNetPRO(estimator=GaussianNB(), max_chunk=4)

        Nprobe, Nsample = (5, 4)

        X = np.arange(Nprobe * Nsample).reshape((Nsample, Nprobe))
        y = np.array(['A', 'A', 'B', 'B'])

        dnet.fit(X, y)

        score = dnet.score(X, y)
        assert score == 0
Ejemplo n.º 2
0
            # print('  took {:.3f} seconds'.format(kbest_toc - dnet_tic), flush=True)

            # print(dnet_perf[['feature_1', 'feature_2', 'ct', 'feature_1_ct', 'feature_2_ct', 'weights']].head(n=20))

            single_perf.sort_values(by='feature_2_ct',
                                    inplace=True,
                                    ascending=False)
            # print(single_perf.head(n=20))

            Dnet_data = dnet.fit_transform(X_train, y_train)
            new_sample, new_probe = Dnet_data.shape

            dnet_signature = dnet.selected_signature

            # DNetPRO parameters to save
            dnet_score = dnet.score(X_test, y_test)
            dnet_informative = len(
                [x for x in dnet_signature if x < Ninformative])
            dnet_size = len(dnet_signature)

            Kbest_data = single_perf.iloc[:dnet_size].feature_2.values.tolist()
            kbest_score = classifier.fit(X_train[:, Kbest_data],
                                         y_train).score(
                                             X_test[:, Kbest_data], y_test)

            kbest_informative = len(
                [x for x in Kbest_data if x < Ninformative])

            common_features = len(set(Kbest_data) & set(dnet_signature))

            # join string
Ejemplo n.º 3
0
    # Create the DNetPRO feature selection object with GaussianNB classifier
    dnet = DNetPRO(estimator=classifier,
                   scoring='accuracy',
                   n_jobs=4,
                   verbose=False)
    # extract the filtered dataset as the signature with highest score in the training set
    Dnet_data = dnet.fit_transform(X_train, y_train)
    new_sample, new_probe = Dnet_data.shape

    # Best DNetPRO signature
    dnet_signature = dnet.selected_signature

    # print some informations
    print('Signature DNetPRO: {}'.format(sorted(dnet_signature)))
    print('DNetPRO score: {:.3f}'.format(dnet.score(X_test, y_test)))
    print('Informative found: {:d} / {:d}'.format(
        len([x for x in dnet_signature if x < Ninformative]), Ninformative))

    # Compare the obtained results against the Kbest features with K=number of feature in the DNetPRO signature
    filter_kbest = SelectKBest(k=new_probe)
    # extract the filtered datasets
    Kbest_data = filter_kbest.fit_transform(X_train, y_train)
    # set to zero the other features
    Kbest_filtered = filter_kbest.inverse_transform(Kbest_data)
    # now it is easy to extract the selected features as non-zero columns
    Kbest_signature = set(np.nonzero(Kbest_filtered)[1])

    # Just to be sure that everything goes right...
    if not (len(Kbest_signature) == len(dnet_signature)):
        raise ValueError(