Exemple #1
0
    print('Features H_0:{}'.format(t1 - t0))

    # -----------------------------------------------------------------------------
    # ------------------------------ H_1 ------------------------------------------
    # -----------------------------------------------------------------------------

    if adptative_feature == 'cder':
        # ----------------------------------------------------------------------------
        # ------------------------------ CDER ----------------------------------------
        # ----------------------------------------------------------------------------

        print('Begin CDER...')
        t0 = time.time()
        pc_train = multidim.PointCloud.from_multisample_multilabel(
            X_train_1, F_train)
        ct_train = CoverTree(pc_train)

        cder = CDER(parsimonious=True)

        cder.fit(ct_train)

        cder_result = cder.gaussians

        ellipses = []
        for c in cder_result:
            temp = {
                key: c[key]
                for key in ['mean', 'std', 'rotation', 'radius', 'entropy']
            }
            temp['std'] = 2 * temp['std']
            #temp['std'] = np.array([temp['radius'], temp['radius']])
def adaptive_features(X_train, X_test, model, y_train, d=25):
    if model == "gmm":
        print('Begin GMM...')
        start = time.time()
        X_train_temp = np.vstack(X_train)
        gmm_f_train = []
        for i in range(len(X_train)):
            gmm_f_train.append(y_train[i] * np.ones(len(X_train[i])))
        gmm_f_train = np.concatenate(gmm_f_train)

        gmm = mixture.BayesianGaussianMixture(n_components=d,
                                              covariance_type='full',
                                              max_iter=int(10e4)).fit(
                                                  X_train_temp, gmm_f_train)

        ellipses = []
        for i in range(len(gmm.means_)):
            L, v = np.linalg.eig(gmm.covariances_[i])
            temp = {
                'mean': gmm.means_[i],
                'std': np.sqrt(L),
                'rotation': v.transpose(),
                'radius': max(np.sqrt(L)),
                'entropy': gmm.weights_[i]
            }
            temp['std'] = 3 * temp['std']
            ellipses.append(temp)

        X_train_features = ATS.get_all_features(X_train, ellipses,
                                                ATS.f_ellipse)

        X_test_features = ATS.get_all_features(X_test, ellipses, ATS.f_ellipse)

        end = time.time()
        print('Computing gmm features took (seconds):{}'.format(end - start))

    elif model == "hdb":
        print('Begin HDBSCAN...')
        start = time.time()
        X_train_temp = np.vstack(X_train)

        clusterer = hdbscan.HDBSCAN()

        clusterer.fit(X_train_temp)

        num_clusters = clusterer.labels_.max()

        ellipses = []
        for i in range(num_clusters):
            cluster_i = X_train_temp[clusterer.labels_ == i]

            en = np.mean(clusterer.probabilities_[clusterer.labels_ == i])

            mean = np.mean(cluster_i, axis=0)
            cov_matrix = np.cov(cluster_i.transpose())

            L, v = np.linalg.eig(cov_matrix)

            temp = {
                'mean': mean,
                'std': np.sqrt(L),
                'rotation': v.transpose(),
                'radius': max(np.sqrt(L)),
                'entropy': en
            }
            temp['std'] = 2 * temp['std']
            ellipses.append(temp)

        X_train_features = ATS.get_all_features(X_train, ellipses,
                                                ATS.f_ellipse)

        X_test_features = ATS.get_all_features(X_test, ellipses, ATS.f_ellipse)

        end = time.time()
        print('Computing hdbscan features took (seconds):{}'.format(end -
                                                                    start))

    elif model == "cder":

        y_train_cder = y_train.copy()

        print('Begin CDER...')
        start = time.time()

        pc_train = multidim.PointCloud.from_multisample_multilabel(
            X_train, y_train_cder)
        ct_train = CoverTree(pc_train)

        cder = CDER(parsimonious=True)

        cder.fit(ct_train)

        cder_result = cder.gaussians

        ellipses = []
        for c in cder_result:
            temp = {
                key: c[key]
                for key in ['mean', 'std', 'rotation', 'radius', 'entropy']
            }
            temp['std'] = 3 * temp['std']
            ellipses.append(temp)

        X_train_features = ATS.get_all_features(X_train, ellipses,
                                                ATS.f_ellipse)

        X_test_features = ATS.get_all_features(X_test, ellipses, ATS.f_ellipse)

        end = time.time()
        print('Computing features from H_1 took (seconds):{}'.format(end -
                                                                     start))

    else:
        print("Not a valid model type")
    return X_train_features, X_test_features
Exemple #3
0
def adaptive_features(X_train, model, y_train, d=25):
    if model == "gmm":
        print('Begin GMM...')
        start = time.time()
        X_train_temp = np.vstack(X_train)
        gmm_f_train = []
        for i in range(len(X_train)):
            gmm_f_train.append(y_train[i] * np.ones(len(X_train[i])))
        gmm_f_train = np.concatenate(gmm_f_train)

        gmm = mixture.BayesianGaussianMixture(n_components=d,
                                              covariance_type='full',
                                              max_iter=int(10e4)).fit(
                                                  X_train_temp, gmm_f_train)

        ellipses = []
        for i in range(len(gmm.means_)):
            L, v = np.linalg.eig(gmm.covariances_[i])
            temp = {
                'mean': gmm.means_[i],
                'std': np.sqrt(L),
                'rotation': v.transpose(),
                'radius': max(np.sqrt(L)),
                'entropy': gmm.weights_[i]
            }
            temp['std'] = 3 * temp['std']
            ellipses.append(temp)

        X_train_features = ATS.get_all_features(X_train, ellipses,
                                                ATS.f_ellipse)
        end = time.time()
        timing = end - start

    elif model == "cder":

        y_train_cder = y_train.copy()

        print('Begin CDER...')
        start = time.time()

        pc_train = multidim.PointCloud.from_multisample_multilabel(
            X_train, y_train_cder)
        ct_train = CoverTree(pc_train)

        cder = CDER(parsimonious=True)

        cder.fit(ct_train)

        cder_result = cder.gaussians

        ellipses = []
        for c in cder_result:
            temp = {
                key: c[key]
                for key in ['mean', 'std', 'rotation', 'radius', 'entropy']
            }
            temp['std'] = 3 * temp['std']
            ellipses.append(temp)

        X_train_features = ATS.get_all_features(X_train, ellipses,
                                                ATS.f_ellipse)
        end = time.time()
        timing = end - start
    return timing