print('Features H_0:{}'.format(t1 - t0)) # ----------------------------------------------------------------------------- # ------------------------------ H_1 ------------------------------------------ # ----------------------------------------------------------------------------- if adptative_feature == 'cder': # ---------------------------------------------------------------------------- # ------------------------------ CDER ---------------------------------------- # ---------------------------------------------------------------------------- print('Begin CDER...') t0 = time.time() pc_train = multidim.PointCloud.from_multisample_multilabel( X_train_1, F_train) ct_train = CoverTree(pc_train) cder = CDER(parsimonious=True) cder.fit(ct_train) cder_result = cder.gaussians ellipses = [] for c in cder_result: temp = { key: c[key] for key in ['mean', 'std', 'rotation', 'radius', 'entropy'] } temp['std'] = 2 * temp['std'] #temp['std'] = np.array([temp['radius'], temp['radius']])
def adaptive_features(X_train, X_test, model, y_train, d=25): if model == "gmm": print('Begin GMM...') start = time.time() X_train_temp = np.vstack(X_train) gmm_f_train = [] for i in range(len(X_train)): gmm_f_train.append(y_train[i] * np.ones(len(X_train[i]))) gmm_f_train = np.concatenate(gmm_f_train) gmm = mixture.BayesianGaussianMixture(n_components=d, covariance_type='full', max_iter=int(10e4)).fit( X_train_temp, gmm_f_train) ellipses = [] for i in range(len(gmm.means_)): L, v = np.linalg.eig(gmm.covariances_[i]) temp = { 'mean': gmm.means_[i], 'std': np.sqrt(L), 'rotation': v.transpose(), 'radius': max(np.sqrt(L)), 'entropy': gmm.weights_[i] } temp['std'] = 3 * temp['std'] ellipses.append(temp) X_train_features = ATS.get_all_features(X_train, ellipses, ATS.f_ellipse) X_test_features = ATS.get_all_features(X_test, ellipses, ATS.f_ellipse) end = time.time() print('Computing gmm features took (seconds):{}'.format(end - start)) elif model == "hdb": print('Begin HDBSCAN...') start = time.time() X_train_temp = np.vstack(X_train) clusterer = hdbscan.HDBSCAN() clusterer.fit(X_train_temp) num_clusters = clusterer.labels_.max() ellipses = [] for i in range(num_clusters): cluster_i = X_train_temp[clusterer.labels_ == i] en = np.mean(clusterer.probabilities_[clusterer.labels_ == i]) mean = np.mean(cluster_i, axis=0) cov_matrix = np.cov(cluster_i.transpose()) L, v = np.linalg.eig(cov_matrix) temp = { 'mean': mean, 'std': np.sqrt(L), 'rotation': v.transpose(), 'radius': max(np.sqrt(L)), 'entropy': en } temp['std'] = 2 * temp['std'] ellipses.append(temp) X_train_features = ATS.get_all_features(X_train, ellipses, ATS.f_ellipse) X_test_features = ATS.get_all_features(X_test, ellipses, ATS.f_ellipse) end = time.time() print('Computing hdbscan features took (seconds):{}'.format(end - start)) elif model == "cder": y_train_cder = y_train.copy() print('Begin CDER...') start = time.time() pc_train = multidim.PointCloud.from_multisample_multilabel( X_train, y_train_cder) ct_train = CoverTree(pc_train) cder = CDER(parsimonious=True) cder.fit(ct_train) cder_result = cder.gaussians ellipses = [] for c in cder_result: temp = { key: c[key] for key in ['mean', 'std', 'rotation', 'radius', 'entropy'] } temp['std'] = 3 * temp['std'] ellipses.append(temp) X_train_features = ATS.get_all_features(X_train, ellipses, ATS.f_ellipse) X_test_features = ATS.get_all_features(X_test, ellipses, ATS.f_ellipse) end = time.time() print('Computing features from H_1 took (seconds):{}'.format(end - start)) else: print("Not a valid model type") return X_train_features, X_test_features
def adaptive_features(X_train, model, y_train, d=25): if model == "gmm": print('Begin GMM...') start = time.time() X_train_temp = np.vstack(X_train) gmm_f_train = [] for i in range(len(X_train)): gmm_f_train.append(y_train[i] * np.ones(len(X_train[i]))) gmm_f_train = np.concatenate(gmm_f_train) gmm = mixture.BayesianGaussianMixture(n_components=d, covariance_type='full', max_iter=int(10e4)).fit( X_train_temp, gmm_f_train) ellipses = [] for i in range(len(gmm.means_)): L, v = np.linalg.eig(gmm.covariances_[i]) temp = { 'mean': gmm.means_[i], 'std': np.sqrt(L), 'rotation': v.transpose(), 'radius': max(np.sqrt(L)), 'entropy': gmm.weights_[i] } temp['std'] = 3 * temp['std'] ellipses.append(temp) X_train_features = ATS.get_all_features(X_train, ellipses, ATS.f_ellipse) end = time.time() timing = end - start elif model == "cder": y_train_cder = y_train.copy() print('Begin CDER...') start = time.time() pc_train = multidim.PointCloud.from_multisample_multilabel( X_train, y_train_cder) ct_train = CoverTree(pc_train) cder = CDER(parsimonious=True) cder.fit(ct_train) cder_result = cder.gaussians ellipses = [] for c in cder_result: temp = { key: c[key] for key in ['mean', 'std', 'rotation', 'radius', 'entropy'] } temp['std'] = 3 * temp['std'] ellipses.append(temp) X_train_features = ATS.get_all_features(X_train, ellipses, ATS.f_ellipse) end = time.time() timing = end - start return timing