def modelnet_transforms(args): import kaolin.transforms as tfs if args.msda_mode in [ 'mixup', 'cutmix', 'alt_mixup_fmix', 'alt_mixup_cutmix', 'alt_fmix_cutmix' ]: raise RuntimeError( 'Mixup and CutMix callbacks not designed for 3D classification.') fmix_transform = tfs.Compose([ tfs.TriangleMeshToVoxelGrid(args.pointcloud_resolution, normalize=True), ]) test_transform = tfs.Compose([ tfs.TriangleMeshToPointCloud(num_samples=1000), tfs.NormalizePointCloud() ]) if args.msda_mode == 'fmix': transform = fmix_transform else: transform = test_transform return transform, test_transform
def show_example(path, results, output, num_models): classes = None tf = tfs.Compose([ tfs.UnitSpherePointCloud(), tfs.RandomRotatePointCloud(type='upright') ]) ds = ModelNetPointCloud(basedir=path, split='test', categories=classes, device='cuda', transform=tf, num_points=2**10, sample_points=2**12) # Get data loader = DataLoader(ds, batch_size=num_models, shuffle=True) for item in loader: break # Set up figure rows = 3 fig = plt.figure(figsize=(4 * rows, 4 * num_models), dpi=200) # Draw inputs for i, pts in enumerate(item.clone().cpu()): ax = fig.add_subplot(num_models, rows, i * rows + 1, projection='3d') if i == 0: ax.set_title('Input') ax_points(ax, pts, s=3) # Predict & Draw measure, net = result net.cuda().eval() s1, s2 = net.hack_forward(item) for i, (s1, s2) in enumerate(zip(s1, s2)): pts_ax = fig.add_subplot(num_models, rows, i * rows + 2, projection='3d') pred_ax = fig.add_subplot(num_models, rows, i * rows + 3, projection='3d') if i == 0: pts_ax.set_title('') pred_ax.set_title('Prediction') colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] for j in range(s1.size(0)): col = colors[j % len(colors)] p = s1[j, :].unsqueeze(0) ax_points(pts_ax, p.tolist(), s=8, c=col) for j in range(s2.size(0)): col = colors[j % len(colors)] lcl_pred = s2[j, :, :] ax_points(pred_ax, lcl_pred.tolist(), s=3, c=col) plt.tight_layout() plt.savefig(output)
def load_item(path, num_models): classes = ['chair', 'bathtub', 'toilet', 'night_stand'] tf = tfs.Compose([tfs.UnitSpherePointCloud()]) """ tf = tfs.Compose([tfs.UnitSpherePointCloud(), tfs.RandomRotatePointCloud(type='upright')]) """ ds = ModelNetPointCloud(basedir=path, split='test', categories=classes, device='cuda', transform=tf, num_points=2**10, sample_points=2**12) # Get data loader = DataLoader(ds, batch_size=num_models, shuffle=True) for item in loader: return item
def __init__(self, data_path, rotate, classes=None, jitter=None): t = tfs.Compose([ tfs.UnitSpherePointCloud(), tfs.RandomRotatePointCloud(type=rotate) ]) self.train_dataset = ModelNetPointCloud(basedir=data_path, split='train', categories=classes, transform=t, num_points=2**10, sample_points=2**12) self.valid_dataset = ModelNetPointCloud(basedir=data_path, split='test', categories=classes, transform=t, num_points=2**10, sample_points=2**12) self.dataset = self.train_dataset
def load_item(path): classes = None tf = tfs.Compose([ tfs.UnitSpherePointCloud(), tfs.RandomRotatePointCloud(type='upright') ]) ds = ModelNetPointCloud(basedir=path, split='test', categories=classes, device='cuda', transform=tf, num_points=2**10, sample_points=2**12) # Get data loader = DataLoader(ds, batch_size=1, shuffle=True) for item in loader: return item
help='Number of train epochs.') parser.add_argument('-lr', '--learning-rate', type=float, default=1e-3, help='Learning rate.') parser.add_argument('--batch-size', type=int, default=12, help='Batch size.') parser.add_argument('--device', type=str, default='cuda', help='Device to use.') args = parser.parse_args() transform = tfs.Compose([ tfs.TriangleMeshToPointCloud(num_samples=args.num_points), tfs.NormalizePointCloud() ]) train_loader = DataLoader(ModelNet(args.modelnet_root, categories=args.categories, split='train', transform=transform, device=args.device), batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(ModelNet(args.modelnet_root, categories=args.categories, split='test', transform=transform, device=args.device),
def run_feature_selector_algo(args, S, X_train, X_test, T_train, T_test, i, model_fpsr, model_fnsr, model_msfe, model_mspe, model_card, model_nme_train, model_nme_test): log_params = False file_path_prefix = "./parameters/" feature_percentage = args.feature_percentage start_time = time.time() if args.algo == "RF": file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str( i) + ".joblib" model = RandomForestRegressor(n_estimators=100) model = create_model(args, file_path, model, X_train, T_train) importance_vals = model.feature_importances_ # Choose features which has 1% importance according to paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6660200/ S_hat = np.argwhere(importance_vals > 0.01).flatten() if args.data == "MNIST" or args.data == "CIFAR-10": # Take 40% features of MNIST only file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "_percent_features-" + str( i) + ".joblib" n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) / 100) # For RF use this because of the already trained saved model in Sandipan's laptop # n_sub_feat_size = 315 S_hat = np.argsort( importance_vals)[::-1][:n_sub_feat_size].flatten( ) #40% features model = RandomForestRegressor(n_estimators=100) model = create_model(args, file_path, model, X_train[:, S_hat], T_train) X_train = X_train[:, S_hat] X_test = X_test[:, S_hat] log_params = True elif args.algo == "DEEPLIFT": # Implemented using DeepExplain in SHAP: https://github.com/slundberg/shap #-------------------------------------------------------------------------# x_train = X_train x_test = X_test X_train = X_train.reshape(X_train.shape[0], 28, 28) X_test = X_test.reshape(X_test.shape[0], 28, 28) # Make sure images have shape (28, 28, 1) X_train = np.expand_dims(X_train, -1) X_test = np.expand_dims(X_test, -1) print("X_train shape:", X_train.shape) print(X_train.shape[0], "train samples") print(X_test.shape[0], "test samples") # Model / data parameters num_classes = 10 input_shape = (28, 28, 1) """ ## Build the model """ model = CNNModel(num_classes, input_shape).create_cnn_model() model.summary() file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str( i) + ".h5" """ ## Train the model """ batch_size = 128 epochs = 15 model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model = create_model(args, file_path, model, X_train, T_train) # Sanity checks score_train = model.evaluate(X_train, T_train, verbose=0) score_test = model.evaluate(X_test, T_test, verbose=0) print("Test loss:", score_test[0]) print("Test accuracy:", score_test[1]) background = X_train[np.random.choice(X_train.shape[0], 100, replace=False)] # explain predictions of the model on 10 images e = shap.DeepExplainer(model, background) x_test_sample = X_test[np.random.choice( X_test.shape[0], int(args.deeplift_sample_size), replace=False), :] shap_values = e.shap_values(x_test_sample) total_val = np.sum(np.sum(np.abs(shap_values), axis=0), axis=0).flatten() S_hat = total_val.argsort()[::-1] if args.data == "MNIST" or args.data == "CIFAR-10": # Take 40% features of MNIST only X_train = x_train[:, S_hat] X_test = x_test[:, S_hat] X_train = X_train.reshape(X_train.shape[0], 28, 28) X_test = X_test.reshape(X_test.shape[0], 28, 28) # Make sure images have shape (28, 28, 1) X_train = np.expand_dims(X_train, -1) X_test = np.expand_dims(X_test, -1) file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "percent_features-" + str( i) + ".h5" n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) / 100) S_hat = total_val.argsort()[::-1][:n_sub_feat_size] #40% features model_new = CNNModel(num_classes, input_shape).create_cnn_model() model_new.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) model = create_model(args, file_path, model_new, X_train, T_train) # Just to compare what global features SHAP with DeepLift choose # X_train_ori = loadmat("./mat_files/MNIST.mat")["train_x"].astype(np.float32) # show_image([X_train_ori[:,1],X_train_ori[:,20],X_train_ori[:,30]],S_hat[0:len(S)], (args.algo+str(i))) # show_image(x_train[1,:].flatten(),x_train[20,:].flatten(),x_train[30,:].flatten(),S_hat, (args.algo+str(i))) log_params = True elif args.algo == "BART": # Implemented using XBART: https://github.com/JingyuHe/XBART #----------------------------------------------------------# x_train = X_train x_test = X_test X_train = pd.DataFrame(X_train) X_test = pd.DataFrame(X_test) # Ugly hack otherwise xbart fit does not work T_train = T_train.flatten() T_test = T_test.flatten() file_path = file_path_prefix + args.data + "/" + args.algo + str( args.tree_size) + "-" + str(i) + ".joblib" # model = XBART(num_trees = int(args.tree_size), num_sweeps = 20, burnin = 15, verbose = True, parallel = True) model = XBART(num_trees=int(args.tree_size), num_sweeps=20, burnin=15, verbose=True, parallel=True) model = create_model(args, file_path, model, X_train, T_train) S_hat = sorted(model.importance, key=model.importance.get)[::-1] imp_vals = np.array(S_hat) S_hat = imp_vals[imp_vals > 0.01] if args.data == "MNIST" or args.data == "CIFAR-10": # Take 40% features of MNIST only file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "_percent_features-" + str( i) + ".joblib" n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) / 100) S_hat = sorted( model.importance, key=model.importance.get)[::-1][: n_sub_feat_size] #40% features model = XBART(num_trees=int(args.tree_size), num_sweeps=20, burnin=15, verbose=True, parallel=True) X_train = pd.DataFrame(x_train[:, S_hat]) X_test = pd.DataFrame(x_test[:, S_hat]) model = create_model(args, file_path, model, X_train, T_train) # Ugly hack otherwise xbart predict does not work T_train = T_train.reshape(X_train.shape[0], 1) T_test = T_test.reshape(X_test.shape[0], 1) log_params = True elif args.algo == "POINTNET": import torch from torch.utils.data import DataLoader import kaolin as kal from kaolin import ClassificationEngine from kaolin.datasets import ModelNet from kaolin.models.PointNet import PointNetClassifier as PointNet import kaolin.transforms as tfs modelnet_path = './mat_files/ModelNet10' categories = ['chair', 'sofa'] num_points = 1024 device = 'cuda' transform = tfs.Compose([ tfs.TriangleMeshToPointCloud(num_samples=num_points), tfs.NormalizePointCloud() ]) train_loader = DataLoader(ModelNet(modelnet_path, categories=categories, split='train', transform=transform, device=device), batch_size=12, shuffle=True) elif args.algo == "GAM": # Note GAM doesn't work on MNIST properly file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str( i) + ".joblib" thershold = 0.01 gam_fn_form = s(0, n_splines=5) for feature in range(1, X_train.shape[1]): gam_fn_form += s(feature, n_splines=5) # Regression in GAM # https://pygam.readthedocs.io/en/latest/notebooks/tour_of_pygam.html#Regression model = GAM(gam_fn_form, distribution='normal', link='identity', max_iter=10, tol=0.001) model = create_model(args, file_path, model, X_train, T_train) feature_vals = np.array(model.statistics_['p_values']) imp_vals = feature_vals[feature_vals > thershold] S_hat = np.argsort(imp_vals).flatten() #S_hat = np.argsort(model.statistics_['p_values']) log_params = True elif args.algo == "LASSO": file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str( i) + ".joblib" thershold = 0.01 #T_train = np.argmax(T_train, axis=1) #T_test = np.argmax(T_test, axis=1) model = linear_model.Lasso(alpha=0.01, max_iter=5000) model = create_model(args, file_path, model, X_train, T_train) imp_vals = model.coef_[model.coef_ > thershold] S_hat = np.argsort(imp_vals).flatten() if args.data == "MNIST" or args.data == "CIFAR-10": # Take 40% features of MNIST only file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "_percent_features-" + str( i) + ".joblib" n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) / 100) S_hat = np.argsort( model.coef_)[::-1][:n_sub_feat_size].flatten() #40% features model = linear_model.Lasso(alpha=0.01, max_iter=5000) model = create_model(args, file_path, model, X_train[:, S_hat], T_train) X_train = X_train[:, S_hat] X_test = X_test[:, S_hat] # Ugly hack otherwise vector norm not calculated #T_train = T_train.reshape(X_train.shape[0], 1) #T_test = T_test.reshape(X_test.shape[0], 1) log_params = True elif args.algo == "E-NET": file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str( i) + ".joblib" T_train = np.argmax(T_train, axis=1) T_test = np.argmax(T_test, axis=1) model = ElasticNet(alpha=0.01, l1_ratio=0.7) model = create_model(args, file_path, model, X_train, T_train) S_hat = np.argsort(model.coef_) log_params = False elif args.algo == "CORR": thershold = 0.01 importance_vals = abs(np.dot((X_train.T), T_train).T)[::-1] S_hat = np.argsort(importance_vals > thershold).flatten() model_fpsr[0, i] = FPSR(S, S_hat) model_fnsr[0, i] = FNSR(S, S_hat) log_params = False elif args.algo == "SPINN": # https://github.com/jjfeng/spinn log_params = False print("Not yet implemented!") else: print("Sorry! No such evaluation exists.") if log_params: # Mean squared errors model_msfe[0, i] = compute_mse_compare( model.predict(X_train).reshape(T_train.shape), T_train) model_mspe[0, i] = compute_mse_compare( model.predict(X_test).reshape(T_test.shape), T_test) # Selection rate errors model_fpsr[0, i] = FPSR(S, S_hat) model_fnsr[0, i] = FNSR(S, S_hat) # Cardinality of the model model_card[0, i] = len(S_hat) # Normalized Error (NME) model_nme_train[0, i] = compute_nme( model.predict(X_train).reshape(T_train.shape), T_train) model_nme_test[0, i] = compute_nme( model.predict(X_test).reshape(T_test.shape), T_test) if args.algo == "BART": val = model.predict(X_train) normalized = (val - min(val)) / (max(val) - min(val)) accuracy = np.sum([ abs(0.9 * normalized - T_train.flatten()) < 0.2 ]) / len(T_train.flatten()) print("**********The train accuracy is: ", accuracy) else: print( "**********The train accuracy is: ", calculate_accuracy( model.predict(X_train).reshape(T_train.shape).T, T_train.T)) if args.algo == "BART": val = model.predict(X_test) normalized = (val - min(val)) / (max(val) - min(val)) accuracy = np.sum([abs(0.9 * normalized - T_test.flatten()) < 0.2 ]) / len(T_test.flatten()) print("**********The test accuracy is: ", accuracy) else: print( "**********The test accuracy is: ", calculate_accuracy( model.predict(X_test).reshape(T_test.shape).T, T_test.T)) print("Time taken for this MC iteration: ", time.time() - start_time)
print("DEVICE: CPU") device = "cpu" for _npoints in npoint_arr: pcloud_arr = [ (T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False), "ScalePointCloud"), (T.RotatePointCloud(ROT_MATRIX.to(device), inplace=False), "RotatePointCloud"), (T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False), "ReAlignPointCloud"), (T.NormalizePointCloud(inplace=False), "NormalizePointCloud"), (T.Compose([ T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False), T.RotatePointCloud(torch.randn(3, 3).to(device), inplace=False), T.RealignPointCloud(realign_point_cloud(_npoints, device), inplace=False), T.NormalizePointCloud(inplace=False) ]), "Chain") ] trimesh_arr = [ (T.ScaleMesh(.5, inplace=True), "ScaleTriMesh"), (T.RotateMesh(ROT_MATRIX.to(device), inplace=True), "RotateTriMesh"), (T.RealignMesh(realign_trimesh(_npoints, device).vertices), "ReAlignTriMesh"), (T.NormalizeMesh(inplace=True), "NormalizeTriMesh"), (T.Compose([ T.ScaleMesh(.5, inplace=True), T.RotateMesh(ROT_MATRIX.to(device), inplace=True), T.RealignMesh(realign_trimesh(_npoints, device).vertices),