Esempio n. 1
0
def modelnet_transforms(args):
    import kaolin.transforms as tfs

    if args.msda_mode in [
            'mixup', 'cutmix', 'alt_mixup_fmix', 'alt_mixup_cutmix',
            'alt_fmix_cutmix'
    ]:
        raise RuntimeError(
            'Mixup and CutMix callbacks not designed for 3D classification.')

    fmix_transform = tfs.Compose([
        tfs.TriangleMeshToVoxelGrid(args.pointcloud_resolution,
                                    normalize=True),
    ])
    test_transform = tfs.Compose([
        tfs.TriangleMeshToPointCloud(num_samples=1000),
        tfs.NormalizePointCloud()
    ])

    if args.msda_mode == 'fmix':
        transform = fmix_transform
    else:
        transform = test_transform

    return transform, test_transform
parser.add_argument('-lr',
                    '--learning-rate',
                    type=float,
                    default=1e-3,
                    help='Learning rate.')
parser.add_argument('--batch-size', type=int, default=12, help='Batch size.')
parser.add_argument('--device',
                    type=str,
                    default='cuda',
                    help='Device to use.')

args = parser.parse_args()

transform = tfs.Compose([
    tfs.TriangleMeshToPointCloud(num_samples=args.num_points),
    tfs.NormalizePointCloud()
])

train_loader = DataLoader(ModelNet(args.modelnet_root,
                                   categories=args.categories,
                                   split='train',
                                   transform=transform,
                                   device=args.device),
                          batch_size=args.batch_size,
                          shuffle=True)

val_loader = DataLoader(ModelNet(args.modelnet_root,
                                 categories=args.categories,
                                 split='test',
                                 transform=transform,
                                 device=args.device),
Esempio n. 3
0
def run_feature_selector_algo(args, S, X_train, X_test, T_train, T_test, i,
                              model_fpsr, model_fnsr, model_msfe, model_mspe,
                              model_card, model_nme_train, model_nme_test):
    log_params = False
    file_path_prefix = "./parameters/"
    feature_percentage = args.feature_percentage

    start_time = time.time()
    if args.algo == "RF":
        file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str(
            i) + ".joblib"

        model = RandomForestRegressor(n_estimators=100)
        model = create_model(args, file_path, model, X_train, T_train)
        importance_vals = model.feature_importances_

        # Choose features which has 1% importance according to paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6660200/
        S_hat = np.argwhere(importance_vals > 0.01).flatten()

        if args.data == "MNIST" or args.data == "CIFAR-10":  # Take 40% features of MNIST only
            file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "_percent_features-" + str(
                i) + ".joblib"
            n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) /
                                  100)
            # For RF use this because of the already trained saved model in Sandipan's laptop
            # n_sub_feat_size = 315
            S_hat = np.argsort(
                importance_vals)[::-1][:n_sub_feat_size].flatten(
                )  #40% features
            model = RandomForestRegressor(n_estimators=100)
            model = create_model(args, file_path, model, X_train[:, S_hat],
                                 T_train)
            X_train = X_train[:, S_hat]
            X_test = X_test[:, S_hat]
        log_params = True

    elif args.algo == "DEEPLIFT":
        # Implemented using DeepExplain in SHAP: https://github.com/slundberg/shap
        #-------------------------------------------------------------------------#
        x_train = X_train
        x_test = X_test

        X_train = X_train.reshape(X_train.shape[0], 28, 28)
        X_test = X_test.reshape(X_test.shape[0], 28, 28)
        # Make sure images have shape (28, 28, 1)
        X_train = np.expand_dims(X_train, -1)
        X_test = np.expand_dims(X_test, -1)
        print("X_train shape:", X_train.shape)
        print(X_train.shape[0], "train samples")
        print(X_test.shape[0], "test samples")

        # Model / data parameters
        num_classes = 10
        input_shape = (28, 28, 1)
        """
        ## Build the model
        """

        model = CNNModel(num_classes, input_shape).create_cnn_model()
        model.summary()

        file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str(
            i) + ".h5"
        """
        ## Train the model
        """

        batch_size = 128
        epochs = 15

        model.compile(loss="categorical_crossentropy",
                      optimizer="adam",
                      metrics=["accuracy"])
        model = create_model(args, file_path, model, X_train, T_train)

        # Sanity checks
        score_train = model.evaluate(X_train, T_train, verbose=0)
        score_test = model.evaluate(X_test, T_test, verbose=0)
        print("Test loss:", score_test[0])
        print("Test accuracy:", score_test[1])

        background = X_train[np.random.choice(X_train.shape[0],
                                              100,
                                              replace=False)]
        # explain predictions of the model on 10 images
        e = shap.DeepExplainer(model, background)

        x_test_sample = X_test[np.random.choice(
            X_test.shape[0], int(args.deeplift_sample_size), replace=False), :]

        shap_values = e.shap_values(x_test_sample)

        total_val = np.sum(np.sum(np.abs(shap_values), axis=0),
                           axis=0).flatten()
        S_hat = total_val.argsort()[::-1]

        if args.data == "MNIST" or args.data == "CIFAR-10":  # Take 40% features of MNIST only
            X_train = x_train[:, S_hat]
            X_test = x_test[:, S_hat]
            X_train = X_train.reshape(X_train.shape[0], 28, 28)
            X_test = X_test.reshape(X_test.shape[0], 28, 28)
            # Make sure images have shape (28, 28, 1)
            X_train = np.expand_dims(X_train, -1)
            X_test = np.expand_dims(X_test, -1)
            file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "percent_features-" + str(
                i) + ".h5"
            n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) /
                                  100)
            S_hat = total_val.argsort()[::-1][:n_sub_feat_size]  #40% features
            model_new = CNNModel(num_classes, input_shape).create_cnn_model()
            model_new.compile(loss="categorical_crossentropy",
                              optimizer="adam",
                              metrics=["accuracy"])
            model = create_model(args, file_path, model_new, X_train, T_train)

        # Just to compare what global features SHAP with DeepLift choose
        # X_train_ori =  loadmat("./mat_files/MNIST.mat")["train_x"].astype(np.float32)
        # show_image([X_train_ori[:,1],X_train_ori[:,20],X_train_ori[:,30]],S_hat[0:len(S)], (args.algo+str(i)))

        # show_image(x_train[1,:].flatten(),x_train[20,:].flatten(),x_train[30,:].flatten(),S_hat, (args.algo+str(i)))

        log_params = True

    elif args.algo == "BART":
        # Implemented using XBART: https://github.com/JingyuHe/XBART
        #----------------------------------------------------------#
        x_train = X_train
        x_test = X_test

        X_train = pd.DataFrame(X_train)
        X_test = pd.DataFrame(X_test)

        # Ugly hack otherwise xbart fit does not work
        T_train = T_train.flatten()
        T_test = T_test.flatten()

        file_path = file_path_prefix + args.data + "/" + args.algo + str(
            args.tree_size) + "-" + str(i) + ".joblib"
        # model = XBART(num_trees = int(args.tree_size), num_sweeps = 20, burnin = 15, verbose = True, parallel = True)
        model = XBART(num_trees=int(args.tree_size),
                      num_sweeps=20,
                      burnin=15,
                      verbose=True,
                      parallel=True)
        model = create_model(args, file_path, model, X_train, T_train)

        S_hat = sorted(model.importance, key=model.importance.get)[::-1]
        imp_vals = np.array(S_hat)
        S_hat = imp_vals[imp_vals > 0.01]

        if args.data == "MNIST" or args.data == "CIFAR-10":  # Take 40% features of MNIST only
            file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "_percent_features-" + str(
                i) + ".joblib"
            n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) /
                                  100)
            S_hat = sorted(
                model.importance,
                key=model.importance.get)[::-1][:
                                                n_sub_feat_size]  #40% features
            model = XBART(num_trees=int(args.tree_size),
                          num_sweeps=20,
                          burnin=15,
                          verbose=True,
                          parallel=True)
            X_train = pd.DataFrame(x_train[:, S_hat])
            X_test = pd.DataFrame(x_test[:, S_hat])
            model = create_model(args, file_path, model, X_train, T_train)

        # Ugly hack otherwise xbart predict does not work
        T_train = T_train.reshape(X_train.shape[0], 1)
        T_test = T_test.reshape(X_test.shape[0], 1)

        log_params = True

    elif args.algo == "POINTNET":
        import torch
        from torch.utils.data import DataLoader
        import kaolin as kal
        from kaolin import ClassificationEngine
        from kaolin.datasets import ModelNet
        from kaolin.models.PointNet import PointNetClassifier as PointNet
        import kaolin.transforms as tfs

        modelnet_path = './mat_files/ModelNet10'
        categories = ['chair', 'sofa']
        num_points = 1024
        device = 'cuda'

        transform = tfs.Compose([
            tfs.TriangleMeshToPointCloud(num_samples=num_points),
            tfs.NormalizePointCloud()
        ])

        train_loader = DataLoader(ModelNet(modelnet_path,
                                           categories=categories,
                                           split='train',
                                           transform=transform,
                                           device=device),
                                  batch_size=12,
                                  shuffle=True)

    elif args.algo == "GAM":  # Note GAM doesn't work on MNIST properly
        file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str(
            i) + ".joblib"
        thershold = 0.01

        gam_fn_form = s(0, n_splines=5)
        for feature in range(1, X_train.shape[1]):
            gam_fn_form += s(feature, n_splines=5)
        # Regression in GAM
        # https://pygam.readthedocs.io/en/latest/notebooks/tour_of_pygam.html#Regression
        model = GAM(gam_fn_form,
                    distribution='normal',
                    link='identity',
                    max_iter=10,
                    tol=0.001)
        model = create_model(args, file_path, model, X_train, T_train)

        feature_vals = np.array(model.statistics_['p_values'])
        imp_vals = feature_vals[feature_vals > thershold]
        S_hat = np.argsort(imp_vals).flatten()

        #S_hat = np.argsort(model.statistics_['p_values'])

        log_params = True

    elif args.algo == "LASSO":
        file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str(
            i) + ".joblib"

        thershold = 0.01
        #T_train = np.argmax(T_train, axis=1)
        #T_test = np.argmax(T_test, axis=1)

        model = linear_model.Lasso(alpha=0.01, max_iter=5000)
        model = create_model(args, file_path, model, X_train, T_train)

        imp_vals = model.coef_[model.coef_ > thershold]
        S_hat = np.argsort(imp_vals).flatten()
        if args.data == "MNIST" or args.data == "CIFAR-10":  # Take 40% features of MNIST only
            file_path = file_path_prefix + args.data + "/" + args.algo + "-" + feature_percentage + "_percent_features-" + str(
                i) + ".joblib"
            n_sub_feat_size = int(X_train.shape[1] * int(feature_percentage) /
                                  100)
            S_hat = np.argsort(
                model.coef_)[::-1][:n_sub_feat_size].flatten()  #40% features
            model = linear_model.Lasso(alpha=0.01, max_iter=5000)
            model = create_model(args, file_path, model, X_train[:, S_hat],
                                 T_train)
            X_train = X_train[:, S_hat]
            X_test = X_test[:, S_hat]

        # Ugly hack otherwise vector norm not calculated
        #T_train = T_train.reshape(X_train.shape[0], 1)
        #T_test = T_test.reshape(X_test.shape[0], 1)

        log_params = True

    elif args.algo == "E-NET":
        file_path = file_path_prefix + args.data + "/" + args.algo + "-" + str(
            i) + ".joblib"

        T_train = np.argmax(T_train, axis=1)
        T_test = np.argmax(T_test, axis=1)

        model = ElasticNet(alpha=0.01, l1_ratio=0.7)
        model = create_model(args, file_path, model, X_train, T_train)

        S_hat = np.argsort(model.coef_)

        log_params = False

    elif args.algo == "CORR":
        thershold = 0.01
        importance_vals = abs(np.dot((X_train.T), T_train).T)[::-1]
        S_hat = np.argsort(importance_vals > thershold).flatten()
        model_fpsr[0, i] = FPSR(S, S_hat)
        model_fnsr[0, i] = FNSR(S, S_hat)

        log_params = False
    elif args.algo == "SPINN":
        # https://github.com/jjfeng/spinn
        log_params = False
        print("Not yet implemented!")

    else:
        print("Sorry! No such evaluation exists.")

    if log_params:
        # Mean squared errors
        model_msfe[0, i] = compute_mse_compare(
            model.predict(X_train).reshape(T_train.shape), T_train)
        model_mspe[0, i] = compute_mse_compare(
            model.predict(X_test).reshape(T_test.shape), T_test)
        # Selection rate errors
        model_fpsr[0, i] = FPSR(S, S_hat)
        model_fnsr[0, i] = FNSR(S, S_hat)
        # Cardinality of the model
        model_card[0, i] = len(S_hat)
        # Normalized Error (NME)
        model_nme_train[0, i] = compute_nme(
            model.predict(X_train).reshape(T_train.shape), T_train)
        model_nme_test[0, i] = compute_nme(
            model.predict(X_test).reshape(T_test.shape), T_test)

        if args.algo == "BART":
            val = model.predict(X_train)
            normalized = (val - min(val)) / (max(val) - min(val))
            accuracy = np.sum([
                abs(0.9 * normalized - T_train.flatten()) < 0.2
            ]) / len(T_train.flatten())
            print("**********The train accuracy is: ", accuracy)
        else:
            print(
                "**********The train accuracy is: ",
                calculate_accuracy(
                    model.predict(X_train).reshape(T_train.shape).T,
                    T_train.T))

        if args.algo == "BART":
            val = model.predict(X_test)
            normalized = (val - min(val)) / (max(val) - min(val))
            accuracy = np.sum([abs(0.9 * normalized - T_test.flatten()) < 0.2
                               ]) / len(T_test.flatten())
            print("**********The test accuracy is: ", accuracy)
        else:
            print(
                "**********The test accuracy is: ",
                calculate_accuracy(
                    model.predict(X_test).reshape(T_test.shape).T, T_test.T))

    print("Time taken for this MC iteration: ", time.time() - start_time)
Esempio n. 4
0
]

cpu_bm_trimesh = setup_benchmark_record(names)
gpu_bm_trimesh = setup_benchmark_record(names)

print("DEVICE: CPU")
device = "cpu"
for _npoints in npoint_arr:
    pcloud_arr = [
        (T.ScalePointCloud(torch.Tensor([.5]).to(device),
                           inplace=False), "ScalePointCloud"),
        (T.RotatePointCloud(ROT_MATRIX.to(device),
                            inplace=False), "RotatePointCloud"),
        (T.RealignPointCloud(realign_point_cloud(_npoints, device),
                             inplace=False), "ReAlignPointCloud"),
        (T.NormalizePointCloud(inplace=False), "NormalizePointCloud"),
        (T.Compose([
            T.ScalePointCloud(torch.Tensor([.5]).to(device), inplace=False),
            T.RotatePointCloud(torch.randn(3, 3).to(device), inplace=False),
            T.RealignPointCloud(realign_point_cloud(_npoints, device),
                                inplace=False),
            T.NormalizePointCloud(inplace=False)
        ]), "Chain")
    ]

    trimesh_arr = [
        (T.ScaleMesh(.5, inplace=True), "ScaleTriMesh"),
        (T.RotateMesh(ROT_MATRIX.to(device), inplace=True), "RotateTriMesh"),
        (T.RealignMesh(realign_trimesh(_npoints,
                                       device).vertices), "ReAlignTriMesh"),
        (T.NormalizeMesh(inplace=True), "NormalizeTriMesh"),