예제 #1
0
def prune(args, model, percent, train_loader, val_loader, hyperparams,
          df_column_entry_dict):
    """
    Uses parameter pruning to remove connections from the model that are least relevant for the neurons.
    The concrete procedure is the traversal of the models modules, module by module (https://arxiv.org/abs/1506.02626).
    # todo elaborate on the CONCRETE pruning procedure, especially when implementing alternatives or before varying this
    Code from https://github.com/larry0123du/PyTorch-Deep-Compression
    Code explained at https://jacobgil.github.io/deeplearning/pruning-deep-learning
    :param model: the actual model (Module subclass), not a path, not just the weights
    :return: the saved model's path
    """

    # Set additional parameters required for pruning.
    # todo future work: might want to include all of these in args only, instead of passing arguments in two parameters, args and hyperparams
    hyperparams['topk'] = [1, 5]  # Top k precision metrics
    hyperparams['interval'] = int(args.prune_epochs)  # checkpointing interval
    hyperparams['momentum'] = 0.9
    hyperparams['weight_decay'] = 0.005
    torch.cuda.empty_cache()
    print("emptied cache\n")
    print_memory_metrics("start of pruning", df_column_entry_dict)
    start_mem_measurement()
    start = time.time()
    iter_prune(args=args,
               train_loader=train_loader,
               val_loader=val_loader,
               the_model=model,
               stop_percent=percent,
               df_column_entry_dict=df_column_entry_dict,
               **hyperparams)
    time_elapse = time.time() - start

    event = 'iterative pruning'
    formatted_time = str(timedelta(seconds=time_elapse))
    df_column_entry_dict['Time measurement at ' + event + ' [s]'] = time_elapse

    print("\n" + event + " took " + formatted_time + " seconds\n")
    event = "end of pruning"
    stop_mem_measurement(event, df_column_entry_dict)
    print_memory_metrics(event, df_column_entry_dict)
예제 #2
0
def test(test_loader, model, criterion, loggers, activations_collectors, args):
    """Model Test"""
    msglogger.info('--- test ---------------------')
    if activations_collectors is None:
        activations_collectors = create_activation_stats_collectors(
            model, None)
    with collectors_context(activations_collectors["test"]) as collectors:

        df_column_entry_dict = {}
        event = "validation"
        if torch.cuda.is_available():
            print_memory_metrics("before " + event, df_column_entry_dict)
        start_mem_measurement()
        start = time.time()

        top1, top5, lossses = _validate(test_loader, model, criterion, loggers,
                                        args)

        time_elapse = time.time() - start
        df_column_entry_dict['Time measurement at ' + event +
                             ' [s]'] = time_elapse
        stop_mem_measurement(event, df_column_entry_dict)
        if torch.cuda.is_available():
            print_memory_metrics("after " + event, df_column_entry_dict)

        import pandas as pd
        frame = pd.DataFrame([list(df_column_entry_dict.values())],
                             columns=list(df_column_entry_dict.keys()))
        frame.to_excel(DISTILLER_PATH + msglogger.logdir + ".xlsx",
                       index=False)

        distiller.log_activation_statsitics(-1,
                                            "test",
                                            loggers,
                                            collector=collectors['sparsity'])
        save_collectors_data(collectors, msglogger.logdir)
    return top1, top5, lossses
예제 #3
0
def apply_band_selection(technique, dataset, predictions, mode, n_components,
                         df_column_entry_dict):
    if df_column_entry_dict is None:
        df_column_entry_dict = {
        }  # couldn't care less, this is a lazy way to make all accesses work

    print("Dataset current shape: " + str(dataset.shape))

    print_memory_metrics("before applying band selection method " + technique,
                         df_column_entry_dict)

    from DeepHyperX.batch import PARAMETER_JSON
    parameterFile = open(PARAMETER_JSON, "r")
    import json
    data = json.load(parameterFile)
    parameterFile.close()

    if technique in ["IncrementalPCA"]:  # requires special method
        dataset, _ = applyIncrementalPCA(dataset, n_components)

    elif technique in data["image_compression"]["extraction"]["techniques"]:

        extraction_object = None
        if technique == "PCA":
            from sklearn.decomposition import PCA
            """ HybridSN: Exploring 3D-2D CNN Feature Hierarchy for Hyperspectral Image Classification
            Source code used: https://github.com/gokriznastic/HybridSN/blob/master/Hybrid-Spectral-Net.ipynb
            Paper: https://arxiv.org/abs/1902.06701
            Good parameters: 30 components for Indian Pines, 15 for Salinas and Pavia University
            """
            extraction_object = PCA(n_components=n_components, whiten=True)
        elif technique == "KernelPCA":
            from sklearn.decomposition import KernelPCA
            extraction_object = KernelPCA(kernel="rbf",
                                          n_components=n_components,
                                          gamma=None,
                                          fit_inverse_transform=True,
                                          n_jobs=1)
        elif technique == "SparsePCA":
            """Sparse PCA uses the links between the ACP and the SVD to extract the main components by solving a lower-order matrix approximation problem."""
            from sklearn.decomposition import SparsePCA
            extraction_object = SparsePCA(n_components=n_components,
                                          alpha=0.0001,
                                          n_jobs=-1)
        elif technique == "LDA":  # only supervised is supported, y is required
            if mode != "supervised":
                print(
                    "warning: mode other than supervised detected for lda, setting it to supervised...\n"
                )
                mode = "supervised"
            # maximally n_classes - 1 columns, https://stackoverflow.com/questions/26963454/lda-ignoring-n-components
            from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
            extraction_object = LinearDiscriminantAnalysis(
                n_components=n_components)
        elif technique == "SVD":
            from sklearn.decomposition import TruncatedSVD
            extraction_object = TruncatedSVD(n_components=n_components,
                                             algorithm='randomized',
                                             n_iter=5)
        elif technique == "GRP":
            from sklearn.random_projection import GaussianRandomProjection
            extraction_object = GaussianRandomProjection(
                n_components=n_components, eps=0.5)
        elif technique == "SRP":
            from sklearn.random_projection import SparseRandomProjection
            extraction_object = SparseRandomProjection(
                n_components=n_components,
                density='auto',
                eps=0.5,
                dense_output=False)
        elif technique == "MDS":
            """O(n^3), uses lots of memory for distance matrix (doesn't fit in 48GB), doesn't fit in GPU memory either, so basically unusable"""
            from sklearn.manifold import MDS
            extraction_object = MDS(n_components=n_components,
                                    n_init=12,
                                    max_iter=200,
                                    metric=True,
                                    n_jobs=16)
        elif technique == "MiniBatch":
            """takes too long"""
            from sklearn.decomposition import MiniBatchDictionaryLearning
            extraction_object = MiniBatchDictionaryLearning(
                n_components=n_components, batch_size=200, alpha=1, n_iter=1)
        elif technique == "LLE":
            # modified LLE requires n_neighbors >= n_components
            """execution takes 20 minutes or so, but it does work, just takes a long time"""
            from sklearn.manifold import LocallyLinearEmbedding
            extraction_object = LocallyLinearEmbedding(
                n_components=n_components,
                n_neighbors=100,
                method='modified',
                n_jobs=4)
        elif technique == "ICA":
            from sklearn.decomposition import FastICA
            extraction_object = FastICA(n_components=n_components,
                                        algorithm='parallel',
                                        whiten=True,
                                        max_iter=100)
        elif technique == "FactorAnalysis":
            from sklearn.decomposition import FactorAnalysis
            extraction_object = FactorAnalysis(n_components=n_components)  #75
        elif technique == "ISOMAP":
            from sklearn import manifold
            extraction_object = manifold.Isomap(n_neighbors=5,
                                                n_components=n_components,
                                                n_jobs=-1)
        elif technique == "t-SNE":
            # like PCA, but non-linear (pca is linear)
            from sklearn.manifold import TSNE
            extraction_object = TSNE(n_components=n_components,
                                     learning_rate=300,
                                     perplexity=30,
                                     early_exaggeration=12,
                                     init='random')
        elif technique == "UMAP":
            # install umap-learn for this to work
            import umap
            extraction_object = umap.UMAP(n_neighbors=50,
                                          min_dist=0.3,
                                          n_components=n_components)
        elif technique == "NMF":
            # https://www.kaggle.com/remidi/dimensionality-reduction-techniques
            from sklearn.decomposition import NMF
            extraction_object = NMF(n_components=n_components,
                                    init='nndsvdar',
                                    random_state=420)
        elif technique == "F*G":
            # super fast and nice
            from sklearn.cluster import FeatureAgglomeration
            extraction_object = FeatureAgglomeration(n_clusters=n_components,
                                                     linkage='ward')
        else:
            raise ValueError("Unknown feature extraction technique: " +
                             technique)

        start_mem_measurement()
        start = time.time()

        dataset, _ = applyFeatureExtraction(
            dataset,
            predictions,
            extraction_object,
            mode,
            merged=(len(dataset.shape) == 4 and len(predictions.shape) == 3))

        time_elapse = time.time() - start

        event = 'applying band selection method (EXTRACTION) ' + technique
        formatted_time = str(timedelta(seconds=time_elapse))
        df_column_entry_dict['Time measurement at ' + event +
                             ' [s]'] = time_elapse

        print("\n" + event + " took " + formatted_time + " seconds\n")

        event = "after applying band selection method " + technique
        stop_mem_measurement(event, df_column_entry_dict)
        print_memory_metrics(event, df_column_entry_dict)

    elif technique in data["image_compression"]["selection"]["techniques"]:

        selection_object = None
        if technique == "RandomForest":
            # Random forests or random decision forests are an ensemble learning method for classification, regression and other
            # tasks that operates by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees.[1][2] Random decision forests correct for decision trees' habit of overfitting to their training set.[3]:587–588 https://en.wikipedia.org/wiki/Random_forest
            from sklearn.ensemble import RandomForestClassifier
            selection_object = RandomForestClassifier()
        elif technique == "LogisticRegression":
            from sklearn.linear_model import LogisticRegression
            selection_object = LogisticRegression()
        elif technique == "LinearRegression":
            from sklearn.linear_model import LinearRegression
            selection_object = LinearRegression()
        elif technique == "LightGBM":
            from lightgbm import LGBMClassifier
            selection_object = LGBMClassifier()
        else:
            raise ValueError("Unknown feature selection technique: " +
                             technique)

        start_mem_measurement()
        start = time.time()

        dataset, _ = applyFeatureSelection(
            dataset,
            predictions,
            selection_object,
            n_components,
            mode,
            merged=(len(dataset.shape) == 4 and len(predictions.shape) == 3))

        time_elapse = time.time() - start

        event = 'applying band selection method (SELECTION) ' + technique
        formatted_time = str(timedelta(seconds=time_elapse))
        df_column_entry_dict['Time measurement at ' + event +
                             ' [s]'] = time_elapse

        print("\n" + event + " took " + formatted_time + " seconds\n")

        event = "after applying band selection method " + technique
        stop_mem_measurement(event, df_column_entry_dict)
        print_memory_metrics(event, df_column_entry_dict)

    print("Dataset new shape: " + str(dataset.shape))

    return dataset
예제 #4
0
def pb_inference(MODEL, path, quantize_afterwards=False):
    import tensorflow as tf  # Default graph is initialized when the library is imported
    import os
    from tensorflow.python.platform import gfile
    from PIL import Image
    import numpy as np
    import scipy
    from scipy import misc
    import matplotlib.pyplot as plt

    DATASET = "IndianPines"
    print("Doing PB inference for model " + MODEL + "...")

    # path = "D:/Experiments/winmltoolsQuantized/"
    GRAPH_PB_PATH = path + MODEL + ".pb"  #path to your .pb file

    with tf.Graph().as_default() as graph:  # Set default graph as graph

        with tf.Session() as sess:
            # Load the graph in graph_def
            print("load graph")

            # We load the protobuf file from the disk and parse it to retrive the unserialized graph_drf
            with gfile.FastGFile(GRAPH_PB_PATH, 'rb') as f:

                # Load IndianPines dataset
                from DeepHyperX.datasets import get_dataset
                img, gt, LABEL_VALUES, IGNORED_LABELS, RGB_BANDS, palette = get_dataset(
                    DATASET, "./DeepHyperX/Datasets/")

                from DeepHyperX.utils import sample_gt
                _, test_gt = sample_gt(gt, 0.8, mode='random')

                hyperparams = {}
                from DeepHyperX.utils import get_device
                hyperparams.update({
                    'n_classes': 17,
                    'n_bands': 200,
                    'ignored_labels': IGNORED_LABELS,
                    'device': torch.device("cpu"),  #get_device(0),
                    'dataset': "IndianPines"
                })
                hyperparams['supervision'] = 'full'
                hyperparams['flip_augmentation'] = False
                hyperparams['radiation_augmentation'] = False
                hyperparams['mixture_augmentation'] = False
                hyperparams['center_pixel'] = True

                # model-specific params
                if MODEL == "cao":
                    hyperparams['patch_size'] = 9  # patch_size
                    hyperparams['batch_size'] = 100

                elif MODEL == "hu":
                    hyperparams['patch_size'] = 1  # patch_size
                    hyperparams['batch_size'] = 100
                    # output_tensor = 'mul_5:0'
                elif MODEL == "he":
                    hyperparams['patch_size'] = 7  # patch_size
                    hyperparams['batch_size'] = 40
                    # output_tensor = 'add_17:0'#bs
                    # output_tensor = 'MatMul:0'#bs
                    # output_tensor = 'mul:0'#bs
                elif MODEL == "santara":
                    hyperparams['patch_size'] = 3  # patch_size
                    hyperparams['batch_size'] = 200
                    # output_tensor = 'add_65:0'#bs
                    # output_tensor = 'LogSoftmax:0'
                    # output_tensor = 'MatMul_1:0'#bs
                    # output_tensor = 'transpose_124:0'
                    # output_tensor = 'mul_2:0'#bs
                elif MODEL == "luo_cnn":
                    hyperparams['patch_size'] = 3  # patch_size
                    hyperparams['batch_size'] = 100
                    # output_tensor = 'add_5:0'

                output_tensor = output_tensors[MODEL]

                hyperparams['test_stride'] = 1  # default is 1

                from DeepHyperX.datasets import HyperX
                img_dataset = HyperX(data=img, gt=gt, hyperparams=hyperparams)

                # Set FCN graph to the default graph
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                sess.graph.as_default()

                # Import a graph_def into the current default Graph (In this case, the weights are (typically) embedded in the graph)

                tf.import_graph_def(graph_def,
                                    input_map=None,
                                    return_elements=None,
                                    name="",
                                    op_dict=None,
                                    producer_op_list=None)

                # Print the name of operations in the session
                # for op in graph.get_operations():
                #     print("Operation Name :", op.name)  # Operation name
                #     print("Tensor Stats :", str(op.values()))  # Tensor name

                # INFERENCE Here
                l_input = graph.get_tensor_by_name('0:0')  # Input Tensor
                l_output = graph.get_tensor_by_name(
                    output_tensor)  # Output Tensor

                # print("Shape of input : ", tf.shape(l_input))
                # initialize_all_variables
                tf.global_variables_initializer()

                df_column_entry_dict = {}
                print_memory_metrics("before PB Inference",
                                     df_column_entry_dict)
                start_mem_measurement()
                start = time.time()

                # get the right input data shape and run model
                probs = test(img_dataset.data, hyperparams, sess, l_output,
                             l_input, MODEL)

                time_elapse = time.time() - start
                event = 'PB Inference'
                formatted_time = str(timedelta(seconds=time_elapse))
                df_column_entry_dict['Time measurement at ' + event +
                                     ' [s]'] = time_elapse
                print("\n" + event + " took " + formatted_time + " seconds\n")
                event = "after PB Inference"
                stop_mem_measurement(event, df_column_entry_dict)
                print_memory_metrics(event, df_column_entry_dict)

                prediction = np.argmax(probs, axis=-1)

                # goal: display accuracy metrics, incl. confusion matrix
                from DeepHyperX.utils import metrics
                run_results = metrics(
                    prediction,
                    test_gt,
                    ignored_labels=hyperparams['ignored_labels'],
                    n_classes=hyperparams['n_classes'])

                mask = np.zeros(gt.shape, dtype='bool')
                for l in IGNORED_LABELS:
                    mask[gt == l] = True
                prediction[mask] = 0

                results = []
                results.append(run_results)
                from DeepHyperX.utils import show_results
                import visdom
                viz = visdom.Visdom(env=MODEL + "_" + DATASET)

                dataframe_grid = []
                show_results(run_results,
                             viz,
                             label_values=LABEL_VALUES,
                             df_column_entry_dict=df_column_entry_dict)

                dataframe_grid.append(list(df_column_entry_dict.values()))
                import pandas as pd
                frame = pd.DataFrame(dataframe_grid,
                                     columns=list(df_column_entry_dict.keys()))
                means = frame.mean()
                frame = frame.append(means, ignore_index=True)

                from DeepHyperX.batch import STORE_EXPERIMENT_LOCATION
                frame.to_excel(path + MODEL + "_" + DATASET + ".xlsx",
                               index=False)

            if quantize_afterwards:
                print("Quantizing model " + MODEL + " after inference...\n")

                img = tf.identity(tf.get_variable(
                    name="0",
                    dtype=tf.float32,
                    shape=tuple(expected_input_shapes[MODEL])),
                                  name="0")
                # img = tf.identity(tf.get_variable(name="Const_53", dtype=tf.float32, shape=tuple(expected_input_shapes[MODEL])), name="Const_53")
                # img = tf.identity(tf.get_variable(name="foo", dtype=tf.float32, shape=tuple(expected_input_shapes[MODEL])), name="0")
                out = tf.identity(tf.get_variable(
                    name=output_tensors[MODEL][:len(output_tensors[MODEL]) -
                                               2],
                    dtype=tf.float32,
                    shape=tuple(expected_output_shapes[MODEL])),
                                  name=output_tensors[MODEL]
                                  [:len(output_tensors[MODEL]) -
                                   2])  # cut out ":0" for valid tensor name
                # out = tf.identity(tf.get_variable(name="bar", dtype=tf.float32, shape=tuple(expected_output_shapes[MODEL])), name=output_tensors[MODEL][:len(output_tensors[MODEL])-2]) # cut out ":0" for valid tensor name

                sess.run(tf.global_variables_initializer())
                converter = tf.lite.TFLiteConverter.from_session(
                    sess, [img], [out])
                tflite_model = converter.convert()
                open("converted_model.tflite", "wb").write(tflite_model)
                """
예제 #5
0
        print_memory_metrics("got model/before training", df_column_entry_dict)
        start_mem_measurement()
        start = time.time()

        clf.fit(X_train, y_train)

        time_elapse = time.time() - start

        event = 'model.predict'
        formatted_time = str(timedelta(seconds=time_elapse))
        df_column_entry_dict['Time measurement at '+event+' [s]'] = time_elapse

        print("\n"+event+" took "+ formatted_time + " seconds\n")

        event = "after training"
        stop_mem_measurement(event, df_column_entry_dict)
        print_memory_metrics(event, df_column_entry_dict)

        print("SVM best parameters : {}".format(clf.best_params_))
        print_memory_metrics("before inference", df_column_entry_dict)
        start_mem_measurement()
        start = time.time()
        prediction = clf.predict(img.reshape(-1, N_BANDS))
        time_elapse = time.time() - start

        event = 'model.predict'
        formatted_time = str(timedelta(seconds=time_elapse))
        df_column_entry_dict['Time measurement at '+event+' [s]'] = time_elapse

        print("\n"+event+" took "+ formatted_time + " seconds\n")