def prune(args, model, percent, train_loader, val_loader, hyperparams, df_column_entry_dict): """ Uses parameter pruning to remove connections from the model that are least relevant for the neurons. The concrete procedure is the traversal of the models modules, module by module (https://arxiv.org/abs/1506.02626). # todo elaborate on the CONCRETE pruning procedure, especially when implementing alternatives or before varying this Code from https://github.com/larry0123du/PyTorch-Deep-Compression Code explained at https://jacobgil.github.io/deeplearning/pruning-deep-learning :param model: the actual model (Module subclass), not a path, not just the weights :return: the saved model's path """ # Set additional parameters required for pruning. # todo future work: might want to include all of these in args only, instead of passing arguments in two parameters, args and hyperparams hyperparams['topk'] = [1, 5] # Top k precision metrics hyperparams['interval'] = int(args.prune_epochs) # checkpointing interval hyperparams['momentum'] = 0.9 hyperparams['weight_decay'] = 0.005 torch.cuda.empty_cache() print("emptied cache\n") print_memory_metrics("start of pruning", df_column_entry_dict) start_mem_measurement() start = time.time() iter_prune(args=args, train_loader=train_loader, val_loader=val_loader, the_model=model, stop_percent=percent, df_column_entry_dict=df_column_entry_dict, **hyperparams) time_elapse = time.time() - start event = 'iterative pruning' formatted_time = str(timedelta(seconds=time_elapse)) df_column_entry_dict['Time measurement at ' + event + ' [s]'] = time_elapse print("\n" + event + " took " + formatted_time + " seconds\n") event = "end of pruning" stop_mem_measurement(event, df_column_entry_dict) print_memory_metrics(event, df_column_entry_dict)
def test(test_loader, model, criterion, loggers, activations_collectors, args): """Model Test""" msglogger.info('--- test ---------------------') if activations_collectors is None: activations_collectors = create_activation_stats_collectors( model, None) with collectors_context(activations_collectors["test"]) as collectors: df_column_entry_dict = {} event = "validation" if torch.cuda.is_available(): print_memory_metrics("before " + event, df_column_entry_dict) start_mem_measurement() start = time.time() top1, top5, lossses = _validate(test_loader, model, criterion, loggers, args) time_elapse = time.time() - start df_column_entry_dict['Time measurement at ' + event + ' [s]'] = time_elapse stop_mem_measurement(event, df_column_entry_dict) if torch.cuda.is_available(): print_memory_metrics("after " + event, df_column_entry_dict) import pandas as pd frame = pd.DataFrame([list(df_column_entry_dict.values())], columns=list(df_column_entry_dict.keys())) frame.to_excel(DISTILLER_PATH + msglogger.logdir + ".xlsx", index=False) distiller.log_activation_statsitics(-1, "test", loggers, collector=collectors['sparsity']) save_collectors_data(collectors, msglogger.logdir) return top1, top5, lossses
def apply_band_selection(technique, dataset, predictions, mode, n_components, df_column_entry_dict): if df_column_entry_dict is None: df_column_entry_dict = { } # couldn't care less, this is a lazy way to make all accesses work print("Dataset current shape: " + str(dataset.shape)) print_memory_metrics("before applying band selection method " + technique, df_column_entry_dict) from DeepHyperX.batch import PARAMETER_JSON parameterFile = open(PARAMETER_JSON, "r") import json data = json.load(parameterFile) parameterFile.close() if technique in ["IncrementalPCA"]: # requires special method dataset, _ = applyIncrementalPCA(dataset, n_components) elif technique in data["image_compression"]["extraction"]["techniques"]: extraction_object = None if technique == "PCA": from sklearn.decomposition import PCA """ HybridSN: Exploring 3D-2D CNN Feature Hierarchy for Hyperspectral Image Classification Source code used: https://github.com/gokriznastic/HybridSN/blob/master/Hybrid-Spectral-Net.ipynb Paper: https://arxiv.org/abs/1902.06701 Good parameters: 30 components for Indian Pines, 15 for Salinas and Pavia University """ extraction_object = PCA(n_components=n_components, whiten=True) elif technique == "KernelPCA": from sklearn.decomposition import KernelPCA extraction_object = KernelPCA(kernel="rbf", n_components=n_components, gamma=None, fit_inverse_transform=True, n_jobs=1) elif technique == "SparsePCA": """Sparse PCA uses the links between the ACP and the SVD to extract the main components by solving a lower-order matrix approximation problem.""" from sklearn.decomposition import SparsePCA extraction_object = SparsePCA(n_components=n_components, alpha=0.0001, n_jobs=-1) elif technique == "LDA": # only supervised is supported, y is required if mode != "supervised": print( "warning: mode other than supervised detected for lda, setting it to supervised...\n" ) mode = "supervised" # maximally n_classes - 1 columns, https://stackoverflow.com/questions/26963454/lda-ignoring-n-components from sklearn.discriminant_analysis import LinearDiscriminantAnalysis extraction_object = LinearDiscriminantAnalysis( n_components=n_components) elif technique == "SVD": from sklearn.decomposition import TruncatedSVD extraction_object = TruncatedSVD(n_components=n_components, algorithm='randomized', n_iter=5) elif technique == "GRP": from sklearn.random_projection import GaussianRandomProjection extraction_object = GaussianRandomProjection( n_components=n_components, eps=0.5) elif technique == "SRP": from sklearn.random_projection import SparseRandomProjection extraction_object = SparseRandomProjection( n_components=n_components, density='auto', eps=0.5, dense_output=False) elif technique == "MDS": """O(n^3), uses lots of memory for distance matrix (doesn't fit in 48GB), doesn't fit in GPU memory either, so basically unusable""" from sklearn.manifold import MDS extraction_object = MDS(n_components=n_components, n_init=12, max_iter=200, metric=True, n_jobs=16) elif technique == "MiniBatch": """takes too long""" from sklearn.decomposition import MiniBatchDictionaryLearning extraction_object = MiniBatchDictionaryLearning( n_components=n_components, batch_size=200, alpha=1, n_iter=1) elif technique == "LLE": # modified LLE requires n_neighbors >= n_components """execution takes 20 minutes or so, but it does work, just takes a long time""" from sklearn.manifold import LocallyLinearEmbedding extraction_object = LocallyLinearEmbedding( n_components=n_components, n_neighbors=100, method='modified', n_jobs=4) elif technique == "ICA": from sklearn.decomposition import FastICA extraction_object = FastICA(n_components=n_components, algorithm='parallel', whiten=True, max_iter=100) elif technique == "FactorAnalysis": from sklearn.decomposition import FactorAnalysis extraction_object = FactorAnalysis(n_components=n_components) #75 elif technique == "ISOMAP": from sklearn import manifold extraction_object = manifold.Isomap(n_neighbors=5, n_components=n_components, n_jobs=-1) elif technique == "t-SNE": # like PCA, but non-linear (pca is linear) from sklearn.manifold import TSNE extraction_object = TSNE(n_components=n_components, learning_rate=300, perplexity=30, early_exaggeration=12, init='random') elif technique == "UMAP": # install umap-learn for this to work import umap extraction_object = umap.UMAP(n_neighbors=50, min_dist=0.3, n_components=n_components) elif technique == "NMF": # https://www.kaggle.com/remidi/dimensionality-reduction-techniques from sklearn.decomposition import NMF extraction_object = NMF(n_components=n_components, init='nndsvdar', random_state=420) elif technique == "F*G": # super fast and nice from sklearn.cluster import FeatureAgglomeration extraction_object = FeatureAgglomeration(n_clusters=n_components, linkage='ward') else: raise ValueError("Unknown feature extraction technique: " + technique) start_mem_measurement() start = time.time() dataset, _ = applyFeatureExtraction( dataset, predictions, extraction_object, mode, merged=(len(dataset.shape) == 4 and len(predictions.shape) == 3)) time_elapse = time.time() - start event = 'applying band selection method (EXTRACTION) ' + technique formatted_time = str(timedelta(seconds=time_elapse)) df_column_entry_dict['Time measurement at ' + event + ' [s]'] = time_elapse print("\n" + event + " took " + formatted_time + " seconds\n") event = "after applying band selection method " + technique stop_mem_measurement(event, df_column_entry_dict) print_memory_metrics(event, df_column_entry_dict) elif technique in data["image_compression"]["selection"]["techniques"]: selection_object = None if technique == "RandomForest": # Random forests or random decision forests are an ensemble learning method for classification, regression and other # tasks that operates by constructing a multitude of decision trees at training time and outputting the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees.[1][2] Random decision forests correct for decision trees' habit of overfitting to their training set.[3]:587–588 https://en.wikipedia.org/wiki/Random_forest from sklearn.ensemble import RandomForestClassifier selection_object = RandomForestClassifier() elif technique == "LogisticRegression": from sklearn.linear_model import LogisticRegression selection_object = LogisticRegression() elif technique == "LinearRegression": from sklearn.linear_model import LinearRegression selection_object = LinearRegression() elif technique == "LightGBM": from lightgbm import LGBMClassifier selection_object = LGBMClassifier() else: raise ValueError("Unknown feature selection technique: " + technique) start_mem_measurement() start = time.time() dataset, _ = applyFeatureSelection( dataset, predictions, selection_object, n_components, mode, merged=(len(dataset.shape) == 4 and len(predictions.shape) == 3)) time_elapse = time.time() - start event = 'applying band selection method (SELECTION) ' + technique formatted_time = str(timedelta(seconds=time_elapse)) df_column_entry_dict['Time measurement at ' + event + ' [s]'] = time_elapse print("\n" + event + " took " + formatted_time + " seconds\n") event = "after applying band selection method " + technique stop_mem_measurement(event, df_column_entry_dict) print_memory_metrics(event, df_column_entry_dict) print("Dataset new shape: " + str(dataset.shape)) return dataset
def pb_inference(MODEL, path, quantize_afterwards=False): import tensorflow as tf # Default graph is initialized when the library is imported import os from tensorflow.python.platform import gfile from PIL import Image import numpy as np import scipy from scipy import misc import matplotlib.pyplot as plt DATASET = "IndianPines" print("Doing PB inference for model " + MODEL + "...") # path = "D:/Experiments/winmltoolsQuantized/" GRAPH_PB_PATH = path + MODEL + ".pb" #path to your .pb file with tf.Graph().as_default() as graph: # Set default graph as graph with tf.Session() as sess: # Load the graph in graph_def print("load graph") # We load the protobuf file from the disk and parse it to retrive the unserialized graph_drf with gfile.FastGFile(GRAPH_PB_PATH, 'rb') as f: # Load IndianPines dataset from DeepHyperX.datasets import get_dataset img, gt, LABEL_VALUES, IGNORED_LABELS, RGB_BANDS, palette = get_dataset( DATASET, "./DeepHyperX/Datasets/") from DeepHyperX.utils import sample_gt _, test_gt = sample_gt(gt, 0.8, mode='random') hyperparams = {} from DeepHyperX.utils import get_device hyperparams.update({ 'n_classes': 17, 'n_bands': 200, 'ignored_labels': IGNORED_LABELS, 'device': torch.device("cpu"), #get_device(0), 'dataset': "IndianPines" }) hyperparams['supervision'] = 'full' hyperparams['flip_augmentation'] = False hyperparams['radiation_augmentation'] = False hyperparams['mixture_augmentation'] = False hyperparams['center_pixel'] = True # model-specific params if MODEL == "cao": hyperparams['patch_size'] = 9 # patch_size hyperparams['batch_size'] = 100 elif MODEL == "hu": hyperparams['patch_size'] = 1 # patch_size hyperparams['batch_size'] = 100 # output_tensor = 'mul_5:0' elif MODEL == "he": hyperparams['patch_size'] = 7 # patch_size hyperparams['batch_size'] = 40 # output_tensor = 'add_17:0'#bs # output_tensor = 'MatMul:0'#bs # output_tensor = 'mul:0'#bs elif MODEL == "santara": hyperparams['patch_size'] = 3 # patch_size hyperparams['batch_size'] = 200 # output_tensor = 'add_65:0'#bs # output_tensor = 'LogSoftmax:0' # output_tensor = 'MatMul_1:0'#bs # output_tensor = 'transpose_124:0' # output_tensor = 'mul_2:0'#bs elif MODEL == "luo_cnn": hyperparams['patch_size'] = 3 # patch_size hyperparams['batch_size'] = 100 # output_tensor = 'add_5:0' output_tensor = output_tensors[MODEL] hyperparams['test_stride'] = 1 # default is 1 from DeepHyperX.datasets import HyperX img_dataset = HyperX(data=img, gt=gt, hyperparams=hyperparams) # Set FCN graph to the default graph graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() # Import a graph_def into the current default Graph (In this case, the weights are (typically) embedded in the graph) tf.import_graph_def(graph_def, input_map=None, return_elements=None, name="", op_dict=None, producer_op_list=None) # Print the name of operations in the session # for op in graph.get_operations(): # print("Operation Name :", op.name) # Operation name # print("Tensor Stats :", str(op.values())) # Tensor name # INFERENCE Here l_input = graph.get_tensor_by_name('0:0') # Input Tensor l_output = graph.get_tensor_by_name( output_tensor) # Output Tensor # print("Shape of input : ", tf.shape(l_input)) # initialize_all_variables tf.global_variables_initializer() df_column_entry_dict = {} print_memory_metrics("before PB Inference", df_column_entry_dict) start_mem_measurement() start = time.time() # get the right input data shape and run model probs = test(img_dataset.data, hyperparams, sess, l_output, l_input, MODEL) time_elapse = time.time() - start event = 'PB Inference' formatted_time = str(timedelta(seconds=time_elapse)) df_column_entry_dict['Time measurement at ' + event + ' [s]'] = time_elapse print("\n" + event + " took " + formatted_time + " seconds\n") event = "after PB Inference" stop_mem_measurement(event, df_column_entry_dict) print_memory_metrics(event, df_column_entry_dict) prediction = np.argmax(probs, axis=-1) # goal: display accuracy metrics, incl. confusion matrix from DeepHyperX.utils import metrics run_results = metrics( prediction, test_gt, ignored_labels=hyperparams['ignored_labels'], n_classes=hyperparams['n_classes']) mask = np.zeros(gt.shape, dtype='bool') for l in IGNORED_LABELS: mask[gt == l] = True prediction[mask] = 0 results = [] results.append(run_results) from DeepHyperX.utils import show_results import visdom viz = visdom.Visdom(env=MODEL + "_" + DATASET) dataframe_grid = [] show_results(run_results, viz, label_values=LABEL_VALUES, df_column_entry_dict=df_column_entry_dict) dataframe_grid.append(list(df_column_entry_dict.values())) import pandas as pd frame = pd.DataFrame(dataframe_grid, columns=list(df_column_entry_dict.keys())) means = frame.mean() frame = frame.append(means, ignore_index=True) from DeepHyperX.batch import STORE_EXPERIMENT_LOCATION frame.to_excel(path + MODEL + "_" + DATASET + ".xlsx", index=False) if quantize_afterwards: print("Quantizing model " + MODEL + " after inference...\n") img = tf.identity(tf.get_variable( name="0", dtype=tf.float32, shape=tuple(expected_input_shapes[MODEL])), name="0") # img = tf.identity(tf.get_variable(name="Const_53", dtype=tf.float32, shape=tuple(expected_input_shapes[MODEL])), name="Const_53") # img = tf.identity(tf.get_variable(name="foo", dtype=tf.float32, shape=tuple(expected_input_shapes[MODEL])), name="0") out = tf.identity(tf.get_variable( name=output_tensors[MODEL][:len(output_tensors[MODEL]) - 2], dtype=tf.float32, shape=tuple(expected_output_shapes[MODEL])), name=output_tensors[MODEL] [:len(output_tensors[MODEL]) - 2]) # cut out ":0" for valid tensor name # out = tf.identity(tf.get_variable(name="bar", dtype=tf.float32, shape=tuple(expected_output_shapes[MODEL])), name=output_tensors[MODEL][:len(output_tensors[MODEL])-2]) # cut out ":0" for valid tensor name sess.run(tf.global_variables_initializer()) converter = tf.lite.TFLiteConverter.from_session( sess, [img], [out]) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) """
print_memory_metrics("got model/before training", df_column_entry_dict) start_mem_measurement() start = time.time() clf.fit(X_train, y_train) time_elapse = time.time() - start event = 'model.predict' formatted_time = str(timedelta(seconds=time_elapse)) df_column_entry_dict['Time measurement at '+event+' [s]'] = time_elapse print("\n"+event+" took "+ formatted_time + " seconds\n") event = "after training" stop_mem_measurement(event, df_column_entry_dict) print_memory_metrics(event, df_column_entry_dict) print("SVM best parameters : {}".format(clf.best_params_)) print_memory_metrics("before inference", df_column_entry_dict) start_mem_measurement() start = time.time() prediction = clf.predict(img.reshape(-1, N_BANDS)) time_elapse = time.time() - start event = 'model.predict' formatted_time = str(timedelta(seconds=time_elapse)) df_column_entry_dict['Time measurement at '+event+' [s]'] = time_elapse print("\n"+event+" took "+ formatted_time + " seconds\n")