def __init__(self, opt, plot_name): self._opt = opt self.plot_name = plot_name self.data = self.createDataDict() self.weight_grad = [] # to store W' self.weight_value = [] # to store W self.bias_grad = [] # to store bias' self.bias_value = [] # to store bias self.full_epoch_list = [] # record whole list of epochs self.loss = [] # to record loss self.acc_train = [] # to record training accuracy self.acc_test = [] # to record test accuracy self.plotter = PlotFigure(self._opt, self.plot_name) self.recorded_epochs = [] self.svds = [[], [] ] # first for original SVD and second for normalized SVD self._process_param_names = re.compile(r"^D.+(weight|bias)$") self.layer_weight_mean = [] self.weight_dist_images = [] self.grad_dist_images = []
def __init__(self, opt, plot_name): self._opt = opt self.plot_name = plot_name self.log_seperator = self._opt.log_seperator self.log_frequency = self._opt.log_frequency self.data = self.createDataDict() self.weight_grad = [] # to store W' self.weight_value = [] # to store W self.bias_grad = [] # to store bias' self.bias_value = [] # to store bias self.plotter = PlotFigure(self._opt, self.plot_name) self.recorded_epochs = [] self.svds = [[], [] ] # first for original SVD and second for normalized SVD
def CalculateDist(self): print(f"calculation begins at {time.asctime()}") ckpt_path = os.path.join(self.model_path, self._opt.ckpt_dir) epoch_files = os.listdir(ckpt_path) # initialize plotter plotter = PlotFigure(self._opt, self.model_name, IS_HIDDEN_DIST=True) for epoch_file in epoch_files: if not epoch_file.endswith('.pth'): continue # load model epoch weight indicators = self._model.load_model(epoch_file, CKECK_LOG=True) if not indicators["NEED_LOG"]: continue # if this epoch does not need to be logged continue epoch = indicators["epoch"] # set model to eval self._model.eval() # container for activations, features and labels layer_activity = [] # inference on test set to get layer activations for j, (inputs, labels) in enumerate(self.dataset): outputs = self._model.predict(inputs) # for each layer activation add to container for i in range(len(outputs)): data = outputs[i].detach().numpy() if len(layer_activity) < len(outputs): layer_activity.append(data) else: # layer_activity[i] = torch.cat((layer_activity[i], data), dim = 0) layer_activity[i] = np.concatenate( (layer_activity[i], data), axis=0) # plot hidden output distribution for each epoch plotter.plot_dist(epoch, layer_activity, plot_type='hidden') # generate gif for hidden output distribution plotter.generate_dist_gif(plot_type='hidden')
def EVMethod(self): start = time.time() progress = 0 IX_dic = {} IY_dic = {} # prepare sample indices Nrepeats = 10 random_indexes = self.random_index((Nrepeats, 400)) print("len dataset : ", len(self._test_set)) epoch_files = os.listdir(self.path) for epoch_file in epoch_files: # initial progress record progress += 1 self.progress_bar = int(str(round(float(progress / len(epoch_files)) * 100.0))) print("\rprogress : " + str(round(float(progress / len(epoch_files)) * 100.0)) + "%",end = "", flush = True) if not epoch_file.endswith('.pth'): continue # load ckpt ckpt = torch.load(os.path.join(self.path, epoch_file)) epoch = ckpt['epoch'] #check if this epoch need to be calculated if not self.needLog(epoch): continue # load model epoch weight self._model.load_state_dict(ckpt['model_state_dict']) # set model to eval self._model.eval() # container for activations, features and labels layer_activity = [] X = [] Y = [] # inference on test set to get layer activations for j, (inputs, labels) in enumerate(self._test_set): outputs = self._model(inputs) Y.append(labels.clone().numpy()) X.append(inputs.clone().squeeze(0).numpy()) # for each layer activation add to container for i in range(len(outputs)): data = outputs[i] if len(layer_activity) < len(outputs): layer_activity.append(data) else: layer_activity[i] = torch.cat((layer_activity[i], data), dim = 0) # for each layer, compute averaged MI X = np.array(X) Y = np.array(Y) IX_epoch = [] IY_epoch = [] for layer in layer_activity: layer = layer.detach().numpy() avg_IX, avg_IY = self._compute_averaged_IX_IY(X, Y, layer, random_indexes) IX_epoch.append(avg_IX) IY_epoch.append(avg_IY) if epoch not in IX_dic.keys() and epoch not in IY_dic.keys(): IX_dic[epoch] = IX_epoch IY_dic[epoch] = IY_epoch else: raise RuntimeError('epoch is duplicated') plotter = PlotFigure(self._opt, self.model_name) plotter.plot_MI_plane(IX_dic, IY_dic) #save plot data self._save_plot_data("IX_dic_data.pkl", IX_dic) self._save_plot_data("IY_dic_data.pkl", IY_dic) end = time.time() print(" ") print("total time cost : ", end - start)
def kdeMethod(self): saved_labelixs, label_probs = self.get_saved_labelixs_and_labelprobs() epoch_files = os.listdir(self.path) start = time.time() IX = {} IY = {} nats2bits = 1.0/np.log(2) progress = 0 for epoch_file in epoch_files: progress += 1 self.progress_bar = int(str(round(float(progress / len(epoch_files)) * 100.0))) print("\rprogress : " + str(round(float(progress / len(epoch_files)) * 100.0)) + "%",end = "", flush = True) if not epoch_file.endswith('.pth'): continue # load ckpt ckpt = torch.load(os.path.join(self.path, epoch_file)) epoch = ckpt['epoch'] #check if this epoch need to be calculated if not self.needLog(epoch): continue # load model epoch weight self._model.load_state_dict(ckpt['model_state_dict']) # set model to eval self._model.eval() # container for activations, features and labels layer_activity = [] X = [] Y = [] # inference on test set to get layer activations for j, (inputs, labels) in enumerate(self._test_set): outputs = self._model(inputs) Y.append(labels) X.append(inputs) # for each layer activation add to container for i in range(len(outputs)): data = outputs[i] if len(layer_activity) < len(outputs): layer_activity.append(data) else: layer_activity[i] = torch.cat((layer_activity[i], data), dim = 0) # for each layer compute IX and IY IX_epoch = [] IY_epoch = [] for layer in layer_activity: upper = self.measure.entropy_estimator_kl(layer, 0.001) hM_given_X = self.measure.kde_condentropy(layer, 0.001) mutual_info_X = upper - hM_given_X # IX IX_epoch.append(mutual_info_X.item() * nats2bits) # for each label y hM_given_Y_upper=0. for i, key in enumerate(sorted(saved_labelixs.keys())): hcond_upper = self.measure.entropy_estimator_kl(layer[saved_labelixs[key]], 0.001) hM_given_Y_upper += label_probs[i] * hcond_upper mutual_info_Y = upper - hM_given_Y_upper IY_epoch.append(mutual_info_Y.item() * nats2bits) if epoch not in IX.keys() and epoch not in IY.keys(): IX[epoch] = IX_epoch IY[epoch] = IY_epoch else: raise RuntimeError('epoch is duplicated') plotter = PlotFigure(self._opt, self.model_name) plotter.plot_MI_plane(IX, IY) #save plot data self._save_plot_data("IX_dic_data.pkl", IX) self._save_plot_data("IY_dic_data.pkl", IY) end = time.time() print(" ") print("total time cost : ", end - start)
class Logger(object): def __init__(self, opt, plot_name): self._opt = opt self.plot_name = plot_name self.data = self.createDataDict() self.weight_grad = [] # to store W' self.weight_value = [] # to store W self.bias_grad = [] # to store bias' self.bias_value = [] # to store bias self.full_epoch_list = [] # record whole list of epochs self.loss = [] # to record loss self.acc_train = [] # to record training accuracy self.acc_test = [] # to record test accuracy self.plotter = PlotFigure(self._opt, self.plot_name) self.recorded_epochs = [] self.svds = [[], [] ] # first for original SVD and second for normalized SVD self._process_param_names = re.compile(r"^D.+(weight|bias)$") self.layer_weight_mean = [] self.weight_dist_images = [] self.grad_dist_images = [] def createDataDict(self): layer_size = len(self._opt.layer_dims) - 1 epoch_num = self._opt.max_epoch source_keys = ["weight_value", "weight_grad", "bias", "bias_grad"] type_keys = ["mean", "std", "l2n"] epoch_keys = list( map(lambda x: "epoch" + str(x), [i for i in range(epoch_num)])) layer_keys = list( map(lambda x: "layer" + str(x), [i for i in range(layer_size)])) data = {} for source_key in source_keys: if source_key not in data.keys(): data[source_key] = {} for type_key in type_keys: if type_key not in data[source_key].keys(): data[source_key][type_key] = {} for epoch_key in epoch_keys: if epoch_key not in data[source_key][ type_key].keys(): data[source_key][type_key][epoch_key] = {} for layer_key in layer_keys: if layer_key not in data[source_key][ type_key][epoch_key].keys(): data[source_key][type_key][epoch_key][ layer_key] = 0 return data def log(self, model): # record model parameters if len(self.weight_grad) == 0 and len(self.weight_value) == 0 and len( self.bias_grad) == 0 and len(self.bias_value) == 0: for name, param in model.named_parameters(): if self._process_param_names.match(name): grad = param.grad.clone().detach().unsqueeze(0) data = param.data.clone().detach().unsqueeze(0) if name.endswith('weight'): self.weight_grad.append(grad) self.weight_value.append(data) if name.endswith('bias'): self.bias_grad.append(grad) self.bias_value.append(data) else: index = 0 for name, param in model.named_parameters(): if self._process_param_names.match(name): grad = param.grad.clone().detach().unsqueeze(0) data = param.data.clone().detach().unsqueeze(0) if name.endswith('weight'): self.weight_grad[index] = torch.cat( (self.weight_grad[index], grad), dim=0) self.weight_value[index] = torch.cat( (self.weight_value[index], data), dim=0) if name.endswith('bias'): self.bias_grad[index] = torch.cat( (self.bias_grad[index], grad), dim=0) self.bias_value[index] = torch.cat( (self.bias_value[index], data), dim=0) index += 1 def log_acc_loss(self, epoch, record_type, acc, loss=None): if record_type == 'train': # record acc and loss for training process self.acc_train.append(acc) self.loss.append(loss) self.full_epoch_list.append( epoch) # append epoch only for train case elif record_type == 'test': # record acc for training process self.acc_test.append(acc) else: raise ValueError('not valid record type') def update(self, epoch): self.recorded_epochs.append(epoch) epoch_key = "epoch" + str(epoch) for i in range(len(self.weight_grad)): layer_key = "layer" + str(i) if self._opt.mean: self.data["weight_value"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=True, isGrad=False) self.data["weight_grad"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=True, isGrad=True) self.data["bias"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=False, isGrad=False) self.data["bias_grad"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=False, isGrad=True) if self._opt.std: self.data["weight_value"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=True, isGrad=False) self.data["weight_grad"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=True, isGrad=True) self.data["bias"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=False, isGrad=False) self.data["bias_grad"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=False, isGrad=True) if self._opt.l2n: self.data["weight_value"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=True, isGrad=False) self.data["weight_grad"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=True, isGrad=True) self.data["bias"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=False, isGrad=False) self.data["bias_grad"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=False, isGrad=True) self.plot_dist_epoch(epoch) self.calculate_svd() self.clear() def dataParser(self, layer, _type="mean", isWeight=True, isGrad=True, method=1): if isWeight and isGrad: tensor = self.weight_grad[layer] elif isWeight and not isGrad: tensor = self.weight_value[layer] elif not isWeight and isGrad: tensor = self.bias_grad[layer] elif not isWeight and not isGrad: tensor = self.bias_value[layer] else: raise RuntimeError('error in calculate weight and gradient data') if _type == "mean": if method == 1: ##METHOD 1: batch-averaged then take norm for each layer mean = torch.mean(tensor, dim=0) if isWeight and not isGrad: self.layer_weight_mean.append(mean) return torch.norm(mean).item() if method == 2: ##METHOD 2: averge tha (abs) of all w in a layer in a epoch mean = torch.mean(tensor.abs()) # mean = torch.mean(tensor) return mean.item() if method == 3: ##METHOD 3: average within each layer, then norm along batch reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) mean = torch.mean(reshaped_tensor, dim=1) return torch.norm(mean).item() elif _type == "std": # reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) # std = torch.std(reshaped_tensor, dim = 0) if method == 1: ##METHOD 1: std along batches then take norm for each layer std = torch.std(tensor, dim=0) return torch.norm(std).item() if method == 2: ##METHOD 2: std of the average of tha abs of all w in a layer in a epoch mean = torch.mean(tensor, dim=0) return torch.std(mean).item() if method == 3: ##METHOD 3: cal. std within each layer, then norm along batch reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) std = torch.std(reshaped_tensor, dim=1) return torch.norm(std).item() elif _type == "l2n": return torch.norm(tensor).item() else: raise RuntimeError('error in calculate weight and gradient data') def calculate_svd(self): one_epoch_original_weight = [] one_epoch_normalized_weight = [] # one_epoch_grad = [] # for calculating weight svd for i in range(len(self.weight_value)): mean_weight = self.layer_weight_mean[i] _, weight_sigma, _ = torch.svd(mean_weight, compute_uv=False) ##NOTE either use the original s or the normalized one weight_sigma_tmp = weight_sigma.numpy() one_epoch_normalized_weight.append(weight_sigma_tmp / weight_sigma_tmp[0]) one_epoch_original_weight.append(weight_sigma_tmp) # print(one_epoch_weight) self.svds[0].append( one_epoch_original_weight) # [Lepoch] [NLayer] [weight_layers] self.svds[1].append( one_epoch_normalized_weight) # [Lepoch] [NLayer] [weight_layers] ##NOTE svd for grad, note used presently # for calcularing grad svd # for grad in self.weight_grad: # mean_grad = torch.mean(grad, dim = 0) # _, grad_sigma, _ = torch.svd(mean_grad, compute_uv = False) # one_epoch_grad.append(grad_sigma.numpy()) # self.svds[1].append(one_epoch_grad) ####################################### def clear(self): self.weight_grad = [] self.weight_value = [] self.bias_grad = [] self.bias_value = [] self.layer_weight_mean = [] def plot_figures(self, mean_and_std=True, sv=True, acc_loss=True, dist_gif=True): # save epoch data if mean_and_std or sv or acc_loss or dist_gif: # print(self.recorded_epochs) self.plotter.save_plot_data("recorded_epochs_data.pkl", self.recorded_epochs) # plot mean_and_std and save the data if mean_and_std: epoch_mean, epoch_std = self.get_mean_std() self.plotter.plot_mean_std(self.recorded_epochs, epoch_mean, epoch_std) self.plotter.save_plot_data("mean_data.pkl", epoch_mean) self.plotter.save_plot_data("std_data.pkl", epoch_std) # plot sv and save the data if sv: self.plotter.plot_svd(self.recorded_epochs, self.svds) self.plotter.save_plot_data("svds_data.pkl", self.svds) # plot acc_loss and save the data if acc_loss: self.plotter.plot_acc_loss(self.full_epoch_list, self.acc_train, self.acc_test, self.loss) self.plotter.save_plot_data("full_epoch_list_data.pkl", self.full_epoch_list) self.plotter.save_plot_data("acc_train_data.pkl", self.acc_train) self.plotter.save_plot_data("acc_test_data.pkl", self.acc_test) self.plotter.save_plot_data("loss_data.pkl", self.loss) # generate gif for dist plot of weight and weight grad if dist_gif: # self.plotter.generate_dist_gif(plot_type = 'weight') # self.plotter.generate_dist_gif(plot_type = 'grad') import threading threads = [] t1 = threading.Thread( target=self.plotter.generate_dist_gif_by_images, args=('weight', self.weight_dist_images)) threads.append(t1) t2 = threading.Thread( target=self.plotter.generate_dist_gif_by_images, args=('grad', self.grad_dist_images)) threads.append(t2) # self.plotter.generate_dist_gif_by_images(plot_type = 'weight', images = self.weight_dist_images) # self.plotter.generate_dist_gif_by_images(plot_type = 'grad', images = self.grad_dist_images) for t in threads: t.setDaemon(True) t.start() for i in threads: i.join() def plot_dist_epoch(self, epoch): mean_weight, mean_grad = self.cal_batch_mean() weight_image = self.plotter.plot_dist(epoch, mean_weight, plot_type='weight') grad_image = self.plotter.plot_dist(epoch, mean_grad, plot_type='grad') self.weight_dist_images.append(weight_image) self.grad_dist_images.append(grad_image) def cal_batch_mean(self): mean_weight = [] mean_grad = [] for layer in range(len(self.weight_value)): tmp_weight = self.layer_weight_mean[layer].numpy() tmp_grad = torch.mean(self.weight_grad[layer], dim=0).numpy() mean_weight.append(tmp_weight) mean_grad.append(tmp_grad) return mean_weight, mean_grad def get_mean_std(self): epoch_std = [] epoch_mean = [] for epoch in self.recorded_epochs: epoch_key = 'epoch' + str(epoch) layer_std = [] layer_mean = [] for layer in range(len(self._opt.layer_dims) - 1): layer_key = 'layer' + str(layer) layer_mean.append( self.data["weight_grad"]["mean"][epoch_key][layer_key]) layer_std.append( self.data["weight_grad"]["std"][epoch_key][layer_key]) epoch_mean.append(layer_mean) epoch_std.append(layer_std) epoch_mean = np.array(epoch_mean) epoch_std = np.array(epoch_std) return epoch_mean, epoch_std def __str__(self): pprint.pprint(self.data) return " "
def EVMethod(self): start = time.time() print(f"calculation begins at {time.asctime()}") IX_dic = {} IY_dic = {} # prepare sample indices Nrepeats = 1 random_indexes = self.random_index((Nrepeats, 1000)) print("len dataset : ", len(self.dataset.dataset)) ckpt_path = os.path.join(self.model_path, self._opt.ckpt_dir) epoch_files = os.listdir(ckpt_path) num_epoch_files = len(epoch_files) progress = 0 for epoch_file in epoch_files: if not epoch_file.endswith('.pth'): continue # running progress record progress += 1 progress_ratio = float(progress / num_epoch_files) * 100.0 # self.progress_bar = int(progress_ratio) print(f"\rprogress : {progress_ratio:.4f}%", end="", flush=True) # load model epoch weight indicators = self._model.load_model(epoch_file, CKECK_LOG=True) if not indicators["NEED_LOG"]: continue # if this epoch does not need to be logged continue epoch = indicators["epoch"] # set model to eval self._model.eval() # container for activations, features and labels layer_activity = [] X = np.array([]) Y = np.array([]) # inference on test set to get layer activations for j, (inputs, labels) in enumerate(self.dataset): outputs = self._model.predict(inputs) np_labels = labels.clone().numpy().reshape(-1, 1) np_inputs = inputs.clone().squeeze(0).numpy() X = np.vstack((X, np_inputs)) if len(X) != 0 else np_inputs Y = np.vstack((Y, np_labels)) if len(Y) != 0 else np_labels # for each layer activation add to container for i in range(len(outputs)): data = outputs[i] if len(layer_activity) < len(outputs): layer_activity.append(data) else: layer_activity[i] = torch.cat( (layer_activity[i], data), dim=0) IX_epoch = [] IY_epoch = [] for layer in layer_activity: layer = layer.detach().numpy() avg_IX, avg_IY = self._compute_averaged_IX_IY( X, Y, layer, random_indexes) IX_epoch.append(avg_IX) IY_epoch.append(avg_IY) if epoch not in IX_dic.keys() and epoch not in IY_dic.keys(): IX_dic[epoch] = IX_epoch IY_dic[epoch] = IY_epoch else: raise RuntimeError('epoch is duplicated') # save data, then plot plotter = PlotFigure(self._opt, self.model_name) plotter.save_plot_data("IX_dic_data.pkl", IX_dic) plotter.save_plot_data("IY_dic_data.pkl", IY_dic) plotter.plot_MI_plane(IX_dic, IY_dic) end = time.time() print(" ") print("total time cost : ", end - start)
def kdeMethod(self): start = time.time() saved_labelixs, label_probs = self.get_saved_labelixs_and_labelprobs() ckpt_path = os.path.join(self.model_path, self._opt.ckpt_dir) epoch_files = os.listdir(ckpt_path) num_epoch_files = len(epoch_files) IX = {} IY = {} nats2bits = 1.0 / np.log(2) progress = 0 for epoch_file in epoch_files: if not epoch_file.endswith('.pth'): continue progress += 1 progress_ratio = float(progress / num_epoch_files) * 100.0 # self.progress_bar = int(progress_ratio) print(f"\rprogress : {progress_ratio:.4f}%", end="", flush=True) # load model epoch weight indicators = self._model.load_model(epoch_file, CKECK_LOG=True) if not indicators["NEED_LOG"]: continue # if this epoch does not need to be logged continue epoch = indicators["epoch"] # set model to eval self._model.eval() # container for activations, features and labels layer_activity = [] X = [] Y = [] # inference on test set to get layer activations for j, (inputs, labels) in enumerate(self.dataset): outputs = self._model.predict(inputs) Y.append(labels) X.append(inputs) # for each layer activation add to container for i in range(len(outputs)): data = outputs[i] if len(layer_activity) < len(outputs): layer_activity.append(data) else: layer_activity[i] = torch.cat( (layer_activity[i], data), dim=0) # for each layer compute IX and IY IX_epoch = [] IY_epoch = [] for layer in layer_activity: upper = measure.entropy_estimator_kl(layer, 0.001) hM_given_X = measure.kde_condentropy(layer, 0.001) mutual_info_X = upper - hM_given_X # IX IX_epoch.append(mutual_info_X.item() * nats2bits) # for each label y hM_given_Y_upper = 0. for i, key in enumerate(sorted(saved_labelixs.keys())): hcond_upper = measure.entropy_estimator_kl( layer[saved_labelixs[key]], 0.001) hM_given_Y_upper += label_probs[i] * hcond_upper mutual_info_Y = upper - hM_given_Y_upper IY_epoch.append(mutual_info_Y.item() * nats2bits) if epoch not in IX.keys() and epoch not in IY.keys(): IX[epoch] = IX_epoch IY[epoch] = IY_epoch else: raise RuntimeError('epoch is duplicated') # save data, then plot plotter = PlotFigure(self._opt, self.model_name) plotter.save_plot_data("IX_dic_data.pkl", IX) plotter.save_plot_data("IY_dic_data.pkl", IY) plotter.plot_MI_plane(IX, IY) end = time.time() print(" ") print("total time cost : ", end - start)
class Logger(object): def __init__(self, opt, plot_name): self._opt = opt self.plot_name = plot_name self.log_seperator = self._opt.log_seperator self.log_frequency = self._opt.log_frequency self.data = self.createDataDict() self.weight_grad = [] # to store W' self.weight_value = [] # to store W self.bias_grad = [] # to store bias' self.bias_value = [] # to store bias self.plotter = PlotFigure(self._opt, self.plot_name) self.recorded_epochs = [] self.svds = [[], [] ] # first for original SVD and second for normalized SVD def createDataDict(self): layer_size = len(self._opt.layer_dims) - 1 epoch_num = self._opt.max_epoch source_keys = ["weight_value", "weight_grad", "bias", "bias_grad"] type_keys = ["mean", "std", "l2n"] epoch_keys = list( map(lambda x: "epoch" + str(x), [i for i in range(epoch_num)])) layer_keys = list( map(lambda x: "layer" + str(x), [i for i in range(layer_size)])) data = {} for source_key in source_keys: if source_key not in data.keys(): data[source_key] = {} for type_key in type_keys: if type_key not in data[source_key].keys(): data[source_key][type_key] = {} for epoch_key in epoch_keys: if epoch_key not in data[source_key][ type_key].keys(): data[source_key][type_key][epoch_key] = {} for layer_key in layer_keys: if layer_key not in data[source_key][ type_key][epoch_key].keys(): data[source_key][type_key][epoch_key][ layer_key] = 0 return data def update(self, epoch): if self.needLog(epoch): self.recorded_epochs.append(epoch) epoch_key = "epoch" + str(epoch) for i in range(len(self.weight_grad)): layer_key = "layer" + str(i) if self._opt.mean: self.data["weight_value"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=True, isGrad=False) self.data["weight_grad"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=True, isGrad=True) self.data["bias"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=False, isGrad=False) self.data["bias_grad"]["mean"][epoch_key][ layer_key] = self.dataParser(i, "mean", isWeight=False, isGrad=True) if self._opt.std: self.data["weight_value"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=True, isGrad=False) self.data["weight_grad"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=True, isGrad=True) self.data["bias"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=False, isGrad=False) self.data["bias_grad"]["std"][epoch_key][ layer_key] = self.dataParser(i, "std", isWeight=False, isGrad=True) if self._opt.l2n: self.data["weight_value"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=True, isGrad=False) self.data["weight_grad"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=True, isGrad=True) self.data["bias"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=False, isGrad=False) self.data["bias_grad"]["l2n"][epoch_key][ layer_key] = self.dataParser(i, "l2n", isWeight=False, isGrad=True) self.calculate_svd() self.clear() def calculate_svd(self): one_epoch_original_weight = [] one_epoch_normalized_weight = [] # one_epoch_grad = [] # for calculating weight svd for weight in self.weight_value: mean_weight = torch.mean(weight, dim=0) _, weight_sigma, _ = torch.svd(mean_weight, compute_uv=False) ##NOTE either use the original s or the normalized one weight_sigma_tmp = weight_sigma.numpy() one_epoch_normalized_weight.append(weight_sigma_tmp / weight_sigma_tmp[0]) one_epoch_original_weight.append(weight_sigma_tmp) # print(one_epoch_weight) self.svds[0].append( one_epoch_original_weight) # [Lepoch] [NLayer] [weight_layers] self.svds[1].append( one_epoch_normalized_weight) # [Lepoch] [NLayer] [weight_layers] ##NOTE svd for grad, note used presently # for calcularing grad svd # for grad in self.weight_grad: # mean_grad = torch.mean(grad, dim = 0) # _, grad_sigma, _ = torch.svd(mean_grad, compute_uv = False) # one_epoch_grad.append(grad_sigma.numpy()) # self.svds[1].append(one_epoch_grad) ####################################### def clear(self): self.weight_grad = [] self.weight_value = [] self.bias_grad = [] self.bias_value = [] def log(self, model): if len(self.weight_grad) == 0 and len(self.weight_value) == 0 and len( self.bias_grad) == 0 and len(self.bias_value) == 0: for name, param in model.named_parameters(): grad = param.grad.clone().detach().unsqueeze(0) data = param.data.clone().detach().unsqueeze(0) if name.endswith('weight'): self.weight_grad.append(grad) self.weight_value.append(data) if name.endswith('bias'): self.bias_grad.append(grad) self.bias_value.append(data) else: index = 0 for name, param in model.named_parameters(): grad = param.grad.clone().detach().unsqueeze(0) data = param.data.clone().detach().unsqueeze(0) if name.endswith('weight'): self.weight_grad[index] = torch.cat( (self.weight_grad[index], grad), dim=0) self.weight_value[index] = torch.cat( (self.weight_value[index], data), dim=0) if name.endswith('bias'): self.bias_grad[index] = torch.cat( (self.bias_grad[index], grad), dim=0) self.bias_value[index] = torch.cat( (self.bias_value[index], data), dim=0) index += 1 def dataParser(self, layer, _type="mean", isWeight=True, isGrad=True, method=1): if isWeight and isGrad: tensor = self.weight_grad[layer] elif isWeight and not isGrad: tensor = self.weight_value[layer] elif not isWeight and isGrad: tensor = self.bias_grad[layer] elif not isWeight and not isGrad: tensor = self.bias_value[layer] else: raise RuntimeError('error in calculate weight and gradient data') if _type == "mean": # reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) # mean = torch.mean(reshaped_tensor, dim = 0) if method == 1: ##METHOD 1: batch-averaged then take norm for each layer mean = torch.mean(tensor, dim=0) return torch.norm(mean).item() if method == 2: ##METHOD 2: averge tha (abs) of all w in a layer in a epoch mean = torch.mean(tensor.abs()) # mean = torch.mean(tensor) return mean.item() if method == 3: ##METHOD 3: average within each layer, then norm along batch reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) mean = torch.mean(reshaped_tensor, dim=1) return torch.norm(mean).item() elif _type == "std": # reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) # std = torch.std(reshaped_tensor, dim = 0) if method == 1: ##METHOD 1: std along batches then take norm for each layer std = torch.std(tensor, dim=0) return torch.norm(std).item() if method == 2: ##METHOD 2: std of the average of tha abs of all w in a layer in a epoch mean = torch.mean(tensor, dim=0) return torch.std(mean).item() if method == 3: ##METHOD 3: cal. std within each layer, then norm along batch reshaped_tensor = torch.reshape(tensor, (tensor.shape[0], -1)) std = torch.std(reshaped_tensor, dim=1) return torch.norm(std).item() elif _type == "l2n": return torch.norm(tensor).item() else: raise RuntimeError('error in calculate weight and gradient data') def needLog(self, epoch): # Only log activity for some epochs. Mainly this is to make things run faster. assert len(self.log_seperator) == len(self.log_frequency), "sha bi" for idx, val in enumerate(self.log_seperator): if epoch < val: return epoch % self.log_frequency[idx] == 0 def get_mean_std(self): epoch_std = [] epoch_mean = [] for epoch in self.recorded_epochs: epoch_key = 'epoch' + str(epoch) layer_std = [] layer_mean = [] for layer in range(len(self._opt.layer_dims) - 1): layer_key = 'layer' + str(layer) layer_mean.append( self.data["weight_grad"]["mean"][epoch_key][layer_key]) layer_std.append( self.data["weight_grad"]["std"][epoch_key][layer_key]) epoch_mean.append(layer_mean) epoch_std.append(layer_std) epoch_mean = np.array(epoch_mean) epoch_std = np.array(epoch_std) return epoch_mean, epoch_std def plot_figures(self, mean_and_std=True, svd=True): if mean_and_std: epoch_mean, epoch_std = self.get_mean_std() self.plotter.plot_mean_std(self.recorded_epochs, epoch_mean, epoch_std) #save plot data self._save_plot_data("recorded_epochs_data.pkl", self.recorded_epochs) self._save_plot_data("mean_data.pkl", epoch_mean) self._save_plot_data("std_data.pkl", epoch_std) if svd: self.plotter.plot_svd(self.recorded_epochs, self.svds) #save plot data if not mean_and_std: self._save_plot_data("recorded_epochs_data.pkl", self.recorded_epochs) self._save_plot_data("svds_data.pkl", self.svds) def _save_plot_data(self, fname, data): save_root = "./saved_plot_data" save_path = os.path.join(save_root, fname) if not os.path.exists(save_root): os.mkdir(save_root) with open(save_path, "wb") as f: pickle.dump(data, f) def __str__(self): pprint.pprint(self.data) return " "