def project_trajectory(dir_file, w, s, dataset, model_name, model_files, dir_type='weights', proj_method='cos'): """ Project the optimization trajectory onto the given two directions. Args: dir_file: the h5 file that contains the directions w: weights of the final model s: states of the final model model_name: the name of the model model_files: the checkpoint files dir_type: the type of the direction, weights or states proj_method: cosine projection Returns: proj_file: the projection filename """ proj_file = dir_file + '_proj_' + proj_method + '.h5' if os.path.exists(proj_file): print( 'The projection file exists! No projection is performed unless %s is deleted' % proj_file) return proj_file # read directions and convert them to vectors directions = net_plotter.load_directions(dir_file) dx = nplist_to_tensor(directions[0]) dy = nplist_to_tensor(directions[1]) xcoord, ycoord = [], [] for model_file in model_files: net2 = model_loader.load(dataset, model_name, model_file) if dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) d = tensorlist_to_tensor(d) x, y = project_2D(d, dx, dy, proj_method) print("%s (%.4f, %.4f)" % (model_file, x, y)) xcoord.append(x) ycoord.append(y) f = h5py.File(proj_file, 'w') f['proj_xcoord'] = np.array(xcoord) f['proj_ycoord'] = np.array(ycoord) f.close() return proj_file
def convert_matlab_pca_data(args, direction_matlab_name, direction_python_name): # class ARGS: # dataset='cifar10' # model='resnet56' # model_folder='folders for models to be projected' # dir_type='weights' # ignore='biasbn' # prefix='model_' # suffix='.t7' # start_epoch=0 # max_epoch=500 # save_epoch=1 # args = ARGS() # args.model_folder = model_folder # args.model = model last_model_file = args.model_folder + '/' + args.prefix + str( args.max_epoch) + args.suffix net = model_loader.load(args.dataset, args.model, last_model_file) w = net_plotter.get_weights(net) # read in matlab pca results f = h5py.File(direction_matlab_name, 'r') fpy = h5py.File(direction_python_name, 'w') fpy['explained_variance_ratio_'] = np.array(f['explained_variance_ratio_']) fpy['explained_variance_'] = np.array(f['explained_variance_']) pc1 = np.array(f['directionx']) pc2 = np.array(f['directiony']) f.close() # convert vectorized directions to the same shape as models to save in h5 file. # import pdb; pdb.set_trace() if args.dir_type == 'weights': xdirection = npvec_to_tensorlist(pc1, w) ydirection = npvec_to_tensorlist(pc2, w) elif args.dir_type == 'states': xdirection = npvec_to_tensorlist(pc1, s) ydirection = npvec_to_tensorlist(pc2, s) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) # import pdb; pdb.set_trace() h5_util.write_list(fpy, 'xdirection', xdirection) h5_util.write_list(fpy, 'ydirection', ydirection) fpy.close() print('PCA directions saved in: %s' % direction_python_name)
def project_trajectory(dir_file, w, s, dataset, model_name, model_files, dir_type='weights', proj_method='cos'): """ Project the optimization trajectory onto the given two directions. Args: dir_file: the h5 file that contains the directions w: weights of the final model s: states of the final model model_name: the name of the model model_files: the checkpoint files dir_type: the type of the direction, weights or states proj_method: cosine projection Returns: proj_file: the projection filename """ proj_file = dir_file + '_proj_' + proj_method + '.h5' if os.path.exists(proj_file): print('The projection file exists! No projection is performed unless %s is deleted' % proj_file) return proj_file # read directions and convert them to vectors directions = net_plotter.load_directions(dir_file) dx = nplist_to_tensor(directions[0]) dy = nplist_to_tensor(directions[1]) xcoord, ycoord = [], [] for model_file in model_files: net2 = model_loader.load(dataset, model_name, model_file) if dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) d = tensorlist_to_tensor(d) x, y = project_2D(d, dx, dy, proj_method) print ("%s (%.4f, %.4f)" % (model_file, x, y)) xcoord.append(x) ycoord.append(y) f = h5py.File(proj_file, 'w') f['proj_xcoord'] = np.array(xcoord) f['proj_ycoord'] = np.array(ycoord) f.close() return proj_file
def setup_PCA_directions(args, model_files): """ Find PCA directions for the optimization path from the initial model to the final trained model. Returns: dir_name: the h5 file that stores the directions. """ # Name the .h5 file that stores the PCA directions. folder_name = args.model_folder + '/PCA_' + args.dir_type if args.ignore: folder_name += '_ignore=' + args.ignore folder_name += '_save_epoch=' + str(args.save_epoch) os.system('mkdir ' + folder_name) dir_name = folder_name + '/directions.h5' # skip if the direction file exists if os.path.exists(dir_name): f = h5py.File(dir_name, 'a') if 'explained_variance_' in f.keys(): f.close() return dir_name # load models and prepare the optimization path matrix matrix = [] for model_file in model_files: print(model_file) net2 = model_loader.load(args.dataset, args.model, model_file, args.data_parallel) if args.dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif args.dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(d) d = weights_to_vec(d) matrix.append(d.numpy()) # Perform PCA on the optimization path matrix print("Perform PCA on the models") pca = PCA(n_components=2) pca.fit(np.array(matrix)) pc1 = np.array(pca.components_[0]) pc2 = np.array(pca.components_[1]) print("angle between pc1 and pc2: " + str(cal_angle(pc1, pc2))) print(pca.explained_variance_ratio_) # convert vectorized directions to the same shape as models to save in h5 file. if args.dir_type == 'weights': xdirection = vec_to_weights(pc1, w) ydirection = vec_to_weights(pc2, w) elif args.dir_type == 'states': xdirection = vec_to_states(pc1, s) ydirection = vec_to_states(pc2, s) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) f = h5py.File(dir_name, 'w') h5_util.write_list(f, 'xdirection', xdirection) h5_util.write_list(f, 'ydirection', ydirection) f['explained_variance_ratio_'] = pca.explained_variance_ratio_ f['singular_values_'] = pca.singular_values_ f['explained_variance_'] = pca.explained_variance_ f.close() print('PCA directions saved in ' + dir_name) return dir_name
def project_trajectory(dir_file, w, s, dataset, model_name, model_files, dir_type='weights', proj_method='cos', data_parallel=False): """ Project the optimization trajectory onto the given two directions. Args: dir_file: the h5 file that contains the directions w: weights of the final model s: states of the final model model_name: the name of the model save_epoch: the checkpoint frequency dir_type: the type of the direction, weights or states proj_method: cosine projection Returns: proj_file: the projection filename """ proj_file = dir_file + '_proj_' + proj_method + '.h5' if os.path.exists(proj_file): print(proj_file + ' exits! No projection is performed.') return proj_file # read directions and convert them to vectors f = h5py.File(dir_file, 'r') directions = net_plotter.load_directions(f) dx = list_to_vec(directions[0]) dy = list_to_vec(directions[1]) f.close() xcoord, ycoord = [], [] for model_file in model_files: net2 = model_loader.load(dataset, model_name, model_file, data_parallel) if dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) d = weights_to_vec(d) x, y = project_2D(d, dx, dy, proj_method) print(model_file, x, y) xcoord.append(x) ycoord.append(y) f = h5py.File(proj_file, 'w') f['proj_xcoord'] = np.array(xcoord) f['proj_ycoord'] = np.array(ycoord) f.close() return proj_file
def setup_PCA_directions(args, model_files, w, s): """ Find PCA directions for the optimization path from the initial model to the final trained model. Returns: dir_name: the h5 file that stores the directions. """ # Name the .h5 file that stores the PCA directions. folder_name = args.model_folder + '/PCA_' + args.dir_type if args.ignore: folder_name += '_ignore=' + args.ignore folder_name += '_save_epoch=' + str(args.save_epoch) os.system('mkdir ' + folder_name) dir_name = folder_name + '/directions.h5' # skip if the direction file exists if os.path.exists(dir_name): f = h5py.File(dir_name, 'a') if 'explained_variance_' in f.keys(): f.close() return dir_name # load models and prepare the optimization path matrix matrix = [] for model_file in model_files: print (model_file) net2 = model_loader.load(args.dataset, args.model, model_file) if args.dir_type == 'weights': w2 = net_plotter.get_weights(net2) d = net_plotter.get_diff_weights(w, w2) elif args.dir_type == 'states': s2 = net2.state_dict() d = net_plotter.get_diff_states(s, s2) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(d) d = tensorlist_to_tensor(d) matrix.append(d.numpy()) # Perform PCA on the optimization path matrix print ("Perform PCA on the models") pca = PCA(n_components=2) pca.fit(np.array(matrix)) pc1 = np.array(pca.components_[0]) pc2 = np.array(pca.components_[1]) print("angle between pc1 and pc2: %f" % cal_angle(pc1, pc2)) print("pca.explained_variance_ratio_: %s" % str(pca.explained_variance_ratio_)) # convert vectorized directions to the same shape as models to save in h5 file. if args.dir_type == 'weights': xdirection = npvec_to_tensorlist(pc1, w) ydirection = npvec_to_tensorlist(pc2, w) elif args.dir_type == 'states': xdirection = npvec_to_tensorlist(pc1, s) ydirection = npvec_to_tensorlist(pc2, s) if args.ignore == 'biasbn': net_plotter.ignore_biasbn(xdirection) net_plotter.ignore_biasbn(ydirection) f = h5py.File(dir_name, 'w') h5_util.write_list(f, 'xdirection', xdirection) h5_util.write_list(f, 'ydirection', ydirection) f['explained_variance_ratio_'] = pca.explained_variance_ratio_ f['singular_values_'] = pca.singular_values_ f['explained_variance_'] = pca.explained_variance_ f.close() print ('PCA directions saved in: %s' % dir_name) return dir_name
if args.y: args.ymin, args.ymax, args.ynum = [ float(a) for a in args.y.split(':') ] assert args.ymin and args.ymax and args.ynum, \ 'You specified some arguments for the y axis, but not all' except: raise Exception( 'Improper format for x- or y-coordinates. Try something like -1:1:51' ) #-------------------------------------------------------------------------- # Load models and extract parameters #-------------------------------------------------------------------------- net = model_loader.load(args.dataset, args.model, args.model_file) w = net_plotter.get_weights(net) # initial parameters s = copy.deepcopy( net.state_dict()) # deepcopy since state_dict are references if args.ngpu > 1: # data parallel with multiple GPUs on a single node net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) #-------------------------------------------------------------------------- # Setup the direction file and the surface file #-------------------------------------------------------------------------- dir_file = net_plotter.name_direction_file(args) # name the direction file if rank == 0: net_plotter.setup_direction(args, dir_file, net) surf_file = name_surface_file(args, dir_file) if rank == 0:
#-------------------------------------------------------------------------- try: args.xmin, args.xmax, args.xnum = [float(a) for a in args.x.split(':')] args.ymin, args.ymax, args.ynum = (None, None, None) if args.y: args.ymin, args.ymax, args.ynum = [float(a) for a in args.y.split(':')] assert args.ymin and args.ymax and args.ynum, \ 'You specified some arguments for the y axis, but not all' except: raise Exception('Improper format for x- or y-coordinates. Try something like -1:1:51') #-------------------------------------------------------------------------- # Load models and extract parameters #-------------------------------------------------------------------------- net = model_loader.load(args.dataset, args.model, args.model_file) w = net_plotter.get_weights(net) # initial parameters s = copy.deepcopy(net.state_dict()) # deepcopy since state_dict are references if args.ngpu > 1: # data parallel with multiple GPUs on a single node net = nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) #-------------------------------------------------------------------------- # Setup the direction file and the surface file #-------------------------------------------------------------------------- dir_file = net_plotter.name_direction_file(args) # name the direction file if rank == 0: net_plotter.setup_direction(args, dir_file, net) surf_file = name_surface_file(args, dir_file) if rank == 0: setup_surface_file(args, surf_file, dir_file)
parser.add_argument('--ignore', default='', help='ignore bias and BN paras: biasbn (no bias or bn)') parser.add_argument('--prefix', default='model_', help='prefix for the checkpint model') parser.add_argument('--suffix', default='.t7', help='prefix for the checkpint model') parser.add_argument('--start_epoch', default=0, type=int, help='min index of epochs') parser.add_argument('--max_epoch', default=300, type=int, help='max number of epochs') parser.add_argument('--save_epoch', default=1, type=int, help='save models every few epochs') parser.add_argument('--dir_file', default='', help='load the direction file for projection') args = parser.parse_args() #-------------------------------------------------------------------------- # load the final model #-------------------------------------------------------------------------- last_model_file = args.model_folder + '/' + args.prefix + str(args.max_epoch) + args.suffix net = model_loader.load(args.dataset, args.model, last_model_file) w = net_plotter.get_weights(net) s = net.state_dict() #-------------------------------------------------------------------------- # collect models to be projected #-------------------------------------------------------------------------- model_files = [] for epoch in range(args.start_epoch, args.max_epoch + args.save_epoch, args.save_epoch): model_file = args.model_folder + '/' + args.prefix + str(epoch) + args.suffix assert os.path.exists(model_file), 'model %s does not exist' % model_file model_files.append(model_file) #-------------------------------------------------------------------------- # load or create projection directions #-------------------------------------------------------------------------- if args.dir_file:
def train_save(trainloader, net, criterion, optimizer, use_cuda=True): net.train() train_loss = 0 correct = 0 total = 0 grads = [] sub_loss = [] sub_weights = [] if isinstance(criterion, nn.CrossEntropyLoss): for batch_idx, (inputs, targets) in enumerate(trainloader): batch_size = inputs.size(0) total += batch_size if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() # get gradient grad = get_grads(net).cpu() grads.append(grad) optimizer.step() # record tiny steps in every epoch sub_loss.append(loss.item()) w = net_plotter.get_weights(net) # initial parameters for j in range(len(w)): w[j] = w[j].cpu().numpy() sub_weights.append(w) train_loss += loss.item()*batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum().item() elif isinstance(criterion, nn.MSELoss): for batch_idx, (inputs, targets) in enumerate(trainloader): batch_size = inputs.size(0) total += batch_size one_hot_targets = torch.FloatTensor(batch_size, 10).zero_() one_hot_targets = one_hot_targets.scatter_(1, targets.view(batch_size, 1), 1.0) one_hot_targets = one_hot_targets.float() if use_cuda: inputs, one_hot_targets = inputs.cuda(), one_hot_targets.cuda() inputs, one_hot_targets = Variable(inputs), Variable(one_hot_targets) outputs = F.softmax(net(inputs)) loss = criterion(outputs, one_hot_targets) loss.backward() # get gradient grad = get_grads(net).cpu() grads.append(grad) optimizer.step() # record tiny steps in every epoch sub_loss.append(loss.item()) import pdb; pdb.set_trace() w = net_plotter.get_weights(net) # initial parameters for j in range(len(w)): w[j] = w[j].cpu().numpy() sub_weights.append(w) train_loss += loss.item()*batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.cpu().eq(targets).cpu().sum().item() M = len(grads[0]) # total number of parameters grads = torch.cat(grads).view(-1, M) mean_grad = grads.sum(0) / (batch_idx + 1) # divided by # batchs noise_norm = (grads - mean_grad).norm(dim=1) N = M * (batch_idx + 1) for i in range(1, 1 + int(math.sqrt(N))): if N%i == 0: m = i alpha = alpha_estimator(m, (grads - mean_grad).view(-1, 1)) del grads del mean_grad hist_noise = [noise_norm.numpy(), alpha.numpy()] w,g = get_layerWise_norms(net) layerWise_norms = [w,g] return train_loss/total, 100 - 100.*correct/total, hist_noise, layerWise_norms, sub_weights, sub_loss
parser.add_argument('--ignore', default='', help='ignore bias and BN paras: biasbn (no bias or bn)') parser.add_argument('--prefix', default='model_', help='prefix for the checkpint model') parser.add_argument('--suffix', default='.t7', help='prefix for the checkpint model') parser.add_argument('--start_epoch', default=0, type=int, help='min index of epochs') parser.add_argument('--max_epoch', default=300, type=int, help='max number of epochs') parser.add_argument('--save_epoch', default=1, type=int, help='save models every few epochs') parser.add_argument('--dir_file', default='', help='load the direction file for projection') args = parser.parse_args() #-------------------------------------------------------------------------- # load the final model #-------------------------------------------------------------------------- last_model_file = args.model_folder + '/' + args.prefix + str(args.max_epoch) + args.suffix net = model_loader.load(args.dataset, args.model, last_model_file) w = net_plotter.get_weights(net) s = net.state_dict() #-------------------------------------------------------------------------- # collect models to be projected #-------------------------------------------------------------------------- model_files = [] for epoch in range(args.start_epoch, args.max_epoch + args.save_epoch, args.save_epoch): model_file = args.model_folder + '/' + args.prefix + str(epoch) + args.suffix assert os.path.exists(model_file), 'model %s does not exist' % model_file model_files.append(model_file) #-------------------------------------------------------------------------- # load or create projection directions #-------------------------------------------------------------------------- if args.dir_file:
args.ymin, args.ymax, args.ynum = (None, None, None) if args.y: args.ymin, args.ymax, args.ynum = [ float(a) for a in args.y.split(':') ] assert args.ymin and args.ymax and args.ynum, 'You specified some arguments for the y axis, but not all' except: raise Exception( 'Improper format for x- or y-coordinates. Try something like -1:1:51' ) #-------------------------------------------------------------------------- # Load models and extract parameters #-------------------------------------------------------------------------- model = load_model(args.model_file) w = net_plotter.get_weights(model) # initial parameters #-------------------------------------------------------------------------- # Setup the direction file and the surface file #-------------------------------------------------------------------------- dir_file = net_plotter.name_direction_file(args) # name the direction file if rank == 0: net_plotter.setup_direction(args, dir_file, model) surf_file = name_surface_file(args, dir_file) if rank == 0: setup_surface_file(args, surf_file, dir_file) # load directions d = net_plotter.load_directions(dir_file) # calculate the consine similarity of the two directions