def train_model(wrapped_model, model, model_path, train_loader, test_loader, init_lr, epochs, args): train_loss_f = refinery_loss.RefineryLoss() val_loss_f = nn.CrossEntropyLoss() best_model_path = '.'.join(model_path.split('.')[:-1]) + '.best.pth' # tracking stats if not hasattr(model, 'stats'): model.stats = {'train_loss': [], 'test_acc': [], 'test_loss': [], 'weight': [], 'lr': [], 'macs': [], 'efficiency': []} start_epoch = 1 best_acc = 0 else: start_epoch = len(model.stats['test_loss']) best_acc = max(model.stats['test_acc']).item() curr_weights, _ = util.num_nonzeros(model) if hasattr(model, 'packed_layer_size'): macs = np.sum([x*y for x, y in model.packed_layer_size]) else: macs = curr_weights # optimizer optimizer = optim.RMSprop(util.group_weight(model), lr=init_lr, momentum=0.9, alpha=0.9, weight_decay=0, eps=1.0) print("Optimizer:") print(optimizer) # pruning stage for epoch in range(start_epoch, epochs + 1): print('[Epoch {}]'.format(epoch)) for g in optimizer.param_groups: lr = g['lr'] break train_loss = util.train(train_loader, wrapped_model, train_loss_f, optimizer, epoch-1, args) test_loss, test_acc = util.validate(test_loader, model, val_loss_f, epoch-1, args) print('LR :: {}'.format(lr)) print('Train Loss:: {}'.format(train_loss)) print('Test Loss:: {}'.format(test_loss)) print('Test Acc.:: {}'.format(test_acc)) print('Nonzeros :: {}'.format(curr_weights)) print('') print('') model.stats['lr'].append(lr) model.optimizer = optimizer.state_dict() model.cpu() torch.save(model, model_path) if test_acc > best_acc: print('New best model found') torch.save(model, best_model_path) best_acc = test_acc model.cuda()
def _cur_tag_X_and_y(self, tag, test_random=False, all_ground_truth_binary=True): """ """ sorted_sources = self.sorted_sources[tag] cur_song_list = self.song_lists[tag] features_dict = self.features[tag] y = rc.matrix(self._dict_to_vec(self.ground_truth[tag], self.song_lists[tag])) if util.num_nonzeros(y) < self.min_tag_count: self._remove_tag(tag, verbosity=0) return # Possibly add intercept. ncol = 0 x_vec = [] if "intercept" in sorted_sources: ncol += 1 x_vec.extend([1.0 for songid in cur_song_list]) # Add Scrobble counts scrobble_vec = numpy.array([self.log_scrobble_counts.get(songid, self.avg_scrobble) for songid in cur_song_list]) if "scrobble" in sorted_sources: ncol += 1 x_vec.extend(self._standardize(scrobble_vec)) # Add remaining features. for source in sorted_sources: if source.endswith("_interaction"): main_source = util.remove_trailing_string(source, "_interaction") main_vec = self._dict_to_vec(features_dict.get(main_source, None), cur_song_list) feature_vec = numpy.multiply(main_vec, scrobble_vec) # pointwise product elif source not in ["intercept", "scrobble"]: feature_vec = self._dict_to_vec(features_dict.get(source, None), cur_song_list) else: continue x_vec.extend(self._standardize(feature_vec)) ncol += 1 try: X = rc.matrix(x_vec,ncol=ncol)#dimnames=[[],x_sources]) except: pdb.set_trace() # Temporary fix: rmme! self._remove_tag(tag) return if test_random and not self.production_run: # Erase all of the above and do some random numbers for testing. n_songs = len(X) X = numpy.random.standard_normal((n_songs,ncol)) if ncol==1: y = 3 * X[:,0] + 0.5*numpy.random.standard_normal((1,n_songs)) else: y = 3 * X[:,0] + X[:,1] + 0.5*numpy.random.standard_normal((1,n_songs)) y = y.transpose() if all_ground_truth_binary or self.regtype=="Independent Logistic" or self.regtype=="Hierarchical Logistic": # Convert y to 0's and 1's. y = 1.0*(numpy.array(y)>0) # multiply by 1.0 to make Float self.X[tag] = X self.y[tag] = y
def train_model(wrapped_model, model, train_loss_f, model_path, train_loader, test_loader, init_lr, epochs, args): val_loss_f = nn.CrossEntropyLoss() best_model_path = '.'.join(model_path.split('.')[:-1]) + '.best.pth' # tracking stats if not hasattr(model, 'stats'): model.stats = { 'train_loss': [], 'test_acc': [], 'test_loss': [], 'weight': [], 'lr': [], 'macs': [], 'efficiency': [] } start_epoch = 1 else: start_epoch = len(model.stats['test_loss']) curr_weights, _ = util.num_nonzeros(model) if hasattr(model, 'packed_layer_size'): macs = np.sum([x * y for x, y in model.packed_layer_size]) else: macs = curr_weights # optimizer optimizer = optim.RMSprop(util.group_weight(model), lr=init_lr, momentum=0.9, alpha=0.9, weight_decay=4e-5, eps=1.0) print("Optimizer:") print(optimizer) best_acc = 0 prune_epoch = 0 max_prune_rate = 0.8 final_prune_epoch = int(0.9 * args.epochs) num_prune_epochs = 10 prune_rates = [ max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3) for i in range(num_prune_epochs) ] prune_rates[-1] = max_prune_rate prune_epochs = np.linspace(0, final_prune_epoch, num_prune_epochs).astype('i').tolist() prune_rate = 0.1 prune_total = 0.0 prune_cycle = 8 max_prune = 0.7 # pruning stage for epoch in range(start_epoch, epochs + 1): print('[Epoch {}]'.format(epoch)) for g in optimizer.param_groups: lr = g['lr'] break if epoch % prune_cycle == 0 and prune_total < max_prune: prune_total += prune_rate print('Prune Total: {:2.2f}'.format(100. * prune_total)) util.prune(model, prune_total) packing.pack_model(model, args.gamma) macs = np.sum([x * y for x, y in model.packed_layer_size]) curr_weights, num_weights = util.num_nonzeros(model) train_loss = util.train(train_loader, wrapped_model, train_loss_f, optimizer, epoch - 1, args) test_loss, test_acc = util.validate(test_loader, model, val_loss_f, epoch - 1, args) print('LR :: {}'.format(lr)) print('Train Loss:: {}'.format(train_loss)) print('Test Loss:: {}'.format(test_loss)) print('Test Acc.:: {}'.format(test_acc)) print('Nonzeros :: {}'.format(curr_weights)) print('') print('') model.stats['lr'].append(lr) model.optimizer = optimizer.state_dict() model.cpu() torch.save(model, model_path) if test_acc > best_acc and prune_total >= max_prune: print('New best model found') torch.save(model, best_model_path) best_acc = test_acc model.cuda()
# load dataset data = datasets.get_dataset(args.dataset_root, args.dataset, args.batch_size, args.cuda, args.aug, in_memory=args.in_memory, input_size=args.input_size) train_dataset, train_loader, test_dataset, test_loader = data # teacher model if args.teacher_path == None: wrapped_model = net.WrappedModel(model) train_loss_f = nn.CrossEntropyLoss() else: teacher = ResNet50() teacher.load_state_dict(torch.load(args.teacher_path)) teacher.cuda() wrapped_model = ModelRefineryWrapper(model, teacher) train_loss_f = refinery_loss.RefineryLoss() if args.cuda: model = model.cuda() print(model) print(util.num_nonzeros(model)) print('Target Nonzeros:', util.target_nonzeros(model)) train_model(wrapped_model, model, train_loss_f, args.save_path, train_loader, test_loader, args.lr, args.epochs, args)
import util import matplotlib import matplotlib.pyplot as plt from config import init_mpl_settings init_mpl_settings() colors = ['r', 'g', 'b'] alpha = 8 beta = 0.7 gammas = [0.0, 0.25, 0.5, 0.75, 1.0, 1.25, 1.5, 1.75] gstats = [] for gamma in gammas: model_path = 'models/cifar10/cifar10-shift-{}.pth'.format(gamma) model = torch.load(model_path).cpu() macs = np.sum([x * y for x, y in model.packed_layer_size]) curr_weights, _ = util.num_nonzeros(model) gstats.append(model.stats) accs, effs, weights = [], [], [] for i, gamma in enumerate(gammas): accs.append(gstats[i]['test_acc'][-1]) effs.append(gstats[i]['efficiency'][-1]) weights.append(gstats[i]['weight'][-1]) fig, ax1 = plt.subplots() ax1.set_title('CIFAR-10 VGG-19 (1x1)') ln1 = ax1.plot(gammas, accs, '-o', color=colors[2], linewidth=3,
def train(model, train_loader, val_loader, args): criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs) prune_epoch = 0 max_prune_rate = 0.85 max_prune_rate = 0.8 final_prune_epoch = int(0.5 * args.epochs) num_prune_epochs = 10 prune_rates = [ max_prune_rate * (1 - (1 - (i / num_prune_epochs))**3) for i in range(num_prune_epochs) ] prune_rates[-1] = max_prune_rate prune_epochs = np.linspace(0, final_prune_epoch, num_prune_epochs).astype('i').tolist() print("Pruning Epochs: {}".format(prune_epochs)) print("Pruning Rates: {}".format(prune_rates)) curr_weights, num_weights = util.num_nonzeros(model) macs = curr_weights model.stats = { 'train_loss': [], 'test_acc': [], 'test_loss': [], 'weight': [], 'lr': [], 'macs': [], 'efficiency': [] } best_path = args.save_path.split('.pth')[0] + '.best.pth' best_test_acc = 0 for epoch in range(1, args.epochs + 1): scheduler.step() for g in optimizer.param_groups: lr = g['lr'] break # prune smallest weights up to a set prune_rate if epoch in prune_epochs: util.prune(model, prune_rates[prune_epoch]) curr_weights, num_weights = util.num_nonzeros(model) packing.pack_model(model, args.gamma) macs = np.sum([x * y for x, y in model.packed_layer_size]) curr_weights, num_weights = util.num_nonzeros(model) prune_epoch += 1 if epoch == prune_epochs[-1]: # disable l1 penalty, as target sparsity is reached args.l1_penalty = 0 print(' :: [{}]\tLR {:.4f}\tNonzeros ({}/{})'.format( epoch, lr, curr_weights, num_weights)) train_loss = util.train(train_loader, model, criterion, optimizer, epoch, args) test_loss, test_acc = util.validate(val_loader, model, criterion, epoch, args) is_best = test_acc > best_test_acc best_test_acc = max(test_acc, best_test_acc) model.stats['lr'].append(lr) model.stats['macs'].append(macs) model.stats['weight'].append(curr_weights) model.stats['efficiency'].append(100.0 * (curr_weights / macs)) model.optimizer = optimizer.state_dict() model.epoch = epoch model.cpu() torch.save(model, args.save_path) if is_best: torch.save(model, best_path) model.cuda()